From 7c927939d3e28dbac4beeaeac1f2e144b925cee7 Mon Sep 17 00:00:00 2001 From: Shahar Bar Date: Tue, 28 Jan 2025 22:51:41 +0200 Subject: [PATCH] Add zooming quantitative bandit model ### Changes: * Added quantitative model support for continuous action spaces using zooming algorithm. * Added base model classes to separate single/multi-objective and cost control models. * Refactored MAB classes to support both discrete and continuous action spaces. * Updated test suite with new test cases for quantitative models and refactored test suite for robustness. * Added serialization support for quantitative models. * Removed redundant predict_actions_randomly from cMAB. --- .github/workflows/continuous_delivery.yml | 9 +- .github/workflows/continuous_integration.yml | 9 +- .gitignore | 3 + pybandits/base.py | 81 +- pybandits/base_model.py | 94 ++ pybandits/cmab.py | 206 +-- pybandits/mab.py | 288 +++- pybandits/model.py | 246 ++- pybandits/quantitative_mab.py | 49 + pybandits/quantitative_model.py | 715 +++++++++ pybandits/simulator.py | 10 +- pybandits/smab.py | 107 +- pybandits/strategy.py | 84 +- pybandits/utils.py | 42 +- pyproject.toml | 4 +- tests/test_cmab.py | 1404 +++++++----------- tests/test_mab.py | 121 +- tests/test_model.py | 50 +- tests/test_quantitative_model.py | 227 +++ tests/test_smab.py | 1274 ++++++---------- tests/test_strategy.py | 18 +- 21 files changed, 2919 insertions(+), 2122 deletions(-) create mode 100644 pybandits/base_model.py create mode 100644 pybandits/quantitative_mab.py create mode 100644 pybandits/quantitative_model.py create mode 100644 tests/test_quantitative_model.py diff --git a/.github/workflows/continuous_delivery.yml b/.github/workflows/continuous_delivery.yml index 7c2bc7b..389c90f 100644 --- a/.github/workflows/continuous_delivery.yml +++ b/.github/workflows/continuous_delivery.yml @@ -25,8 +25,13 @@ jobs: - name: Install Poetry run: | - curl -sSL https://install.python-poetry.org | python3 - - export PATH="$HOME/.poetry/bin:$PATH" + if [[ "${{ matrix.python-version }}" == "3.8" ]]; then + curl -sSL https://install.python-poetry.org | python3 - --version 1.8.0 + export PATH="$HOME/.poetry/bin:$PATH" + else + curl -sSL https://install.python-poetry.org | python3 - + export PATH="$HOME/.poetry/bin:$PATH" + fi - name: Backup pyproject.toml run: cp pyproject.toml pyproject.toml.bak - name: Install project dependencies with Poetry diff --git a/.github/workflows/continuous_integration.yml b/.github/workflows/continuous_integration.yml index 6c3cdac..8311113 100644 --- a/.github/workflows/continuous_integration.yml +++ b/.github/workflows/continuous_integration.yml @@ -33,8 +33,13 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install Poetry run: | - curl -sSL https://install.python-poetry.org | python3 - - export PATH="$HOME/.poetry/bin:$PATH" + if [[ "${{ matrix.python-version }}" == "3.8" ]]; then + curl -sSL https://install.python-poetry.org | python3 - --version 1.8.0 + export PATH="$HOME/.poetry/bin:$PATH" + else + curl -sSL https://install.python-poetry.org | python3 - + export PATH="$HOME/.poetry/bin:$PATH" + fi - name: Install project dependencies with Poetry run: | poetry add pydantic@${{ matrix.pydantic-version }} diff --git a/.gitignore b/.gitignore index c206dc6..81e30ec 100644 --- a/.gitignore +++ b/.gitignore @@ -65,3 +65,6 @@ MANIFEST # poetry poetry.lock + +# qodo gen +.qodo diff --git a/pybandits/base.py b/pybandits/base.py index 4cae4ad..7c7083d 100644 --- a/pybandits/base.py +++ b/pybandits/base.py @@ -21,7 +21,9 @@ # SOFTWARE. -from typing import Any, Dict, List, NewType, Tuple, Union +from typing import Any, Dict, List, Mapping, NewType, Optional, Tuple, Union + +from typing_extensions import Self from pybandits.pydantic_version_compatibility import ( PYDANTIC_VERSION_1, @@ -34,24 +36,52 @@ ) ActionId = NewType("ActionId", constr(min_length=1)) +QuantitativeActionId = Tuple[ActionId, Tuple[float, ...]] +UnifiedActionId = Union[ActionId, QuantitativeActionId] Float01 = NewType("Float_0_1", confloat(ge=0, le=1)) Probability = NewType("Probability", Float01) +ProbabilityWeight = Tuple[Probability, float] +MOProbability = List[Probability] +MOProbabilityWeight = List[ProbabilityWeight] +# QuantitativeProbability generalizes probability to include both action quantities and their associated probability +QuantitativeProbability = Dict[Tuple[float, ...], Probability] +QuantitativeProbabilityWeight = Dict[Tuple[float, ...], ProbabilityWeight] +QuantitativeMOProbability = Dict[Tuple[float, ...], List[Probability]] +QuantitativeMOProbabilityWeight = Dict[Tuple[float, ...], List[ProbabilityWeight]] +UnifiedProbability = Union[Probability, QuantitativeProbability] +UnifiedProbabilityWeight = Union[ProbabilityWeight, QuantitativeProbabilityWeight] +UnifiedMOProbability = Union[MOProbability, QuantitativeMOProbability] +UnifiedMOProbabilityWeight = Union[MOProbabilityWeight, QuantitativeMOProbabilityWeight] # SmabPredictions is a tuple of two lists: the first list contains the selected action ids, # and the second list contains their associated probabilities -SmabPredictions = NewType("SmabPredictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]]]) +SmabPredictions = NewType( + "SmabPredictions", + Tuple[ + List[UnifiedActionId], + Union[List[Dict[UnifiedActionId, Probability]], List[Dict[UnifiedActionId, MOProbability]]], + ], +) # CmabPredictions is a tuple of three lists: the first list contains the selected action ids, # the second list contains their associated probabilities, # and the third list contains their associated weighted sums CmabPredictions = NewType( - "CmabPredictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]], List[Dict[ActionId, float]]] + "CmabPredictions", + Union[ + Tuple[List[UnifiedActionId], List[Dict[UnifiedActionId, Probability]], List[Dict[UnifiedActionId, float]]], + Tuple[ + List[UnifiedActionId], List[Dict[UnifiedActionId, MOProbability]], List[Dict[UnifiedActionId, List[float]]] + ], + ], ) Predictions = NewType("Predictions", Union[SmabPredictions, CmabPredictions]) BinaryReward = NewType("BinaryReward", conint(ge=0, le=1)) ActionRewardLikelihood = NewType( "ActionRewardLikelihood", - Union[Dict[ActionId, float], Dict[ActionId, Probability], Dict[ActionId, List[Probability]]], + Union[Dict[UnifiedActionId, float], Dict[UnifiedActionId, Probability], Dict[UnifiedActionId, List[Probability]]], ) +Serializable = Union[str, int, float, bool, None, List["Serializable"], Dict[str, "Serializable"]] ACTION_IDS_PREFIX = "action_ids_" +QUANTITATIVE_ACTION_IDS_PREFIX = f"quantitative_{ACTION_IDS_PREFIX}" class _classproperty(property): @@ -109,3 +139,46 @@ def model_fields(cls) -> Dict[str, Any]: The model fields. """ return cls.__fields__ + + def model_copy(self, *, update: Optional[Mapping[str, Any]] = None, deep: bool = False) -> Self: + """ + Create a new instance of the model with the same quantities. + + Parameters + ---------- + update : Mapping[str, Any], optional + The quantities to update, by default None + + deep : bool, optional + Whether to copy the quantities deeply, by default False + + Returns + ------- + Self + The new instance of the model. + """ + return self.copy(update=update, deep=deep) + + @classmethod + def model_validate( + cls, + obj: Any, + ) -> Self: + """ + Validate a PyBandits BaseModel model instance. + + Parameters + ---------- + obj : Any + The object to validate. Use state dictionary to generate model from state. + + Raises + ------ + ValidationError: If the object could not be validated. + + Returns + ------- + Self + The validated model instance. + """ + return cls.parse_obj(obj) diff --git a/pybandits/base_model.py b/pybandits/base_model.py new file mode 100644 index 0000000..0ae8d25 --- /dev/null +++ b/pybandits/base_model.py @@ -0,0 +1,94 @@ +from abc import ABC, abstractmethod +from typing import Callable, List, Union + +import numpy as np + +from pybandits.base import BinaryReward, Probability, PyBanditsBaseModel, QuantitativeProbability +from pybandits.pydantic_version_compatibility import NonNegativeFloat + + +class BaseModel(PyBanditsBaseModel, ABC): + """ + Class to model the prior distributions of standard actions and quantitative actions. + """ + + @abstractmethod + def sample_proba(self) -> Union[Probability, QuantitativeProbability]: + """ + Sample the probability of getting a positive reward. + """ + + @abstractmethod + def update(self, rewards: Union[List[BinaryReward], List[List[BinaryReward]]], **kwargs): + """ + Update the model parameters. + + Parameters + ---------- + rewards : Union[List[BinaryReward], List[List[BinaryReward]]], + if nested list, len() should follow shape of (n_samples, n_objectives) + The binary reward for each sample. + If strategy is not MultiObjectiveBandit, rewards should be a list, e.g. + rewards = [1, 0, 1, 1, 1, ...] + If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2): + rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...] + """ + + +class BaseModelSO(BaseModel, ABC): + """ + Class to model the prior distributions of standard actions and quantitative actions for single objective. + """ + + @abstractmethod + def update(self, rewards: List[BinaryReward], **kwargs): + """ + Update the model parameters. + + Parameters + ---------- + rewards : List[BinaryReward], + The binary reward for each sample. + """ + + +class BaseModelMO(BaseModel, ABC): + """ + Class to model the prior distributions of standard actions and quantitative actions for multi-objective. + + Parameters + ---------- + models : List[BaseModelSO] + The list of models for each objective. + """ + + models: List[BaseModelSO] + + @abstractmethod + def update(self, rewards: List[List[BinaryReward]], **kwargs): + """ + Update the model parameters. + + Parameters + ---------- + rewards : List[List[BinaryReward]], + if nested list, len() should follow shape of (n_samples, n_objectives) + The binary rewards for each sample. + If strategy is not MultiObjectiveBandit, rewards should be a list, e.g. + rewards = [1, 0, 1, 1, 1, ...] + If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2): + rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...] + """ + + +class BaseModelCC(PyBanditsBaseModel, ABC): + """ + Class to model action cost. + + Parameters + ---------- + cost: Union[NonNegativeFloat, Callable[[Union[float, NonNegativeFloat]], NonNegativeFloat]] + Cost associated to the Beta distribution. + """ + + cost: Union[NonNegativeFloat, Callable[[Union[float, np.ndarray]], NonNegativeFloat]] diff --git a/pybandits/cmab.py b/pybandits/cmab.py index 9b405a1..27d1476 100644 --- a/pybandits/cmab.py +++ b/pybandits/cmab.py @@ -23,13 +23,20 @@ from typing import Dict, List, Optional, Set, Union from numpy import array -from numpy.random import choice from numpy.typing import ArrayLike -from pybandits.base import ActionId, BinaryReward, CmabPredictions +from pybandits.base import ( + ActionId, + BinaryReward, + CmabPredictions, + UnifiedActionId, + UnifiedMOProbabilityWeight, + UnifiedProbabilityWeight, +) from pybandits.mab import BaseMab -from pybandits.model import BayesianLogisticRegression, BayesianLogisticRegressionCC +from pybandits.model import BaseBayesianLogisticRegression, BayesianLogisticRegression, BayesianLogisticRegressionCC from pybandits.pydantic_version_compatibility import field_validator, validate_call +from pybandits.quantitative_model import BaseCmabZoomingModel, CmabZoomingModel, CmabZoomingModelCC from pybandits.strategy import ( BestActionIdentificationBandit, ClassicBandit, @@ -43,38 +50,62 @@ class BaseCmabBernoulli(BaseMab): Parameters ---------- - actions: Dict[ActionId, BayesianLogisticRegression] + actions : Dict[ActionId, Union[BaseBayesianLogisticRegression, BaseCmabZoomingModel]] The list of possible actions, and their associated Model. - strategy: Strategy + strategy : Strategy The strategy used to select actions. - predict_with_proba: bool - If True predict with sampled probabilities, else predict with weighted sums. - predict_actions_randomly: bool - If True predict actions randomly (where each action has equal probability to be selected), else predict with the - bandit strategy. """ - actions: Dict[ActionId, BayesianLogisticRegression] - predict_with_proba: bool - predict_actions_randomly: bool + actions: Dict[ActionId, Union[BaseBayesianLogisticRegression, BaseCmabZoomingModel]] + _predict_with_proba: bool + + @staticmethod + def _maybe_crawl_model(model: Union[BaseBayesianLogisticRegression, BaseCmabZoomingModel]): + return list(model.sub_actions.values())[0] if isinstance(model, BaseCmabZoomingModel) else model @field_validator("actions", mode="after") @classmethod - def check_bayesian_logistic_regression_models(cls, v): + def check_models(cls, v): action_models = list(v.values()) first_action = action_models[0] - first_action_type = type(first_action) + test_first_action = cls._maybe_crawl_model(first_action) for action in action_models[1:]: - if not isinstance(action, first_action_type): - raise AttributeError("All actions should follow the same type.") - if not len(action.betas) == len(first_action.betas): + test_action = cls._maybe_crawl_model(action) + if not len(test_action.betas) == len(test_first_action.betas): raise AttributeError("All actions should have the same number of betas.") - if not action.update_method == first_action.update_method: + if not test_action.update_method == test_first_action.update_method: raise AttributeError("All actions should have the same update method.") - if not action.update_kwargs == first_action.update_kwargs: + if not test_action.update_kwargs == test_first_action.update_kwargs: raise AttributeError("All actions should have the same update kwargs.") return v + def _inner_get_action_probabilities( + self, valid_actions: Set[ActionId], context: ArrayLike + ) -> Union[ + Dict[ActionId, List[UnifiedProbabilityWeight]], + Dict[ActionId, List[UnifiedMOProbabilityWeight]], + ]: + """ + Get the probability of getting a positive reward for each action. + + Parameters + ---------- + + valid_actions : Set[ActionId] + The list of valid (i.e. not forbidden) action IDs. + context: ArrayLike of shape (n_samples, n_features) + Matrix of contextual features. + + Returns + ------- + action_probabilities: Union[Dict[ActionId, List[Tuple[UnifiedProbability, float]]], Dict[ActionId, List[List[Tuple[UnifiedProbability, float]]]]] + The probability of getting a positive reward for each action and objective. + """ + action_probabilities = { + action: model.sample_proba(context) for action, model in self.actions.items() if action in valid_actions + } + return action_probabilities + @validate_call(config=dict(arbitrary_types_allowed=True)) def predict( self, @@ -95,14 +126,13 @@ def predict( Returns ------- - actions: List[ActionId] of shape (n_samples,) + actions: List[ActionId] The actions selected by the multi-armed bandit model. - probs: List[Dict[ActionId, Probability]] of shape (n_samples,) + probs: Union[List[Dict[UnifiedActionId, Probability]], List[Dict[UnifiedActionId, MOProbability]]] The probabilities of getting a positive reward for each action. - ws : List[Dict[ActionId, float]] + ws : Union[List[Dict[UnifiedActionId, float]], List[Dict[UnifiedActionId, List[float]]]] The weighted sum of logistic regression logits. """ - valid_actions = self._get_valid_actions(forbidden_actions) # cast inputs to numpy arrays to facilitate their manipulation context = array(context) @@ -110,47 +140,35 @@ def predict( if len(context) < 1: raise AttributeError("Context must have at least one row") - if self.predict_actions_randomly: - # check that context has the expected number of columns - if context.shape[1] != len(list(self.actions.values())[0].betas): - raise AttributeError("Context must have {n_betas} columns") - - selected_actions = choice(list(valid_actions), size=len(context)).tolist() # predict actions randomly - probs = len(context) * [{k: 0.5 for k in valid_actions}] # all probs are set to 0.5 - weighted_sums = len(context) * [{k: 0 for k in valid_actions}] # all weighted sum are set to 1 - else: - # p is a dict of the sampled probability "prob" and weighted_sum "ws", e.g. - # - # p = {'a1': ([0.5, 0.2, 0.3], [200, 100, 130]), 'a2': ([0.4, 0.5, 0.6], [180, 200, 230]), ...} - # | | | | - # prob ws prob ws - p = { - action: model.sample_proba(context=context) # sample probabilities for the entire context matrix - for action, model in self.actions.items() - if action in valid_actions - } - - prob = {a: x[0] for a, x in p.items()} # e.g. prob = {'a1': [0.5, 0.4, ...], 'a2': [0.4, 0.3, ...], ...} - ws = {a: x[1] for a, x in p.items()} # e.g. ws = {'a1': [200, 100, ...], 'a2': [100, 50, ...], ...} - - # select either "prob" or "ws" to use as input argument in select_actions() - p_to_select_action = prob if self.predict_with_proba else ws - - # predict actions, probs, weighted_sums - selected_actions = [ - self._select_epsilon_greedy_action( - p={a: p_to_select_action[a][i] for a in p_to_select_action}, actions=self.actions - ) - for i in range(len(context)) - ] - probs = [{a: prob[a][i] for a in prob} for i in range(len(context))] - weighted_sums = [{a: ws[a][i] for a in ws} for i in range(len(context))] + # p is a dict of the sampled probability "prob" and weighted_sum "ws", e.g. + # + # p = {'a1': ([0.5, 0.2, 0.3], [200, 100, 130]), 'a2': ([0.4, 0.5, 0.6], [180, 200, 230]), ...} + # | | | | + # prob ws prob ws + probs_weights = self._get_action_probabilities(forbidden_actions=forbidden_actions, context=context) + + probs = [ + {a: x[0] for a, x in prob_weight.items()} for prob_weight in probs_weights + ] # e.g. prob = {'a1': [0.5, 0.4, ...], 'a2': [0.4, 0.3, ...], ...} + weighted_sums = [ + {a: x[1] for a, x in prob_weight.items()} for prob_weight in probs_weights + ] # e.g. ws = {'a1': [200, 100, ...], 'a2': [100, 50, ...], ...} + + # select either "prob" or "ws" to use as input argument in select_actions() + p_to_select_action = probs if self._predict_with_proba else weighted_sums + + # predict actions, probs, weighted_sums + selected_actions = [self._select_epsilon_greedy_action(p=p, actions=self.actions) for p in p_to_select_action] return selected_actions, probs, weighted_sums @validate_call(config=dict(arbitrary_types_allowed=True)) - def update( - self, context: ArrayLike, actions: List[ActionId], rewards: List[Union[BinaryReward, List[BinaryReward]]] + def _update( + self, + actions: List[UnifiedActionId], + rewards: List[Union[BinaryReward, List[BinaryReward]]], + quantities: Optional[List[Union[float, List[float], None]]], + context: ArrayLike, ): """ Update the contextual Bernoulli bandit given the list of selected actions and their corresponding binary @@ -158,9 +176,8 @@ def update( Parameters ---------- - context: ArrayLike of shape (n_samples, n_features) - Matrix of contextual features. - actions : List[ActionId] of shape (n_samples,), e.g. ['a1', 'a2', 'a3', 'a4', 'a5'] + + actions : List[UnifiedActionId] of shape (n_samples,), e.g. ['a1', 'a2', 'a3', 'a4', 'a5'] The selected action for each sample. rewards : List[Union[BinaryReward, List[BinaryReward]]] of shape (n_samples, n_objectives) The binary reward for each sample. @@ -168,50 +185,49 @@ def update( rewards = [1, 0, 1, 1, 1, ...] If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2): rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...] + quantities : Optional[List[Union[float, List[float], None]]] + The value associated with each action. If none, the value is not used, i.e. non-quantitative action. + context: ArrayLike of shape (n_samples, n_features) + Matrix of contextual features. """ - self._validate_update_params(actions=actions, rewards=rewards) - if len(context) != len(rewards): - raise AttributeError(f"Shape mismatch: actions and rewards should have the same length {len(actions)}.") - # cast inputs to numpy arrays to facilitate their manipulation context, actions, rewards = array(context), array(actions), array(rewards) + if quantities is not None: + quantities = array(quantities) for a in set(actions): # get context and rewards of the samples associated to action a context_of_a = context[actions == a] rewards_of_a = rewards[actions == a].tolist() - # update model associated to action a - self.actions[a].update(context=context_of_a, rewards=rewards_of_a) - - # always set predict_actions_randomly after update - self.predict_actions_randomly = False + if quantities is not None and ( + set(quantities_of_a := quantities[actions == a]) != {None} + ): # quantitative action + # update model associated to action a with quantities + self.actions[a].update(context=context_of_a, quantities=quantities_of_a, rewards=rewards_of_a) + else: # non-quantitative action + # update model associated to action a + self.actions[a].update(context=context_of_a, rewards=rewards_of_a) class CmabBernoulli(BaseCmabBernoulli): """ - Contextual Bernoulli Multi-Armed Bandit with Thompson Sampling. + Contextual Bernoulli Multi-Armed Bandit with Thompson Sampling. Reference: Thompson Sampling for Contextual Bandits with Linear Payoffs (Agrawal and Goyal, 2014) https://arxiv.org/pdf/1209.3352.pdf Parameters ---------- - actions: Dict[ActionId, BayesianLogisticRegression] + actions: Dict[ActionId, Union[BayesianLogisticRegression, CmabZoomingModel]] The list of possible actions, and their associated Model. strategy: ClassicBandit The strategy used to select actions. - predict_with_proba: bool - If True predict with sampled probabilities, else predict with weighted sums - predict_actions_randomly: bool - If True predict actions randomly (where each action has equal probability to be selected), else predict with the - bandit strategy. """ - actions: Dict[ActionId, BayesianLogisticRegression] + actions: Dict[ActionId, Union[BayesianLogisticRegression, CmabZoomingModel]] strategy: ClassicBandit - predict_with_proba: bool = False - predict_actions_randomly: bool = False + _predict_with_proba: bool = False class CmabBernoulliBAI(BaseCmabBernoulli): @@ -223,21 +239,15 @@ class CmabBernoulliBAI(BaseCmabBernoulli): Parameters ---------- - actions: Dict[ActionId, BayesianLogisticRegression] + actions: Dict[ActionId, Union[BayesianLogisticRegression, CmabZoomingModel]] The list of possible actions, and their associated Model. strategy: BestActionIdentificationBandit The strategy used to select actions. - predict_with_proba: bool - If True predict with sampled probabilities, else predict with weighted sums - predict_actions_randomly: bool - If True predict actions randomly (where each action has equal probability to be selected), else predict with the - bandit strategy. """ - actions: Dict[ActionId, BayesianLogisticRegression] + actions: Dict[ActionId, Union[BayesianLogisticRegression, CmabZoomingModel]] strategy: BestActionIdentificationBandit - predict_with_proba: bool = False - predict_actions_randomly: bool = False + _predict_with_proba: bool = False class CmabBernoulliCC(BaseCmabBernoulli): @@ -257,18 +267,12 @@ class CmabBernoulliCC(BaseCmabBernoulli): Parameters ---------- - actions: Dict[ActionId, BayesianLogisticRegressionCC] + actions: Dict[ActionId, Union[BayesianLogisticRegressionCC, CmabZoomingModelCC]] The list of possible actions, and their associated Model. strategy: CostControlBandit The strategy used to select actions. - predict_with_proba: bool - If True predict with sampled probabilities, else predict with weighted sums - predict_actions_randomly: bool - If True predict actions randomly (where each action has equal probability to be selected), else predict with the - bandit strategy. """ - actions: Dict[ActionId, BayesianLogisticRegressionCC] + actions: Dict[ActionId, Union[BayesianLogisticRegressionCC, CmabZoomingModelCC]] strategy: CostControlBandit - predict_with_proba: bool = True - predict_actions_randomly: bool = False + _predict_with_proba: bool = True diff --git a/pybandits/mab.py b/pybandits/mab.py index 38c83b5..654d138 100644 --- a/pybandits/mab.py +++ b/pybandits/mab.py @@ -23,20 +23,33 @@ import warnings from abc import ABC, abstractmethod from collections import defaultdict -from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union, get_args +from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union, get_args, get_origin import numpy as np +from numpy.typing import ArrayLike from pybandits.base import ( ACTION_IDS_PREFIX, + QUANTITATIVE_ACTION_IDS_PREFIX, ActionId, ActionRewardLikelihood, BinaryReward, Float01, + MOProbability, + MOProbabilityWeight, Predictions, + Probability, + ProbabilityWeight, PyBanditsBaseModel, + Serializable, + UnifiedActionId, + UnifiedMOProbability, + UnifiedMOProbabilityWeight, + UnifiedProbability, + UnifiedProbabilityWeight, ) -from pybandits.model import Model +from pybandits.base_model import BaseModel +from pybandits.model import Model, ModelMO from pybandits.pydantic_version_compatibility import ( PYDANTIC_VERSION_1, PYDANTIC_VERSION_2, @@ -45,8 +58,9 @@ pydantic_version, validate_call, ) +from pybandits.quantitative_model import QuantitativeModel from pybandits.strategy import Strategy -from pybandits.utils import extract_argument_names_from_function +from pybandits.utils import extract_argument_names class BaseMab(PyBanditsBaseModel, ABC): @@ -69,14 +83,14 @@ class BaseMab(PyBanditsBaseModel, ABC): which in turn will be used to instantiate the strategy. """ - actions: Dict[ActionId, Model] + actions: Dict[ActionId, BaseModel] strategy: Strategy epsilon: Optional[Float01] = None - default_action: Optional[ActionId] = None + default_action: Optional[UnifiedActionId] = None def __init__( self, - actions: Dict[ActionId, Model], + actions: Dict[ActionId, BaseModel], epsilon: Optional[Float01] = None, default_action: Optional[ActionId] = None, **strategy_kwargs, @@ -101,12 +115,6 @@ def at_least_one_action_is_defined(cls, v): raise AttributeError("At least one action should be defined.") elif len(v) == 1: warnings.warn("Only a single action was supplied. This MAB will be deterministic.") - # validate that all actions are of the same configuration - action_models = list(v.values()) - first_action = action_models[0] - first_action_type = type(first_action) - if any(not isinstance(action, first_action_type) for action in action_models[1:]): - raise AttributeError("All actions should follow the same type.") return v if pydantic_version == PYDANTIC_VERSION_1: @@ -165,7 +173,11 @@ def _get_valid_actions(self, forbidden_actions: Optional[Set[ActionId]]) -> Set[ return valid_actions def _validate_update_params( - self, actions: List[ActionId], rewards: Union[List[BinaryReward], List[List[BinaryReward]]] + self, + actions: List[ActionId], + rewards: Union[List[BinaryReward], List[List[BinaryReward]]], + quantities: Optional[List[Union[float, List[float], None]]], + context: Optional[ArrayLike], ): """ Verify that the given list of action IDs is a subset of the currently defined actions and that @@ -177,19 +189,54 @@ def _validate_update_params( The selected action for each sample. rewards: List[Union[BinaryReward, List[BinaryReward]]] The reward for each sample. + quantities : Optional[List[Union[float, List[float], None]]] + The value associated with each action. If none, the value is not used, i.e. non-quantitative action. + context: Optional[ArrayLike] + The context for each sample. """ invalid = set(actions) - set(self.actions.keys()) if invalid: raise AttributeError(f"The following invalid action(s) were specified: {invalid}.") if len(actions) != len(rewards): raise AttributeError(f"Shape mismatch: actions and rewards should have the same length {len(actions)}.") + if quantities is not None and len(actions) != len(quantities): + raise AttributeError(f"Shape mismatch: actions and quantities should have the same length {len(actions)}.") + if context is not None and len(context) != len(rewards): + raise AttributeError(f"Shape mismatch: actions and rewards should have the same length {len(actions)}.") #################################################################################################################### - @abstractmethod - @validate_call + @validate_call(config=dict(arbitrary_types_allowed=True)) def update( - self, actions: List[ActionId], rewards: Union[List[BinaryReward], List[List[BinaryReward]]], *args, **kwargs + self, + actions: List[ActionId], + rewards: Union[List[BinaryReward], List[List[BinaryReward]]], + quantities: Optional[List[Union[float, List[float], None]]] = None, + context: Optional[ArrayLike] = None, + ): + """ + Update the multi-armed bandit model. + + actions: List[ActionId] + The selected action for each sample. + rewards: List[Union[BinaryReward, List[BinaryReward]]] + The reward for each sample. + quantities: Optional[List[Union[float, List[float], None]]] + The value associated with each action. If none, the value is not used, i.e. non-quantitative action. + context: Optional[ArrayLike] + The context for each sample. + """ + self._validate_update_params(actions, rewards, quantities, context) + self._update(actions, rewards, quantities, context) + + @abstractmethod + @validate_call(config=dict(arbitrary_types_allowed=True)) + def _update( + self, + actions: List[ActionId], + rewards: Union[List[BinaryReward], List[List[BinaryReward]]], + quantities: Optional[List[Union[float, List[float], None]]], + context: Optional[ArrayLike], ): """ Update the multi-armed bandit model. @@ -198,11 +245,75 @@ def update( The selected action for each sample. rewards: List[Union[BinaryReward, List[BinaryReward]]] The reward for each sample. + context: Optional[ArrayLike] + The context for each sample. + quantities: Optional[List[Union[float, List[float], None]]] + The value associated with each action. If none, the value is not used, i.e. non-quantitative action. + """ + + def _get_action_probabilities( + self, forbidden_actions: Optional[Set[ActionId]] = None, **kwargs + ) -> Union[ + List[Dict[UnifiedActionId, Probability]], + List[Dict[UnifiedActionId, ProbabilityWeight]], + List[Dict[UnifiedActionId, MOProbability]], + List[Dict[UnifiedActionId, MOProbabilityWeight]], + ]: + """ + Get the probability of getting a positive reward for each action. + + Parameters + ---------- + forbidden_actions : Optional[Set[ActionId]], default=None + Set of forbidden actions. If specified, the model will discard the forbidden_actions and it will only + consider the remaining allowed_actions. By default, the model considers all actions as allowed_actions. + Note that: actions = allowed_actions U forbidden_actions. + + Returns + ------- + action_probabilities: Union[List[Dict[UnifiedActionId, Probability]], List[Dict[UnifiedActionId, ProbabilityWeight]], List[Dict[UnifiedActionId, MOProbability]], List[Dict[UnifiedActionId, MOProbabilityWeight]]] + The probability of getting a positive reward for each action. + """ + + valid_actions = self._get_valid_actions(forbidden_actions) + action_probabilities = self._inner_get_action_probabilities(valid_actions, **kwargs) + action_probabilities = [dict(zip(action_probabilities, t)) for t in zip(*action_probabilities.values())] + action_probabilities = [ + { + (k, inner_k) if isinstance(v, dict) else k: inner_v + for k, v in p.items() + for inner_k, inner_v in (v.items() if isinstance(v, dict) else [("", v)]) + } + for p in action_probabilities + ] + return action_probabilities + + @abstractmethod + def _inner_get_action_probabilities( + self, valid_actions: Set[ActionId], **kwargs + ) -> Union[ + Dict[ActionId, List[UnifiedProbability]], + Dict[ActionId, List[UnifiedProbabilityWeight]], + Dict[ActionId, List[UnifiedMOProbability]], + Dict[ActionId, List[UnifiedMOProbabilityWeight]], + ]: + """ + Get the probability of getting a positive reward for each action. + + Parameters + ---------- + valid_actions : Set[ActionId] + The list of valid (i.e. not forbidden) action IDs. + + Returns + ------- + action_probabilities: Union[Dict[ActionId, List[UnifiedProbability]], Dict[ActionId, List[UnifiedProbabilityWeight], Dict[ActionId, List[UnifiedMOProbability], Dict[ActionId, List[UnifiedMOProbabilityWeight]] + The probability of getting a positive reward for each action. """ @abstractmethod @validate_call - def predict(self, forbidden_actions: Optional[Set[ActionId]] = None) -> Predictions: + def predict(self, forbidden_actions: Optional[Set[ActionId]] = None, **kwargs) -> Predictions: """ Predict actions. @@ -241,7 +352,7 @@ def get_state(self) -> (str, dict): def _select_epsilon_greedy_action( self, p: ActionRewardLikelihood, - actions: Optional[Dict[ActionId, Model]] = None, + actions: Optional[Dict[ActionId, BaseModel]] = None, ) -> ActionId: """ Wraps self.strategy.select_action function with epsilon-greedy strategy, @@ -276,6 +387,8 @@ def _select_epsilon_greedy_action( raise KeyError(f"Default action {self.default_action} not in actions.") if np.random.binomial(1, self.epsilon): selected_action = self.default_action or np.random.choice(list(p.keys())) + if isinstance(selected_action, tuple): + selected_action = (selected_action[0], tuple(np.random.random(len(selected_action[1])))) else: selected_action = self.strategy.select_action(p=p, actions=actions) else: @@ -283,7 +396,7 @@ def _select_epsilon_greedy_action( return selected_action @classmethod - def from_state(cls, state: dict) -> "BaseMab": + def from_state(cls, state: Dict[str, Serializable]) -> "BaseMab": """ Create a new instance of the class from a given model state. The state can be obtained by applying get_state() to a model. @@ -299,16 +412,13 @@ def from_state(cls, state: dict) -> "BaseMab": The new model instance. """ - model_attributes = extract_argument_names_from_function(cls.__init__, True) - strategy_attributes = list(state["strategy"].keys()) - attributes_mapping = {k: state[k] for k in model_attributes if k not in strategy_attributes and k in state} - attributes_mapping.update({k: state["strategy"][k] for k in strategy_attributes}) - return cls(**attributes_mapping) + return cls.model_validate(state) @classmethod def cold_start( cls, action_ids: Optional[Set[ActionId]] = None, + quantitative_action_ids: Optional[Set[ActionId]] = None, epsilon: Optional[Float01] = None, default_action: Optional[ActionId] = None, **kwargs, @@ -319,14 +429,16 @@ def cold_start( Parameters ---------- - action_ids: Optional[Set[ActionId]] + action_ids : Optional[Set[ActionId]] The list of possible actions. - epsilon: Optional[Float01] + quantitative_action_ids : Optional[Set[ActionId]] + The list of quantitative actions. + epsilon : Optional[Float01] epsilon for epsilon-greedy approach. If None, epsilon-greedy is not used. - default_action: Optional[ActionId] + default_action : Optional[ActionId] The default action to select with a probability of epsilon when using the epsilon-greedy approach. If `default_action` is None, a random action from the action set will be selected with a probability of epsilon. - kwargs: Dict[str, Any] + kwargs : Dict[str, Any] Additional parameters for the mab and for the action model. Returns @@ -334,35 +446,43 @@ def cold_start( mab: BaseMab Multi-Armed Bandit """ - action_specific_kwargs, kwargs = cls._extract_action_specific_kwargs(**kwargs) + action_specific_kwargs, quantitative_action_specific_kwargs, kwargs = cls._extract_action_specific_kwargs( + **kwargs + ) # Extract inner_action_ids - inner_action_ids = action_ids or set(action_specific_kwargs.keys()) - if not inner_action_ids: - raise ValueError( - "inner_action_ids should be provided either directly or via keyword argument in the form of " - "action_id_{model argument name} = {action_id: value}." - ) + inner_action_ids = action_ids or set(action_specific_kwargs) + inner_quantitative_action_ids = quantitative_action_ids or set(quantitative_action_specific_kwargs) + if not inner_action_ids and not inner_quantitative_action_ids: + raise ValueError("At least one action should be defined.") # Assign model for each action - action_model_cold_start, action_general_kwargs = cls._extract_action_model_class_and_attributes(**kwargs) - actions = {} - for a in inner_action_ids: - actions[a] = action_model_cold_start(**action_general_kwargs, **action_specific_kwargs.get(a, {})) + ( + model_cold_start, + quantitative_model_cold_start, + action_general_kwargs, + quantitative_action_general_kwargs, + ) = cls._extract_action_model_class_and_attributes(kwargs) + + # Instantiate the actions + all_actions = {} + for action_ids, cold_start, general_kwargs, specific_kwargs in zip( + [inner_action_ids, inner_quantitative_action_ids], + [model_cold_start, quantitative_model_cold_start], + [action_general_kwargs, quantitative_action_general_kwargs], + [action_specific_kwargs, quantitative_action_specific_kwargs], + ): + for a in action_ids: + all_actions[a] = cold_start(**general_kwargs, **specific_kwargs.get(a, {})) # Instantiate the MAB - strategy_kwargs = {k: kwargs[k] for k in kwargs.keys() if k not in action_general_kwargs.keys()} strategy_class = cls.model_fields["strategy"].annotation - strategy = strategy_class(**strategy_kwargs) - mab = cls(actions=actions, strategy=strategy, epsilon=epsilon, default_action=default_action) - # For contextual multi-armed bandit, until the very first update the model will predict actions randomly, - # where each action has equal probability to be selected. - if hasattr(mab, "predict_actions_randomly"): - mab.predict_actions_randomly = True + strategy = strategy_class(**kwargs) + mab = cls(actions=all_actions, strategy=strategy, epsilon=epsilon, default_action=default_action) return mab @staticmethod - def _extract_action_specific_kwargs(**kwargs) -> Tuple[Dict[str, Dict], Dict[str, Any]]: + def _extract_action_specific_kwargs(**kwargs) -> Tuple[Dict[str, Dict], Dict[str, Dict], Dict[str, Any]]: """ Utility function to extract kwargs that are specific for each action when constructing the action model. @@ -375,21 +495,30 @@ def _extract_action_specific_kwargs(**kwargs) -> Tuple[Dict[str, Dict], Dict[str ------- action_specific_kwargs : Dict[str, Dict] Dictionary of actions and the parameters of their associated model. + quantitative_action_specific_kwargs : Dict[str, Dict] + Dictionary of quantitative actions and the parameters of their associated model. kwargs : Dict[str, Any] - Dictionary of parameters and their values, without the action_specific_kwargs. + Dictionary of parameters and their quantities, without the action_specific_kwargs. """ action_specific_kwargs = defaultdict(dict) + quantitative_action_specific_kwargs = defaultdict(dict) for keyword in list(kwargs): argument = kwargs[keyword] - if keyword.startswith(ACTION_IDS_PREFIX) and type(argument) is dict: - kwargs.pop(keyword) - inner_keyword = keyword.split(ACTION_IDS_PREFIX)[1] - for action_id, value in argument.items(): - action_specific_kwargs[action_id][inner_keyword] = value - return dict(action_specific_kwargs), kwargs + for prefix, target_kwargs in zip( + [ACTION_IDS_PREFIX, QUANTITATIVE_ACTION_IDS_PREFIX], + [action_specific_kwargs, quantitative_action_specific_kwargs], + ): + if keyword.startswith(prefix) and type(argument) is dict: + kwargs.pop(keyword) + inner_keyword = keyword.split(prefix)[1] + for action_id, value in argument.items(): + target_kwargs[action_id][inner_keyword] = value + return dict(action_specific_kwargs), dict(quantitative_action_specific_kwargs), kwargs @classmethod - def _extract_action_model_class_and_attributes(cls, **kwargs) -> Tuple[Callable, Dict[str, Dict]]: + def _extract_action_model_class_and_attributes( + cls, kwargs + ) -> Tuple[Callable, Callable, Dict[str, Dict], Dict[str, Dict]]: """ Utility function to extract kwargs that are specific for each action when constructing the action model. @@ -402,17 +531,44 @@ def _extract_action_model_class_and_attributes(cls, **kwargs) -> Tuple[Callable, ------- action_model_cold_start : Callable Function handle for factoring the required action model. + quantitative_action_model_cold_start : Callable + Function handle for factoring the required quantitative action model. action_general_kwargs : Dict[str, any] Dictionary of parameters and their values for the action model. + quantitative_action_general_kwargs : Dict[str, any] + Dictionary of parameters and their values for the quantitative action model. """ - action_model_class = get_args(cls.model_fields["actions"].annotation)[1] - if hasattr(action_model_class, "cold_start"): - action_model_cold_start_init = action_model_cold_start = action_model_class.cold_start - else: - action_model_cold_start_init = action_model_class.__init__ - action_model_cold_start = action_model_class - - action_model_attributes = extract_argument_names_from_function(action_model_cold_start_init, True) - - action_general_kwargs = {k: kwargs[k] for k in action_model_attributes if k in kwargs.keys()} - return action_model_cold_start, action_general_kwargs + action_model_type = get_args(cls.model_fields["actions"].annotation)[1] + action_model_classes = ( + get_args(action_model_type) if get_origin(action_model_type) is Union else (action_model_type,) + ) + if len(action_model_classes) > 2: + raise ValueError("Only up to two types of action models are supported.") + quantitative_model_cold_start = model_cold_start = lambda *args, **kwargs: None # dummy callable + action_general_kwargs = quantitative_action_general_kwargs = None + for action_model_class in action_model_classes: + if hasattr(action_model_class, "cold_start"): + action_model_cold_start = action_model_class.cold_start + action_model_attributes = extract_argument_names(action_model_cold_start) + # cover for cold_start kwargs + action_model_attributes = action_model_attributes + extract_argument_names(action_model_class) + else: + action_model_cold_start = action_model_class + action_model_attributes = extract_argument_names(action_model_cold_start) + general_kwargs = {k: kwargs.pop(k) for k in action_model_attributes if k in kwargs.keys()} + + if issubclass(action_model_class, (Model, ModelMO)): + model_cold_start = action_model_cold_start + action_general_kwargs = general_kwargs + elif issubclass(action_model_class, QuantitativeModel): + quantitative_model_cold_start = action_model_cold_start + quantitative_action_general_kwargs = general_kwargs + else: + raise ValueError(f"Unsupported action model class: {action_model_class}") + + return ( + model_cold_start, + quantitative_model_cold_start, + action_general_kwargs, + quantitative_action_general_kwargs, + ) diff --git a/pybandits/model.py b/pybandits/model.py index 2993645..f525cea 100644 --- a/pybandits/model.py +++ b/pybandits/model.py @@ -22,7 +22,7 @@ import warnings from abc import ABC, abstractmethod from random import betavariate -from typing import Any, List, Literal, Optional, Tuple, Union +from typing import List, Literal, Optional, Union import numpy as np import pymc.math as pmath @@ -33,8 +33,10 @@ from pymc import StudentT as PymcStudentT from pytensor.tensor import TensorVariable, dot from scipy.stats import t +from typing_extensions import Self -from pybandits.base import BinaryReward, Probability, PyBanditsBaseModel +from pybandits.base import BinaryReward, MOProbability, Probability, ProbabilityWeight, PyBanditsBaseModel +from pybandits.base_model import BaseModelCC, BaseModelMO, BaseModelSO from pybandits.pydantic_version_compatibility import ( PYDANTIC_VERSION_1, PYDANTIC_VERSION_2, @@ -47,26 +49,106 @@ validate_call, ) -UpdateMethods = Literal["MCMC", "VI"] +UpdateMethods = Literal["VI", "MCMC"] -class Model(PyBanditsBaseModel, ABC): +class Model(BaseModelSO, ABC): """ - Class to model the prior distributions. + Class to model the prior distributions for single objective. + + Parameters + ---------- + n_successes: PositiveInt = 1 + Counter of the number of successes. + n_failures: PositiveInt = 1 + Counter of the number of failures. """ + n_successes: PositiveInt = 1 + n_failures: PositiveInt = 1 + @abstractmethod - def sample_proba(self) -> Probability: + def sample_proba( + self, context: Optional[ArrayLike] = None + ) -> Union[Probability, MOProbability, List[ProbabilityWeight]]: """ Sample the probability of getting a positive reward. + + Parameters + ---------- + context : Optional[ArrayLike] + Matrix of contextual features. + """ + + @validate_call(config=dict(arbitrary_types_allowed=True)) + def update(self, rewards: List[BinaryReward], context: Optional[ArrayLike] = None): """ + Update n_successes and n_failures. + + Parameters + ---------- + rewards: List[BinaryReward] + A list of binary rewards. + context : Optional[ArrayLike] + Matrix of contextual features. + """ + self.n_successes += sum(rewards) + self.n_failures += len(rewards) - sum(rewards) + self._update(rewards=rewards, context=context) @abstractmethod - def update(self, rewards: List[Any]): + def _update(self, rewards: List[BinaryReward], context: Optional[ArrayLike] = None): """ Update the model parameters. + + Parameters + ---------- + rewards: List[BinaryReward] + A list of binary rewards. + context : Optional[ArrayLike] + Matrix of contextual features. """ + @property + def count(self) -> int: + """ + The total amount of successes and failures collected. + """ + return self.n_successes + self.n_failures + + @property + def mean(self) -> float: + """ + The success rate i.e. n_successes / (n_successes + n_failures). + """ + return self.n_successes / self.count + + +class ModelCC(BaseModelCC, ABC): + """ + Class to model action cost. + + Parameters + ---------- + cost: NonNegativeFloat + Cost associated to the Beta distribution. + """ + + cost: NonNegativeFloat + + +class ModelMO(BaseModelMO, ABC): + """ + Class to model the prior distributions for multi-objective. + + Parameters + ---------- + models : List[Model] + The list of models for each objective. + """ + + models: List[Model] + class BaseBeta(Model): """ @@ -80,30 +162,13 @@ class BaseBeta(Model): Counter of the number of failures. """ - n_successes: PositiveInt = 1 - n_failures: PositiveInt = 1 - @model_validator(mode="before") @classmethod - def both_or_neither_counters_are_defined(cls, values): + def both_or_neither_models_are_defined(cls, values): if hasattr(values, "n_successes") != hasattr(values, "n_failures"): raise ValueError("Either both or neither n_successes and n_failures should be specified.") return values - @property - def count(self) -> int: - """ - The total amount of successes and failures collected. - """ - return self.n_successes + self.n_failures - - @property - def mean(self) -> float: - """ - The success rate i.e. n_successes / (n_successes + n_failures). - """ - return self.n_successes / self.count - @property def std(self) -> float: """ @@ -112,19 +177,18 @@ def std(self) -> float: return sqrt((self.n_successes * self.n_failures) / (self.count * (self.count - 1))) @validate_call - def update(self, rewards: List[BinaryReward]): + def _update(self, rewards: List[BinaryReward], context: None = None): """ - Update n_successes and and n_failures. + Update n_successes and n_failures. Parameters ---------- rewards: List[BinaryReward] A list of binary rewards. """ - self.n_successes += sum(rewards) - self.n_failures += len(rewards) - sum(rewards) + pass - def sample_proba(self) -> Probability: + def sample_proba(self, context: None = None) -> Probability: """ Sample the probability of getting a positive reward. @@ -139,36 +203,45 @@ def sample_proba(self) -> Probability: class Beta(BaseBeta): """ Beta Distribution model for Bernoulli multi-armed bandits. + + Parameters + ---------- + n_successes: PositiveInt = 1 + Counter of the number of successes. + n_failures: PositiveInt = 1 + Counter of the number of failures. """ -class BetaCC(BaseBeta): +class BetaCC(BaseBeta, ModelCC): """ Beta Distribution model for Bernoulli multi-armed bandits with cost control. Parameters ---------- - cost: NonNegativeFloat + n_successes : PositiveInt = 1 + Counter of the number of successes. + n_failures : PositiveInt = 1 + Counter of the number of failures. + cost : NonNegativeFloat Cost associated to the Beta distribution. """ - cost: NonNegativeFloat - -class BetaMO(Model): +class BetaMO(ModelMO): """ Beta Distribution model for Bernoulli multi-armed bandits with multi-objectives. Parameters ---------- - counters: List[Beta] of shape (n_objectives,) + models: List[Beta] of length (n_objectives,) List of Beta distributions. """ - counters: List[Beta] + models: List[Beta] @validate_call - def sample_proba(self) -> List[Probability]: + def sample_proba(self, context: None = None) -> MOProbability: """ Sample the probability of getting a positive reward. @@ -177,7 +250,7 @@ def sample_proba(self) -> List[Probability]: prob: List[Probability] Probabilities of getting a positive reward for each objective. """ - return [x.sample_proba() for x in self.counters] + return [x.sample_proba() for x in self.models] @validate_call def update(self, rewards: List[List[BinaryReward]]): @@ -191,11 +264,11 @@ def update(self, rewards: List[List[BinaryReward]]): associated to each objective. For example, `[[1, 1], [1, 0], [1, 1], [1, 0], [1, 1]]`. """ - if any(len(x) != len(self.counters) for x in rewards): + if any(len(x) != len(self.models) for x in rewards): raise AttributeError("The shape of rewards is incorrect") - for i, counter in enumerate(self.counters): - counter.update([r[i] for r in rewards]) + for i, model in enumerate(self.models): + model.update([r[i] for r in rewards]) @classmethod def cold_start(cls, n_objectives: PositiveInt, **kwargs) -> "BetaMO": @@ -222,25 +295,21 @@ def cold_start(cls, n_objectives: PositiveInt, **kwargs) -> "BetaMO": blr: BayesianLogisticRegrssion The Bayesian Logistic Regression model. """ - counters = n_objectives * [Beta()] - blr = cls(counters=counters, **kwargs) + models = n_objectives * [Beta()] + blr = cls(models=models, **kwargs) return blr -class BetaMOCC(BetaMO): +class BetaMOCC(BetaMO, ModelCC): """ Beta Distribution model for Bernoulli multi-armed bandits with multi-objectives and cost control. Parameters ---------- - counters: List[BetaCC] of shape (n_objectives,) + models: List[BetaCC] of shape (n_objectives,) List of Beta distributions. - cost: NonNegativeFloat - Cost associated to the Beta distribution. """ - cost: NonNegativeFloat - class StudentT(PyBanditsBaseModel): """ @@ -261,7 +330,7 @@ class StudentT(PyBanditsBaseModel): nu: confloat(allow_inf_nan=False) = 5.0 -class BayesianLogisticRegression(Model): +class BaseBayesianLogisticRegression(Model): """ Base Bayesian Logistic Regression model. @@ -281,7 +350,7 @@ class BayesianLogisticRegression(Model): The strategy for computing posterior quantities of the Bayesian models in the update function. Such as Markov chain Monte Carlo ("MCMC") or Variational Inference ("VI"). Check UpdateMethods in pybandits.model for the full list. - update_kwargs : Optional[dict], uses default values if not specified + update_kwargs : Optional[dict], uses default quantities if not specified Additional arguments to pass to the update method. """ @@ -352,12 +421,12 @@ def _stable_sigmoid(cls, x: Union[np.ndarray, TensorVariable]) -> Union[np.ndarr Parameters ---------- x : Union[np.ndarray, TensorVariable] - Input values. + Input quantities. Returns ------- prob : Union[np.ndarray, TensorVariable] - Sigmoid function applied to the input values. + Sigmoid function applied to the input quantities. """ backend = np if isinstance(x, np.ndarray) else pmath with warnings.catch_warnings(): @@ -388,7 +457,7 @@ def check_context_matrix(self, context: ArrayLike): raise AttributeError(f"Shape mismatch: context must have {len(self.betas)} columns.") @validate_call(config=dict(arbitrary_types_allowed=True)) - def sample_proba(self, context: ArrayLike) -> Tuple[Probability, float]: + def sample_proba(self, context: ArrayLike) -> List[ProbabilityWeight]: """ Compute the probability of getting a positive reward from the sampled regression coefficients and the context. @@ -402,7 +471,7 @@ def sample_proba(self, context: ArrayLike) -> Tuple[Probability, float]: prob: ndarray of shape (n_samples) Probability of getting a positive reward. weighted_sum: ndarray of shape (n_samples) - Weighted sums between contextual feature values and sampled coefficients. + Weighted sums between contextual feature quantities and sampled coefficients. """ # check input args @@ -411,7 +480,7 @@ def sample_proba(self, context: ArrayLike) -> Tuple[Probability, float]: # extend context with a column of 1 to handle the dot product with the intercept context_ext = c_[ones((len(context), 1)), context] - # sample alpha and beta coefficient values from student-t distributions once for each sample + # sample alpha and beta coefficient quantities from student-t distributions once for each sample alpha = t.rvs(df=self.alpha.nu, loc=self.alpha.mu, scale=self.alpha.sigma, size=len(context_ext)) betas = array( [ @@ -429,19 +498,19 @@ def sample_proba(self, context: ArrayLike) -> Tuple[Probability, float]: # compute the probability with the sigmoid function prob = self._stable_sigmoid(weighted_sum) - return prob, weighted_sum + return list(zip(prob, weighted_sum)) @validate_call(config=dict(arbitrary_types_allowed=True)) - def update(self, context: ArrayLike, rewards: List[BinaryReward]): + def _update(self, rewards: List[BinaryReward], context: ArrayLike): """ Update the model parameters. Parameters ---------- - context : ArrayLike - Context matrix of shape (n_samples, n_features). rewards: List[BinaryReward] A list of binary rewards. + context : ArrayLike + Context matrix of shape (n_samples, n_features). """ # check input args @@ -482,22 +551,26 @@ def update(self, context: ArrayLike, rewards: List[BinaryReward]): raise ValueError("Invalid update method.") # compute mean and std of the coefficients distributions - self.alpha.mu = mean(trace["alpha"]) - self.alpha.sigma = std(trace["alpha"], ddof=1) - betas_mu = mean(trace["betas"], axis=0) - betas_std = std(trace["betas"], axis=0, ddof=1) - self.betas = [ - StudentT(mu=mu, sigma=sigma, nu=beta.nu) for mu, sigma, beta in zip(betas_mu, betas_std, self.betas) - ] + if hasattr(trace, "alpha") and hasattr(trace, "betas"): + self.alpha.mu = mean(trace["alpha"]) + self.alpha.sigma = std(trace["alpha"], ddof=1) + betas_mu = mean(trace["betas"], axis=0) + betas_std = std(trace["betas"], axis=0, ddof=1) + self.betas = [ + StudentT(mu=mu, sigma=sigma, nu=beta.nu) for mu, sigma, beta in zip(betas_mu, betas_std, self.betas) + ] + else: + warnings.warn("Trace object missing vital keys. Model was not updated.") @classmethod + @validate_call def cold_start( cls, n_features: PositiveInt, update_method: UpdateMethods = "MCMC", update_kwargs: Optional[dict] = None, **kwargs, - ) -> "BayesianLogisticRegression": + ) -> Self: """ Utility function to create a Bayesian Logistic Regression model or child model with cost control, with default parameters. @@ -517,14 +590,14 @@ def cold_start( The strategy for computing posterior quantities of the Bayesian models in the update function. Such as Markov chain Monte Carlo ("MCMC") or Variational Inference ("VI"). Check UpdateMethods in pybandits.model for the full list. - update_kwargs : Optional[dict], uses default values if not specified + update_kwargs : Optional[dict], uses default quantities if not specified Additional arguments to pass to the update method. kwargs: Dict[str, Any] Additional arguments for the Bayesian Logistic Regression child model. Returns ------- - blr: BayesianLogisticRegrssion + blr: BayesianLogisticRegression The Bayesian Logistic Regression model. """ return cls( @@ -536,7 +609,32 @@ def cold_start( ) -class BayesianLogisticRegressionCC(BayesianLogisticRegression): +class BayesianLogisticRegression(BaseBayesianLogisticRegression): + """ + Base Bayesian Logistic Regression model. + + It is modeled as: + + y = sigmoid(alpha + beta1 * x1 + beta2 * x2 + ... + betaN * xN) + + where the alpha and betas coefficients are Student's t-distributions. + + Parameters + ---------- + alpha : StudentT + Student's t-distribution of the alpha coefficient. + betas : StudentT + Student's t-distributions of the betas coefficients. + update_method : UpdateMethods, defaults to "MCMC" + The strategy for computing posterior quantities of the Bayesian models in the update function. Such as Markov + chain Monte Carlo ("MCMC") or Variational Inference ("VI"). Check UpdateMethods in pybandits.model for the + full list. + update_kwargs : Optional[dict], uses default quantities if not specified + Additional arguments to pass to the update method. + """ + + +class BayesianLogisticRegressionCC(BaseBayesianLogisticRegression, ModelCC): """ Bayesian Logistic Regression model with cost control. @@ -556,10 +654,8 @@ class BayesianLogisticRegressionCC(BayesianLogisticRegression): The strategy for computing posterior quantities of the Bayesian models in the update function. Such as Markov chain Monte Carlo ("MCMC") or Variational Inference ("VI"). Check UpdateMethods in pybandits.model for the full list. - update_kwargs : Optional[dict], uses default values if not specified + update_kwargs : Optional[dict], uses default quantities if not specified Additional arguments to pass to the update method. cost: NonNegativeFloat Cost associated to the Bayesian Logistic Regression model. """ - - cost: NonNegativeFloat diff --git a/pybandits/quantitative_mab.py b/pybandits/quantitative_mab.py new file mode 100644 index 0000000..8866219 --- /dev/null +++ b/pybandits/quantitative_mab.py @@ -0,0 +1,49 @@ +from abc import ABC, abstractmethod +from typing import List, Optional, Set, Union + +from pybandits.base import BinaryReward, Predictions, QuantitativeActionId +from pybandits.mab import BaseMab +from pybandits.pydantic_version_compatibility import validate_call + + +class QuantitativeMab(BaseMab, ABC): + @abstractmethod + @validate_call + def update( + self, + actions: List[QuantitativeActionId], + rewards: Union[List[BinaryReward], List[List[BinaryReward]]], + *args, + **kwargs, + ): + """ + Update the multi-armed bandit model. + + actions: List[ActionId] + The selected action for each sample. + rewards: List[Union[BinaryReward, List[BinaryReward]]] + The reward for each sample. + """ + + @abstractmethod + @validate_call + def predict(self, forbidden_actions: Optional[Set[QuantitativeActionId]] = None) -> Predictions: + """ + Predict actions. + + Parameters + ---------- + forbidden_actions : Optional[Set[ActionId]], default=None + Set of forbidden actions. If specified, the model will discard the forbidden_actions and it will only + consider the remaining allowed_actions. By default, the model considers all actions as allowed_actions. + Note that: actions = allowed_actions U forbidden_actions. + + Returns + ------- + actions: List[ActionId] of shape (n_samples,) + The actions selected by the multi-armed bandit model. + probs: List[Dict[ActionId, Probability]] of shape (n_samples,) + The probabilities of getting a positive reward for each action + ws : List[Dict[ActionId, float]], only relevant for some of the MABs + The weighted sum of logistic regression logits.. + """ diff --git a/pybandits/quantitative_model.py b/pybandits/quantitative_model.py new file mode 100644 index 0000000..f68ac44 --- /dev/null +++ b/pybandits/quantitative_model.py @@ -0,0 +1,715 @@ +from abc import ABC, abstractmethod +from functools import cache, cached_property +from itertools import product +from typing import Any, Callable, Dict, List, Optional, Tuple, Union + +import numpy as np +from numpy.typing import ArrayLike +from scipy.spatial.distance import jensenshannon +from scipy.stats import beta +from typing_extensions import Self + +from pybandits.base import BinaryReward, Probability, PyBanditsBaseModel, QuantitativeProbability +from pybandits.base_model import BaseModel, BaseModelCC +from pybandits.model import BayesianLogisticRegression, Beta, Model +from pybandits.pydantic_version_compatibility import ( + NonNegativeFloat, + PositiveInt, + PrivateAttr, + field_validator, + validate_call, +) + + +class QuantitativeModel(BaseModel, ABC): + """ + Base class for quantitative models. + + Parameters + ---------- + dimension: PositiveInt + Number of parameters of the model. + """ + + dimension: PositiveInt + + @abstractmethod + def sample_proba(self) -> float: + """ + Sample the model. + """ + + @validate_call(config=dict(arbitrary_types_allowed=True)) + def update( + self, + quantities: List[Union[float, List[float]]], + rewards: Union[List[BinaryReward], List[List[BinaryReward]]], + context: Optional[ArrayLike], + ): + """ + Update the model parameters. + + Parameters + ---------- + quantities : Optional[List[Union[float, List[float], None]] + The value associated with each action. If none, the value is not used, i.e. non-quantitative action. + rewards: Union[List[BinaryReward], List[List[BinaryReward]]] + The reward for each sample. + context : Optional[ArrayLike] + Context for each sample. + """ + + self._validate_update_params(quantities, rewards, context) + if quantities: + self._update(quantities, rewards, context) + + @abstractmethod + def _update( + self, + quantities: Optional[List[Union[float, List[float], None]]], + rewards: Union[List[BinaryReward], List[List[BinaryReward]]], + context: Optional[ArrayLike], + ): + """ + Update the model parameters. + + Parameters + ---------- + quantities : Optional[List[Union[float, List[float], None]] + The value associated with each action. If none, the value is not used, i.e. non-quantitative action. + rewards: Union[List[BinaryReward], List[List[BinaryReward]]] + The reward for each sample. + context : Optional[ArrayLike] + Context for each sample. + """ + + @staticmethod + def _validate_update_params( + quantities: Optional[List[Union[float, List[float], None]]], + rewards: Union[List[BinaryReward], List[List[BinaryReward]]], + context: Optional[ArrayLike], + ): + """ + Verify that the given list of action IDs is a subset of the currently defined actions and that + the rewards type matches the strategy type. + + Parameters + ---------- + quantities : Optional[List[Union[float, List[float], None]]] + The value associated with each action. If none, the value is not used, i.e. non-quantitative action. + rewards: List[Union[BinaryReward, List[BinaryReward]]] + The reward for each sample. + """ + if quantities is not None and len(quantities) != len(rewards): + raise AttributeError( + f"Shape mismatch: quantities and rewards should have the same length {len(quantities)}." + ) + if context is not None and len(context) != len(rewards): + raise AttributeError(f"Shape mismatch: context and rewards should have the same length {len(context)}.") + + +class QuantitativeModelCC(BaseModelCC, ABC): + """ + Class to model quantitative action cost. + + Parameters + ---------- + cost: Callable[[Union[float, NonNegativeFloat]], NonNegativeFloat] + Cost associated to the Beta distribution. + """ + + cost: Callable[[Union[float, NonNegativeFloat]], NonNegativeFloat] + + +class Segment(PyBanditsBaseModel): + """ + Segment class. + + Parameters + ---------- + intervals: Tuple[Tuple[Probability, Probability], ...] + Intervals of the segment. + """ + + intervals: Tuple[Tuple[Probability, Probability], ...] + + @cached_property + def mins(self) -> np.ndarray: + return self.intervals_array[:, 0] + + @cached_property + def maxs(self) -> np.ndarray: + return self.intervals_array[:, 1] + + @cached_property + def intervals_array(self) -> np.ndarray: + array_form = np.array(self.intervals) + if array_form.size == 0: + return np.array([[], []]).T + return np.array(self.intervals) + + @field_validator("intervals", mode="before") + @classmethod + def intervals_to_tuple(cls, value): + if isinstance(value, np.ndarray): + if value.shape[1] != 2: + raise ValueError("Intervals must have shape (n, 2).") + return tuple(tuple(v) for v in value) + return value + + def split(self) -> Tuple["Segment", "Segment"]: + middles = (self.mins + self.maxs) / 2 + left_intervals = np.concatenate(np.atleast_2d(self.mins, middles), axis=1) + right_intervals = np.concatenate(np.atleast_2d(middles, self.maxs), axis=1) + return Segment(intervals=left_intervals), Segment(intervals=right_intervals) + + def __add__(self, other: "Segment") -> "Segment": + """ + Add two adjacent segments. + + Parameters + ---------- + other : Segment + Segment to add. + + Returns + ------- + Segment + The merged segment. + The merged segment. + """ + if not self.is_adjacent(other): + raise ValueError("Segments must be adjacent.") + to_concatenate = (self.mins, other.maxs) if self.maxs == other.mins else (other.mins, self.maxs) + new_intervals = np.concatenate(np.atleast_2d(*to_concatenate), axis=1) + return Segment(intervals=new_intervals) + + @cache + def __str__(self) -> str: + return str(zip(self.mins, self.maxs)) + + def __hash__(self) -> int: + return tuple(self.intervals_array.flatten()).__hash__() + + def __contains__(self, value: Union[float, np.ndarray]) -> bool: + """ + Check if a value is contained in segment. + + Parameters + ---------- + value : Union[float, np.ndarray] + Value to check. + + Returns + ------- + bool + Whether the value is contained in the segment. + """ + if (isinstance(value, np.ndarray) and value.shape != self.intervals_array.shape[1]) or ( + isinstance(value, float) and len(self.intervals_array) != 1 + ): + raise ValueError("Tested value must have the same shape as the intervals.") + return bool( + np.all( + np.logical_and( + (self.mins <= value), + np.logical_or((value < self.maxs), np.logical_and(value == self.maxs, self.maxs == 1)), + ) + ) + ) + + def __eq__(self, other) -> bool: + return np.all(self.intervals_array == other.intervals_array) + + def is_adjacent(self, other: "Segment") -> bool: + """ + Check if two segments are adjacent. + + Parameters + ---------- + other : Segment + Segment to check. + + Returns + ------- + bool + Whether the segments are adjacent. + """ + if self.intervals_array.shape[0] != other.intervals_array.shape[0]: + raise ValueError("Segments must have the same shape.") + return np.all(self.maxs == other.mins) or np.all(self.mins == other.maxs) + + +class ZoomingModel(QuantitativeModel, ABC): + """ + Base class for zooming models. + + Parameters + ---------- + dimension: PositiveInt + Number of parameters of the model. + comparison_threshold: Probability + Comparison threshold. + n_comparison_points: PositiveInt + Number of comparison points. + n_max_segments: PositiveInt + Maximum number of segments. + sub_actions: Dict[Segment, Optional[Model]] + Mapping of segments to models. + """ + + dimension: PositiveInt + comparison_threshold: Probability = 0.1 + n_comparison_points: PositiveInt = 1000 + n_max_segments: Optional[PositiveInt] = 32 + sub_actions: Dict[Tuple[Tuple[Probability, Probability], ...], Optional[Model]] + _base_model: Model = PrivateAttr() + + def _validate_segments(self): + if self.n_max_segments is not None and len(self.sub_actions) > self.n_max_segments: + raise ValueError("Number of segments must be less than the maximum number of segments.") + dimensions = {len(segment) for segment in self.sub_actions.keys()} + if dimensions != {self.dimension}: + raise ValueError(f"All segments must have the same dimension {self.dimension}.") + + def model_post_init(self, __context: Any) -> None: + self._validate_segments() + self._init_base_model() + segment_models_types = set(type(model) if model is not None else None for model in self.sub_actions.values()) + if None in segment_models_types: + if len(segment_models_types) > 1: + raise ValueError("All segments must either have a model or miss a model.") + self.sub_actions = dict(zip(self.sub_actions, [self._base_model.model_copy()] * len(self.sub_actions))) + + @property + def segmented_actions(self) -> Dict[Segment, Optional[Model]]: + return {Segment(intervals=segment): model for segment, model in self.sub_actions.items()} + + @abstractmethod + def _init_base_model(self): + """ + Initialize the base model. + """ + + @classmethod + @validate_call + def cold_start( + cls, + dimension: PositiveInt = 1, + n_1d_segments: PositiveInt = 2, + comparison_threshold: Probability = 0.1, + n_comparison_points: PositiveInt = 1000, + n_max_segments: Optional[PositiveInt] = 32, + **kwargs, + ) -> Self: + """ + Create a cold start model. + + Returns + ------- + ZoomingModel + Cold start model. + """ + interval_points = np.linspace(0, 1, n_1d_segments + 1) + intervals = [(interval_points[i], interval_points[i + 1]) for i in range(n_1d_segments)] + sub_actions = {tuple(segment): None for segment in product(intervals, repeat=dimension)} + return cls( + dimension=dimension, + comparison_threshold=comparison_threshold, + n_comparison_points=n_comparison_points, + n_max_segments=n_max_segments, + sub_actions=sub_actions, + **kwargs, + ) + + @validate_call(config=dict(arbitrary_types_allowed=True)) + def sample_proba(self, context: Optional[ArrayLike] = None) -> QuantitativeProbability: + """ + Sample an action value from each of the intervals. Then, choose the best action value. + """ + result = {} + kwargs = {"context": context} if context is not None else {} + for segment, model in self.segmented_actions.items(): + random_point = np.random.random(len(segment.intervals)) + scaled_action_value = segment.mins + random_point * (segment.maxs - segment.mins) + result = {tuple(scaled_action_value): model.sample_proba(**kwargs)} + return result + + def _update( + self, quantities: List[Union[float, np.ndarray]], rewards: List[BinaryReward], context: Optional[ArrayLike] + ): + """ + Update the model parameters. + + Parameters + ---------- + quantities : List[Union[float, np.ndarray]] + The value associated with each action. + rewards: List[BinaryReward] + The reward for each sample. + context : Optional[ArrayLike] + Context for each sample. + """ + + segments = self._map_and_update_segment_models(quantities, rewards, context) + self._update_segmentation(quantities, segments, rewards, context) + + def _map_and_update_segment_models( + self, quantities: List[Union[float, np.ndarray]], rewards: List[BinaryReward], context: Optional[ArrayLike] + ) -> List[Segment]: + """ + Map and update the segment models. + + Parameters + ---------- + quantities : List[Union[float, np.ndarray]] + The value associated with each action. + rewards: List[BinaryReward] + The reward for each sample. + context : Optional[ArrayLike] + Context for each sample. + + Returns + ------- + List[Segment] + Segments to update. + """ + segments = self._map_values_to_segments(quantities) + self._inner_update(segments, rewards, context) + return segments + + @abstractmethod + def _inner_update(self, segments: List[Segment], rewards: List[BinaryReward], context: Optional[ArrayLike] = None): + """ + Update the segments models. + + Parameters + ---------- + segments : List[Segment] + Segments to update. + rewards : List[BinaryReward] + Rewards for update. + context : Optional[ArrayLike] + Context for update. + """ + + def _map_values_to_segments(self, quantities: List[Union[float, np.ndarray]]) -> List[Segment]: + segments = [segment for value in quantities for segment in self.segmented_actions.keys() if value in segment] + return segments + + def _update_segmentation( + self, + quantities: List[Union[float, np.ndarray]], + segments: List[Segment], + rewards: List[BinaryReward], + context: Optional[ArrayLike] = None, + ): + segment_scores = {segment: model.mean for segment, model in self.segmented_actions.items()} + ordered_segments = sorted(segment_scores, key=segment_scores.get) + best_segment = ordered_segments[-1] + del self.sub_actions[best_segment.intervals] + + # Consider merging adjacent segments + worst_segments = ordered_segments[:-1] + i = 0 + while i < len(worst_segments) - 1: + segment = worst_segments[i] + j = i + 1 + while j < len(worst_segments): + other_segment = worst_segments[j] + if segment.is_adjacent(other_segment) and self.is_similar_performance(segment, other_segment): + del self.sub_actions[segment.intervals] + del self.sub_actions[other_segment.intervals] + worst_segments.remove(segment) + worst_segments.remove(other_segment) + merged_segment = segment + other_segment + self.sub_actions[merged_segment.intervals] = self._base_model.model_copy() + filtered_quantities, filtered_rewards, filtered_context = self._filter_by_segment( + merged_segment, quantities, segments, rewards, context + ) + self._map_and_update_segment_models(filtered_quantities, filtered_rewards, filtered_context) + break + j += 1 + i += 1 + + # Split best segment if possible + if self.n_max_segments is None or len(self.sub_actions) < self.n_max_segments: + sub_best_segments = best_segment.split() + self.sub_actions[sub_best_segments[0].intervals] = self._base_model.model_copy() + self.sub_actions[sub_best_segments[1].intervals] = self._base_model.model_copy() + filtered_quantities, filtered_rewards, filtered_context = self._filter_by_segment( + best_segment, quantities, segments, rewards, context + ) + self._map_and_update_segment_models(filtered_quantities, filtered_rewards, filtered_context) + + def is_similar_performance(self, segment1: Segment, segment2: Segment) -> bool: + """ + Check if two segments have similar performance. + + Parameters + ---------- + segment1 : Segment + First segment. + segment2 : Segment + Second segment. + + Returns + ------- + bool + Whether the segments have similar performance. + """ + x = np.linspace(0, 1, self.n_comparison_points) + model1 = self.sub_actions[segment1.intervals] + model2 = self.sub_actions[segment2.intervals] + p1 = beta.pdf(x, model1.n_successes, model1.n_failures) + p2 = beta.pdf(x, model2.n_successes, model2.n_failures) + return jensenshannon(p1, p2) < self.comparison_threshold + + def _filter_by_segment( + self, + reference_segment: Segment, + quantities: List[Union[float, np.ndarray]], + segments: List[Segment], + rewards: List[BinaryReward], + context: Optional[ArrayLike], + ) -> Tuple[List[Union[float, np.ndarray]], List[BinaryReward], Optional[ArrayLike]]: + """ + Filter and update the segments models. + + Parameters + ---------- + reference_segment : Segment + Reference segment to filter upon. + segments : List[Segment] + Segments to filter. + quantities : List[Union[float, np.ndarray]] + Values to filter. + rewards : List[BinaryReward] + Rewards to filter. + + Returns + ------- + filtered_values : List[Union[float, np.ndarray]] + Filtered quantities. + filtered_rewards : List[BinaryReward] + Filtered rewards. + filtered_context : Optional[ArrayLike] + Filtered context. + """ + filtered_values_rewards_context = [ + (value, reward, context) + for (value, reward, context, segment) in zip( + quantities, rewards, context if context is not None else [None] * len(segments), segments + ) + if segment == reference_segment + ] + if filtered_values_rewards_context: + filtered_values, filtered_rewards, filtered_context = zip(*filtered_values_rewards_context) + else: + filtered_values, filtered_rewards, filtered_context = [], [], [] + filtered_context = np.array(filtered_context) if context is not None else None + return filtered_values, filtered_rewards, filtered_context + + +class BaseSmabZoomingModel(ZoomingModel, ABC): + """ + Zooming model for sMAB. + + Parameters + ---------- + dimension: PositiveInt + Number of parameters of the model. + comparison_threshold: Probability + Comparison threshold. + n_comparison_points: PositiveInt + Number of comparison points. + n_max_segments: PositiveInt + Maximum number of segments. + sub_actions: Dict[Tuple[Tuple[Probability, Probability], ...], Optional[Beta]] + Mapping of segments to Beta models. + """ + + sub_actions: Dict[Tuple[Tuple[Probability, Probability], ...], Optional[Beta]] + + def _init_base_model(self): + """ + Initialize the base model. + """ + self._base_model = Beta() + + @validate_call(config=dict(arbitrary_types_allowed=True)) + def update( + self, + quantities: Optional[List[Union[float, List[float], None]]], + rewards: Union[List[BinaryReward], List[List[BinaryReward]]], + context: Optional = None, + ): + """ + Update the model parameters. + + Parameters + ---------- + quantities : Optional[List[Union[float, List[float], None]] + The value associated with each action. If none, the value is not used, i.e. non-quantitative action. + rewards: Union[List[BinaryReward], List[List[BinaryReward]]] + The reward for each sample. + context : Optional + Placeholder for context. + """ + super().update(quantities, rewards, context) + + @validate_call(config=dict(arbitrary_types_allowed=True)) + def _inner_update(self, segments: List[Segment], rewards: List[BinaryReward], context: Optional = None): + """ + Update the segments models. + + Parameters + ---------- + segments : List[Segment] + Segments to update. + rewards : List[BinaryReward] + Rewards for update. + context : Optional + Placeholder for context. + """ + segments = np.array(segments) + rewards = np.array(rewards) + for segment in set(segments): + rewards_of_segment = rewards[segments == segment].tolist() + self.sub_actions[segment.intervals].update(rewards=rewards_of_segment) + + +class SmabZoomingModel(BaseSmabZoomingModel): + """ + Zooming model for sMAB. + + Parameters + ---------- + dimension: PositiveInt + Number of parameters of the model. + comparison_threshold: Probability + Comparison threshold. + n_comparison_points: PositiveInt + Number of comparison points. + n_max_segments: PositiveInt + Maximum number of segments. + sub_actions: Dict[Tuple[Tuple[Probability, Probability], ...], Optional[Beta]] + Mapping of segments to Beta models. + """ + + +class SmabZoomingModelCC(BaseSmabZoomingModel, QuantitativeModelCC): + """ + Zooming model for sMAB with cost control. + + Parameters + ---------- + comparison_threshold: Probability + Comparison threshold. + n_comparison_points: PositiveInt + Number of comparison points. + n_max_segments: PositiveInt + Maximum number of segments. + cost: Callable[[Union[float, NonNegativeFloat]], NonNegativeFloat] + Cost associated to the Beta distribution. + """ + + +class BaseCmabZoomingModel(ZoomingModel, ABC): + """ + Zooming model for CMAB. + + Parameters + ---------- + dimension: PositiveInt + Number of parameters of the model. + comparison_threshold: Probability + Comparison threshold. + n_comparison_points: PositiveInt + Number of comparison points. + n_max_segments: PositiveInt + Maximum number of segments. + sub_actions: Dict[Tuple[Tuple[Probability, Probability], ...], Optional[BayesianLogisticRegression]] + Mapping of segments to Bayesian Logistic Regression models. + base_model_cold_start_kwargs: Dict[str, Any] + Keyword arguments for the base model cold start. + """ + + sub_actions: Dict[Tuple[Tuple[Probability, Probability], ...], Optional[BayesianLogisticRegression]] + base_model_cold_start_kwargs: Dict[str, Any] + + @field_validator("base_model_cold_start_kwargs", mode="before") + @classmethod + def validate_n_features(cls, value): + if "n_features" not in value: + raise KeyError("n_features must be in base_model_cold_start_kwargs.") + return value + + def _init_base_model(self): + """ + Initialize the base model. + """ + self._base_model = BayesianLogisticRegression.cold_start(**self.base_model_cold_start_kwargs) + + @validate_call(config=dict(arbitrary_types_allowed=True)) + def _inner_update(self, segments: List[Segment], rewards: List[BinaryReward], context: ArrayLike): + """ + Update the segments models. + + Parameters + ---------- + segments : List[Segment] + Segments to update. + rewards : List[BinaryReward] + Rewards for update. + context : Optional[ArrayLike] + Context for update. + """ + segments = np.array(segments) + rewards = np.array(rewards) + context = np.array(context) + for segment in set(segments): + rewards_of_segment = rewards[segments == segment].tolist() + context_of_segment = context[segments == segment] + if rewards_of_segment: + self.sub_actions[segment.intervals].update(rewards=rewards_of_segment, context=context_of_segment) + + +class CmabZoomingModel(BaseCmabZoomingModel): + """ + Zooming model for CMAB. + + Parameters + ---------- + dimension: PositiveInt + Number of parameters of the model. + comparison_threshold: Probability + Comparison threshold. + n_comparison_points: PositiveInt + Number of comparison points. + n_max_segments: PositiveInt + Maximum number of segments. + sub_actions: Dict[Tuple[Tuple[Probability, Probability], ...], Optional[BayesianLogisticRegression]] + Mapping of segments to Bayesian Logistic Regression models. + base_model_cold_start_kwargs: Dict[str, Any] + Keyword arguments for the base model cold start. + """ + + +class CmabZoomingModelCC(BaseCmabZoomingModel, QuantitativeModelCC): + """ + Zooming model for CMAB with cost control. + + Parameters + ---------- + comparison_threshold: Probability + Comparison threshold. + n_comparison_points: PositiveInt + Number of comparison points. + n_max_segments: PositiveInt + Maximum number of segments. + base_model_cold_start_kwargs: Dict[str, Any] + Keyword arguments for the base model cold start. + cost: Callable[[Union[float, NonNegativeFloat]], NonNegativeFloat] + Cost associated to the Beta distribution. + """ diff --git a/pybandits/simulator.py b/pybandits/simulator.py index c9f143a..cf3cebb 100644 --- a/pybandits/simulator.py +++ b/pybandits/simulator.py @@ -68,7 +68,7 @@ class Simulator(PyBanditsBaseModel, ABC): The number of samples per batch. probs_reward : Optional[pd.DataFrame], default=None The reward probability for the different actions. If None probabilities are set to 0.5. - The keys of the dict must match the mab actions_ids, and the values are float in the interval [0, 1]. + The keys of the dict must match the mab actions_ids, and the quantities are float in the interval [0, 1]. e.g. probs_reward=pd.DataFrame({"a1 A": [0.6], "a2 B": [0.5], "a3": [0.8]}). Note that currently only single-objective reward is supported. save : bool, defaults to False @@ -120,9 +120,9 @@ class Config: def validate_probs_reward_values(cls, value): if value is not None: if not all(value.dtypes.apply(lambda x: x.kind == "f")): - raise ValueError("probs_reward values must be float.") + raise ValueError("probs_reward quantities must be float.") if not value.applymap(lambda x: 0 <= x <= 1).all().all(): - raise ValueError("probs_reward values must be in the interval [0, 1].") + raise ValueError("probs_reward quantities must be in the interval [0, 1].") return value @field_validator("file_prefix", mode="before") @@ -375,7 +375,7 @@ def selected_actions_count(self) -> pd.DataFrame: ------- counts_df : pd.DataFrame Data frame with batch serial number as index (or total for all batches), actions as columns, - and count of recommended actions as values + and count of recommended actions as quantities """ groupby_cols = [col for col in self._base_columns if col not in ["reward", "action"]] counts_df = self._results.groupby(groupby_cols)["action"].value_counts().unstack(fill_value=0).reset_index() @@ -415,7 +415,7 @@ def positive_reward_proportion(self) -> pd.DataFrame: Returns ------- proportion_df : pd.DataFrame - Data frame with actions as index, and proportion of positive rewards as values + Data frame with actions as index, and proportion of positive rewards as quantities """ groupby_cols = [col for col in self._base_columns if col not in ["reward", "batch"]] proportion_df = self._results.groupby(groupby_cols)["reward"].mean().to_frame(name="proportion") diff --git a/pybandits/smab.py b/pybandits/smab.py index 614434c..975e853 100644 --- a/pybandits/smab.py +++ b/pybandits/smab.py @@ -27,12 +27,15 @@ from pybandits.base import ( ActionId, BinaryReward, - Probability, SmabPredictions, + UnifiedActionId, + UnifiedMOProbability, + UnifiedProbability, ) from pybandits.mab import BaseMab from pybandits.model import BaseBeta, Beta, BetaCC, BetaMO, BetaMOCC from pybandits.pydantic_version_compatibility import PositiveInt, field_validator, validate_call +from pybandits.quantitative_model import BaseSmabZoomingModel, SmabZoomingModel, SmabZoomingModelCC from pybandits.strategy import ( BestActionIdentificationBandit, ClassicBandit, @@ -49,13 +52,43 @@ class BaseSmabBernoulli(BaseMab): Parameters ---------- - actions: Dict[ActionId, BaseBeta] + actions: Dict[ActionId, Union[BaseBeta, BaseSmabZoomingModel]] The list of possible actions, and their associated Model. strategy: Strategy The strategy used to select actions. """ - actions: Dict[ActionId, BaseBeta] + actions: Dict[ActionId, Union[BaseBeta, BaseSmabZoomingModel]] + + def _inner_get_action_probabilities( + self, valid_actions: Set[ActionId], n_samples: PositiveInt = 1 + ) -> Union[ + Dict[ActionId, List[UnifiedProbability]], + Dict[ActionId, List[UnifiedMOProbability]], + ]: + """ + Get the probability of getting a positive reward for each action. + + Parameters + ---------- + + valid_actions : Set[ActionId] + The list of valid (i.e. not forbidden) action IDs. + n_samples : PositiveInt, default=1 + Number of samples to predict. + + Returns + ------- + action_probabilities: Union[Dict[UnifiedActionId, Probability], Dict[UnifiedActionId, List[Probability]]] + The probability of getting a positive reward for each action and objective. + """ + action_probabilities = { + action: [model.sample_proba() for _ in range(n_samples)] + for action, model in self.actions.items() + if action in valid_actions + } + + return action_probabilities @validate_call def predict( @@ -68,7 +101,7 @@ def predict( Parameters ---------- - n_samples : int > 0, default=1 + n_samples : PositiveInt, default=1 Number of samples to predict. forbidden_actions : Optional[Set[ActionId]], default=None Set of forbidden actions. If specified, the model will discard the forbidden_actions and it will only @@ -77,25 +110,25 @@ def predict( Returns ------- - actions: List[ActionId] of shape (n_samples,) + actions: List[UnifiedActionId] The actions selected by the multi-armed bandit model. - probs: List[Dict[ActionId, Probability]] of shape (n_samples,) + probs: Union[List[Dict[UnifiedActionId, Probability]], List[Dict[UnifiedActionId, MOProbability]]] The probabilities of getting a positive reward for each action. """ - valid_actions = self._get_valid_actions(forbidden_actions) - selected_actions: List[ActionId] = [] - probs: List[Dict[ActionId, Probability]] = [] - - for _ in range(n_samples): - p = {action: model.sample_proba() for action, model in self.actions.items() if action in valid_actions} - selected_actions.append(self._select_epsilon_greedy_action(p=p, actions=self.actions)) - probs.append(p) + probs = self._get_action_probabilities(forbidden_actions=forbidden_actions, n_samples=n_samples) + selected_actions = [self._select_epsilon_greedy_action(p=prob, actions=self.actions) for prob in probs] return selected_actions, probs @validate_call - def update(self, actions: List[ActionId], rewards: Union[List[BinaryReward], List[List[BinaryReward]]]): + def _update( + self, + actions: List[UnifiedActionId], + rewards: Union[List[BinaryReward], List[List[BinaryReward]]], + quantities: Optional[List[Union[float, List[float], None]]], + context: None = None, + ): """ Update the stochastic Bernoulli bandit given the list of selected actions and their corresponding binary rewards. @@ -104,23 +137,39 @@ def update(self, actions: List[ActionId], rewards: Union[List[BinaryReward], Lis ---------- actions : List[ActionId] of shape (n_samples,), e.g. ['a1', 'a2', 'a3', 'a4', 'a5'] The selected action for each sample. - rewards : List[Union[BinaryReward, List[BinaryReward]]] of shape (n_samples, n_objectives) + rewards : Union[List[BinaryReward], List[List[BinaryReward]]], + if nested list, len() should follow shape of (n_samples, n_objectives) The binary reward for each sample. If strategy is not MultiObjectiveBandit, rewards should be a list, e.g. rewards = [1, 0, 1, 1, 1, ...] If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2): rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...] + quantities : Optional[List[Union[float, List[float], None]]] + The value associated with each action. If none, the value is not used, i.e. non-quantitative action. + context : None, default=None + The context associated with the action. Not used in this model. """ - self._validate_update_params(actions=actions, rewards=rewards) + self._validate_update_params(actions=actions, quantities=quantities, rewards=rewards, context=None) rewards_dict = defaultdict(list) - for a, r in zip(actions, rewards): - rewards_dict[a].append(r) - - for a in set(actions): - self.actions[a].update(rewards=rewards_dict[a]) + if quantities is None: + for a, r in zip(actions, rewards): + rewards_dict[a].append(r) + for a in set(actions): + self.actions[a].update(rewards=rewards_dict[a]) + else: + quantities_dict = defaultdict(list) + for a, v, r in zip(actions, quantities, rewards): + if v is not None: + quantities_dict[a].append(v) + rewards_dict[a].append(r) + for a in set(actions): + if quantities_dict[a]: # quantitative action + self.actions[a].update(rewards=rewards_dict[a], quantities=quantities_dict[a]) + else: # non-quantitative action + self.actions[a].update(rewards=rewards_dict[a]) class SmabBernoulli(BaseSmabBernoulli): @@ -132,13 +181,13 @@ class SmabBernoulli(BaseSmabBernoulli): Parameters ---------- - actions: Dict[ActionId, Beta] + actions: Dict[UnifiedActionId, Union[Beta, SmabZoomingModel]] The list of possible actions, and their associated Model. strategy: ClassicBandit The strategy used to select actions. """ - actions: Dict[ActionId, Beta] + actions: Dict[ActionId, Union[Beta, SmabZoomingModel]] strategy: ClassicBandit @@ -151,13 +200,13 @@ class SmabBernoulliBAI(BaseSmabBernoulli): Parameters ---------- - actions: Dict[ActionId, Beta] + actions: Dict[ActionId, Union[Beta, SmabZoomingModel]] The list of possible actions, and their associated Model. strategy: BestActionIdentificationBandit The strategy used to select actions. """ - actions: Dict[ActionId, Beta] + actions: Dict[ActionId, Union[Beta, SmabZoomingModel]] strategy: BestActionIdentificationBandit @@ -178,13 +227,13 @@ class SmabBernoulliCC(BaseSmabBernoulli): Parameters ---------- - actions: Dict[ActionId, BetaCC] + actions: Dict[ActionId, Union[BetaCC, SmabZoomingModelCC]] The list of possible actions, and their associated Model. strategy: CostControlBandit The strategy used to select actions. """ - actions: Dict[ActionId, BetaCC] + actions: Dict[ActionId, Union[BetaCC, SmabZoomingModelCC]] strategy: CostControlBandit @@ -207,7 +256,7 @@ class BaseSmabBernoulliMO(BaseSmabBernoulli): @field_validator("actions", mode="after") @classmethod def all_actions_have_same_number_of_objectives(cls, actions: Dict[ActionId, BetaMO]): - n_objs_per_action = [len(beta.counters) for beta in actions.values()] + n_objs_per_action = [len(beta.models) for beta in actions.values()] if len(set(n_objs_per_action)) != 1: raise ValueError("All actions should have the same number of objectives") return actions diff --git a/pybandits/strategy.py b/pybandits/strategy.py index a67be09..0946935 100644 --- a/pybandits/strategy.py +++ b/pybandits/strategy.py @@ -28,8 +28,9 @@ from scipy.stats import ttest_ind_from_stats from typing_extensions import Self -from pybandits.base import ActionId, Float01, Probability, PyBanditsBaseModel -from pybandits.model import Beta, BetaMOCC, Model +from pybandits.base import ActionId, Float01, Probability, PyBanditsBaseModel, UnifiedActionId +from pybandits.base_model import BaseModel +from pybandits.model import Beta, BetaMOCC from pybandits.pydantic_version_compatibility import field_validator, validate_call @@ -60,7 +61,9 @@ def _with_argument(self, argument_name: str, argument_value: Any) -> Self: return mutated_strategy @abstractmethod - def select_action(self, p: Dict[ActionId, Probability], actions: Optional[Dict[ActionId, Model]]) -> ActionId: + def select_action( + self, p: Dict[UnifiedActionId, Probability], actions: Optional[Dict[ActionId, BaseModel]] + ) -> UnifiedActionId: """ Select the action. """ @@ -90,22 +93,22 @@ class ClassicBandit(Strategy): @validate_call def select_action( self, - p: Dict[ActionId, float], - actions: Optional[Dict[ActionId, Model]] = None, - ) -> ActionId: + p: Dict[UnifiedActionId, float], + actions: Optional[Dict[UnifiedActionId, BaseModel]] = None, + ) -> UnifiedActionId: """ Select the action with the highest probability of getting a positive reward. Parameters ---------- - p : Dict[ActionId, Probability] + p : Dict[UnifiedActionId, Probability] The dictionary of actions and their sampled probability of getting a positive reward. - actions : Optional[Dict[ActionId, Model]] + actions : Optional[Dict[UnifiedActionId, BaseModel]] The dictionary of actions and their associated model. Returns ------- - selected_action: ActionId + selected_action: UnifiedActionId The selected action. """ return max(p, key=p.get) @@ -162,9 +165,9 @@ def with_exploit_p(self, exploit_p: Optional[Float01]) -> Self: @validate_call def select_action( self, - p: Dict[ActionId, float], - actions: Optional[Dict[ActionId, Model]] = None, - ) -> ActionId: + p: Dict[UnifiedActionId, float], + actions: Optional[Dict[UnifiedActionId, BaseModel]] = None, + ) -> UnifiedActionId: """ Select with probability self.exploit_p the best action (i.e. the action with the highest probability of getting a positive reward), and with probability 1-self.exploit_p it returns the second best action (i.e. the action @@ -172,14 +175,14 @@ def select_action( Parameters ---------- - p : Dict[ActionId, Probability] + p : Dict[UnifiedActionId, Probability] The dictionary of actions and their sampled probability of getting a positive reward. - actions : Optional[Dict[ActionId, Model]] + actions : Optional[Dict[UnifiedActionId, BaseModel]] The dictionary of actions and their associated model. Returns ------- - selected_action: ActionId + selected_action: UnifiedActionId The selected action. """ p = p.copy() @@ -198,13 +201,13 @@ def select_action( return selected_action # TODO: WIP this is valid only for SmabBernoulli - def compare_best_actions(self, actions: Dict[ActionId, Beta]) -> float: + def compare_best_actions(self, actions: Dict[UnifiedActionId, Beta]) -> float: """ Compare the 2 best actions, hence the 2 actions with the highest expected means of getting a positive reward. Parameters ---------- - actions: Dict[ActionId, Beta] + actions: Dict[UnifiedActionId, Beta] Returns ---------- @@ -244,31 +247,34 @@ def _average(cls, p_of_action: Union[Probability, List[Probability]]): @validate_call def _evaluate_and_select( cls, - p: Union[Dict[ActionId, Probability], Dict[ActionId, List[Probability]]], - actions: Dict[ActionId, Model], - feasible_actions: List[ActionId], - ) -> ActionId: + p: Union[Dict[UnifiedActionId, Probability], Dict[UnifiedActionId, List[Probability]]], + actions: Dict[UnifiedActionId, BaseModel], + feasible_actions: List[UnifiedActionId], + ) -> UnifiedActionId: """ Evaluate the feasible actions and select the one with the minimum cost. Parameters ---------- - p: Union[Dict[ActionId, Probability], Dict[ActionId, List[Probability]]] + p: Union[Dict[UnifiedActionId, Probability], Dict[UnifiedActionId, List[Probability]]] The dictionary of actions and their sampled probability of getting a positive reward. - actions: Dict[ActionId, Model] + actions: Dict[UnifiedActionId, BaseModel] The dictionary of actions and their associated model. - feasible_actions: List[ActionId] + feasible_actions: List[UnifiedActionId] The list of feasible actions. Returns ------- - selected_action: ActionId + selected_action: UnifiedActionId The selected action. """ # feasible actions enriched with their characteristics (cost, np.mean(probabilities), action_id) - # the negative probability ensures that if we order the actions based on their minimum values the one with + # the negative probability ensures that if we order the actions based on their minimum quantities the one with # higher probability will be selected - sortable_actions = [(actions[a].cost, -cls._average(p[a]), a) for a in feasible_actions] + sortable_actions = [ + (actions[a[0]].cost(*a[1]) if isinstance(a, tuple) else actions[a].cost, -cls._average(p[a]), a) + for a in feasible_actions + ] # select the action with the min cost (and the highest mean of probabilities in case of cost equality) _, _, selected_action = sorted(sortable_actions)[0] @@ -331,7 +337,9 @@ def with_subsidy_factor(self, subsidy_factor: Optional[Float01]) -> Self: return mutated_cost_control_bandit @validate_call - def select_action(self, p: Dict[ActionId, Probability], actions: Dict[ActionId, Model]) -> ActionId: + def select_action( + self, p: Dict[UnifiedActionId, Probability], actions: Dict[UnifiedActionId, BaseModel] + ) -> UnifiedActionId: """ Select the action with the minimum cost among the set of feasible actions (the actions whose expected rewards are above a certain lower bound defined as [(1-subsidy_factor)*max_p, max_p], where max_p is the highest @@ -339,14 +347,14 @@ def select_action(self, p: Dict[ActionId, Probability], actions: Dict[ActionId, Parameters ---------- - p: Dict[ActionId, Probability] + p: Dict[UnifiedActionId, Probability] The dictionary or actions and their sampled probability of getting a positive reward. - actions: Dict[ActionId, BetaCC] + actions: Dict[UnifiedActionId, BetaCC] The dictionary or actions and their cost. Returns ------- - selected_action: ActionId + selected_action: UnifiedActionId The selected action. """ # get the highest expected reward sampled value @@ -366,14 +374,14 @@ class MultiObjectiveStrategy(Strategy, ABC): @classmethod @validate_call - def get_pareto_front(cls, p: Dict[ActionId, List[Probability]]) -> List[ActionId]: + def get_pareto_front(cls, p: Dict[UnifiedActionId, List[Probability]]) -> List[UnifiedActionId]: """ Create Pareto optimal set of actions (Pareto front) A* identified as actions that are not dominated by any action out of the set A*. Parameters: ----------- - p: Dict[ActionId, Probability] + p: Dict[UnifiedActionId, Probability] The dictionary or actions and their sampled probability of getting a positive reward for each objective. Return @@ -425,7 +433,7 @@ class MultiObjectiveBandit(MultiObjectiveStrategy): """ @validate_call - def select_action(self, p: Dict[ActionId, List[Probability]], **kwargs) -> ActionId: + def select_action(self, p: Dict[UnifiedActionId, List[Probability]], **kwargs) -> UnifiedActionId: """ Select an action at random from the Pareto optimal set of action. The Pareto optimal action set (Pareto front) A* is the set of actions not dominated by any other actions not in A*. Dominance relation is established based @@ -453,7 +461,9 @@ class MultiObjectiveCostControlBandit(MultiObjectiveStrategy, CostControlStrateg """ @validate_call - def select_action(self, p: Dict[ActionId, List[Probability]], actions: Dict[ActionId, BetaMOCC]) -> ActionId: + def select_action( + self, p: Dict[UnifiedActionId, List[Probability]], actions: Dict[UnifiedActionId, BetaMOCC] + ) -> UnifiedActionId: """ Select the action with the minimum cost among the Pareto optimal set of action. The Pareto optimal action set (Pareto front) A* is the set of actions not dominated by any other actions not in A*. Dominance @@ -461,12 +471,12 @@ def select_action(self, p: Dict[ActionId, List[Probability]], actions: Dict[Acti Parameters ---------- - p: Dict[ActionId, List[Probability]] + p: Dict[UnifiedActionId, List[Probability]] The dictionary of actions and their sampled probability of getting a positive reward for each objective. Returns ------- - selected_action: ActionId + selected_action: UnifiedActionId The selected action. """ pareto_set = self.get_pareto_front(p=p) diff --git a/pybandits/utils.py b/pybandits/utils.py index 3d8a62f..35b0685 100644 --- a/pybandits/utils.py +++ b/pybandits/utils.py @@ -19,9 +19,8 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - -import json -from typing import Any, Callable, Dict, List, Optional, Union +import inspect +from typing import Callable, List, Optional, Tuple from bokeh.io import curdoc, output_file, output_notebook, save, show from bokeh.models import InlineStyleSheet, TabPanel, Tabs @@ -29,44 +28,31 @@ from pybandits.pydantic_version_compatibility import validate_call -JSONSerializable = Union[str, int, float, bool, None, List["JSONSerializable"], Dict[str, "JSONSerializable"]] - @validate_call -def to_serializable_dict(d: Dict[str, Any]) -> Dict[str, JSONSerializable]: - """ - Convert a dictionary to a dictionary whose values are JSONSerializable Parameters - - ---------- - d: dictionary to convert - - Returns - ------- - - """ - return json.loads(json.dumps(d, default=dict)) - - -@validate_call -def extract_argument_names_from_function(function_handle: Callable, is_class_method: bool = False) -> List[str]: +def extract_argument_names(handle: Callable, ignore_arguments: Tuple = ("args", "kwargs")) -> List[str]: """ Extract the argument names from a function handle. Parameters ---------- - function_handle : Callable - Handle of a function to extract the argument names from - - is_class_method : bool, defaults to False - Whether the function is a class method + handle : Callable + Handle of a function or class to extract the argument names from + ignore_arguments : Tuple + Tuple of argument names to ignore Returns ------- argument_names : List[str] List of argument names """ - start_index = int(is_class_method) - argument_names = function_handle.__code__.co_varnames[start_index : function_handle.__code__.co_argcount] + + argument_names = list( + handle.model_fields.keys() if hasattr(handle, "model_fields") else inspect.signature(handle).parameters + ) + for argument_name in ignore_arguments: + if argument_name in argument_names: + argument_names.remove(argument_name) return argument_names diff --git a/pyproject.toml b/pyproject.toml index fb3e0e8..c53b56a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pybandits" -version = "1.1.0" +version = "3.0.0" description = "Python Multi-Armed Bandit Library" authors = [ "Dario d'Andrea ", @@ -16,7 +16,7 @@ readme = "README.md" python = ">=3.8.1,<3.12" loguru = "^0.6" numpy = "^1.23" -pydantic = "1.10.*" +pydantic = ">=1.10.*,<3" scipy = "^1.9" pymc = "^5.3" scikit-learn = "^1.1" diff --git a/tests/test_cmab.py b/tests/test_cmab.py index 208f381..2266e7e 100644 --- a/tests/test_cmab.py +++ b/tests/test_cmab.py @@ -19,205 +19,579 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - -from typing import get_args +from copy import deepcopy +from typing import Any, Dict, List, Optional, Tuple, Type, Union, get_args import numpy as np import pandas as pd import pytest from hypothesis import given, settings from hypothesis import strategies as st - -from pybandits.base import Float01 -from pybandits.cmab import CmabBernoulli, CmabBernoulliBAI, CmabBernoulliCC -from pybandits.model import BayesianLogisticRegression, BayesianLogisticRegressionCC, StudentT, UpdateMethods +from pydantic.dataclasses import dataclass + +from pybandits.base import ActionId, Float01 +from pybandits.base_model import BaseModel +from pybandits.cmab import BaseCmabBernoulli, CmabBernoulli, CmabBernoulliBAI, CmabBernoulliCC +from pybandits.model import ( + BaseBayesianLogisticRegression, + BayesianLogisticRegression, + BayesianLogisticRegressionCC, + StudentT, + UpdateMethods, +) from pybandits.pydantic_version_compatibility import ( - PYDANTIC_VERSION_1, - PYDANTIC_VERSION_2, - NonNegativeFloat, + PositiveInt, ValidationError, - pydantic_version, ) +from pybandits.quantitative_model import BaseCmabZoomingModel, CmabZoomingModel, CmabZoomingModelCC, QuantitativeModel from pybandits.strategy import BestActionIdentificationBandit, ClassicBandit, CostControlBandit -from pybandits.utils import to_serializable_dict -from tests.test_utils import is_serializable literal_update_methods = get_args(UpdateMethods) -def _apply_update_method_to_state(state, update_method): - for action in state["actions"]: - state["actions"][action]["update_method"] = update_method +@st.composite +def rewards_strategy(draw, n_samples=None): + return draw(st.lists(st.integers(min_value=0, max_value=1), min_size=n_samples or 1, max_size=n_samples or 10)) + + +@st.composite +def quantities_strategy(draw, n_samples=None): + return draw(st.lists(st.floats(min_value=0, max_value=1), min_size=n_samples or 1, max_size=n_samples or 10)) + + +@st.composite +def diff_strategy(draw): + return draw(st.floats(min_value=0.001, max_value=0.5)) + + +@st.composite +def cost_strategy(draw, n_actions): + return draw(st.lists(st.floats(min_value=0, max_value=2), min_size=n_actions, max_size=n_actions)) + + +@pytest.fixture(scope="module") +def monkeymodule(): + with pytest.MonkeyPatch.context() as mp: + yield mp + + +def mock_student_t( + field_value: StudentT, + diff: Any, + monkeymodule: Any, + label: Union[int, str], +) -> int: + """ + Update the mu and sigma fields of a StudentT object. + + Args: + field_value: StudentT object to update + diff: Hypothesis diff object for drawing random values + monkeymodule: Module for monkey patching + label: Label for the diff draw + + Returns: + Updated label value + """ + for sub_field in ("mu", "sigma"): + try: + new_value = getattr(field_value, sub_field) + diff.draw(diff_strategy(), label=f"{label}") + monkeymodule.setattr(field_value, sub_field, new_value) + label = int(label) + 1 if isinstance(label, (int, str)) else label + 1 + except AttributeError as e: + raise ValueError(f"Invalid StudentT field: {sub_field}") from e + return label + + +def mock_update(models: Union[List[BaseModel], BaseModel], diff, monkeymodule, label=0): + model_list = [models] if isinstance(models, BaseModel) else models + for model in model_list: + for field in model.model_fields: + field_value = getattr(model, field) + + # Handle StudentT field + if isinstance(field_value, StudentT): + label = mock_student_t(field_value, diff, monkeymodule, label) + + # Handle list of StudentT objects + elif isinstance(field_value, list) and field_value and isinstance(field_value[0], StudentT): + for item in field_value: + label = mock_student_t(item, diff, monkeymodule, label) + + # Handle list of BaseModel objects + elif isinstance(field_value, list) and field_value and isinstance(field_value[0], BaseModel): + mock_update(field_value, diff, monkeymodule, label) + + +@dataclass +class ModelTestConfig: + cmab_class: Type + strategy_class: Type + model_types: List[Type[BaseModel]] + + def _create_actions( + self, + action_ids: List[str], + costs: Optional[st.SearchStrategy], + n_features: PositiveInt, + update_method: UpdateMethods, + update_kwargs: Optional[Dict[str, Any]], + ) -> Tuple[Dict[str, Any], Dict[str, Any]]: + if len(self.model_types) < len(action_ids): + indices = np.random.randint(0, len(self.model_types), len(action_ids)) + self.model_types = [self.model_types[i] for i in indices] + if all(model in [BayesianLogisticRegressionCC, CmabZoomingModelCC] for model in self.model_types): + # Generate random costs + costs = costs.draw(cost_strategy(n_actions=len(action_ids))) + costs = [ + cost if model_type in [BayesianLogisticRegressionCC] else lambda x: x**cost + for cost, model_type in zip(costs, self.model_types) + ] + else: + costs = None + + model_cold_start_kwargs = dict(update_method=update_method, update_kwargs=update_kwargs) + base_model_cold_start_kwargs = dict(n_features=n_features, **model_cold_start_kwargs) + if costs is not None: + return { + action_id: model_type( + alpha=StudentT(), + betas=[StudentT() for _ in range(n_features)], + **model_cold_start_kwargs, + cost=cost, + ) + if issubclass(model_type, BayesianLogisticRegressionCC) + else model_type.cold_start( + dimension=1, + base_model_cold_start_kwargs=base_model_cold_start_kwargs, + cost=cost, + ) # CmabZoomingModelCC + for action_id, model_type, cost in zip(action_ids, self.model_types, costs) + }, base_model_cold_start_kwargs + else: + return { + action_id: model_type( + alpha=StudentT(), betas=[StudentT() for _ in range(n_features)], **model_cold_start_kwargs + ) + if issubclass(model_type, BayesianLogisticRegression) + else model_type.cold_start( + dimension=1, + base_model_cold_start_kwargs=base_model_cold_start_kwargs, + ) # CmabZoomingModel + for action_id, model_type in zip(action_ids, self.model_types) + }, base_model_cold_start_kwargs + + def create_cmab_and_actions( + self, + action_ids: List[str], + epsilon: Optional[Float01], + costs: st.SearchStrategy, + exploit_p: Union[st.SearchStrategy[Optional[Float01]], Optional[float]], + subsidy_factor: Union[st.SearchStrategy[Optional[Float01]], Optional[float]], + n_features: PositiveInt, + update_method: UpdateMethods, + update_kwargs: Optional[Dict[str, Any]], + ) -> Tuple[BaseCmabBernoulli, Dict[ActionId, BaseModel], Dict[str, Any]]: + actions, base_model_cold_start_kwargs = self._create_actions( + action_ids, costs, n_features, update_method, update_kwargs + ) + default_action = action_ids[0] if epsilon else None + + kwargs = { + k: v + for k, v in { + "epsilon": epsilon, + "default_action": default_action, + }.items() + if v is not None + } + for param, classes in zip(["subsidy_factor", "exploit_p"], [[CmabBernoulliCC], [CmabBernoulliBAI]]): + if self.cmab_class in classes: + actual_param = eval(param) + if isinstance(actual_param, float) or actual_param is None: + kwargs[param] = actual_param + else: + kwargs[param] = actual_param.draw(st.floats(min_value=0, max_value=1)) + + cmab = self.cmab_class(actions=actions, **kwargs) + if any(isinstance(model, BaseCmabZoomingModel) for model in actions.values()): + kwargs["base_model_cold_start_kwargs"] = base_model_cold_start_kwargs + if any(isinstance(model, BaseBayesianLogisticRegression) for model in actions.values()): + kwargs.update(base_model_cold_start_kwargs) + + return cmab, actions, kwargs + + +TEST_CONFIGS = { + "cmab": ModelTestConfig(CmabBernoulli, ClassicBandit, [BayesianLogisticRegression, CmabZoomingModel]), + "cmab_bai": ModelTestConfig( + CmabBernoulliBAI, BestActionIdentificationBandit, [BayesianLogisticRegression, CmabZoomingModel] + ), + "cmab_cc": ModelTestConfig( + CmabBernoulliCC, + CostControlBandit, + [BayesianLogisticRegressionCC, CmabZoomingModelCC], + ), +} -######################################################################################################################## +@settings(deadline=None) +@pytest.mark.parametrize("config", TEST_CONFIGS.values(), ids=TEST_CONFIGS.keys()) +@given( + action_ids=st.lists( + st.text( + min_size=1, + ), + min_size=2, + max_size=5, + unique=True, + ), + epsilon=st.one_of(st.none(), st.floats(min_value=0, max_value=1)), + costs=st.data(), + n_features=st.integers(min_value=1, max_value=5), + subsidy_factor=st.data(), + exploit_p=st.data(), + update_method=st.sampled_from(literal_update_methods), + update_kwargs=st.sampled_from([None, {"draws": 500}]), +) +def test_cold_start( + config: ModelTestConfig, + action_ids: List[str], + epsilon: Optional[float], + costs, + n_features, + exploit_p, + subsidy_factor, + update_method, + update_kwargs, +): + # Create CMAB instance + cmab, actions, kwargs = config.create_cmab_and_actions( + action_ids, epsilon, costs, exploit_p, subsidy_factor, n_features, update_method, update_kwargs + ) + # Cold start comparison logic (modified for different model types) + cold_start_kwargs = { + "action_ids": { + action + for action, model in zip(action_ids, config.model_types) + if issubclass(model, (BayesianLogisticRegression)) + }, + "quantitative_action_ids": { + action for action, model in zip(action_ids, config.model_types) if issubclass(model, QuantitativeModel) + }, + } + if all(model in [BayesianLogisticRegressionCC, CmabZoomingModelCC] for model in config.model_types): + cold_start_kwargs["action_ids_cost"] = { + action: model.cost for action, model in actions.items() if isinstance(model, (BayesianLogisticRegressionCC)) + } + cold_start_kwargs["quantitative_action_ids_cost"] = { + action: model.cost for action, model in actions.items() if isinstance(model, CmabZoomingModelCC) + } + cold_start_kwargs.update(kwargs) # Add exploit_p or subsidy_factor if needed + cold_start_kwargs = {k: v for k, v in cold_start_kwargs.items() if v is not None} + assert config.cmab_class.cold_start(**cold_start_kwargs) == cmab -# CmabBernoulli with strategy=ClassicBandit() +@settings(deadline=None) +@pytest.mark.parametrize("config", TEST_CONFIGS.values(), ids=TEST_CONFIGS.keys()) +@given( + action_ids=st.lists(st.text(min_size=1), min_size=2, max_size=5, unique=True), + n_features=st.integers(min_value=1, max_value=5), + costs=st.data(), + subsidy_factor=st.data(), + exploit_p=st.data(), + update_method=st.sampled_from(literal_update_methods), + update_kwargs=st.sampled_from([None, {"draws": 500}]), +) +def test_bad_initialization( + config: ModelTestConfig, + action_ids: List[str], + n_features: int, + costs, + exploit_p, + subsidy_factor, + update_method, + update_kwargs, +): + """Test various invalid initialization scenarios for CMAB models""" + kwargs = {"cost": 1} if config.cmab_class == CmabBernoulliCC else {} + # Test empty actions + with pytest.raises(AttributeError): + config.cmab_class(actions={}) -@settings(deadline=500) -@given(st.integers(max_value=100)) -def test_create_cmab_bernoulli_cold_start(a_int): - # n_features must be > 0 - if a_int <= 0: - with pytest.raises(ValidationError): - CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=a_int) - else: - mab1 = CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=a_int) - mab2 = CmabBernoulli( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=a_int), - "a2": BayesianLogisticRegression.cold_start(n_features=a_int), - } - ) - mab2.predict_actions_randomly = True - assert mab1 == mab2 + # Test single action (should warn) + single_action = {action_ids[0]: config.model_types[0].cold_start(n_features=n_features, **kwargs)} + with pytest.warns(UserWarning): + config.cmab_class(actions=single_action) + # Test mismatched feature dimensions + actions_wrong_dims = { + action_ids[0]: config.model_types[0].cold_start(n_features=n_features, **kwargs), + action_ids[1]: config.model_types[0].cold_start(n_features=n_features + 1, **kwargs), + } + with pytest.raises(AttributeError): + config.cmab_class(actions=actions_wrong_dims) -@settings(deadline=500) -@given(st.integers(min_value=1, max_value=10)) -def test_cmab_can_instantiate(n_features): - with pytest.raises(TypeError): - CmabBernoulli() + # Test mismatched update methods + actions_wrong_update = { + action_ids[0]: config.model_types[0].cold_start(n_features=n_features, update_method="VI", **kwargs), + action_ids[1]: config.model_types[0].cold_start(n_features=n_features, update_method="MCMC", **kwargs), + } with pytest.raises(AttributeError): - CmabBernoulli(actions={}) - with pytest.warns(UserWarning): - CmabBernoulli(actions={"a1": BayesianLogisticRegression.cold_start(n_features=n_features)}) - with pytest.raises(ValidationError): # predict_with_proba is not an argument of init - CmabBernoulli( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=n_features), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - }, - predict_with_proba=True, - ) + config.cmab_class(actions=actions_wrong_update) + + # Test mismatched update kwargs + base_kwargs = {"draws": 500} if update_kwargs else {"draws": 1000} + actions_wrong_kwargs = { + action_ids[0]: config.model_types[0].cold_start( + n_features=n_features, update_method=update_method, update_kwargs=base_kwargs, **kwargs + ), + action_ids[1]: config.model_types[0].cold_start( + n_features=n_features, + update_method=update_method, + update_kwargs={"draws": base_kwargs["draws"] // 2}, + **kwargs, + ), + } + with pytest.raises(AttributeError): + config.cmab_class(actions=actions_wrong_kwargs) + + # Test invalid model types + actions_wrong_type = { + action_ids[0]: BayesianLogisticRegression.cold_start(n_features=n_features), + action_ids[1]: BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=1.0), + } with pytest.raises(ValidationError): - CmabBernoulli( - actions={ - "a1": None, - "a2": None, - }, - ) - CmabBernoulli( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=n_features), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - }, - strategy=ClassicBandit(), - ) - mab = CmabBernoulli( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=n_features), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - } - ) + config.cmab_class(actions=actions_wrong_type) + + # Test None actions + with pytest.raises(ValidationError): + config.cmab_class(actions={aid: None for aid in action_ids}) - assert mab.actions["a1"] == BayesianLogisticRegression.cold_start(n_features=n_features) - assert mab.actions["a2"] == BayesianLogisticRegression.cold_start(n_features=n_features) - assert not mab.predict_actions_randomly - assert not mab.predict_with_proba - mab.predict_with_proba = True - mab.predict_actions_randomly = True - assert mab.predict_actions_randomly - assert mab.predict_with_proba + # Test invalid strategy parameters + if config.cmab_class == CmabBernoulliBAI: + with pytest.raises(ValidationError): + config.create_cmab_and_actions( + action_ids, + None, + costs, + exploit_p.draw(st.sampled_from([-0.1, 1.1])), + subsidy_factor, + n_features, + update_method, + update_kwargs, + ) + elif config.cmab_class == CmabBernoulliCC: + with pytest.raises(ValidationError): + config.create_cmab_and_actions( + action_ids, + None, + costs, + exploit_p, + subsidy_factor.draw(st.sampled_from([-0.1, 1.1])), + n_features, + update_method, + update_kwargs, + ) -@settings(deadline=500) +@settings(deadline=None) +@pytest.mark.parametrize("config", TEST_CONFIGS.values(), ids=TEST_CONFIGS.keys()) @given( - st.integers(min_value=1, max_value=5), - st.integers(min_value=6, max_value=10), - st.integers(min_value=0, max_value=1), - st.just("draws"), - st.just(2), + action_ids=st.lists( + st.text( + min_size=1, + ), + min_size=2, + max_size=5, + unique=True, + ), + n_samples=st.integers(min_value=1, max_value=100), + epsilon=st.one_of(st.none(), st.floats(min_value=0, max_value=1)), + rewards=st.data(), + quantities=st.data(), + costs=st.data(), + n_features=st.integers(min_value=1, max_value=5), + subsidy_factor=st.data(), + exploit_p=st.data(), + update_method=st.sampled_from(literal_update_methods), + update_kwargs=st.sampled_from([None, {"draws": 500}]), ) -def test_cmab_init_with_wrong_blr_models(n_features, other_n_features, update_method_index, kwarg_to_alter, factor): - with pytest.raises(AttributeError): - CmabBernoulli( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=n_features), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - "a3": BayesianLogisticRegression.cold_start(n_features=other_n_features), - } - ) - update_method = literal_update_methods[update_method_index] - other_update_method = literal_update_methods[1 - update_method_index] - with pytest.raises(AttributeError): - CmabBernoulli( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=n_features, update_method=update_method), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features, update_method=other_update_method), - } - ) - model = BayesianLogisticRegression.cold_start(n_features=n_features, update_method=update_method) - altered_kwarg = model.update_kwargs[kwarg_to_alter] // factor - with pytest.raises(AttributeError): - CmabBernoulli( - actions={ - "a1": model, - "a2": BayesianLogisticRegression.cold_start( - n_features=n_features, - update_method=update_method, - update_kwargs={kwarg_to_alter: altered_kwarg}, - ), - } - ) +def test_update( + config: ModelTestConfig, + action_ids: List[str], + n_samples: int, + epsilon: Optional[float], + rewards, + quantities, + costs, + n_features, + exploit_p, + subsidy_factor, + update_method, + update_kwargs, +): + # Create CMAB instance + cmab, _, kwargs = config.create_cmab_and_actions( + action_ids, epsilon, costs, exploit_p, subsidy_factor, n_features, update_method, update_kwargs + ) + context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) + # Generate random rewards + reward_data = rewards.draw(rewards_strategy(n_samples=n_samples)) + # Test updates with generated data + actions_to_update = np.random.choice(np.array(action_ids, dtype=np.object_), size=n_samples, replace=True).tolist() + # Generate quantities only if there are any QuantitativeModel actions + for_update_kwargs = {"actions": actions_to_update, "rewards": reward_data} + if any(isinstance(model, CmabZoomingModel) for model in cmab.actions.values()): + quantity_data = quantities.draw(quantities_strategy(n_samples=n_samples)) + quantity_data = [ + q if isinstance(cmab.actions[action], QuantitativeModel) else None + for q, action in zip(quantity_data, actions_to_update) + ] + for_update_kwargs["quantities"] = quantity_data + for transform in [lambda x: x, list, pd.DataFrame]: + old_cmab = deepcopy(cmab) + cmab.update(context=transform(context), **for_update_kwargs) + assert cmab != old_cmab -@settings(deadline=60000) -@given(st.just(100), st.just(3), st.sampled_from(literal_update_methods)) -def test_cmab_update(n_samples, n_features, update_method): - actions = np.random.choice(["a1", "a2"], size=n_samples).tolist() - rewards = np.random.choice([0, 1], size=n_samples).tolist() - def run_update(context): - mab = CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=n_features, update_method=update_method) - assert all( - [ - mab.actions[a] - == BayesianLogisticRegression.cold_start(n_features=n_features, update_method=update_method) - for a in set(actions) - ] - ) - mab.update(context=context, actions=actions, rewards=rewards) - assert all( - [ - mab.actions[a] - != BayesianLogisticRegression.cold_start(n_features=n_features, update_method=update_method) - for a in set(actions) - ] - ) - assert not mab.predict_actions_randomly - - # context is numpy array +@settings(deadline=None) +@pytest.mark.parametrize("config", TEST_CONFIGS.values(), ids=TEST_CONFIGS.keys()) +@given( + action_ids=st.lists( + st.text( + min_size=1, + ), + min_size=2, + max_size=5, + unique=True, + ), + n_samples=st.integers(min_value=1, max_value=100), + epsilon=st.one_of(st.none(), st.floats(min_value=0, max_value=1)), + costs=st.data(), + n_features=st.integers(min_value=1, max_value=5), + subsidy_factor=st.data(), + exploit_p=st.data(), + update_method=st.sampled_from(literal_update_methods), + update_kwargs=st.sampled_from([None, {"draws": 500}]), + diff=st.data(), +) +def test_predict( + config: ModelTestConfig, + action_ids: List[str], + n_samples: int, + epsilon: Optional[float], + costs, + n_features, + exploit_p, + subsidy_factor, + update_method, + update_kwargs, + diff, + monkeymodule, +): + # Create CMAB instance + cmab = config.create_cmab_and_actions( + action_ids, epsilon, costs, exploit_p, subsidy_factor, n_features, update_method, update_kwargs + )[0] context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - assert type(context) is np.ndarray - run_update(context=context) + # Test predictions with random forbidden actions + forbidden = ( + set(np.random.choice(np.array(action_ids, dtype=np.object_), size=len(action_ids) // 2, replace=False)) + if len(action_ids) > 2 + else None + ) + if cmab.default_action is not None and forbidden is not None and cmab.default_action in forbidden: + forbidden.remove(cmab.default_action) + + mock_update(list(cmab.actions.values()), diff, monkeymodule) + best_actions, probs, weights = cmab.predict(context=context, forbidden_actions=forbidden) + assert len(best_actions) == n_samples + assert len(probs) == n_samples + assert len(weights) == n_samples + + if forbidden: + assert all(len(prob) == len(action_ids) - len(forbidden) for prob in probs) + assert all(action not in forbidden for action in best_actions) + assert all(action not in forbidden for prob in probs for action in prob.keys()) + assert all(action not in forbidden for weight in weights for action in weight.keys()) + else: + assert all(len(prob) == len(action_ids) for prob in probs) + assert all(action in action_ids for action in best_actions) + if isinstance(cmab, CmabBernoulli) and not epsilon: + assert all(prob[best_action] == max(prob.values()) for best_action, prob in zip(best_actions, probs)) - # context is python list - context = context.tolist() - assert type(context) is list - run_update(context=context) - # context is pandas DataFrame - context = pd.DataFrame(context) - assert type(context) is pd.DataFrame - run_update(context=context) +@settings(deadline=None) +@pytest.mark.parametrize("config", TEST_CONFIGS.values(), ids=TEST_CONFIGS.keys()) +@given( + action_ids=st.lists( + st.text( + min_size=1, + ), + min_size=2, + max_size=5, + unique=True, + ), + epsilon=st.one_of(st.none(), st.floats(min_value=0, max_value=1)), + costs=st.data(), + n_features=st.integers(min_value=1, max_value=5), + subsidy_factor=st.data(), + exploit_p=st.data(), + update_method=st.sampled_from(literal_update_methods), + update_kwargs=st.sampled_from([None, {"draws": 500}]), + diff=st.data(), +) +def test_serialization( + config: ModelTestConfig, + action_ids: List[str], + epsilon: Optional[float], + costs, + n_features, + exploit_p, + subsidy_factor, + update_method, + update_kwargs, + diff, + monkeymodule, +): + # Create CMAB instance + cmab = config.create_cmab_and_actions( + action_ids, epsilon, costs, exploit_p, subsidy_factor, n_features, update_method, update_kwargs + )[0] + + pre_update_state = cmab.get_state() + mock_update(list(cmab.actions.values()), diff, monkeymodule) + post_update_state = cmab.get_state() + # Verify model updates + assert pre_update_state != post_update_state + + # Test serialization + restored_cmab = config.cmab_class.from_state(post_update_state[1]) + assert restored_cmab == cmab + + +def test_cmab_actions_matching(n_features=2): + blr_model = BayesianLogisticRegression.cold_start(n_features=n_features) + blr_model2 = BayesianLogisticRegression.cold_start(n_features=n_features + 1) + zoom_model = CmabZoomingModel.cold_start(base_model_cold_start_kwargs={"n_features": n_features}) + zoom_model2 = CmabZoomingModel.cold_start(base_model_cold_start_kwargs={"n_features": n_features + 1}) + actions = {"a1": blr_model, "a2": blr_model2} + with pytest.raises(AttributeError): + BaseCmabBernoulli.check_models(actions) + actions = {"a1": zoom_model, "a2": zoom_model2} + with pytest.raises(AttributeError): + BaseCmabBernoulli.check_models(actions) -@settings(deadline=10000) -@given(st.just(100), st.just(3), st.sampled_from(literal_update_methods)) -def test_cmab_update_not_all_actions(n_samples, n_feat, update_method): - actions = np.random.choice(["a3", "a4"], size=n_samples).tolist() - rewards = np.random.choice([0, 1], size=n_samples).tolist() - context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_feat)) - mab = CmabBernoulli.cold_start(action_ids={"a1", "a2", "a3", "a4"}, n_features=n_feat, update_method=update_method) + actions = {"a1": blr_model, "a2": zoom_model2} + with pytest.raises(AttributeError): + BaseCmabBernoulli.check_models(actions) - mab.update(context=context, actions=actions, rewards=rewards) - assert mab.actions["a1"] == BayesianLogisticRegression.cold_start(n_features=n_feat, update_method=update_method) - assert mab.actions["a2"] == BayesianLogisticRegression.cold_start(n_features=n_feat, update_method=update_method) - assert mab.actions["a3"] != BayesianLogisticRegression.cold_start(n_features=n_feat, update_method=update_method) - assert mab.actions["a4"] != BayesianLogisticRegression.cold_start(n_features=n_feat, update_method=update_method) + actions = {"a1": blr_model, "a2": zoom_model} + BaseCmabBernoulli.check_models(actions) @settings(deadline=500) @@ -244,64 +618,6 @@ def test_cmab_update_shape_mismatch(n_samples, n_features, update_method): mab.update(context=[], actions=actions, rewards=rewards) -@settings(deadline=500) -@given(st.integers(min_value=1, max_value=1000), st.integers(min_value=1, max_value=100)) -def test_cmab_predict_cold_start(n_samples, n_features): - def run_predict(context): - mab = CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=n_features) - selected_actions, probs, weighted_sums = mab.predict(context=context) - assert mab.predict_actions_randomly - assert all([a in ["a1", "a2"] for a in selected_actions]) - assert len(selected_actions) == n_samples - assert probs == n_samples * [{"a1": 0.5, "a2": 0.5}] - assert weighted_sums == n_samples * [{"a1": 0, "a2": 0}] - - # context is numpy array - context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - assert type(context) is np.ndarray - run_predict(context=context) - - # context is python list - context = context.tolist() - assert type(context) is list - run_predict(context=context) - - # context is pandas DataFrame - context = pd.DataFrame(context) - assert type(context) is pd.DataFrame - run_predict(context=context) - - -@settings(deadline=500) -@given(st.integers(min_value=1, max_value=100), st.integers(min_value=1, max_value=3)) -def test_cmab_predict_not_cold_start(n_samples, n_features): - def run_predict(context): - mab = CmabBernoulli( - actions={ - "a1": BayesianLogisticRegression(alpha=StudentT(mu=1, sigma=2), betas=n_features * [StudentT()]), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - }, - ) - assert not mab.predict_actions_randomly - selected_actions, probs, weighted_sums = mab.predict(context=context) - assert len(selected_actions) == len(probs) == len(weighted_sums) == n_samples - - # context is numpy array - context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - assert type(context) is np.ndarray - run_predict(context=context) - - # context is python list - context = context.tolist() - assert type(context) is list - run_predict(context=context) - - # context is pandas DataFrame - context = pd.DataFrame(context) - assert type(context) is pd.DataFrame - run_predict(context=context) - - @settings(deadline=500) @given(st.integers(min_value=1, max_value=10)) def test_cmab_predict_shape_mismatch(a_int): @@ -311,653 +627,3 @@ def test_cmab_predict_shape_mismatch(a_int): mab.predict(context=context) with pytest.raises(AttributeError): mab.predict(context=[]) - - -def test_cmab_predict_with_forbidden_actions(n_features=3): - def run_predict(mab): - context = np.random.uniform(low=-1.0, high=1.0, size=(1000, n_features)) - assert set(mab.predict(context=context, forbidden_actions={"a2", "a3", "a4", "a5"})[0]) == {"a1"} - assert set(mab.predict(context=context, forbidden_actions={"a1", "a3"})[0]) == {"a2", "a4", "a5"} - assert set(mab.predict(context=context, forbidden_actions={"a1"})[0]) == {"a2", "a3", "a4", "a5"} - assert set(mab.predict(context=context, forbidden_actions=set())[0]) == {"a1", "a2", "a3", "a4", "a5"} - - if pydantic_version == PYDANTIC_VERSION_1: - expected_error_type = ValueError - elif pydantic_version == PYDANTIC_VERSION_2: - expected_error_type = ValidationError - else: - raise ValueError(f"Unsupported Pydantic version: {pydantic_version}") - with pytest.raises(expected_error_type): # not a set - assert set(mab.predict(context=context, forbidden_actions={1})[0]) - with pytest.raises(ValueError): # invalid action_ids - assert set(mab.predict(context=context, forbidden_actions={"a1", "a9999", "a", 5})[0]) - with pytest.raises(ValueError): # all actions forbidden - assert set(mab.predict(context=context, forbidden_actions={"a1", "a2", "a3", "a4", "a5"})[0]) - with pytest.raises(ValueError): # all actions forbidden (unordered) - assert set(mab.predict(n_samples=1000, forbidden_actions={"a5", "a4", "a2", "a3", "a1"})[0]) - - # cold start mab - mab = CmabBernoulli.cold_start(action_ids={"a1", "a2", "a3", "a4", "a5"}, n_features=n_features) - run_predict(mab=mab) - - # not cold start mab - mab = CmabBernoulli( - actions={ - "a1": BayesianLogisticRegression(alpha=StudentT(mu=1, sigma=2), betas=[StudentT(), StudentT(), StudentT()]), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - "a3": BayesianLogisticRegression.cold_start(n_features=n_features), - "a4": BayesianLogisticRegression(alpha=StudentT(mu=4, sigma=5), betas=[StudentT(), StudentT(), StudentT()]), - "a5": BayesianLogisticRegression.cold_start(n_features=n_features), - }, - ) - assert mab != CmabBernoulli.cold_start(action_ids={"a1", "a2", "a3", "a4", "a5"}, n_features=n_features) - run_predict(mab=mab) - - -@settings(deadline=500) -@given(st.integers(min_value=1), st.integers(min_value=1), st.integers(min_value=2, max_value=100)) -def test_cmab_get_state(mu, sigma, n_features): - actions: dict = { - "a1": BayesianLogisticRegression(alpha=StudentT(mu=mu, sigma=sigma), betas=n_features * [StudentT()]), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - } - - cmab = CmabBernoulli(actions=actions) - expected_state = to_serializable_dict( - { - "actions": actions, - "strategy": {}, - "predict_with_proba": False, - "predict_actions_randomly": False, - "epsilon": None, - "default_action": None, - } - ) - - class_name, cmab_state = cmab.get_state() - assert class_name == "CmabBernoulli" - assert cmab_state == expected_state - - assert is_serializable(cmab_state), "Internal state is not serializable" - - -@settings(deadline=500) -@given( - state=st.fixed_dictionaries( - { - "actions": st.dictionaries( - keys=st.text(min_size=1, max_size=10), - values=st.fixed_dictionaries( - { - "alpha": st.fixed_dictionaries( - { - "mu": st.floats(min_value=-100, max_value=100), - "nu": st.floats(min_value=0, max_value=100), - "sigma": st.floats(min_value=0, max_value=100), - } - ), - "betas": st.lists( - st.fixed_dictionaries( - { - "mu": st.floats(min_value=-100, max_value=100), - "nu": st.floats(min_value=0, max_value=100), - "sigma": st.floats(min_value=0, max_value=100), - } - ), - min_size=3, - max_size=3, - ), - }, - ), - min_size=2, - ), - "strategy": st.fixed_dictionaries({}), - } - ), - update_method=st.sampled_from(literal_update_methods), -) -def test_cmab_from_state(state, update_method): - _apply_update_method_to_state(state, update_method) - cmab = CmabBernoulli.from_state(state) - assert isinstance(cmab, CmabBernoulli) - - actual_actions = to_serializable_dict(cmab.actions) # Normalize the dict - expected_actions = {k: {**v, **state["actions"][k]} for k, v in actual_actions.items()} - assert expected_actions == actual_actions - - # Ensure get_state and from_state compatibility - new_cmab = globals()[cmab.get_state()[0]].from_state(state=cmab.get_state()[1]) - assert new_cmab == cmab - - -######################################################################################################################## - - -# CmabBernoulli with strategy=BestActionIdentificationBandit() - - -@settings(deadline=500) -@given(st.integers(max_value=100)) -def test_create_cmab_bernoulli_bai_cold_start(a_int): - # n_features must be > 0 - if a_int <= 0: - with pytest.raises(ValidationError): - CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=a_int) - else: - # default exploit_p - mab1 = CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=a_int) - mab2 = CmabBernoulliBAI( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=a_int), - "a2": BayesianLogisticRegression.cold_start(n_features=a_int), - } - ) - mab2.predict_actions_randomly = True - assert mab1 == mab2 - - # set exploit_p - mab1 = CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=a_int, exploit_p=0.42) - mab2 = CmabBernoulliBAI( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=a_int), - "a2": BayesianLogisticRegression.cold_start(n_features=a_int), - }, - exploit_p=0.42, - ) - mab2.predict_actions_randomly = True - assert mab1 == mab2 - - -@settings(deadline=500) -@given(st.integers(min_value=1, max_value=10)) -def test_cmab_bai_can_instantiate(n_features): - with pytest.raises(TypeError): - CmabBernoulliBAI() - with pytest.raises(AttributeError): - CmabBernoulliBAI(actions={}) - with pytest.warns(UserWarning): - CmabBernoulliBAI(actions={"a1": BayesianLogisticRegression.cold_start(n_features=2)}) - with pytest.raises(ValidationError): # predict_with_proba is not an argument of init - CmabBernoulliBAI( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=n_features), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - }, - predict_with_proba=True, - ) - with pytest.raises(ValidationError): - CmabBernoulliBAI( - actions={ - "a1": None, - "a2": None, - }, - ) - CmabBernoulliBAI( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=n_features), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - }, - strategy=BestActionIdentificationBandit(), - ) - mab = CmabBernoulliBAI( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=n_features), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - } - ) - assert mab.actions["a1"] == BayesianLogisticRegression.cold_start(n_features=n_features) - assert mab.actions["a2"] == BayesianLogisticRegression.cold_start(n_features=n_features) - assert not mab.predict_actions_randomly - assert not mab.predict_with_proba - assert mab.strategy == BestActionIdentificationBandit() - - mab = CmabBernoulliBAI( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=n_features), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - }, - exploit_p=0.42, - ) - assert mab.actions["a1"] == BayesianLogisticRegression.cold_start(n_features=n_features) - assert mab.actions["a2"] == BayesianLogisticRegression.cold_start(n_features=n_features) - assert not mab.predict_actions_randomly - assert not mab.predict_with_proba - assert mab.strategy == BestActionIdentificationBandit(exploit_p=0.42) - - -@settings(deadline=500) -@given(st.integers(min_value=1, max_value=100), st.integers(min_value=1, max_value=3)) -def test_cmab_bai_predict(n_samples, n_features): - context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - - # cold start - mab = CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=n_features) - selected_actions, probs, weighted_sums = mab.predict(context=context) - assert mab.predict_actions_randomly - assert all([a in ["a1", "a2"] for a in selected_actions]) - assert len(selected_actions) == n_samples - assert probs == n_samples * [{"a1": 0.5, "a2": 0.5}] - assert weighted_sums == n_samples * [{"a1": 0, "a2": 0}] - - # not cold start - mab = CmabBernoulliBAI( - actions={ - "a1": BayesianLogisticRegression.cold_start(n_features=n_features), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - }, - exploit_p=0.42, - ) - assert not mab.predict_actions_randomly - selected_actions, probs, weighted_sums = mab.predict(context=context) - assert len(selected_actions) == len(probs) == len(weighted_sums) == n_samples - - -@settings(deadline=10000) -@given(st.just(100), st.just(3), st.sampled_from(literal_update_methods)) -def test_cmab_bai_update(n_samples, n_features, update_method): - actions = np.random.choice(["a1", "a2"], size=n_samples).tolist() - rewards = np.random.choice([0, 1], size=n_samples).tolist() - context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - mab = CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=n_features, update_method=update_method) - assert mab.predict_actions_randomly - assert all( - [ - mab.actions[a] == BayesianLogisticRegression.cold_start(n_features=n_features, update_method=update_method) - for a in set(actions) - ] - ) - mab.update(context=context, actions=actions, rewards=rewards) - assert all( - [ - mab.actions[a] != BayesianLogisticRegression.cold_start(n_features=n_features, update_method=update_method) - for a in set(actions) - ] - ) - assert not mab.predict_actions_randomly - - -@settings(deadline=500) -@given( - st.integers(min_value=1), - st.integers(min_value=1), - st.integers(min_value=2, max_value=100), - st.floats(min_value=0, max_value=1), -) -def test_cmab_bai_get_state(mu, sigma, n_features, exploit_p: Float01): - actions: dict = { - "a1": BayesianLogisticRegression(alpha=StudentT(mu=mu, sigma=sigma), betas=n_features * [StudentT()]), - "a2": BayesianLogisticRegression.cold_start(n_features=n_features), - } - - cmab = CmabBernoulliBAI(actions=actions, exploit_p=exploit_p) - expected_state = to_serializable_dict( - { - "actions": actions, - "strategy": {"exploit_p": exploit_p}, - "predict_with_proba": False, - "predict_actions_randomly": False, - "epsilon": None, - "default_action": None, - } - ) - - class_name, cmab_state = cmab.get_state() - assert class_name == "CmabBernoulliBAI" - assert cmab_state == expected_state - - assert is_serializable(cmab_state), "Internal state is not serializable" - - -@settings(deadline=500) -@given( - state=st.fixed_dictionaries( - { - "actions": st.dictionaries( - keys=st.text(min_size=1, max_size=10), - values=st.fixed_dictionaries( - { - "alpha": st.fixed_dictionaries( - { - "mu": st.floats(min_value=-100, max_value=100), - "nu": st.floats(min_value=0, max_value=100), - "sigma": st.floats(min_value=0, max_value=100), - } - ), - "betas": st.lists( - st.fixed_dictionaries( - { - "mu": st.floats(min_value=-100, max_value=100), - "nu": st.floats(min_value=0, max_value=100), - "sigma": st.floats(min_value=0, max_value=100), - } - ), - min_size=3, - max_size=3, - ), - }, - ), - min_size=2, - ), - "strategy": st.one_of( - st.just({}), - st.just({"exploit_p": None}), - st.builds(lambda x: {"exploit_p": x}, st.floats(min_value=0, max_value=1)), - ), - } - ), - update_method=st.sampled_from(literal_update_methods), -) -def test_cmab_bai_from_state(state, update_method): - _apply_update_method_to_state(state, update_method) - cmab = CmabBernoulliBAI.from_state(state) - assert isinstance(cmab, CmabBernoulliBAI) - - actual_actions = to_serializable_dict(cmab.actions) # Normalize the dict - expected_actions = {k: {**v, **state["actions"][k]} for k, v in actual_actions.items()} - assert expected_actions == actual_actions - - expected_exploit_p = cmab.strategy.get_expected_value_from_state(state, "exploit_p") - actual_exploit_p = cmab.strategy.exploit_p - assert expected_exploit_p == actual_exploit_p - - # Ensure get_state and from_state compatibility - new_cmab = globals()[cmab.get_state()[0]].from_state(state=cmab.get_state()[1]) - assert new_cmab == cmab - - -######################################################################################################################## - - -# CmabBernoulli with strategy=CostControlBandit() - - -@settings(deadline=500) -@given(st.integers(max_value=100)) -def test_create_cmab_bernoulli_cc_cold_start(a_int): - action_ids_cost = {"a1": 10, "a2": 20.5} - # n_features must be > 0 - if a_int <= 0: - with pytest.raises(ValidationError): - CmabBernoulliCC.cold_start(action_ids_cost=action_ids_cost, n_features=a_int) - else: - # default subsidy_factor - mab1 = CmabBernoulliCC.cold_start(action_ids_cost=action_ids_cost, n_features=a_int) - mab2 = CmabBernoulliCC( - actions={ - "a1": BayesianLogisticRegressionCC.cold_start(n_features=a_int, cost=action_ids_cost["a1"]), - "a2": BayesianLogisticRegressionCC.cold_start(n_features=a_int, cost=action_ids_cost["a2"]), - } - ) - mab2.predict_actions_randomly = True - assert mab1 == mab2 - - # set subsidy_factor - mab1 = CmabBernoulliCC.cold_start(action_ids_cost=action_ids_cost, n_features=a_int, subsidy_factor=0.42) - mab2 = CmabBernoulliCC( - actions={ - "a1": BayesianLogisticRegressionCC.cold_start(n_features=a_int, cost=action_ids_cost["a1"]), - "a2": BayesianLogisticRegressionCC.cold_start(n_features=a_int, cost=action_ids_cost["a2"]), - }, - subsidy_factor=0.42, - ) - mab2.predict_actions_randomly = True - assert mab1 == mab2 - - -@settings(deadline=500) -@given(st.integers(min_value=1, max_value=10)) -def test_cmab_cc_can_instantiate(n_features): - with pytest.raises(TypeError): - CmabBernoulliCC() - with pytest.raises(AttributeError): - CmabBernoulliCC(actions={}) - with pytest.warns(UserWarning): - CmabBernoulliCC(actions={"a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10)}) - with pytest.raises(ValidationError): # predict_with_proba is not an argument of init - CmabBernoulliCC( - actions={ - "a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), - "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), - }, - predict_with_proba=True, - ) - with pytest.raises(ValidationError): - CmabBernoulliCC( - actions={ - "a1": None, - "a2": None, - }, - ) - CmabBernoulliCC( - actions={ - "a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), - "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), - }, - strategy=CostControlBandit(), - ) - mab = CmabBernoulliCC( - actions={ - "a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), - "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), - } - ) - assert mab.actions["a1"] == BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10) - assert mab.actions["a2"] == BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10) - assert not mab.predict_actions_randomly - assert mab.predict_with_proba - assert mab.strategy == CostControlBandit() - - mab = CmabBernoulliCC( - actions={ - "a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), - "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), - }, - subsidy_factor=0.42, - ) - assert mab.actions["a1"] == BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10) - assert mab.actions["a2"] == BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10) - assert not mab.predict_actions_randomly - assert mab.predict_with_proba - assert mab.strategy == CostControlBandit(subsidy_factor=0.42) - - -@settings(deadline=500) -@given(st.integers(min_value=1, max_value=100), st.integers(min_value=1, max_value=3)) -def test_cmab_cc_predict(n_samples, n_features): - context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - - # cold start - mab = CmabBernoulliCC.cold_start(action_ids_cost={"a1": 10, "a2": 20.5}, n_features=n_features) - selected_actions, probs, weighted_sums = mab.predict(context=context) - assert mab.predict_actions_randomly - assert all([a in ["a1", "a2"] for a in selected_actions]) - assert len(selected_actions) == n_samples - assert probs == n_samples * [{"a1": 0.5, "a2": 0.5}] - assert weighted_sums == n_samples * [{"a1": 0, "a2": 0}] - - # not cold start - mab = CmabBernoulliCC( - actions={ - "a1": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10), - "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=20.5), - }, - subsidy_factor=0.42, - ) - assert not mab.predict_actions_randomly - selected_actions, probs, weighted_sums = mab.predict(context=context) - assert len(selected_actions) == len(probs) == len(weighted_sums) == n_samples - - -@settings(deadline=None) -@given(st.just(100), st.just(3), st.sampled_from(literal_update_methods)) -def test_cmab_cc_update(n_samples, n_features, update_method): - actions = np.random.choice(["a1", "a2"], size=n_samples).tolist() - rewards = np.random.choice([0, 1], size=n_samples).tolist() - context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - mab = CmabBernoulliCC.cold_start( - action_ids_cost={"a1": 10, "a2": 10}, n_features=n_features, update_method=update_method - ) - assert mab.predict_actions_randomly - assert all( - [ - mab.actions[a] - == BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10, update_method=update_method) - for a in set(actions) - ] - ) - mab.update(context=context, actions=actions, rewards=rewards) - assert all( - [ - mab.actions[a] - != BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=10, update_method=update_method) - for a in set(actions) - ] - ) - assert not mab.predict_actions_randomly - - -@settings(deadline=500) -@given( - st.integers(min_value=1), - st.integers(min_value=1), - st.integers(min_value=2, max_value=100), - st.floats(min_value=0), - st.floats(min_value=0), - st.floats(min_value=0, max_value=1), -) -def test_cmab_cc_get_state( - mu, sigma, n_features, cost_1: NonNegativeFloat, cost_2: NonNegativeFloat, subsidy_factor: Float01 -): - actions: dict = { - "a1": BayesianLogisticRegressionCC( - alpha=StudentT(mu=mu, sigma=sigma), betas=n_features * [StudentT()], cost=cost_1 - ), - "a2": BayesianLogisticRegressionCC.cold_start(n_features=n_features, cost=cost_2), - } - - cmab = CmabBernoulliCC(actions=actions, subsidy_factor=subsidy_factor) - expected_state = to_serializable_dict( - { - "actions": actions, - "strategy": {"subsidy_factor": subsidy_factor}, - "predict_with_proba": True, - "predict_actions_randomly": False, - "epsilon": None, - "default_action": None, - } - ) - - class_name, cmab_state = cmab.get_state() - assert class_name == "CmabBernoulliCC" - assert cmab_state == expected_state - - assert is_serializable(cmab_state), "Internal state is not serializable" - - -@settings(deadline=500) -@given( - state=st.fixed_dictionaries( - { - "actions": st.dictionaries( - keys=st.text(min_size=1, max_size=10), - values=st.fixed_dictionaries( - { - "alpha": st.fixed_dictionaries( - { - "mu": st.floats(min_value=-100, max_value=100), - "nu": st.floats(min_value=0, max_value=100), - "sigma": st.floats(min_value=0, max_value=100), - } - ), - "betas": st.lists( - st.fixed_dictionaries( - { - "mu": st.floats(min_value=-100, max_value=100), - "nu": st.floats(min_value=0, max_value=100), - "sigma": st.floats(min_value=0, max_value=100), - } - ), - min_size=3, - max_size=3, - ), - "cost": st.floats(min_value=0), - }, - ), - min_size=2, - ), - "strategy": st.one_of( - st.just({}), - st.just({"subsidy_factor": None}), - st.builds(lambda x: {"subsidy_factor": x}, st.floats(min_value=0, max_value=1)), - ), - } - ), - update_method=st.sampled_from(literal_update_methods), -) -def test_cmab_cc_from_state(state, update_method): - _apply_update_method_to_state(state, update_method) - cmab = CmabBernoulliCC.from_state(state) - assert isinstance(cmab, CmabBernoulliCC) - - actual_actions = to_serializable_dict(cmab.actions) # Normalize the dict - expected_actions = {k: {**v, **state["actions"][k]} for k, v in actual_actions.items()} - assert expected_actions == actual_actions - - expected_subsidy_factor = cmab.strategy.get_expected_value_from_state(state, "subsidy_factor") - actual_subsidy_factor = cmab.strategy.subsidy_factor - assert expected_subsidy_factor == actual_subsidy_factor - - # Ensure get_state and from_state compatibility - new_cmab = globals()[cmab.get_state()[0]].from_state(state=cmab.get_state()[1]) - assert new_cmab == cmab - - -######################################################################################################################## - - -# Cmab with epsilon-greedy super strategy - - -@settings(deadline=500) -@given(st.integers(min_value=1, max_value=1000), st.integers(min_value=1, max_value=100)) -def test_epsilon_greedy_cmab_predict_cold_start(n_samples, n_features): - context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - - mab = CmabBernoulli.cold_start(action_ids={"a1", "a2"}, n_features=n_features, epsilon=0.1, default_action="a1") - selected_actions, probs, weighted_sums = mab.predict(context=context) - assert mab.predict_actions_randomly - assert all([a in ["a1", "a2"] for a in selected_actions]) - assert len(selected_actions) == n_samples - assert probs == n_samples * [{"a1": 0.5, "a2": 0.5}] - assert weighted_sums == n_samples * [{"a1": 0, "a2": 0}] - - -@settings(deadline=500) -@given(st.integers(min_value=1, max_value=100), st.integers(min_value=1, max_value=3)) -def test_epsilon_greedy_cmab_bai_predict(n_samples, n_features): - context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - - mab = CmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, n_features=n_features, epsilon=0.1, default_action="a1") - selected_actions, probs, weighted_sums = mab.predict(context=context) - assert mab.predict_actions_randomly - assert all([a in ["a1", "a2"] for a in selected_actions]) - assert len(selected_actions) == n_samples - assert probs == n_samples * [{"a1": 0.5, "a2": 0.5}] - assert weighted_sums == n_samples * [{"a1": 0, "a2": 0}] - - -@settings(deadline=500) -@given(st.integers(min_value=1, max_value=100), st.integers(min_value=1, max_value=3)) -def test_epsilon_greedy_cmab_cc_predict(n_samples, n_features): - context = np.random.uniform(low=-1.0, high=1.0, size=(n_samples, n_features)) - - # cold start - mab = CmabBernoulliCC.cold_start( - action_ids_cost={"a1": 10, "a2": 20.5}, n_features=n_features, epsilon=0.1, default_action="a1" - ) - selected_actions, probs, weighted_sums = mab.predict(context=context) - assert mab.predict_actions_randomly - assert all([a in ["a1", "a2"] for a in selected_actions]) - assert len(selected_actions) == n_samples - assert probs == n_samples * [{"a1": 0.5, "a2": 0.5}] - assert weighted_sums == n_samples * [{"a1": 0, "a2": 0}] diff --git a/tests/test_mab.py b/tests/test_mab.py index 3668347..5c6fa3e 100644 --- a/tests/test_mab.py +++ b/tests/test_mab.py @@ -26,9 +26,21 @@ import numpy as np import pytest from hypothesis import given +from numpy._typing import ArrayLike from pytest_mock import MockerFixture -from pybandits.base import ACTION_IDS_PREFIX, ActionId, BinaryReward, Float01, Probability +from pybandits.base import ( + ACTION_IDS_PREFIX, + ActionId, + BinaryReward, + Float01, + Probability, + PyBanditsBaseModel, + UnifiedMOProbability, + UnifiedMOProbabilityWeight, + UnifiedProbability, + UnifiedProbabilityWeight, +) from pybandits.mab import BaseMab from pybandits.model import Beta, BetaCC from pybandits.pydantic_version_compatibility import ValidationError @@ -36,11 +48,28 @@ class DummyMab(BaseMab): + actions: Dict[ActionId, Beta] epsilon: Optional[Float01] = None default_action: Optional[ActionId] = None - def update(self, actions: List[ActionId], rewards: Union[List[BinaryReward], List[List[BinaryReward]]]): - self._validate_update_params(actions=actions, rewards=rewards) + def _update( + self, + actions: List[ActionId], + rewards: Union[List[BinaryReward], List[List[BinaryReward]]], + quantities: Optional[List[Union[float, List[float], None]]], + context: Optional[ArrayLike], + ): + pass + + def _inner_get_action_probabilities( + self, valid_actions: Set[ActionId], **kwargs + ) -> Union[ + Dict[ActionId, List[UnifiedProbability]], + Dict[ActionId, List[UnifiedProbabilityWeight]], + Dict[ActionId, List[UnifiedMOProbability]], + Dict[ActionId, List[UnifiedMOProbabilityWeight]], + ]: + return {} def predict( self, @@ -69,7 +98,7 @@ def test_base_mab_raise_on_bad_actions(cost=0.0): DummyMab(actions={"a1": None, "a2": None}, strategy=ClassicBandit()) with pytest.warns(UserWarning): DummyMab(actions={"a1": Beta()}, strategy=ClassicBandit()) - with pytest.raises(AttributeError): + with pytest.raises(ValidationError): DummyMab(actions={"a1": Beta(), "a2": BetaCC(cost=cost)}, strategy=ClassicBandit()) @@ -77,19 +106,27 @@ def test_base_mab_check_update_params(): dummy_mab = DummyMab(actions={"a1": Beta(), "a2": Beta()}, strategy=ClassicBandit()) with pytest.raises(AttributeError): # actionId doesn't exist - dummy_mab._validate_update_params(actions=["a1", "a3"], rewards=[1, 1]) + dummy_mab._validate_update_params(actions=["a1", "a3"], rewards=[1, 1], quantities=None, context=None) with pytest.raises(AttributeError): # actionId cannot be empty - dummy_mab._validate_update_params(actions=[""], rewards=[1]) + dummy_mab._validate_update_params(actions=[""], rewards=[1], quantities=None, context=None) + with pytest.raises(AttributeError): + dummy_mab._validate_update_params(actions=["a1", "a2"], rewards=[1], quantities=None, context=None) + + with pytest.raises(AttributeError): + # quantities of different length + dummy_mab._validate_update_params(actions=["a1", "a2"], rewards=[1, 1], quantities=[1], context=None) + with pytest.raises(AttributeError): - dummy_mab._validate_update_params(actions=["a1", "a2"], rewards=[1]) + # context of different length + dummy_mab._validate_update_params(actions=["a1", "a2"], rewards=[1, 1], quantities=None, context=[1]) @given(r1=st.integers(min_value=0, max_value=1), r2=st.integers(min_value=0, max_value=1)) def test_base_mab_update_ok(r1, r2): dummy_mab = DummyMab(actions={"a1": Beta(), "a2": Beta()}, strategy=ClassicBandit()) - dummy_mab.update(actions=["a1", "a2"], rewards=[r1, r2]) - dummy_mab.update(actions=["a1", "a1"], rewards=[r1, r2]) + dummy_mab.update(actions=["a1", "a2"], rewards=[r1, r2], quantities=None, context=None) + dummy_mab.update(actions=["a1", "a1"], rewards=[r1, r2], quantities=None, context=None) ######################################################################################################################## @@ -100,7 +137,7 @@ def test_base_mab_update_ok(r1, r2): def test_returns_empty_dict_when_no_action_specific_kwargs(): kwargs = {"param1": 1, "param2": 2} - result, _ = BaseMab._extract_action_specific_kwargs(**kwargs) + result, _, _ = BaseMab._extract_action_specific_kwargs(**kwargs) assert result == {} @@ -108,13 +145,13 @@ def test_processes_kwargs_with_non_dict_values(): kwargs = { f"{ACTION_IDS_PREFIX}param1": "not_a_dict", } - result, _ = BaseMab._extract_action_specific_kwargs(**kwargs) + result, _, _ = BaseMab._extract_action_specific_kwargs(**kwargs) assert result == {} def test_manages_kwargs_with_empty_dicts(): kwargs = {f"{ACTION_IDS_PREFIX}param1": {}, f"{ACTION_IDS_PREFIX}param2": {}} - result, _ = BaseMab._extract_action_specific_kwargs(**kwargs) + result, _, _ = BaseMab._extract_action_specific_kwargs(**kwargs) assert result == {} @@ -124,7 +161,7 @@ def test_extracts_action_specific_kwargs_with_valid_keys(): f"{ACTION_IDS_PREFIX}param2": {"action1": 3, "action2": 4}, } expected_output = {"action1": {"param1": 1, "param2": 3}, "action2": {"param1": 2, "param2": 4}} - result, _ = BaseMab._extract_action_specific_kwargs(**kwargs) + result, _, _ = BaseMab._extract_action_specific_kwargs(**kwargs) assert result == expected_output @@ -135,18 +172,25 @@ def test_extracts_action_specific_kwargs_with_valid_keys(): def test_extracts_action_model_class_and_attributes_with_valid_kwargs(mocker: MockerFixture): - class MockActionModel: - def __init__(self, param1, param2): - pass + class MockActionModel(PyBanditsBaseModel): + param1: int + param2: int mocker.patch("pybandits.mab.get_args", return_value=(None, MockActionModel)) - mocker.patch("pybandits.mab.extract_argument_names_from_function", return_value=["param1", "param2"]) + mocker.patch("pybandits.mab.extract_argument_names", return_value=["param1", "param2"]) + mocker.patch("pybandits.mab.issubclass", return_value=True) kwargs = {"param1": 1, "param2": 2} - action_model_cold_start, action_general_kwargs = BaseMab._extract_action_model_class_and_attributes(**kwargs) + kwargs_backup = kwargs.copy() + ( + model_cold_start, + _, + action_general_kwargs, + _, + ) = BaseMab._extract_action_model_class_and_attributes(kwargs) - assert action_model_cold_start == MockActionModel - assert action_general_kwargs == {"param1": 1, "param2": 2} + assert model_cold_start == MockActionModel + assert action_general_kwargs == kwargs_backup def test_returns_callable_for_action_model_cold_start_instantiation(mocker: MockerFixture): @@ -156,12 +200,17 @@ def cold_start(cls): pass mocker.patch("pybandits.mab.get_args", return_value=(None, MockActionModel)) - mocker.patch("pybandits.mab.extract_argument_names_from_function", return_value=[]) - + mocker.patch("pybandits.mab.extract_argument_names", return_value=[]) + mocker.patch("pybandits.mab.issubclass", return_value=True) kwargs = {} - action_model_cold_start, _ = BaseMab._extract_action_model_class_and_attributes(**kwargs) + ( + model_cold_start, + _, + action_general_kwargs, + _, + ) = BaseMab._extract_action_model_class_and_attributes(kwargs) - assert callable(action_model_cold_start) + assert callable(model_cold_start) def test_handles_empty_kwargs_gracefully(mocker: MockerFixture): @@ -170,13 +219,18 @@ def __init__(self): pass mocker.patch("pybandits.mab.get_args", return_value=(None, MockActionModel)) - mocker.patch("pybandits.mab.extract_argument_names_from_function", return_value=[]) - + mocker.patch("pybandits.mab.extract_argument_names", return_value=[]) + mocker.patch("pybandits.mab.issubclass", return_value=True) kwargs = {} - action_model_cold_start, action_general_kwargs = BaseMab._extract_action_model_class_and_attributes(**kwargs) + ( + model_cold_start, + quantitative_model_cold_start, + action_general_kwargs, + quantitative_action_general_kwargs, + ) = BaseMab._extract_action_model_class_and_attributes(kwargs) - assert action_model_cold_start == MockActionModel - assert action_general_kwargs == {} + assert model_cold_start == MockActionModel + assert action_general_kwargs == kwargs def test_handles_kwargs_with_no_matching_action_model_attributes(mocker: MockerFixture): @@ -185,13 +239,10 @@ def __init__(self): pass mocker.patch("pybandits.mab.get_args", return_value=(None, MockActionModel)) - mocker.patch("pybandits.mab.extract_argument_names_from_function", return_value=[]) - + mocker.patch("pybandits.mab.extract_argument_names", return_value=[]) kwargs = {"irrelevant_param": 1} - action_model_cold_start, action_general_kwargs = BaseMab._extract_action_model_class_and_attributes(**kwargs) - - assert action_model_cold_start == MockActionModel - assert action_general_kwargs == {} + with pytest.raises(ValueError): + BaseMab._extract_action_model_class_and_attributes(kwargs) ######################################################################################################################## diff --git a/tests/test_model.py b/tests/test_model.py index b5ade18..8f0ff1f 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -55,7 +55,7 @@ def test_can_init_beta(success_counter, failure_counter): assert (b.n_successes, b.n_failures) == (1, 1) -def test_both_or_neither_counters_are_defined(): +def test_both_or_neither_models_are_defined(): with pytest.raises(ValidationError): Beta(n_successes=0) with pytest.raises(ValidationError): @@ -112,21 +112,21 @@ def test_can_init_betaCC(a_float): def test_can_init_base_beta_mo(): # init with default params - b = BetaMO(counters=[Beta(), Beta()]) - assert b.counters[0].n_successes == 1 and b.counters[0].n_failures == 1 - assert b.counters[1].n_successes == 1 and b.counters[1].n_failures == 1 + b = BetaMO(models=[Beta(), Beta()]) + assert b.models[0].n_successes == 1 and b.models[0].n_failures == 1 + assert b.models[1].n_successes == 1 and b.models[1].n_failures == 1 # init with empty dict - b = BetaMO(counters=[{}, {}]) - assert b.counters[0] == Beta() + b = BetaMO(models=[{}, {}]) + assert b.models[0] == Beta() # invalid init with BetaCC instead of Beta with pytest.raises(ValidationError): - BetaMO(counters=[BetaCC(cost=1), BetaCC(cost=1)]) + BetaMO(models=[BetaCC(cost=1), BetaCC(cost=1)]) def test_calculate_proba_beta_mo(): - b = BetaMO(counters=[Beta(), Beta()]) + b = BetaMO(models=[Beta(), Beta()]) b.sample_proba() @@ -139,12 +139,12 @@ def test_beta_update_mo(rewards1, rewards2): rewards1, rewards2 = rewards1[:min_len], rewards2[:min_len] rewards = [[a, b] for a, b in zip(rewards1, rewards2)] - b = BetaMO(counters=[Beta(n_successes=11, n_failures=22), Beta(n_successes=33, n_failures=44)]) + b = BetaMO(models=[Beta(n_successes=11, n_failures=22), Beta(n_successes=33, n_failures=44)]) b.update(rewards=rewards) assert b == BetaMO( - counters=[ + models=[ Beta(n_successes=11 + sum(rewards1), n_failures=22 + len(rewards1) - sum(rewards1)), Beta(n_successes=33 + sum(rewards2), n_failures=44 + len(rewards2) - sum(rewards2)), ] @@ -162,16 +162,16 @@ def test_beta_update_mo(rewards1, rewards2): def test_can_init_beta_mo(): # init with default params - b = BetaMO(counters=[Beta(), Beta()]) - assert b.counters == [Beta(), Beta()] + b = BetaMO(models=[Beta(), Beta()]) + assert b.models == [Beta(), Beta()] # init with empty dict - b = BetaMO(counters=[{}, {}]) - assert b.counters == [Beta(), Beta()] + b = BetaMO(models=[{}, {}]) + assert b.models == [Beta(), Beta()] # invalid init with BetaCC instead of Beta with pytest.raises(ValidationError): - BetaMO(counters=[BetaCC(cost=1), BetaCC(cost=1)]) + BetaMO(models=[BetaCC(cost=1), BetaCC(cost=1)]) ######################################################################################################################## @@ -184,21 +184,21 @@ def test_can_init_beta_mo(): def test_can_init_beta_mo_cc(a_float): if a_float < 0 or np.isnan(a_float): with pytest.raises(ValidationError): - BetaMOCC(counters=[Beta(), Beta()], cost=a_float) + BetaMOCC(models=[Beta(), Beta()], cost=a_float) else: # init with default params - b = BetaMOCC(counters=[Beta(), Beta()], cost=a_float) - assert b.counters == [Beta(), Beta()] + b = BetaMOCC(models=[Beta(), Beta()], cost=a_float) + assert b.models == [Beta(), Beta()] assert b.cost == a_float # init with empty dict - b = BetaMOCC(counters=[{}, {}], cost=a_float) - assert b.counters == [Beta(), Beta()] + b = BetaMOCC(models=[{}, {}], cost=a_float) + assert b.models == [Beta(), Beta()] assert b.cost == a_float # invalid init with BetaCC instead of Beta with pytest.raises(ValidationError): - BetaMOCC(counters=[BetaCC(cost=1), BetaCC(cost=1)], cost=a_float) + BetaMOCC(models=[BetaCC(cost=1), BetaCC(cost=1)], cost=a_float) ######################################################################################################################## @@ -285,12 +285,12 @@ def test_check_context_matrix(n_samples, n_features): blr.check_context_matrix(context=[1.0]) # context is a 1-dim list -@given(st.integers(min_value=1, max_value=1000), st.integers(min_value=1, max_value=100)) +@given(st.integers(min_value=1, max_value=100), st.integers(min_value=1, max_value=100)) def test_blr_sample_proba(n_samples, n_features): def sample_proba(context): - prob, weighted_sum = blr.sample_proba(context=context) - - assert type(prob) is type(weighted_sum) is np.ndarray # type of the returns must be np.ndarray + prob_weighted_sum = blr.sample_proba(context=context) + prob, weighted_sum = list(zip(*prob_weighted_sum)) + assert type(prob) is type(weighted_sum) is tuple # type of the returns must be np.ndarray assert len(prob) == len(weighted_sum) == n_samples # return 1 sampled probability and ws per each sample assert all([0 <= p <= 1 for p in prob]) # probs must be in the interval [0, 1] diff --git a/tests/test_quantitative_model.py b/tests/test_quantitative_model.py new file mode 100644 index 0000000..27dcedd --- /dev/null +++ b/tests/test_quantitative_model.py @@ -0,0 +1,227 @@ +from typing import List + +import numpy as np + +# Generated by Qodo Gen +import pytest +from hypothesis import given +from hypothesis import strategies as st +from hypothesis.extra.numpy import arrays + +from pybandits.base import BinaryReward +from pybandits.model import Beta +from pybandits.quantitative_model import CmabZoomingModel, Segment, SmabZoomingModel, ZoomingModel + + +def tuple_of_tuples_strategy(n, m, elements=st.floats(min_value=0, max_value=1)): + return st.tuples(*[st.tuples(*[elements for _ in range(m)]) for _ in range(n)]) + + +# Create segment with valid intervals array of shape (n,2) +@given(tuple_of_tuples_strategy(2, 2)) +def test_create_valid_segment(intervals): + segment = Segment(intervals=intervals) + assert isinstance(segment, Segment) + assert len(segment.intervals) == 2 + assert all(len(interval) == 2 for interval in segment.intervals) + + +# Access minimum and maximum quantities via mins and maxs properties +@given(tuple_of_tuples_strategy(3, 2)) +def test_mins_maxs_properties(intervals): + segment = Segment(intervals=intervals) + assert np.all(segment.mins == np.array([interval[0] for interval in intervals])) + assert np.all(segment.maxs == np.array([interval[1] for interval in intervals])) + + +# Create segment with empty intervals array +def test_create_empty_segment(): + empty_intervals = np.empty((0, 2)) + segment = Segment(intervals=empty_intervals) + assert len(segment.intervals) == 0 + assert len(segment.mins) == 0 + assert len(segment.maxs) == 0 + + +# Create segment with invalid interval shape +@given(arrays(np.float64, shape=(2, 3), elements=st.floats(min_value=0, max_value=100))) +def test_invalid_interval_shape(intervals): + with pytest.raises(ValueError, match="Intervals must have shape .n, 2."): + Segment(intervals=intervals) + + +# Add non-adjacent segments +def test_add_nonadjacent_segments(): + seg1 = Segment(intervals=np.array([[0, 0.1], [0, 0.1]])) + seg2 = Segment(intervals=np.array([[0.2, 0.5], [0.1, 0.2]])) + with pytest.raises(ValueError, match="Segments must be adjacent."): + seg1 + seg2 + + +class DummyZoomingModel(ZoomingModel): + def _init_base_model(self): + self._base_model = Beta() + + def _inner_update(self, segments: List[Segment], rewards: List[BinaryReward], context: None = None): + pass + + +# Model initialization with valid parameters creates correct number of segments +@given(dimension=st.integers(min_value=1, max_value=3), n_1d_segments=st.integers(min_value=2, max_value=8)) +def test_init_creates_correct_segments(dimension, n_1d_segments): + model = DummyZoomingModel.cold_start(dimension=dimension, n_1d_segments=n_1d_segments, n_max_segments=None) + expected_segments = n_1d_segments**dimension + assert len(model.sub_actions) == expected_segments + + +# Update method correctly processes rewards and quantities for existing segments +@given( + st.integers(min_value=1, max_value=5).flatmap( + lambda size: st.tuples( + st.lists(st.integers(min_value=0, max_value=1), min_size=size, max_size=size), + st.lists(st.floats(min_value=0, max_value=1), min_size=size, max_size=size), + ) + ) +) +def test_update_processes_rewards_correctly(data): + rewards, quantities = data + model = DummyZoomingModel.cold_start(dimension=1, n_1d_segments=4) + initial_segments = len(model.sub_actions) + model.update(quantities=quantities, rewards=rewards, context=None) + assert len(model.sub_actions) <= initial_segments + + +# Best performing segment gets split when below max segments limit +def test_best_segment_splits(): + model = DummyZoomingModel.cold_start(dimension=1, n_1d_segments=2, n_max_segments=4) + quantities = [0.25, 0.75] + rewards = [1, 0] + model.update(quantities=quantities, rewards=rewards, context=None) + assert len(model.sub_actions) == 3 + + +# Adjacent segments with similar performance get merged correctly +def test_similar_segments_merge(): + model = DummyZoomingModel.cold_start(dimension=1, n_1d_segments=4, comparison_threshold=0.5) + initial_segmented_actions = model.sub_actions.copy() + quantities = [0.25, 0.75] + rewards = [1, 1] + model.update(quantities=quantities, rewards=rewards, context=None) + assert initial_segmented_actions.keys() != model.sub_actions + assert len(model.sub_actions) == 4 + + +# Sample_proba returns valid probability for each segment +def test_sample_proba_returns_valid_probabilities(): + model = DummyZoomingModel.cold_start(dimension=1, n_1d_segments=2) + probs = model.sample_proba() + for value, prob in probs.items(): + assert 0 <= prob <= 1 + assert all(0 <= v <= 1 for v in value) + + +# Update with empty rewards/quantities list +def test_update_with_empty_lists(): + model = DummyZoomingModel.cold_start(dimension=1, n_1d_segments=2) + initial_segments = len(model.sub_actions) + model.update(quantities=[], rewards=[], context=None) + assert len(model.sub_actions) == initial_segments + + +# Update when at maximum number of segments +def test_update_at_max_segments(): + model = DummyZoomingModel.cold_start(dimension=1, n_1d_segments=4, n_max_segments=4) + quantities = [0.5] + rewards = [1] + model.update(quantities=quantities, rewards=rewards, context=None) + assert len(model.sub_actions) <= model.n_max_segments + + +# Merging segments when only 2 segments remain +def test_merge_with_two_segments(): + model = DummyZoomingModel.cold_start(dimension=1, n_1d_segments=2, comparison_threshold=1.0) + quantities = [0.25, 0.75] + rewards = [1, 1] + model.update(quantities=quantities, rewards=rewards, context=None) + assert len(model.sub_actions) >= 1 + + +# Comparing non-adjacent segments for merging +def test_non_adjacent_segments_comparison(): + model = DummyZoomingModel.cold_start(dimension=1, n_1d_segments=4) + segments = list(model.segmented_actions.keys()) + non_adjacent = [segments[0], segments[2]] + assert not non_adjacent[0].is_adjacent(non_adjacent[1]) + + +# Values that fall on segment boundaries +@given(st.integers(min_value=2, max_value=4)) +def test_boundary_values(n_segments): + model = DummyZoomingModel.cold_start(dimension=1, n_1d_segments=n_segments) + boundary = 1.0 / n_segments + quantities = [boundary] + rewards = [1] + model.update(quantities=quantities, rewards=rewards, context=None) + mapped_segments = model._map_values_to_segments(quantities) + assert len(mapped_segments) >= 1 + + +# Test SmabZoomingModel initialization with valid parameters +@given(dimension=st.integers(min_value=1, max_value=3), n_1d_segments=st.integers(min_value=2, max_value=8)) +def initializes_smab_zooming_model_correctly(dimension, n_1d_segments): + model = SmabZoomingModel(dimension=dimension, n_1d_segments=n_1d_segments) + expected_segments = n_1d_segments**dimension + assert len(model.segmented_actions) == expected_segments + + +# Test SmabZoomingModel update with valid rewards and quantities +@given( + rewards=st.lists(st.integers(min_value=0, max_value=1), min_size=1, max_size=5), + quantities=st.lists(st.floats(min_value=0, max_value=1), min_size=1, max_size=5), +) +def updates_smab_zooming_model_correctly(rewards, quantities): + model = SmabZoomingModel(dimension=1, n_1d_segments=4) + initial_segments = len(model.segmented_actions) + model.update(quantities=quantities, rewards=rewards, context=None) + assert len(model.segmented_actions) <= initial_segments + + +# Test SmabZoomingModel sample_proba returns valid probabilities +def sample_proba_returns_valid_probabilities_smab(): + model = SmabZoomingModel(dimension=1, n_1d_segments=2) + probas = model.sample_proba() + for value, prob in probas.items(): + assert 0 <= prob <= 1 + assert 0 <= value <= 1 + + +# Test CmabZoomingModel initialization with valid parameters +@given(dimension=st.integers(min_value=1, max_value=3), n_1d_segments=st.integers(min_value=2, max_value=8)) +def initializes_cmab_zooming_model_correctly(dimension, n_1d_segments): + model = CmabZoomingModel( + dimension=dimension, n_1d_segments=n_1d_segments, base_model_cold_start_kwargs={"n_features": 1} + ) + expected_segments = n_1d_segments**dimension + assert len(model.segmented_actions) == expected_segments + + +# Test CmabZoomingModel update with valid rewards, quantities, and context +@given( + rewards=st.lists(st.integers(min_value=0, max_value=1), min_size=1, max_size=5), + quantities=st.lists(st.floats(min_value=0, max_value=1), min_size=1, max_size=5), + context=arrays(np.float64, shape=(5, 1), elements=st.floats(min_value=0, max_value=1)), +) +def updates_cmab_zooming_model_correctly(rewards, quantities, context): + model = CmabZoomingModel(dimension=1, n_1d_segments=4, base_model_cold_start_kwargs={"n_features": 1}) + initial_segments = len(model.segmented_actions) + model.update(quantities=quantities, rewards=rewards, context=context) + assert len(model.segmented_actions) <= initial_segments + + +# Test CmabZoomingModel sample_proba returns valid probabilities +def sample_proba_returns_valid_probabilities_cmab(): + model = CmabZoomingModel(dimension=1, n_1d_segments=2, base_model_cold_start_kwargs={"n_features": 1}) + probas = model.sample_proba() + for value, prob in probas.items(): + assert 0 <= prob <= 1 + assert 0 <= value <= 1 diff --git a/tests/test_smab.py b/tests/test_smab.py index 2c8a34e..6664982 100644 --- a/tests/test_smab.py +++ b/tests/test_smab.py @@ -20,918 +20,526 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -import json from copy import deepcopy -from typing import List +from typing import Any, Dict, List, Optional, Tuple, Type, Union +import numpy as np import pytest -from hypothesis import given +from hypothesis import given, settings from hypothesis import strategies as st +from pydantic.dataclasses import dataclass -from pybandits.base import BinaryReward, Float01 +from pybandits.base import ActionId, Float01 +from pybandits.base_model import BaseModel from pybandits.model import Beta, BetaCC, BetaMO, BetaMOCC -from pybandits.pydantic_version_compatibility import NonNegativeFloat, ValidationError -from pybandits.smab import SmabBernoulli, SmabBernoulliBAI, SmabBernoulliCC, SmabBernoulliMO, SmabBernoulliMOCC +from pybandits.pydantic_version_compatibility import PositiveInt, ValidationError +from pybandits.quantitative_model import QuantitativeModel, SmabZoomingModel, SmabZoomingModelCC +from pybandits.smab import ( + BaseSmabBernoulli, + SmabBernoulli, + SmabBernoulliBAI, + SmabBernoulliCC, + SmabBernoulliMO, + SmabBernoulliMOCC, +) from pybandits.strategy import ( + BestActionIdentificationBandit, ClassicBandit, CostControlBandit, MultiObjectiveBandit, MultiObjectiveCostControlBandit, ) -from pybandits.utils import to_serializable_dict -from tests.test_utils import is_serializable - - -@pytest.fixture(scope="session") -def n_samples() -> int: - return 1000 - - -######################################################################################################################## - - -# SmabBernoulli with strategy=ClassicBandit() - - -def test_create_smab_bernoulli_cold_start(): - assert SmabBernoulli.cold_start(action_ids={"a1", "a2"}) == SmabBernoulli( - actions={"a1": Beta(), "a2": Beta()}, - ) -@given(st.integers(min_value=0, max_value=1), st.integers(min_value=0, max_value=1)) -def test_base_smab_update_ok(r1, r2): - mab = SmabBernoulli(actions={"a1": Beta(), "a2": Beta()}) - mab.update(actions=["a1", "a2"], rewards=[r1, r2]) - mab.update(actions=["a1", "a1"], rewards=[r1, r2]) - - -def test_can_instantiate_smab(): - with pytest.raises(TypeError): - SmabBernoulli() - with pytest.raises(AttributeError): - SmabBernoulli(actions={}) - with pytest.warns(UserWarning): - SmabBernoulli(actions={"action1": Beta()}) - with pytest.raises(ValidationError): - SmabBernoulli( - actions={ - "action1": None, - "action2": None, - }, +@st.composite +def rewards_strategy(draw, n_objectives=None, n_samples=None): + if n_objectives: + return draw( + st.lists( + st.lists(st.integers(min_value=0, max_value=1), min_size=n_objectives, max_size=n_objectives), + min_size=n_samples or 1, + max_size=n_samples or 10, + ) ) - SmabBernoulli( - actions={ - "action1": Beta(), - "action2": Beta(), - }, - strategy=ClassicBandit(), - ) - smab = SmabBernoulli( - actions={ - "action1": Beta(), - "action2": Beta(), - } - ) - - assert smab.actions["action1"] == Beta() - assert smab.actions["action2"] == Beta() - - -@given( - st.integers(min_value=1), - st.integers(min_value=1), -) -def test_can_instantiate_smab_with_params(a, b): - s = SmabBernoulli( - actions={ - "action1": Beta(n_successes=a, n_failures=b), - "action2": Beta(n_successes=a, n_failures=b), - }, - ) - assert (s.actions["action1"].n_successes == a) and (s.actions["action1"].n_failures == b) - assert s.actions["action1"] == s.actions["action2"] - - -@given(st.integers(max_value=0)) -def test_smab_predict_raise_when_samples_low(n_samples): - s = SmabBernoulli(actions={"a1": Beta(), "a2": Beta()}) - with pytest.raises(ValidationError): - s.predict(n_samples=n_samples) - - -def test_smab_predict_raise_when_all_actions_forbidden(): - s = SmabBernoulli(actions={"a1": Beta(), "a2": Beta()}) - with pytest.raises(ValueError): - s.predict(n_samples=10, forbidden_actions=["a1", "a2"]) - + return draw(st.lists(st.integers(min_value=0, max_value=1), min_size=n_samples or 1, max_size=n_samples or 10)) -def test_smab_predict(n_samples: int): - s = SmabBernoulli( - actions={ - "a0": Beta(), - "a1": Beta(n_successes=5, n_failures=5), - "forb_1": Beta(n_successes=10, n_failures=1), - "best": Beta(n_successes=10, n_failures=5), - "forb_2": Beta(n_successes=100, n_failures=4), - "a5": Beta(), - }, - ) - forbidden_actions = set(["forb_1", "forb_2"]) - best_actions, probs = s.predict(n_samples=n_samples, forbidden_actions=forbidden_actions) - assert ["forb1" not in p.keys() for p in probs], "forbidden actions weren't removed from the output" +@st.composite +def quantities_strategy(draw, n_samples=None): + return draw(st.lists(st.floats(min_value=0, max_value=1), min_size=n_samples or 1, max_size=n_samples or 10)) - valid_actions = set(s.actions.keys()) - forbidden_actions - for probas, best_action in zip(probs, best_actions): - assert set(probas.keys()) == valid_actions, "restituted actions don't match valid actions" - best_proba = probas[best_action] - assert best_proba == max(probas.values()), "best action hasn't the best probability" +@st.composite +def diff_strategy(draw): + return draw(st.integers(min_value=1, max_value=10)) -@given( - st.lists(st.integers(min_value=0, max_value=1), min_size=1), - st.lists(st.integers(min_value=0, max_value=1), min_size=1), -) -def test_smab_update(rewards: List[BinaryReward], rewards_1: List[BinaryReward]): - updated = SmabBernoulli( - actions={ - "a0": Beta(), - "a1": Beta(), - }, - ) - batch_updated = deepcopy(updated) +@st.composite +def cost_strategy(draw, n_actions): + return draw(st.lists(st.floats(min_value=0, max_value=2), min_size=n_actions, max_size=n_actions)) - # update the model sequentially - [updated.update(actions=["a0"], rewards=[reward]) for reward in rewards] - [updated.update(actions=["a1"], rewards=[reward]) for reward in rewards_1] - # update the model in batch - batch_updated.update(actions=["a0"] * len(rewards) + ["a1"] * len(rewards_1), rewards=rewards + rewards_1) +@pytest.fixture(scope="module") +def monkeymodule(): + with pytest.MonkeyPatch.context() as mp: + yield mp - assert updated == batch_updated, "update() has different result when each item is applied separately" - sum_failures = sum([1 - x for x in rewards]) - assert updated.actions["a0"] == Beta( - n_successes=1 + sum(rewards), n_failures=1 + sum_failures - ), "Unexpected results in counter" +def mock_update(models: List[BaseModel], diff, monkeymodule, label=0): + for model in models: + for field in model.model_fields: + if field in ("n_successes", "n_failures"): + monkeymodule.setattr(model, field, getattr(model, field) + diff.draw(diff_strategy(), label=f"{label}")) + label += 1 + elif isinstance(sub_models := getattr(model, field), list) and isinstance(sub_models[0], BaseModel): + mock_update(sub_models, diff, monkeymodule, label) - sum_failures_1 = sum([1 - x for x in rewards_1]) - assert updated.actions["a1"] == Beta( - n_successes=1 + sum(rewards_1), n_failures=1 + sum_failures_1 - ), "Unexpected results in counter" +@dataclass +class ModelTestConfig: + smab_class: Type + strategy_class: Type + model_types: List[Type[BaseModel]] -@given(st.text()) -def test_smab_accepts_only_valid_actions(s): - if s == "": - with pytest.raises(ValidationError): - SmabBernoulli( - actions={ - s: Beta(), - s + "_": Beta(), + def _create_actions( + self, action_ids: List[str], costs: Optional[st.SearchStrategy], n_objectives: Optional[PositiveInt] + ) -> Dict[str, Any]: + if len(self.model_types) < len(action_ids): + indices = np.random.randint(0, len(self.model_types), len(action_ids)) + self.model_types = [self.model_types[i] for i in indices] + if all(model in [BetaCC, SmabZoomingModelCC, BetaMOCC] for model in self.model_types): + # Generate random costs + costs = costs.draw(cost_strategy(n_actions=len(action_ids))) + costs = [ + cost if model_type in [BetaCC, BetaMOCC] else lambda x: x**cost + for cost, model_type in zip(costs, self.model_types) + ] + else: + costs = None + + if n_objectives is None: + if costs is not None: + return { + action_id: model_type(cost=cost) + if issubclass(model_type, BetaCC) + else model_type.cold_start(dimension=1, cost=cost) # SmabZoomingModelCC + for action_id, model_type, cost in zip(action_ids, self.model_types, costs) + } + else: + return { + action_id: model_type() + if issubclass(model_type, Beta) + else model_type.cold_start(dimension=1) # SmabZoomingModel + for action_id, model_type in zip(action_ids, self.model_types) + } + else: + if costs is not None: + return { + action_id: model_type(models=[Beta()] * n_objectives, cost=cost) + for action_id, model_type, cost in zip(action_ids, self.model_types, costs) + } + else: + return { + action_id: model_type(models=[Beta()] * n_objectives) + for action_id, model_type in zip(action_ids, self.model_types) } - ) - else: - SmabBernoulli(actions={s: Beta(), s + "_": Beta()}) - - -@given(st.integers(min_value=1), st.integers(min_value=1), st.integers(min_value=1), st.integers(min_value=1)) -def test_smab_get_state(a, b, c, d): - actions = {"action1": Beta(n_successes=a, n_failures=b), "action2": Beta(n_successes=c, n_failures=d)} - smab = SmabBernoulli(actions=actions) - - expected_state = to_serializable_dict( - { - "actions": actions, - "strategy": {}, - "epsilon": None, - "default_action": None, - } - ) - - class_name, smab_state = smab.get_state() - assert class_name == "SmabBernoulli" - assert smab_state == expected_state - - -@given( - state=st.fixed_dictionaries( - { - "actions": st.dictionaries( - keys=st.text(min_size=1, max_size=10), - values=st.fixed_dictionaries( - { - "n_successes": st.integers(min_value=1, max_value=100), - "n_failures": st.integers(min_value=1, max_value=100), - }, - ), - min_size=2, - ), - "strategy": st.fixed_dictionaries({}), - } - ) -) -def test_smab_from_state(state): - smab = SmabBernoulli.from_state(state) - assert isinstance(smab, SmabBernoulli) - - expected_actions = state["actions"] - actual_actions = to_serializable_dict(smab.actions) # Normalize the dict - assert expected_actions == actual_actions - - # Ensure get_state and from_state compatibility - new_smab = globals()[smab.get_state()[0]].from_state(state=smab.get_state()[1]) - assert new_smab == smab - - -######################################################################################################################## - - -# SmabBernoulli with strategy=BestActionIdentificationBandit() - - -def test_create_smab_bernoulli_bai(): - # default exploit_p - assert SmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}) == SmabBernoulliBAI( - actions={"a1": Beta(), "a2": Beta()}, - ) - # set exploit_p - assert SmabBernoulliBAI.cold_start(action_ids={"a1", "a2"}, exploit_p=0.2) == SmabBernoulliBAI( - actions={"a1": Beta(), "a2": Beta()}, - exploit_p=0.2, - ) - - -def test_can_init_smabbai(): - # init default params - s = SmabBernoulliBAI( - actions={ - "a1": Beta(), - "a2": Beta(), - }, - ) - - assert s.actions["a1"] == Beta() - assert s.actions["a2"] == Beta() - assert s.strategy.exploit_p == 0.5 - - # init input params - s = SmabBernoulliBAI( - actions={ - "a1": Beta(n_successes=1, n_failures=2), - "a2": Beta(n_successes=3, n_failures=4), - }, - exploit_p=0.3, - ) - assert s.actions["a1"] == Beta(n_successes=1, n_failures=2) - assert s.actions["a2"] == Beta(n_successes=3, n_failures=4) - assert s.strategy.exploit_p == 0.3 - - -def test_smabbai_predict(n_samples: int): - s = SmabBernoulliBAI(actions={"a1": Beta(), "a2": Beta()}) - _, _ = s.predict(n_samples=n_samples) - - -def test_smabbai_update(): - s = SmabBernoulliBAI(actions={"a1": Beta(), "a2": Beta()}) - s.update(actions=["a1", "a1"], rewards=[1, 0]) - -def test_smabbai_with_betacc(): - # Fails because smab bernoulli with BAI shouldn't support BetaCC - with pytest.raises(ValidationError): - SmabBernoulliBAI( - actions={ - "a1": BetaCC(cost=10), - "a2": BetaCC(cost=20), - }, + def create_smab_and_actions( + self, + action_ids: List[str], + epsilon: Optional[Float01], + costs: st.SearchStrategy, + n_objectives: st.SearchStrategy[PositiveInt], + exploit_p: Union[st.SearchStrategy[Optional[Float01]], Optional[float]], + subsidy_factor: Union[st.SearchStrategy[Optional[Float01]], Optional[float]], + ) -> Tuple[BaseSmabBernoulli, Dict[ActionId, BaseModel], Dict[str, Any]]: + n_objectives = ( + n_objectives.draw(st.integers(min_value=1, max_value=10)) + if self.smab_class in [SmabBernoulliMO, SmabBernoulliMOCC] + else None ) - - -@given( - st.integers(min_value=1), - st.integers(min_value=1), - st.integers(min_value=1), - st.integers(min_value=1), - st.floats(min_value=0, max_value=1), -) -def test_smab_bai_get_state(a, b, c, d, exploit_p: Float01): - actions = {"action1": Beta(n_successes=a, n_failures=b), "action2": Beta(n_successes=c, n_failures=d)} - smab = SmabBernoulliBAI(actions=actions, exploit_p=exploit_p) - expected_state = to_serializable_dict( - { - "actions": actions, - "strategy": {"exploit_p": exploit_p}, - "epsilon": None, - "default_action": None, + actions = self._create_actions(action_ids, costs, n_objectives) + default_action = action_ids[0] if epsilon else None + + kwargs = { + k: v + for k, v in { + "epsilon": epsilon, + "default_action": default_action, + }.items() + if v is not None } - ) - - class_name, smab_state = smab.get_state() - assert class_name == "SmabBernoulliBAI" - assert smab_state == expected_state - - assert is_serializable(smab_state), "Internal state is not serializable" - - + for param, classes in zip(["subsidy_factor", "exploit_p"], [[SmabBernoulliCC], [SmabBernoulliBAI]]): + if self.smab_class in classes: + actual_param = eval(param) + if isinstance(actual_param, float) or actual_param is None: + kwargs[param] = actual_param + else: + kwargs[param] = actual_param.draw(st.floats(min_value=0, max_value=1)) + + smab = self.smab_class(actions=actions, **kwargs) + + # For cold start test + if self.smab_class in [SmabBernoulliMO, SmabBernoulliMOCC]: + kwargs["n_objectives"] = n_objectives + return smab, actions, kwargs + + +TEST_CONFIGS = { + "smab": ModelTestConfig(SmabBernoulli, ClassicBandit, [Beta, SmabZoomingModel]), + "smab_bai": ModelTestConfig(SmabBernoulliBAI, BestActionIdentificationBandit, [Beta, SmabZoomingModel]), + "smab_cc": ModelTestConfig( + SmabBernoulliCC, + CostControlBandit, + [BetaCC, SmabZoomingModelCC], + ), + "smab_mo": ModelTestConfig(SmabBernoulliMO, MultiObjectiveBandit, [BetaMO]), + "smab_mocc": ModelTestConfig(SmabBernoulliMOCC, MultiObjectiveCostControlBandit, [BetaMOCC]), +} + + +@settings(deadline=None) +@pytest.mark.parametrize("config", TEST_CONFIGS.values(), ids=TEST_CONFIGS.keys()) @given( - state=st.fixed_dictionaries( - { - "actions": st.dictionaries( - keys=st.text(min_size=1, max_size=10), - values=st.fixed_dictionaries( - { - "n_successes": st.integers(min_value=1, max_value=100), - "n_failures": st.integers(min_value=1, max_value=100), - }, - ), - min_size=2, - ), - "strategy": st.one_of( - st.just({}), - st.just({"exploit_p": None}), - st.builds(lambda x: {"exploit_p": x}, st.floats(min_value=0, max_value=1)), - ), - } - ) + action_ids=st.lists( + st.text( + min_size=1, + ), + min_size=2, + max_size=5, + unique=True, + ), + epsilon=st.one_of(st.none(), st.floats(min_value=0, max_value=1)), + costs=st.data(), + n_objectives=st.data(), + subsidy_factor=st.data(), + exploit_p=st.data(), ) -def test_smab_bai_from_state(state): - smab = SmabBernoulliBAI.from_state(state) - assert isinstance(smab, SmabBernoulliBAI) - - expected_actions = state["actions"] - actual_actions = to_serializable_dict(smab.actions) # Normalize the dict - assert expected_actions == actual_actions - expected_exploit_p = smab.strategy.get_expected_value_from_state(state, "exploit_p") - actual_exploit_p = smab.strategy.exploit_p - assert expected_exploit_p == actual_exploit_p - - # Ensure get_state and from_state compatibility - new_smab = globals()[smab.get_state()[0]].from_state(state=smab.get_state()[1]) - assert new_smab == smab - - -######################################################################################################################## - - -# SmabBernoulli with strategy=CostControlBandit() - - -def test_create_smab_bernoulli_cc(): - assert SmabBernoulliCC.cold_start( - action_ids_cost={"a1": 10, "a2": 20}, - subsidy_factor=0.2, - ) == SmabBernoulliCC( - actions={"a1": BetaCC(cost=10), "a2": BetaCC(cost=20)}, - subsidy_factor=0.2, - ) - - assert SmabBernoulliCC.cold_start(action_ids_cost={"a1": 10, "a2": 20}) == SmabBernoulliCC( - actions={"a1": BetaCC(cost=10), "a2": BetaCC(cost=20)}, - ) - - -def test_can_init_smabcc(): - # init default arguments - s = SmabBernoulliCC( - actions={ - "a1": BetaCC(cost=10), - "a2": BetaCC(cost=20), - }, - ) - assert s.actions["a1"] == BetaCC(cost=10) - assert s.actions["a2"] == BetaCC(cost=20) - assert s.strategy.subsidy_factor == 0.5 - - # init with input args - s = SmabBernoulliCC( - actions={ - "a1": BetaCC(n_successes=1, n_failures=2, cost=10), - "a2": BetaCC(n_successes=3, n_failures=4, cost=20), +def test_cold_start( + config: ModelTestConfig, + action_ids: List[str], + epsilon: Optional[float], + costs, + n_objectives, + exploit_p, + subsidy_factor, +): + # Create SMAB instance + smab, actions, kwargs = config.create_smab_and_actions( + action_ids, epsilon, costs, n_objectives, exploit_p, subsidy_factor + ) + + # Cold start comparison logic (modified for different model types) + cold_start_kwargs = { + "action_ids": { + action for action, model in zip(action_ids, config.model_types) if issubclass(model, (Beta, BetaMO)) }, - subsidy_factor=0.7, - ) - assert s.actions["a1"] == BetaCC(n_successes=1, n_failures=2, cost=10) - assert s.actions["a2"] == BetaCC(n_successes=3, n_failures=4, cost=20) - assert s.strategy == CostControlBandit(subsidy_factor=0.7) - assert s.strategy.subsidy_factor == 0.7 - - -def test_smabcc_predict(n_samples: int): - s = SmabBernoulliCC( - actions={ - "a1": BetaCC(n_successes=1, n_failures=2, cost=10), - "a2": BetaCC(n_successes=3, n_failures=4, cost=20), + "quantitative_action_ids": { + action for action, model in zip(action_ids, config.model_types) if issubclass(model, QuantitativeModel) }, - subsidy_factor=0.7, - ) - _, _ = s.predict(n_samples=n_samples) - - -def test_smabcc_update(): - s = SmabBernoulliCC(actions={"a1": BetaCC(cost=10), "a2": BetaCC(cost=10)}) - s.update(actions=["a1", "a1"], rewards=[1, 0]) - - -@given( - st.integers(min_value=1), - st.integers(min_value=1), - st.integers(min_value=1), - st.integers(min_value=1), - st.floats(min_value=0), - st.floats(min_value=0), - st.floats(min_value=0, max_value=1), -) -def test_smab_cc_get_state(a, b, c, d, cost1: NonNegativeFloat, cost2: NonNegativeFloat, subsidy_factor: Float01): - actions = { - "action1": BetaCC(n_successes=a, n_failures=b, cost=cost1), - "action2": BetaCC(n_successes=c, n_failures=d, cost=cost2), } - smab = SmabBernoulliCC(actions=actions, subsidy_factor=subsidy_factor) - expected_state = to_serializable_dict( - { - "actions": actions, - "strategy": { - "subsidy_factor": subsidy_factor, - }, - "epsilon": None, - "default_action": None, + if all(model in [BetaCC, SmabZoomingModelCC, BetaMOCC] for model in config.model_types): + cold_start_kwargs["action_ids_cost"] = { + action: model.cost for action, model in actions.items() if isinstance(model, (BetaCC, BetaMOCC)) } - ) - - class_name, smab_state = smab.get_state() - assert class_name == "SmabBernoulliCC" - assert smab_state == expected_state - - assert is_serializable(smab_state), "Internal state is not serializable" + cold_start_kwargs["quantitative_action_ids_cost"] = { + action: model.cost for action, model in actions.items() if isinstance(model, SmabZoomingModelCC) + } + cold_start_kwargs.update(kwargs) # Add exploit_p or subsidy_factor if needed + cold_start_kwargs = {k: v for k, v in cold_start_kwargs.items() if v is not None} + assert config.smab_class.cold_start(**cold_start_kwargs) == smab +@settings(deadline=None) +@pytest.mark.parametrize("config", TEST_CONFIGS.values(), ids=TEST_CONFIGS.keys()) @given( - state=st.fixed_dictionaries( - { - "actions": st.dictionaries( - keys=st.text(min_size=1, max_size=10), - values=st.fixed_dictionaries( - { - "n_successes": st.integers(min_value=1, max_value=100), - "n_failures": st.integers(min_value=1, max_value=100), - "cost": st.floats(min_value=0), - }, - ), - min_size=2, - ), - "strategy": st.one_of( - st.just({}), - st.just({"subsidy_factor": None}), - st.builds(lambda x: {"subsidy_factor": x}, st.floats(min_value=0, max_value=1)), - ), - } - ) + action_ids=st.lists(st.text(min_size=1), min_size=2, max_size=5, unique=True), + n_objectives=st.data(), + costs=st.data(), + subsidy_factor=st.data(), + exploit_p=st.data(), ) -def test_smab_cc_from_state(state): - smab = SmabBernoulliCC.from_state(state) - assert isinstance(smab, SmabBernoulliCC) - - expected_actions = state["actions"] - actual_actions = json.loads(json.dumps(smab.actions, default=dict)) # Normalize the dict - assert expected_actions == actual_actions - expected_subsidy_factor = smab.strategy.get_expected_value_from_state(state, "subsidy_factor") - actual_subsidy_factor = smab.strategy.subsidy_factor - assert expected_subsidy_factor == actual_subsidy_factor - - # Ensure get_state and from_state compatibility - new_smab = globals()[smab.get_state()[0]].from_state(state=smab.get_state()[1]) - assert new_smab == smab - - -######################################################################################################################## - - -# SmabBernoulli with strategy=MultiObjectiveBandit() - - -@given(st.lists(st.integers(min_value=1), min_size=6, max_size=6)) -def test_can_init_smab_mo(a_list): - a, b, c, d, e, f = a_list - - s = SmabBernoulliMO( - actions={ - "a1": BetaMO( - counters=[ - Beta(n_successes=a, n_failures=b), - Beta(n_successes=c, n_failures=d), - Beta(n_successes=e, n_failures=f), - ] - ), - "a2": BetaMO( - counters=[ - Beta(n_successes=d, n_failures=a), - Beta(n_successes=e, n_failures=b), - Beta(n_successes=f, n_failures=c), - ] - ), - }, - ) - assert s.actions["a1"] == BetaMO( - counters=[ - Beta(n_successes=a, n_failures=b), - Beta(n_successes=c, n_failures=d), - Beta(n_successes=e, n_failures=f), - ] - ) - assert s.actions["a2"] == BetaMO( - counters=[ - Beta(n_successes=d, n_failures=a), - Beta(n_successes=e, n_failures=b), - Beta(n_successes=f, n_failures=c), - ] - ) - assert s.strategy == MultiObjectiveBandit() - - -def test_all_actions_must_have_same_number_of_objectives_smab_mo(): - with pytest.raises(ValueError): - SmabBernoulliMO( - actions={ - "a1": BetaMO(counters=[Beta(), Beta()]), - "a2": BetaMO(counters=[Beta(), Beta()]), - "a3": BetaMO(counters=[Beta(), Beta(), Beta()]), - }, - ) - - -def test_smab_mo_predict(n_samples: int, n_objectives=3): - s = SmabBernoulliMO.cold_start(action_ids={"a1", "a2"}, n_objectives=n_objectives) - - forbidden = None - s.predict(n_samples=n_samples, forbidden_actions=forbidden) - - forbidden = ["a1"] - predicted_actions, _ = s.predict(n_samples=n_samples, forbidden_actions=forbidden) - - assert "a1" not in predicted_actions - - forbidden = ["a1", "a2"] - with pytest.raises(ValueError): - s.predict(n_samples=n_samples, forbidden_actions=forbidden) - - forbidden = ["a1", "a2", "a3"] - with pytest.raises(ValueError): - s.predict(n_samples=n_samples, forbidden_actions=forbidden) - - forbidden = ["a1", "a3"] - with pytest.raises(ValueError): - s.predict(n_samples=n_samples, forbidden_actions=forbidden) - +def test_bad_initialization( + config: ModelTestConfig, + action_ids: List[str], + n_objectives, + costs, + exploit_p, + subsidy_factor, +): + real_n_objectives = n_objectives.draw(st.integers(min_value=1, max_value=10)) + kwargs = {"cost": 1.0} if config.smab_class in (SmabBernoulliCC, SmabBernoulliMOCC) else {} + if config.smab_class in [SmabBernoulliMO, SmabBernoulliMOCC]: + kwargs["models"] = [Beta() for _ in range(real_n_objectives)] + + # Test empty actions + with pytest.raises(AttributeError): + config.smab_class(actions={}) -def test_smab_mo_update(n_objectives=3): - action_ids = {"a1", "a2"} - mab = SmabBernoulliMO.cold_start(action_ids=action_ids, n_objectives=n_objectives) - assert all([mab.actions[a] == BetaMO.cold_start(n_objectives=n_objectives) for a in action_ids]) + # Test single action (should warn) + single_action = {action_ids[0]: config.model_types[0](**kwargs)} + with pytest.warns(UserWarning): + config.smab_class(actions=single_action) - mab.update(actions=["a1", "a2"], rewards=[[1, 0, 1], [1, 1, 0]]) - assert all([mab.actions[a] != BetaMO.cold_start(n_objectives=n_objectives) for a in set(action_ids)]) + # Test mismatched model types + actions_wrong_type = { + action_ids[0]: Beta(), + action_ids[1]: BetaCC(cost=1.0), + } + with pytest.raises(ValidationError): + config.smab_class(actions=actions_wrong_type) + # Test None actions + with pytest.raises(ValidationError): + config.smab_class(actions={aid: None for aid in action_ids}) -@given(st.lists(st.integers(min_value=1), min_size=6, max_size=6)) -def test_smab_mo_get_state(a_list): - a, b, c, d, e, f = a_list + # Test invalid strategy parameters + if config.smab_class == SmabBernoulliBAI: + with pytest.raises(ValidationError): + config.create_smab_and_actions( + action_ids, + None, + costs, + n_objectives, + exploit_p.draw(st.sampled_from([-0.1, 1.1])), + subsidy_factor, + ) + elif config.smab_class == SmabBernoulliCC: + with pytest.raises(ValidationError): + config.create_smab_and_actions( + action_ids, + None, + costs, + n_objectives, + exploit_p, + subsidy_factor.draw(st.sampled_from([-0.1, 1.1])), + ) - actions = { - "a1": BetaMO( - counters=[ - Beta(n_successes=a, n_failures=b), - Beta(n_successes=c, n_failures=d), - Beta(n_successes=e, n_failures=f), - ] - ), - "a2": BetaMO( - counters=[ - Beta(n_successes=d, n_failures=a), - Beta(n_successes=e, n_failures=b), - Beta(n_successes=f, n_failures=c), - ] - ), - } - smab = SmabBernoulliMO(actions=actions) - expected_state = to_serializable_dict( - { - "actions": actions, - "strategy": {}, - "epsilon": None, - "default_action": None, + # Test multi-objective specific cases + if hasattr(config.model_types[0], "models"): + # Test mismatched number of objectives + mo_actions_wrong = { + action_ids[0]: BetaMO(models=[Beta() for _ in range(real_n_objectives)]), + action_ids[1]: BetaMO(models=[Beta() for _ in range(real_n_objectives + 1)]), } - ) - - class_name, smab_state = smab.get_state() - assert class_name == "SmabBernoulliMO" - assert smab_state == expected_state - - assert is_serializable(smab_state), "Internal state is not serializable" + with pytest.raises(AttributeError): + config.smab_class(actions=mo_actions_wrong) +@settings(deadline=None) +@pytest.mark.parametrize("config", TEST_CONFIGS.values(), ids=TEST_CONFIGS.keys()) @given( - state=st.fixed_dictionaries( - { - "actions": st.dictionaries( - keys=st.text(min_size=1, max_size=10), - values=st.fixed_dictionaries( - { - "counters": st.lists( - st.fixed_dictionaries( - { - "n_successes": st.integers(min_value=1, max_value=100), - "n_failures": st.integers(min_value=1, max_value=100), - }, - ), - min_size=3, - max_size=3, - ) - } - ), - min_size=2, - ), - "strategy": st.fixed_dictionaries({}), - } - ) + action_ids=st.lists( + st.text( + min_size=1, + ), + min_size=2, + max_size=5, + unique=True, + ), + n_samples=st.integers(min_value=1, max_value=100), + epsilon=st.one_of(st.none(), st.floats(min_value=0, max_value=1)), + rewards=st.data(), + quantities=st.data(), + costs=st.data(), + n_objectives=st.data(), + subsidy_factor=st.data(), + exploit_p=st.data(), ) -def test_smab_mo_from_state(state): - smab = SmabBernoulliMO.from_state(state) - assert isinstance(smab, SmabBernoulliMO) - - expected_actions = state["actions"] - actual_actions = json.loads(json.dumps(smab.actions, default=dict)) # Normalize the dict - assert expected_actions == actual_actions - - # Ensure get_state and from_state compatibility - new_smab = globals()[smab.get_state()[0]].from_state(state=smab.get_state()[1]) - assert new_smab == smab - - -######################################################################################################################## - - -# SmabBernoulli with strategy=MultiObjectiveCostControlBandit() - - -@given(st.lists(st.integers(min_value=1), min_size=8, max_size=8)) -def test_can_init_smab_mo_cc(a_list): - a, b, c, d, e, f, g, h = a_list - - s = SmabBernoulliMOCC( - actions={ - "a1": BetaMOCC( - counters=[ - Beta(n_successes=a, n_failures=b), - Beta(n_successes=c, n_failures=d), - Beta(n_successes=e, n_failures=f), - ], - cost=g, - ), - "a2": BetaMOCC( - counters=[ - Beta(n_successes=d, n_failures=a), - Beta(n_successes=e, n_failures=b), - Beta(n_successes=f, n_failures=c), - ], - cost=h, - ), - }, - ) - assert s.actions["a1"] == BetaMOCC( - counters=[ - Beta(n_successes=a, n_failures=b), - Beta(n_successes=c, n_failures=d), - Beta(n_successes=e, n_failures=f), - ], - cost=g, - ) - assert s.actions["a2"] == BetaMOCC( - counters=[ - Beta(n_successes=d, n_failures=a), - Beta(n_successes=e, n_failures=b), - Beta(n_successes=f, n_failures=c), - ], - cost=h, - ) - assert s.strategy == MultiObjectiveCostControlBandit() - - -def test_all_actions_must_have_same_number_of_objectives_smab_mo_cc(): - with pytest.raises(ValueError): - SmabBernoulliMOCC( - actions={ - "action 1": BetaMOCC(counters=[Beta(), Beta()], cost=1), - "action 2": BetaMOCC(counters=[Beta(), Beta()], cost=1), - "action 3": BetaMOCC(counters=[Beta(), Beta(), Beta()], cost=1), - }, - ) - - -def test_smab_mo_cc_predict(n_samples: int): - n_samples = 1000 - - s = SmabBernoulliMOCC.cold_start(action_ids_cost={"a1": 1, "a2": 2}, n_objectives=2) - - forbidden = None - s.predict(n_samples=n_samples, forbidden_actions=forbidden) - - forbidden = ["a1"] - predicted_actions, _ = s.predict(n_samples=n_samples, forbidden_actions=forbidden) - - assert "a1" not in predicted_actions - - forbidden = ["a1", "a2"] - with pytest.raises(ValueError): - s.predict(n_samples=n_samples, forbidden_actions=forbidden) - - forbidden = ["a1", "a2", "a3"] - with pytest.raises(ValueError): - s.predict(n_samples=n_samples, forbidden_actions=forbidden) - - forbidden = ["a1", "a3"] - with pytest.raises(ValueError): - s.predict(n_samples=n_samples, forbidden_actions=forbidden) - - -def test_smab_mo_cc_update(n_objectives=3): - action_ids_cost = {"a1": 1, "a2": 2} - mab = SmabBernoulliMOCC.cold_start(action_ids_cost=action_ids_cost, n_objectives=n_objectives) - assert all( - [ - mab.actions[a] == BetaMOCC.cold_start(n_objectives=n_objectives, cost=action_ids_cost[a]) - for a in action_ids_cost.keys() +def test_update( + config: ModelTestConfig, + action_ids: List[str], + n_samples: int, + epsilon: Optional[float], + rewards, + quantities, + costs, + n_objectives, + exploit_p, + subsidy_factor, +): + # Create SMAB instance + smab, _, kwargs = config.create_smab_and_actions( + action_ids, epsilon, costs, n_objectives, exploit_p, subsidy_factor + ) + batched_smab = deepcopy(smab) + n_objectives = kwargs.get("n_objectives") + # Generate random rewards + reward_data = rewards.draw(rewards_strategy(n_objectives=n_objectives, n_samples=n_samples)) + # Test updates with generated data + actions_to_update = np.random.choice(np.array(action_ids, dtype=np.object_), size=n_samples, replace=True).tolist() + # Generate quantities only if there are any QuantitativeModel actions + if any(isinstance(model, QuantitativeModel) for model in smab.actions.values()): + quantity_data = quantities.draw(quantities_strategy(n_samples=n_samples)) + quantity_data = [ + q if isinstance(smab.actions[action], QuantitativeModel) else None + for q, action in zip(quantity_data, actions_to_update) ] - ) - - mab.update(actions=["a1", "a2"], rewards=[[1, 0, 1], [1, 1, 0]]) - assert all( [ - mab.actions[a] != BetaMOCC.cold_start(n_objectives=n_objectives, cost=action_ids_cost[a]) - for a in action_ids_cost.keys() + smab.update(actions=[action], rewards=[reward], quantities=[quantity]) + for action, reward, quantity in zip(actions_to_update, reward_data, quantity_data) ] - ) - + else: + quantity_data = None + [smab.update(actions=[action], rewards=[reward]) for action, reward in zip(actions_to_update, reward_data)] + + batched_smab.update(actions=actions_to_update, rewards=reward_data, quantities=quantity_data) + + for action in smab.actions: + if isinstance(smab.actions[action], Beta): + assert smab.actions[action] == batched_smab.actions[action] + relevant_rewards = np.array(reward_data)[np.array(actions_to_update) == action] + if hasattr(smab.actions[action], "n_successes"): + assert ( + smab.actions[action].n_successes + == batched_smab.actions[action].n_successes + == sum(relevant_rewards) + 1 + ) + assert ( + smab.actions[action].n_failures + == batched_smab.actions[action].n_failures + == sum(1 - relevant_rewards) + 1 + ) -@given(st.lists(st.integers(min_value=1), min_size=8, max_size=8)) -def test_smab_mo_cc_get_state(a_list): - a, b, c, d, e, f, g, h = a_list - actions = { - "a1": BetaMOCC( - counters=[ - Beta(n_successes=a, n_failures=b), - Beta(n_successes=c, n_failures=d), - Beta(n_successes=e, n_failures=f), - ], - cost=g, - ), - "a2": BetaMOCC( - counters=[ - Beta(n_successes=d, n_failures=a), - Beta(n_successes=e, n_failures=b), - Beta(n_successes=f, n_failures=c), - ], - cost=h, +@settings(deadline=None) +@pytest.mark.parametrize("config", TEST_CONFIGS.values(), ids=TEST_CONFIGS.keys()) +@given( + action_ids=st.lists( + st.text( + min_size=1, ), - } - smab = SmabBernoulliMOCC(actions=actions) - expected_state = to_serializable_dict( - { - "actions": actions, - "strategy": {}, - "epsilon": None, - "default_action": None, - } - ) - - class_name, smab_state = smab.get_state() - assert class_name == "SmabBernoulliMOCC" - assert smab_state == expected_state - - assert is_serializable(smab_state), "Internal state is not serializable" + min_size=2, + max_size=5, + unique=True, + ), + n_samples=st.integers(min_value=1, max_value=100), + epsilon=st.one_of(st.none(), st.floats(min_value=0, max_value=1)), + costs=st.data(), + n_objectives=st.data(), + subsidy_factor=st.data(), + exploit_p=st.data(), + diff=st.data(), +) +def test_predict( + config: ModelTestConfig, + action_ids: List[str], + n_samples: int, + epsilon: Optional[float], + costs, + n_objectives, + exploit_p, + subsidy_factor, + diff, + monkeymodule, +): + # Create SMAB instance + smab = config.create_smab_and_actions(action_ids, epsilon, costs, n_objectives, exploit_p, subsidy_factor)[0] + + # Test predictions with random forbidden actions + forbidden = ( + set(np.random.choice(np.array(action_ids, dtype=np.object_), size=len(action_ids) // 2, replace=False)) + if len(action_ids) > 2 + else None + ) + if smab.default_action is not None and forbidden is not None and smab.default_action in forbidden: + forbidden.remove(smab.default_action) + + mock_update(list(smab.actions.values()), diff, monkeymodule) + best_actions, probs = smab.predict(n_samples=n_samples, forbidden_actions=forbidden) + assert len(best_actions) == n_samples + assert len(probs) == n_samples + + if forbidden: + assert all(len(prob) == len(action_ids) - len(forbidden) for prob in probs) + assert all(action not in forbidden for action in best_actions) + assert all(action not in forbidden for prob in probs for action in prob.keys()) + else: + assert all(len(prob) == len(action_ids) for prob in probs) + if isinstance(smab, SmabBernoulli) and not epsilon: + assert all(prob[best_action] == max(prob.values()) for best_action, prob in zip(best_actions, probs)) +@settings(deadline=None) +@pytest.mark.parametrize("config", TEST_CONFIGS.values(), ids=TEST_CONFIGS.keys()) @given( - state=st.fixed_dictionaries( - { - "actions": st.dictionaries( - keys=st.text(min_size=1, max_size=10), - values=st.fixed_dictionaries( - { - "counters": st.lists( - st.fixed_dictionaries( - { - "n_successes": st.integers(min_value=1, max_value=100), - "n_failures": st.integers(min_value=1, max_value=100), - }, - ), - min_size=3, - max_size=3, - ), - "cost": st.floats(min_value=0), - } - ), - min_size=2, - ), - "strategy": st.fixed_dictionaries({}), - } - ) + action_ids=st.lists( + st.text( + min_size=1, + ), + min_size=2, + max_size=5, + unique=True, + ), + epsilon=st.one_of(st.none(), st.floats(min_value=0, max_value=1)), + costs=st.data(), + n_objectives=st.data(), + subsidy_factor=st.data(), + exploit_p=st.data(), + diff=st.data(), ) -def test_smab_mo_cc_from_state(state): - smab = SmabBernoulliMOCC.from_state(state) - assert isinstance(smab, SmabBernoulliMOCC) - - expected_actions = state["actions"] - actual_actions = to_serializable_dict(smab.actions) # Normalize the dict - assert expected_actions == actual_actions - - # Ensure get_state and from_state compatibility - new_smab = globals()[smab.get_state()[0]].from_state(state=smab.get_state()[1]) - assert new_smab == smab - - -######################################################################################################################## - - -# Smab with epsilon-greedy super strategy +def test_serialization( + config: ModelTestConfig, + action_ids: List[str], + epsilon: Optional[float], + costs, + n_objectives, + exploit_p, + subsidy_factor, + diff, + monkeymodule, +): + # Create SMAB instance + smab = config.create_smab_and_actions(action_ids, epsilon, costs, n_objectives, exploit_p, subsidy_factor)[0] + + pre_update_state = smab.get_state() + mock_update(list(smab.actions.values()), diff, monkeymodule) + post_update_state = smab.get_state() + # Verify model updates + assert pre_update_state != post_update_state + + # Test serialization + restored_smab = config.smab_class.from_state(post_update_state[1]) + assert restored_smab == smab @given( st.integers(min_value=1), st.integers(min_value=1), ) -def test_can_instantiate_epsilon_greddy_smab_with_params(a, b): +def test_can_instantiate_smab_with_params(a, b): s = SmabBernoulli( actions={ "action1": Beta(n_successes=a, n_failures=b), "action2": Beta(n_successes=a, n_failures=b), }, - epsilon=0.1, - default_action="action1", ) assert (s.actions["action1"].n_successes == a) and (s.actions["action1"].n_failures == b) assert s.actions["action1"] == s.actions["action2"] -def test_epsilon_greedy_smab_predict(n_samples: int): - n_samples = 1000 - - s = SmabBernoulli( - actions={ - "a0": Beta(), - "a1": Beta(n_successes=5, n_failures=5), - "forb_1": Beta(n_successes=10, n_failures=1), - "best": Beta(n_successes=10, n_failures=5), - "forb_2": Beta(n_successes=100, n_failures=4), - "a5": Beta(), - }, - epsilon=0.1, - default_action="a1", - ) - forbidden_actions = set(["forb_1", "forb_2"]) - - _, _ = s.predict(n_samples=n_samples, forbidden_actions=forbidden_actions) - - -def test_epsilon_greddy_smabbai_predict(n_samples: int): - n_samples = 1000 - s = SmabBernoulliBAI(actions={"a1": Beta(), "a2": Beta()}, epsilon=0.1, default_action="a1") - _, _ = s.predict(n_samples=n_samples) - - -def test_epsilon_greddy_smabcc_predict(n_samples: int): - n_samples = 1000 - s = SmabBernoulliCC( - actions={ - "a1": BetaCC(n_successes=1, n_failures=2, cost=10), - "a2": BetaCC(n_successes=3, n_failures=4, cost=20), - }, - subsidy_factor=0.7, - epsilon=0.1, - default_action="a1", - ) - _, _ = s.predict(n_samples=n_samples) - - -def test_epsilon_greddy_smab_mo_predict(n_samples: int): - n_samples = 1000 - - s = SmabBernoulliMO.cold_start(action_ids={"a1", "a2"}, n_objectives=3, epsilon=0.1, default_action="a1") - - forbidden = None - s.predict(n_samples=n_samples, forbidden_actions=forbidden) +@given(st.integers(max_value=0)) +def test_smab_predict_raise_when_samples_low(n_samples): + s = SmabBernoulli(actions={"a1": Beta(), "a2": Beta()}) + with pytest.raises(ValidationError): + s.predict(n_samples=n_samples) -def test_epsilon_greddy_smab_mo_cc_predict(n_samples: int): - n_samples = 1000 +def test_smab_predict_raise_when_all_actions_forbidden(): + s = SmabBernoulli(actions={"a1": Beta(), "a2": Beta()}) + with pytest.raises(ValueError): + s.predict(n_samples=10, forbidden_actions=["a1", "a2"]) - s = SmabBernoulliMOCC.cold_start( - action_ids_cost={"a1": 1, "a2": 2}, n_objectives=2, epsilon=0.1, default_action="a1" - ) - forbidden = None - s.predict(n_samples=n_samples, forbidden_actions=forbidden) +@given(st.text()) +def test_smab_accepts_only_valid_actions(s): + if s == "": + with pytest.raises(ValidationError): + SmabBernoulli( + actions={ + s: Beta(), + s + "_": Beta(), + } + ) + else: + SmabBernoulli(actions={s: Beta(), s + "_": Beta()}) diff --git a/tests/test_strategy.py b/tests/test_strategy.py index 8773fe1..7ecc7c9 100644 --- a/tests/test_strategy.py +++ b/tests/test_strategy.py @@ -250,7 +250,7 @@ def test_select_action_logic_corner_cases(a_list_p, a_list_cost): # if cost factor is 0: mutated_c = c.with_subsidy_factor(subsidy_factor=0) - # get the keys of the max p.values() (there might be more max_p_values) + # get the keys of the max p.quantities() (there might be more max_p_values) max_p_values = [k for k, v in p.items() if v == max(p.values())] # if cost factor is 0 and only 1 max_value => return the action with highest p (classic bandit) @@ -351,11 +351,11 @@ def test_select_action_mo_cc(): m = MultiObjectiveCostControlBandit() actions = { - "a1": BetaMOCC(counters=[Beta(), Beta(), Beta()], cost=8), - "a2": BetaMOCC(counters=[Beta(), Beta(), Beta()], cost=2), - "a3": BetaMOCC(counters=[Beta(), Beta(), Beta()], cost=5), - "a4": BetaMOCC(counters=[Beta(), Beta(), Beta()], cost=1), - "a5": BetaMOCC(counters=[Beta(), Beta(), Beta()], cost=7), + "a1": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=8), + "a2": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=2), + "a3": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=5), + "a4": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=1), + "a5": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=7), } p = { "a1": [0.1, 0.3, 0.5], @@ -369,9 +369,9 @@ def test_select_action_mo_cc(): assert m.select_action(p=p, actions=actions) == "a4" actions = { - "a1": BetaMOCC(counters=[Beta(), Beta(), Beta()], cost=2), - "a2": BetaMOCC(counters=[Beta(), Beta(), Beta()], cost=2), - "a3": BetaMOCC(counters=[Beta(), Beta(), Beta()], cost=5), + "a1": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=2), + "a2": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=2), + "a3": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=5), } p = { "a1": [0.6, 0.1, 0.1],