Skip to content

Commit

Permalink
Add zooming quantitative bandit model
Browse files Browse the repository at this point in the history
 ### Changes:
 * Added quantitative model support for continuous action spaces using zooming algorithm.
 * Added base model classes to separate single/multi-objective and cost control models.
 * Refactored MAB classes to support both discrete and continuous action spaces.
 * Updated test suite with new test cases for quantitative models and refactored test suite for robustness.
 * Added serialization support for quantitative models.
  • Loading branch information
Shahar-Bar committed Jan 28, 2025
1 parent 64913ef commit ff06dff
Show file tree
Hide file tree
Showing 19 changed files with 2,742 additions and 1,730 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,6 @@ MANIFEST

# poetry
poetry.lock

# qodo gen
.qodo
81 changes: 77 additions & 4 deletions pybandits/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
# SOFTWARE.


from typing import Any, Dict, List, NewType, Tuple, Union
from typing import Any, Dict, List, Mapping, NewType, Optional, Tuple, Union

from typing_extensions import Self

from pybandits.pydantic_version_compatibility import (
PYDANTIC_VERSION_1,
Expand All @@ -34,24 +36,52 @@
)

ActionId = NewType("ActionId", constr(min_length=1))
QuantitativeActionId = Tuple[ActionId, Tuple[float, ...]]
UnifiedActionId = Union[ActionId, QuantitativeActionId]
Float01 = NewType("Float_0_1", confloat(ge=0, le=1))
Probability = NewType("Probability", Float01)
ProbabilityWeight = Tuple[Probability, float]
MOProbability = List[Probability]
MOProbabilityWeight = List[ProbabilityWeight]
# QuantitativeProbability generalizes probability to include both action quantities and their associated probability
QuantitativeProbability = Dict[Tuple[float, ...], Probability]
QuantitativeProbabilityWeight = Dict[Tuple[float, ...], ProbabilityWeight]
QuantitativeMOProbability = Dict[Tuple[float, ...], List[Probability]]
QuantitativeMOProbabilityWeight = Dict[Tuple[float, ...], List[ProbabilityWeight]]
UnifiedProbability = Union[Probability, QuantitativeProbability]
UnifiedProbabilityWeight = Union[ProbabilityWeight, QuantitativeProbabilityWeight]
UnifiedMOProbability = Union[MOProbability, QuantitativeMOProbability]
UnifiedMOProbabilityWeight = Union[MOProbabilityWeight, QuantitativeMOProbabilityWeight]
# SmabPredictions is a tuple of two lists: the first list contains the selected action ids,
# and the second list contains their associated probabilities
SmabPredictions = NewType("SmabPredictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]]])
SmabPredictions = NewType(
"SmabPredictions",
Tuple[
List[UnifiedActionId],
Union[List[Dict[UnifiedActionId, Probability]], List[Dict[UnifiedActionId, MOProbability]]],
],
)
# CmabPredictions is a tuple of three lists: the first list contains the selected action ids,
# the second list contains their associated probabilities,
# and the third list contains their associated weighted sums
CmabPredictions = NewType(
"CmabPredictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]], List[Dict[ActionId, float]]]
"CmabPredictions",
Union[
Tuple[List[UnifiedActionId], List[Dict[UnifiedActionId, Probability]], List[Dict[UnifiedActionId, float]]],
Tuple[
List[UnifiedActionId], List[Dict[UnifiedActionId, MOProbability]], List[Dict[UnifiedActionId, List[float]]]
],
],
)
Predictions = NewType("Predictions", Union[SmabPredictions, CmabPredictions])
BinaryReward = NewType("BinaryReward", conint(ge=0, le=1))
ActionRewardLikelihood = NewType(
"ActionRewardLikelihood",
Union[Dict[ActionId, float], Dict[ActionId, Probability], Dict[ActionId, List[Probability]]],
Union[Dict[UnifiedActionId, float], Dict[UnifiedActionId, Probability], Dict[UnifiedActionId, List[Probability]]],
)
Serializable = Union[str, int, float, bool, None, List["Serializable"], Dict[str, "Serializable"]]
ACTION_IDS_PREFIX = "action_ids_"
QUANTITATIVE_ACTION_IDS_PREFIX = f"quantitative_{ACTION_IDS_PREFIX}"


class _classproperty(property):
Expand Down Expand Up @@ -109,3 +139,46 @@ def model_fields(cls) -> Dict[str, Any]:
The model fields.
"""
return cls.__fields__

def model_copy(self, *, update: Optional[Mapping[str, Any]] = None, deep: bool = False) -> Self:
"""
Create a new instance of the model with the same quantities.
Parameters
----------
update : Mapping[str, Any], optional
The quantities to update, by default None
deep : bool, optional
Whether to copy the quantities deeply, by default False
Returns
-------
Self
The new instance of the model.
"""
return self.copy(update=update, deep=deep)

@classmethod
def model_validate(
cls,
obj: Any,
) -> Self:
"""
Validate a PyBandits BaseModel model instance.
Parameters
----------
obj : Any
The object to validate. Use state dictionary to generate model from state.
Raises
------
ValidationError: If the object could not be validated.
Returns
-------
Self
The validated model instance.
"""
return cls.parse_obj(obj)
94 changes: 94 additions & 0 deletions pybandits/base_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from abc import ABC, abstractmethod
from typing import Callable, List, Union

import numpy as np

from pybandits.base import BinaryReward, Probability, PyBanditsBaseModel, QuantitativeProbability
from pybandits.pydantic_version_compatibility import NonNegativeFloat


class BaseModel(PyBanditsBaseModel, ABC):
"""
Class to model the prior distributions of standard actions and quantitative actions.
"""

@abstractmethod
def sample_proba(self) -> Union[Probability, QuantitativeProbability]:
"""
Sample the probability of getting a positive reward.
"""

@abstractmethod
def update(self, rewards: Union[List[BinaryReward], List[List[BinaryReward]]], **kwargs):
"""
Update the model parameters.
Parameters
----------
rewards : Union[List[BinaryReward], List[List[BinaryReward]]],
if nested list, len() should follow shape of (n_samples, n_objectives)
The binary reward for each sample.
If strategy is not MultiObjectiveBandit, rewards should be a list, e.g.
rewards = [1, 0, 1, 1, 1, ...]
If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2):
rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...]
"""


class BaseModelSO(BaseModel, ABC):
"""
Class to model the prior distributions of standard actions and quantitative actions for single objective.
"""

@abstractmethod
def update(self, rewards: List[BinaryReward], **kwargs):
"""
Update the model parameters.
Parameters
----------
rewards : List[BinaryReward],
The binary reward for each sample.
"""


class BaseModelMO(BaseModel, ABC):
"""
Class to model the prior distributions of standard actions and quantitative actions for multi-objective.
Parameters
----------
models : List[BaseModelSO]
The list of models for each objective.
"""

models: List[BaseModelSO]

@abstractmethod
def update(self, rewards: List[List[BinaryReward]], **kwargs):
"""
Update the model parameters.
Parameters
----------
rewards : List[List[BinaryReward]],
if nested list, len() should follow shape of (n_samples, n_objectives)
The binary rewards for each sample.
If strategy is not MultiObjectiveBandit, rewards should be a list, e.g.
rewards = [1, 0, 1, 1, 1, ...]
If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2):
rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...]
"""


class BaseModelCC(PyBanditsBaseModel, ABC):
"""
Class to model action cost.
Parameters
----------
cost: Union[NonNegativeFloat, Callable[[Union[float, NonNegativeFloat]], NonNegativeFloat]]
Cost associated to the Beta distribution.
"""

cost: Union[NonNegativeFloat, Callable[[Union[float, np.ndarray]], NonNegativeFloat]]
Loading

0 comments on commit ff06dff

Please sign in to comment.