Add zooming quantitative bandit model

### Changes: * Added quantitative model support for continuous action spaces using zooming algorithm. * Added base model classes to separate single/multi-objective and cost control models. * Refactored MAB classes to support both discrete and continuous action spaces. * Updated test suite with new test cases for quantitative models and refactored test suite for robustness. * Added serialization support for quantitative models.
PlaytikaOSS · Jan 28, 2025 · ff06dff · ff06dff
1 parent 64913ef
commit ff06dff
Show file tree

Hide file tree

Showing 19 changed files with 2,742 additions and 1,730 deletions.
diff --git a/.gitignore b/.gitignore
@@ -65,3 +65,6 @@ MANIFEST
 
 # poetry
 poetry.lock
+
+# qodo gen
+.qodo
diff --git a/pybandits/base.py b/pybandits/base.py
@@ -21,7 +21,9 @@
 # SOFTWARE.
 
 
-from typing import Any, Dict, List, NewType, Tuple, Union
+from typing import Any, Dict, List, Mapping, NewType, Optional, Tuple, Union
+
+from typing_extensions import Self
 
 from pybandits.pydantic_version_compatibility import (
     PYDANTIC_VERSION_1,
@@ -34,24 +36,52 @@
 )
 
 ActionId = NewType("ActionId", constr(min_length=1))
+QuantitativeActionId = Tuple[ActionId, Tuple[float, ...]]
+UnifiedActionId = Union[ActionId, QuantitativeActionId]
 Float01 = NewType("Float_0_1", confloat(ge=0, le=1))
 Probability = NewType("Probability", Float01)
+ProbabilityWeight = Tuple[Probability, float]
+MOProbability = List[Probability]
+MOProbabilityWeight = List[ProbabilityWeight]
+# QuantitativeProbability generalizes probability to include both action quantities and their associated probability
+QuantitativeProbability = Dict[Tuple[float, ...], Probability]
+QuantitativeProbabilityWeight = Dict[Tuple[float, ...], ProbabilityWeight]
+QuantitativeMOProbability = Dict[Tuple[float, ...], List[Probability]]
+QuantitativeMOProbabilityWeight = Dict[Tuple[float, ...], List[ProbabilityWeight]]
+UnifiedProbability = Union[Probability, QuantitativeProbability]
+UnifiedProbabilityWeight = Union[ProbabilityWeight, QuantitativeProbabilityWeight]
+UnifiedMOProbability = Union[MOProbability, QuantitativeMOProbability]
+UnifiedMOProbabilityWeight = Union[MOProbabilityWeight, QuantitativeMOProbabilityWeight]
 # SmabPredictions is a tuple of two lists: the first list contains the selected action ids,
 # and the second list contains their associated probabilities
-SmabPredictions = NewType("SmabPredictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]]])
+SmabPredictions = NewType(
+    "SmabPredictions",
+    Tuple[
+        List[UnifiedActionId],
+        Union[List[Dict[UnifiedActionId, Probability]], List[Dict[UnifiedActionId, MOProbability]]],
+    ],
+)
 # CmabPredictions is a tuple of three lists: the first list contains the selected action ids,
 # the second list contains their associated probabilities,
 # and the third list contains their associated weighted sums
 CmabPredictions = NewType(
-    "CmabPredictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]], List[Dict[ActionId, float]]]
+    "CmabPredictions",
+    Union[
+        Tuple[List[UnifiedActionId], List[Dict[UnifiedActionId, Probability]], List[Dict[UnifiedActionId, float]]],
+        Tuple[
+            List[UnifiedActionId], List[Dict[UnifiedActionId, MOProbability]], List[Dict[UnifiedActionId, List[float]]]
+        ],
+    ],
 )
 Predictions = NewType("Predictions", Union[SmabPredictions, CmabPredictions])
 BinaryReward = NewType("BinaryReward", conint(ge=0, le=1))
 ActionRewardLikelihood = NewType(
     "ActionRewardLikelihood",
-    Union[Dict[ActionId, float], Dict[ActionId, Probability], Dict[ActionId, List[Probability]]],
+    Union[Dict[UnifiedActionId, float], Dict[UnifiedActionId, Probability], Dict[UnifiedActionId, List[Probability]]],
 )
+Serializable = Union[str, int, float, bool, None, List["Serializable"], Dict[str, "Serializable"]]
 ACTION_IDS_PREFIX = "action_ids_"
+QUANTITATIVE_ACTION_IDS_PREFIX = f"quantitative_{ACTION_IDS_PREFIX}"
 
 
 class _classproperty(property):
@@ -109,3 +139,46 @@ def model_fields(cls) -> Dict[str, Any]:
                 The model fields.
             """
             return cls.__fields__
+
+        def model_copy(self, *, update: Optional[Mapping[str, Any]] = None, deep: bool = False) -> Self:
+            """
+            Create a new instance of the model with the same quantities.
+
+            Parameters
+            ----------
+            update : Mapping[str, Any], optional
+                The quantities to update, by default None
+
+            deep : bool, optional
+                Whether to copy the quantities deeply, by default False
+
+            Returns
+            -------
+            Self
+                The new instance of the model.
+            """
+            return self.copy(update=update, deep=deep)
+
+        @classmethod
+        def model_validate(
+            cls,
+            obj: Any,
+        ) -> Self:
+            """
+            Validate a PyBandits BaseModel model instance.
+
+            Parameters
+            ----------
+            obj : Any
+                The object to validate. Use state dictionary to generate model from state.
+
+            Raises
+            ------
+                ValidationError: If the object could not be validated.
+
+            Returns
+            -------
+            Self
+                The validated model instance.
+            """
+            return cls.parse_obj(obj)
diff --git a/pybandits/base_model.py b/pybandits/base_model.py
@@ -0,0 +1,94 @@
+from abc import ABC, abstractmethod
+from typing import Callable, List, Union
+
+import numpy as np
+
+from pybandits.base import BinaryReward, Probability, PyBanditsBaseModel, QuantitativeProbability
+from pybandits.pydantic_version_compatibility import NonNegativeFloat
+
+
+class BaseModel(PyBanditsBaseModel, ABC):
+    """
+    Class to model the prior distributions of standard actions and quantitative actions.
+    """
+
+    @abstractmethod
+    def sample_proba(self) -> Union[Probability, QuantitativeProbability]:
+        """
+        Sample the probability of getting a positive reward.
+        """
+
+    @abstractmethod
+    def update(self, rewards: Union[List[BinaryReward], List[List[BinaryReward]]], **kwargs):
+        """
+        Update the model parameters.
+
+        Parameters
+        ----------
+        rewards : Union[List[BinaryReward], List[List[BinaryReward]]],
+            if nested list, len() should follow shape of (n_samples, n_objectives)
+            The binary reward for each sample.
+                If strategy is not MultiObjectiveBandit, rewards should be a list, e.g.
+                    rewards = [1, 0, 1, 1, 1, ...]
+                If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2):
+                    rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...]
+        """
+
+
+class BaseModelSO(BaseModel, ABC):
+    """
+    Class to model the prior distributions of standard actions and quantitative actions for single objective.
+    """
+
+    @abstractmethod
+    def update(self, rewards: List[BinaryReward], **kwargs):
+        """
+        Update the model parameters.
+
+        Parameters
+        ----------
+        rewards : List[BinaryReward],
+            The binary reward for each sample.
+        """
+
+
+class BaseModelMO(BaseModel, ABC):
+    """
+    Class to model the prior distributions of standard actions and quantitative actions for multi-objective.
+
+    Parameters
+    ----------
+    models : List[BaseModelSO]
+        The list of models for each objective.
+    """
+
+    models: List[BaseModelSO]
+
+    @abstractmethod
+    def update(self, rewards: List[List[BinaryReward]], **kwargs):
+        """
+        Update the model parameters.
+
+        Parameters
+        ----------
+        rewards : List[List[BinaryReward]],
+            if nested list, len() should follow shape of (n_samples, n_objectives)
+            The binary rewards for each sample.
+                If strategy is not MultiObjectiveBandit, rewards should be a list, e.g.
+                    rewards = [1, 0, 1, 1, 1, ...]
+                If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2):
+                    rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...]
+        """
+
+
+class BaseModelCC(PyBanditsBaseModel, ABC):
+    """
+    Class to model action cost.
+
+    Parameters
+    ----------
+    cost: Union[NonNegativeFloat, Callable[[Union[float, NonNegativeFloat]], NonNegativeFloat]]
+        Cost associated to the Beta distribution.
+    """
+
+    cost: Union[NonNegativeFloat, Callable[[Union[float, np.ndarray]], NonNegativeFloat]]