Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add zooming quantitative bandit model #72

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions .github/workflows/continuous_delivery.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,13 @@ jobs:

- name: Install Poetry
run: |
curl -sSL https://install.python-poetry.org | python3 -
export PATH="$HOME/.poetry/bin:$PATH"
if [[ "${{ matrix.python-version }}" == "3.8" ]]; then
curl -sSL https://install.python-poetry.org | python3 - --version 1.8.0
export PATH="$HOME/.poetry/bin:$PATH"
else
curl -sSL https://install.python-poetry.org | python3 -
export PATH="$HOME/.poetry/bin:$PATH"
fi
- name: Backup pyproject.toml
run: cp pyproject.toml pyproject.toml.bak
- name: Install project dependencies with Poetry
Expand Down
9 changes: 7 additions & 2 deletions .github/workflows/continuous_integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,13 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Install Poetry
run: |
curl -sSL https://install.python-poetry.org | python3 -
export PATH="$HOME/.poetry/bin:$PATH"
if [[ "${{ matrix.python-version }}" == "3.8" ]]; then
curl -sSL https://install.python-poetry.org | python3 - --version 1.8.0
export PATH="$HOME/.poetry/bin:$PATH"
else
curl -sSL https://install.python-poetry.org | python3 -
export PATH="$HOME/.poetry/bin:$PATH"
fi
- name: Install project dependencies with Poetry
run: |
poetry add pydantic@${{ matrix.pydantic-version }}
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,6 @@ MANIFEST

# poetry
poetry.lock

# qodo gen
.qodo
93 changes: 89 additions & 4 deletions pybandits/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
# SOFTWARE.


from typing import Any, Dict, List, NewType, Tuple, Union
from typing import Any, Dict, List, Mapping, NewType, Optional, Tuple, Union

from typing_extensions import Self

from pybandits.pydantic_version_compatibility import (
PYDANTIC_VERSION_1,
Expand All @@ -34,24 +36,52 @@
)

ActionId = NewType("ActionId", constr(min_length=1))
QuantitativeActionId = Tuple[ActionId, Tuple[float, ...]]
UnifiedActionId = Union[ActionId, QuantitativeActionId]
Float01 = NewType("Float_0_1", confloat(ge=0, le=1))
Probability = NewType("Probability", Float01)
ProbabilityWeight = Tuple[Probability, float]
MOProbability = List[Probability]
MOProbabilityWeight = List[ProbabilityWeight]
# QuantitativeProbability generalizes probability to include both action quantities and their associated probability
QuantitativeProbability = Tuple[Tuple[Tuple[Float01, ...], Probability], ...]
QuantitativeProbabilityWeight = Tuple[Tuple[Tuple[Float01, ...], ProbabilityWeight], ...]
QuantitativeMOProbability = Tuple[Tuple[Tuple[Float01, ...], List[Probability]], ...]
QuantitativeMOProbabilityWeight = Tuple[Tuple[Tuple[Float01, ...], List[ProbabilityWeight]], ...]
UnifiedProbability = Union[Probability, QuantitativeProbability]
UnifiedProbabilityWeight = Union[ProbabilityWeight, QuantitativeProbabilityWeight]
UnifiedMOProbability = Union[MOProbability, QuantitativeMOProbability]
UnifiedMOProbabilityWeight = Union[MOProbabilityWeight, QuantitativeMOProbabilityWeight]
# SmabPredictions is a tuple of two lists: the first list contains the selected action ids,
# and the second list contains their associated probabilities
SmabPredictions = NewType("SmabPredictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]]])
SmabPredictions = NewType(
"SmabPredictions",
Tuple[
List[UnifiedActionId],
Union[List[Dict[UnifiedActionId, Probability]], List[Dict[UnifiedActionId, MOProbability]]],
],
)
# CmabPredictions is a tuple of three lists: the first list contains the selected action ids,
# the second list contains their associated probabilities,
# and the third list contains their associated weighted sums
CmabPredictions = NewType(
"CmabPredictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]], List[Dict[ActionId, float]]]
"CmabPredictions",
Union[
Tuple[List[UnifiedActionId], List[Dict[UnifiedActionId, Probability]], List[Dict[UnifiedActionId, float]]],
Tuple[
List[UnifiedActionId], List[Dict[UnifiedActionId, MOProbability]], List[Dict[UnifiedActionId, List[float]]]
],
],
)
Predictions = NewType("Predictions", Union[SmabPredictions, CmabPredictions])
BinaryReward = NewType("BinaryReward", conint(ge=0, le=1))
ActionRewardLikelihood = NewType(
"ActionRewardLikelihood",
Union[Dict[ActionId, float], Dict[ActionId, Probability], Dict[ActionId, List[Probability]]],
Union[Dict[UnifiedActionId, float], Dict[UnifiedActionId, Probability], Dict[UnifiedActionId, List[Probability]]],
)
Serializable = Union[str, int, float, bool, None, List["Serializable"], Dict[str, "Serializable"]]
ACTION_IDS_PREFIX = "action_ids_"
QUANTITATIVE_ACTION_IDS_PREFIX = f"quantitative_{ACTION_IDS_PREFIX}"


class _classproperty(property):
Expand All @@ -74,6 +104,18 @@ def __init__(self, **data):
def model_post_init(self, __context: Any) -> None:
pass

def _validate_params_lengths(
self,
**kwargs,
):
"""
Verify that the given keyword arguments have the same length.
"""
reference = len(next(iter(kwargs.values())))
for k, v in kwargs.items():
if v is not None and len(v) != reference:
raise AttributeError(f"Shape mismatch: {k} should have the same length as the other parameters.")

def _apply_version_adjusted_method(self, v2_method_name: str, v1_method_name: str, **kwargs) -> Any:
"""
Apply the method with the given name, adjusting for the pydantic version.
Expand Down Expand Up @@ -109,3 +151,46 @@ def model_fields(cls) -> Dict[str, Any]:
The model fields.
"""
return cls.__fields__

def model_copy(self, *, update: Optional[Mapping[str, Any]] = None, deep: bool = False) -> Self:
"""
Create a new instance of the model with the same quantities.

Parameters
----------
update : Mapping[str, Any], optional
The quantities to update, by default None

deep : bool, optional
Whether to copy the quantities deeply, by default False

Returns
-------
Self
The new instance of the model.
"""
return self.copy(update=update, deep=deep)

@classmethod
def model_validate(
cls,
obj: Any,
) -> Self:
"""
Validate a PyBandits BaseModel model instance.

Parameters
----------
obj : Any
The object to validate. Use state dictionary to generate model from state.

Raises
------
ValidationError: If the object could not be validated.

Returns
-------
Self
The validated model instance.
"""
return cls.parse_obj(obj)
128 changes: 128 additions & 0 deletions pybandits/base_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
from abc import ABC, abstractmethod
from typing import Callable, List, Union

import numpy as np

from pybandits.base import (
BinaryReward,
MOProbability,
Probability,
ProbabilityWeight,
PyBanditsBaseModel,
QuantitativeMOProbability,
QuantitativeProbability,
QuantitativeProbabilityWeight,
)
from pybandits.pydantic_version_compatibility import NonNegativeFloat


class BaseModel(PyBanditsBaseModel, ABC):
"""
Class to model the prior distributions of standard actions and quantitative actions.
"""

@abstractmethod
def sample_proba(
self, **kwargs
) -> Union[
List[Probability],
List[MOProbability],
List[ProbabilityWeight],
List[QuantitativeProbability],
List[QuantitativeMOProbability],
List[QuantitativeProbabilityWeight],
]:
"""
Sample the probability of getting a positive reward.
"""

@abstractmethod
def update(self, rewards: Union[List[BinaryReward], List[List[BinaryReward]]], **kwargs):
"""
Update the model parameters.

Parameters
----------
rewards : Union[List[BinaryReward], List[List[BinaryReward]]],
if nested list, len() should follow shape of (n_samples, n_objectives)
The binary reward for each sample.
If strategy is not MultiObjectiveBandit, rewards should be a list, e.g.
rewards = [1, 0, 1, 1, 1, ...]
If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2):
rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...]
"""


class BaseModelSO(BaseModel, ABC):
"""
Class to model the prior distributions of standard actions and quantitative actions for single objective.
"""

@abstractmethod
def sample_proba(
self, **kwargs
) -> Union[
List[Probability], List[ProbabilityWeight], List[QuantitativeProbability], List[QuantitativeProbabilityWeight]
]:
"""
Sample the probability of getting a positive reward.
"""

@abstractmethod
def update(self, rewards: List[BinaryReward], **kwargs):
"""
Update the model parameters.

Parameters
----------
rewards : List[BinaryReward],
The binary reward for each sample.
"""


class BaseModelMO(BaseModel, ABC):
"""
Class to model the prior distributions of standard actions and quantitative actions for multi-objective.

Parameters
----------
models : List[BaseModelSO]
The list of models for each objective.
"""

models: List[BaseModelSO]

@abstractmethod
def sample_proba(self, **kwargs) -> Union[List[MOProbability], List[QuantitativeMOProbability]]:
"""
Sample the probability of getting a positive reward.
"""

@abstractmethod
def update(self, rewards: List[List[BinaryReward]], **kwargs):
"""
Update the model parameters.

Parameters
----------
rewards : List[List[BinaryReward]],
if nested list, len() should follow shape of (n_samples, n_objectives)
The binary rewards for each sample.
If strategy is not MultiObjectiveBandit, rewards should be a list, e.g.
rewards = [1, 0, 1, 1, 1, ...]
If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2):
rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...]
"""


class BaseModelCC(PyBanditsBaseModel, ABC):
"""
Class to model action cost.

Parameters
----------
cost: Union[NonNegativeFloat, Callable[[Union[float, NonNegativeFloat]], NonNegativeFloat]]
Cost associated to the Beta distribution.
"""

cost: Union[NonNegativeFloat, Callable[[Union[float, np.ndarray]], NonNegativeFloat]]
Loading