From c572e5b33d16d9274865ac5be351287655baabc9 Mon Sep 17 00:00:00 2001 From: Frost Ming Date: Wed, 7 Feb 2024 15:05:41 +0800 Subject: [PATCH] feat: tpu_type support (#4493) * feat: tpu and tpu_type support Signed-off-by: Frost Ming * fix: tpu_type only Signed-off-by: Frost Ming * fix schema Signed-off-by: Frost Ming --------- Signed-off-by: Frost Ming --- src/_bentoml_sdk/service/config.py | 15 +++++++++++++++ src/_bentoml_sdk/service/factory.py | 3 +-- .../_internal/configuration/v2/__init__.py | 11 +++++++++-- typings/schema.pyi | 2 +- 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/_bentoml_sdk/service/config.py b/src/_bentoml_sdk/service/config.py index 83983a64e59..b7e31cde51e 100644 --- a/src/_bentoml_sdk/service/config.py +++ b/src/_bentoml_sdk/service/config.py @@ -44,6 +44,21 @@ class ResourceSchema(TypedDict, total=False): "nvidia-tesla-k80", "nvidia-tesla-p4", ] + tpu_type: Literal[ + "v4-2x2x1", + "v4-2x2x2", + "v4-2x2x4", + "v4-2x4x4", + "v5p-2x2x1", + "v5p-2x2x2", + "v5p-2x2x4", + "v5p-2x4x4", + "v5e-1x1", + "v5e-2x2", + "v5e-2x4", + "v5e-4x4", + "v5e-4x8", + ] WorkerSchema = Union[Posint, Literal["cpu_count"]] diff --git a/src/_bentoml_sdk/service/factory.py b/src/_bentoml_sdk/service/factory.py index b0239195869..941003754f4 100644 --- a/src/_bentoml_sdk/service/factory.py +++ b/src/_bentoml_sdk/service/factory.py @@ -25,7 +25,6 @@ from ..api import APIMethod from .config import ServiceConfig as Config -from .config import validate logger = logging.getLogger("bentoml.io") @@ -345,7 +344,7 @@ class InferenceService: def predict(self, input: str) -> str: return input """ - config = validate(kwargs) + config = kwargs def decorator(inner: type[T]) -> Service[T]: if isinstance(inner, Service): diff --git a/src/bentoml/_internal/configuration/v2/__init__.py b/src/bentoml/_internal/configuration/v2/__init__.py index d2d129438b5..5fba15ab0e4 100644 --- a/src/bentoml/_internal/configuration/v2/__init__.py +++ b/src/bentoml/_internal/configuration/v2/__init__.py @@ -72,8 +72,15 @@ # NOTE: there is a distinction between being unset and None here; if set to 'None' # in configuration for a specific runner, it will override the global configuration. s.Optional("resources"): s.Or( - {s.Optional(str): object}, lambda s: s == "system", None - ), # type: ignore (incomplete schema typing) + { + s.Optional("cpu"): str, + s.Optional("memory"): str, + s.Optional("gpu"): s.And(Real, ensure_larger_than_zero), + s.Optional("gpu_type"): str, + s.Optional("tpu_type"): str, + }, + None, + ), s.Optional("workers"): s.Or( lambda s: s == "cpu_count", s.And(int, ensure_larger_than_zero), diff --git a/typings/schema.pyi b/typings/schema.pyi index 9cc4d18cbb1..88dadee7d2b 100644 --- a/typings/schema.pyi +++ b/typings/schema.pyi @@ -152,4 +152,4 @@ class Const(Schema): ... OpsType = Schema | And | Or | Use | Optional | Regex | Literal | Const AcceptedDictType = dict[str | OpsType, t.Any] _CallableLike = FunctionType | BuiltinFunctionType | t.Callable[..., t.Any] -_SchemaLike = _CallableLike | OpsType +_SchemaLike = _CallableLike | OpsType | AcceptedDictType