Skip to content

Commit

Permalink
feat: add adaptive batch size histogram (#4438)
Browse files Browse the repository at this point in the history
Signed-off-by: Frost Ming <[email protected]>
  • Loading branch information
frostming committed Feb 1, 2024
1 parent 2694230 commit 2cc2c54
Showing 1 changed file with 37 additions and 0 deletions.
37 changes: 37 additions & 0 deletions src/_bentoml_impl/server/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from bentoml._internal.marshal.dispatcher import CorkDispatcher
from bentoml._internal.server.base_app import BaseAppFactory
from bentoml._internal.server.http_app import log_exception
from bentoml._internal.utils.metrics import exponential_buckets
from bentoml.exceptions import BentoMLException
from bentoml.exceptions import ServiceUnavailable

Expand Down Expand Up @@ -99,6 +100,30 @@ def fallback() -> t.NoReturn:
),
)

metrics_client = BentoMLContainer.metrics_client.get()
max_max_batch_size = max(
(
method.max_batch_size
for method in service.apis.values()
if method.batchable
),
default=100,
)

self.adaptive_batch_size_hist = metrics_client.Histogram(
namespace="bentoml_service",
name="adaptive_batch_size",
documentation="Service adaptive batch size",
labelnames=[
"runner_name",
"worker_index",
"method_name",
"service_version",
"service_name",
],
buckets=exponential_buckets(1, 2, max_max_batch_size),
)

async def index_page(self, _: Request) -> Response:
from starlette.responses import FileResponse

Expand Down Expand Up @@ -333,9 +358,21 @@ async def batch_infer(
async def inner_infer(
batches: t.Sequence[t.Any], **kwargs: t.Any
) -> t.Sequence[t.Any]:
from bentoml._internal.context import component_context
from bentoml._internal.runner.container import AutoContainer
from bentoml._internal.utils import is_async_callable

self.adaptive_batch_size_hist.labels( # type: ignore
runner_name=self.service.name,
worker_index=component_context.component_index,
method_name=name,
service_version=component_context.bento_version,
service_name=component_context.bento_name,
).observe(len(batches))

if len(batches) == 0:
return []

batch, indices = AutoContainer.batches_to_batch(
batches, method.batch_dim[0]
)
Expand Down

0 comments on commit 2cc2c54

Please sign in to comment.