Skip to content

Commit 7e11c3a

Browse files
committed
Add prometheus metric for request count
1 parent 8e4a7a0 commit 7e11c3a

File tree

1 file changed

+13
-1
lines changed

1 file changed

+13
-1
lines changed

benchmark_serving.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
import requests
2929
import time
3030
from typing import AsyncGenerator, List, Optional, Tuple, Dict
31-
from prometheus_client import start_http_server, Histogram, Gauge
31+
from prometheus_client import start_http_server, Histogram, Gauge, Counter
3232
import logging
3333

3434
import google.auth
@@ -53,10 +53,12 @@
5353
tpot_metric = Histogram('LatencyProfileGenerator:time_per_output_token_ms', 'Time per output token per request (excluding first token) (ms)', buckets=[2**i for i in range(1, 16)])
5454
ttft_metric = Histogram('LatencyProfileGenerator:time_to_first_token_ms', 'Time to first token per request (ms)', buckets=[2**i for i in range(1, 16)])
5555
active_requests_metric = Gauge('LatencyProfileGenerator:active_requests', 'How many requests actively being processed')
56+
total_request_count = Counter('LatencyProfileGenerator:request_count', 'How many total requests have been sent')
5657

5758
# Add trace config for monitoring in flight requests
5859
async def on_request_start(session, trace_config_ctx, params):
5960
active_requests_metric.inc()
61+
total_request_count.inc()
6062

6163
async def on_request_end(session, trace_config_ctx, params):
6264
active_requests_metric.dec()
@@ -760,6 +762,16 @@ def print_metrics(metrics: List[str], duration_sec: float, namespace: str, job:
760762
logger.debug("HTTP Error: %s" % (response))
761763
continue
762764
server_metrics[metric] = metric_results
765+
766+
767+
url='https://monitoring.googleapis.com/v1/projects/%s/location/global/prometheus/api/v1/query' % (project_id)
768+
headers_api = {'Authorization': 'Bearer ' + credentials.token}
769+
params = {'query': f'rate(LatencyProfileGenerator:request_count_total[{duration}s])'}
770+
logger.debug(f"Finding {query_name} {metric} with the following query: {query}")
771+
request_post = requests.get(url=url, headers=headers_api, params=params)
772+
response = request_post.json()
773+
print(f"Got response for benchmarking prom metrics: {response}")
774+
763775
return server_metrics
764776

765777
def get_stats_for_set(name, description, points):

0 commit comments

Comments
 (0)