Add prometheus metric for request count

jjk-g · jjk-g · commit 7e11c3a288e3 · 2025-06-17T16:51:07.000Z
diff --git a/benchmark_serving.py b/benchmark_serving.py
@@ -28,7 +28,7 @@
 import requests
 import time
 from typing import AsyncGenerator, List, Optional, Tuple, Dict
-from prometheus_client import start_http_server, Histogram, Gauge
+from prometheus_client import start_http_server, Histogram, Gauge, Counter
 import logging
 
 import google.auth
@@ -53,10 +53,12 @@
 tpot_metric = Histogram('LatencyProfileGenerator:time_per_output_token_ms', 'Time per output token per request (excluding first token) (ms)', buckets=[2**i for i in range(1, 16)])
 ttft_metric = Histogram('LatencyProfileGenerator:time_to_first_token_ms', 'Time to first token per request (ms)', buckets=[2**i for i in range(1, 16)])
 active_requests_metric = Gauge('LatencyProfileGenerator:active_requests', 'How many requests actively being processed')
+total_request_count = Counter('LatencyProfileGenerator:request_count', 'How many total requests have been sent')
 
 # Add trace config for monitoring in flight requests
 async def on_request_start(session, trace_config_ctx, params):
     active_requests_metric.inc()
+    total_request_count.inc()
 
 async def on_request_end(session, trace_config_ctx, params):
     active_requests_metric.dec()
@@ -760,6 +762,16 @@ def print_metrics(metrics: List[str], duration_sec: float, namespace: str, job:
         logger.debug("HTTP Error: %s" % (response))
         continue
     server_metrics[metric] = metric_results
+
+
+  url='https://monitoring.googleapis.com/v1/projects/%s/location/global/prometheus/api/v1/query' % (project_id)
+  headers_api = {'Authorization': 'Bearer ' + credentials.token}
+  params = {'query': f'rate(LatencyProfileGenerator:request_count_total[{duration}s])'}
+  logger.debug(f"Finding {query_name} {metric} with the following query: {query}")
+  request_post = requests.get(url=url, headers=headers_api, params=params)
+  response = request_post.json()
+  print(f"Got response for benchmarking prom metrics: {response}")
+  
   return server_metrics
 
 def get_stats_for_set(name, description, points):