|
28 | 28 | import requests |
29 | 29 | import time |
30 | 30 | from typing import AsyncGenerator, List, Optional, Tuple, Dict |
31 | | -from prometheus_client import start_http_server, Histogram, Gauge |
| 31 | +from prometheus_client import start_http_server, Histogram, Gauge, Counter |
32 | 32 | import logging |
33 | 33 |
|
34 | 34 | import google.auth |
|
53 | 53 | tpot_metric = Histogram('LatencyProfileGenerator:time_per_output_token_ms', 'Time per output token per request (excluding first token) (ms)', buckets=[2**i for i in range(1, 16)]) |
54 | 54 | ttft_metric = Histogram('LatencyProfileGenerator:time_to_first_token_ms', 'Time to first token per request (ms)', buckets=[2**i for i in range(1, 16)]) |
55 | 55 | active_requests_metric = Gauge('LatencyProfileGenerator:active_requests', 'How many requests actively being processed') |
| 56 | +total_request_count = Counter('LatencyProfileGenerator:request_count', 'How many total requests have been sent') |
56 | 57 |
|
57 | 58 | # Add trace config for monitoring in flight requests |
58 | 59 | async def on_request_start(session, trace_config_ctx, params): |
59 | 60 | active_requests_metric.inc() |
| 61 | + total_request_count.inc() |
60 | 62 |
|
61 | 63 | async def on_request_end(session, trace_config_ctx, params): |
62 | 64 | active_requests_metric.dec() |
@@ -760,6 +762,16 @@ def print_metrics(metrics: List[str], duration_sec: float, namespace: str, job: |
760 | 762 | logger.debug("HTTP Error: %s" % (response)) |
761 | 763 | continue |
762 | 764 | server_metrics[metric] = metric_results |
| 765 | + |
| 766 | + |
| 767 | + url='https://monitoring.googleapis.com/v1/projects/%s/location/global/prometheus/api/v1/query' % (project_id) |
| 768 | + headers_api = {'Authorization': 'Bearer ' + credentials.token} |
| 769 | + params = {'query': f'rate(LatencyProfileGenerator:request_count_total[{duration}s])'} |
| 770 | + logger.debug(f"Finding {query_name} {metric} with the following query: {query}") |
| 771 | + request_post = requests.get(url=url, headers=headers_api, params=params) |
| 772 | + response = request_post.json() |
| 773 | + print(f"Got response for benchmarking prom metrics: {response}") |
| 774 | + |
763 | 775 | return server_metrics |
764 | 776 |
|
765 | 777 | def get_stats_for_set(name, description, points): |
|
0 commit comments