Skip to content

Commit d699be9

Browse files
authored
Should not expect latency in result if errors is not null (#12)
* first commit * refactor
1 parent 2a309a5 commit d699be9

File tree

1 file changed

+15
-18
lines changed

1 file changed

+15
-18
lines changed

benchmark_serving.py

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -476,25 +476,22 @@ async def benchmark(
476476
if res is None:
477477
continue
478478
latency, ttft, itl, errors = res
479-
prompt_len, output_len, request_latency = latency
480-
overall_results["latencies"].append(latency)
481-
if ttft:
482-
overall_results["ttfts"].append(ttft)
483-
overall_results["tpots"].append((request_latency - ttft) / (output_len - 1) if output_len > 1 else 0)
484-
if itl:
485-
overall_results["itls"].extend(itl)
486479
if errors:
487-
for k, v in errors.items():
488-
overall_results["errors"][k] += v
489-
per_model_results[chosen_model]["latencies"].append(latency)
490-
if ttft:
491-
per_model_results[chosen_model]["ttfts"].append(ttft)
492-
per_model_results[chosen_model]["tpots"].append((request_latency - ttft) / (output_len - 1) if output_len > 1 else 0)
493-
if itl:
494-
per_model_results[chosen_model]["itls"].extend(itl)
495-
if errors:
496-
for k, v in errors.items():
497-
per_model_results[chosen_model]["errors"][k] += v
480+
for k, v in errors.items():
481+
overall_results["errors"][k] += v
482+
per_model_results[chosen_model]["errors"][k] += v
483+
else:
484+
prompt_len, output_len, request_latency = latency
485+
overall_results["latencies"].append(latency)
486+
per_model_results[chosen_model]["latencies"].append(latency)
487+
if ttft:
488+
overall_results["ttfts"].append(ttft)
489+
overall_results["tpots"].append((request_latency - ttft) / (output_len - 1) if output_len > 1 else 0)
490+
per_model_results[chosen_model]["ttfts"].append(ttft)
491+
per_model_results[chosen_model]["tpots"].append((request_latency - ttft) / (output_len - 1) if output_len > 1 else 0)
492+
if itl:
493+
overall_results["itls"].extend(itl)
494+
per_model_results[chosen_model]["itls"].extend(itl)
498495

499496
benchmark_duration = time.time() - benchmark_start_time
500497

0 commit comments

Comments
 (0)