@@ -476,25 +476,22 @@ async def benchmark(
476476 if res is None :
477477 continue
478478 latency , ttft , itl , errors = res
479- prompt_len , output_len , request_latency = latency
480- overall_results ["latencies" ].append (latency )
481- if ttft :
482- overall_results ["ttfts" ].append (ttft )
483- overall_results ["tpots" ].append ((request_latency - ttft ) / (output_len - 1 ) if output_len > 1 else 0 )
484- if itl :
485- overall_results ["itls" ].extend (itl )
486479 if errors :
487- for k , v in errors .items ():
488- overall_results ["errors" ][k ] += v
489- per_model_results [chosen_model ]["latencies" ].append (latency )
490- if ttft :
491- per_model_results [chosen_model ]["ttfts" ].append (ttft )
492- per_model_results [chosen_model ]["tpots" ].append ((request_latency - ttft ) / (output_len - 1 ) if output_len > 1 else 0 )
493- if itl :
494- per_model_results [chosen_model ]["itls" ].extend (itl )
495- if errors :
496- for k , v in errors .items ():
497- per_model_results [chosen_model ]["errors" ][k ] += v
480+ for k , v in errors .items ():
481+ overall_results ["errors" ][k ] += v
482+ per_model_results [chosen_model ]["errors" ][k ] += v
483+ else :
484+ prompt_len , output_len , request_latency = latency
485+ overall_results ["latencies" ].append (latency )
486+ per_model_results [chosen_model ]["latencies" ].append (latency )
487+ if ttft :
488+ overall_results ["ttfts" ].append (ttft )
489+ overall_results ["tpots" ].append ((request_latency - ttft ) / (output_len - 1 ) if output_len > 1 else 0 )
490+ per_model_results [chosen_model ]["ttfts" ].append (ttft )
491+ per_model_results [chosen_model ]["tpots" ].append ((request_latency - ttft ) / (output_len - 1 ) if output_len > 1 else 0 )
492+ if itl :
493+ overall_results ["itls" ].extend (itl )
494+ per_model_results [chosen_model ]["itls" ].extend (itl )
498495
499496 benchmark_duration = time .time () - benchmark_start_time
500497
0 commit comments