Fixes OPEN-5957 Completion tokens not being computed by OpenAIMonitor when stream=True

gustavocidornelas · whoseoyster · commit 79910f96b2ca · 2024-04-09T07:13:55.000-07:00
diff --git a/openlayer/llm_monitors.py b/openlayer/llm_monitors.py
@@ -211,6 +211,8 @@ def stream_chunks():
                             raw_outputs.append(chunk.model_dump())
                             if i == 0:
                                 first_token_time = time.time()
+                            if i > 0:
+                                num_of_completion_tokens = i + 1
 
                             delta = chunk.choices[0].delta
 
@@ -236,8 +238,6 @@ def stream_chunks():
                                     ] += delta.tool_calls[0].function.arguments
 
                             yield chunk
-                        if i > 0:
-                            num_of_completion_tokens = i + 1
                         end_time = time.time()
                         latency = (end_time - start_time) * 1000
                     # pylint: disable=broad-except