3838from ._models import ErrorMessage
3939from ._models import LanguagePackInfo
4040from ._models import MessageTimeMetadata
41- from ._models import MetricsMessage
4241from ._models import SegmentMessage
4342from ._models import SegmentMessageSegment
4443from ._models import SegmentMessageSegmentFragment
44+ from ._models import SessionMetricsMessage
4545from ._models import SessionSpeaker
4646from ._models import SpeakerFocusConfig
4747from ._models import SpeakerFocusMode
5151from ._models import SpeechFragment
5252from ._models import SpeechSegmentEmitMode
5353from ._models import TranscriptionUpdatePreset
54- from ._models import TTFBMetricsMessage
5554from ._models import TurnPredictionMessage
5655from ._models import TurnPredictionMetadata
5756from ._models import TurnStartEndMessage
@@ -185,8 +184,7 @@ def __init__(
185184 self ._total_time : float = 0
186185 self ._total_bytes : int = 0
187186
188- # TTFB metrics
189- self ._last_ttfb_time : Optional [float ] = None
187+ # Latency metrics
190188 self ._last_ttfb : float = 0
191189
192190 # Time to disregard speech fragments before
@@ -245,7 +243,7 @@ def __init__(
245243 self ._dz_config = self ._config .speaker_config
246244
247245 # Metrics emitter task
248- self ._metrics_emitter_interval : float = 10 .0
246+ self ._metrics_emitter_interval : float = 5 .0
249247 self ._metrics_emitter_task : Optional [asyncio .Task ] = None
250248
251249 # Audio sampling info
@@ -741,27 +739,44 @@ def _start_metrics_task(self) -> None:
741739
742740 # Task to send metrics
743741 async def emit_metrics () -> None :
742+ # Tracker
743+ last_emission_time = self ._total_time
744+
745+ # Emit metrics
744746 while True :
745- # Interval between emitting metrics
746- await asyncio .sleep (self ._metrics_emitter_interval )
747+ # Calculate when the next emission should occur
748+ next_emission_time = (
749+ last_emission_time // self ._metrics_emitter_interval + 1
750+ ) * self ._metrics_emitter_interval
747751
748752 # Check if there are any listeners for AgentServerMessageType.METRICS
749- if not self .listeners (AgentServerMessageType .METRICS ):
750- break
753+ if not self .listeners (AgentServerMessageType .SESSION_METRICS ):
754+ await asyncio .sleep (self ._metrics_emitter_interval )
755+ last_emission_time = self ._total_time
756+ continue
757+
758+ # Wait until we've actually reached that time
759+ while self ._total_time < next_emission_time :
760+ time_to_wait = next_emission_time - self ._total_time
761+ await asyncio .sleep (min (0.25 , time_to_wait ))
751762
752763 # Calculations
753- time_s = round (self ._total_time , 3 )
764+ total_time = self ._total_time
765+ total_bytes = self ._total_bytes
754766
755767 # Emit metrics
756768 self ._emit_message (
757- MetricsMessage (
758- total_time = time_s ,
759- total_time_str = time .strftime ("%H:%M:%S" , time .gmtime (time_s )),
760- total_bytes = self . _total_bytes ,
761- last_ttfb = int (self ._last_ttfb ),
769+ SessionMetricsMessage (
770+ total_time = round ( total_time , 1 ) ,
771+ total_time_str = time .strftime ("%H:%M:%S" , time .gmtime (total_time )),
772+ total_bytes = total_bytes ,
773+ processing_time = round (self ._last_ttfb , 3 ),
762774 )
763775 )
764776
777+ # Update tracker
778+ last_emission_time = total_time
779+
765780 # Trigger the task
766781 self ._metrics_emitter_task = asyncio .create_task (emit_metrics ())
767782
@@ -782,36 +797,15 @@ def _calculate_ttfb(self, end_time: float) -> None:
782797 end_time: The end time of the payload from the STT engine.
783798 """
784799
785- # Skip if not enabled
786- if not (self .listeners (AgentServerMessageType .TTFB_METRICS ) or self .listeners (AgentServerMessageType .METRICS )):
787- return
788-
789- # Skip if no fragments are words
790- if len (self ._speech_fragments ) == 0 or all (f .type_ != "word" for f in self ._speech_fragments ):
791- return
792-
793- # Get start of the first fragment
794- fragments_start_time = self ._speech_fragments [0 ].start_time
795-
796- # Skip if no partial word or if we have already calculated the TTFB for this word
797- if self ._last_ttfb_time and fragments_start_time <= self ._last_ttfb_time :
798- return
799-
800800 # Calculate the time difference (convert to ms)
801- ttfb = ( self ._total_time - end_time ) * 1000.0
801+ ttfb = self ._total_time - end_time
802802
803803 # Skip if zero or less
804804 if ttfb <= 0 :
805805 return
806806
807807 # Save TTFB and end time
808808 self ._last_ttfb = ttfb
809- self ._last_ttfb_time = end_time
810-
811- # Emit the TTFB
812- self ._emit_message (
813- TTFBMetricsMessage (ttfb = int (self ._last_ttfb )),
814- )
815809
816810 def calculate_speaker_metrics (self , final_segments : list [SpeakerSegment ]) -> None :
817811 """Calculate the speaker metrics.
@@ -1342,7 +1336,7 @@ def add_multipler_reason(multiplier: float, reason: str) -> None:
13421336 clamped_delay = min (delay , self ._config .end_of_utterance_max_delay )
13431337
13441338 # Adjust time and make sure no less than 25ms
1345- finalize_delay = max (clamped_delay - ( self ._last_ttfb / 1000 ) , 0.025 )
1339+ finalize_delay = max (clamped_delay - self ._last_ttfb , 0.025 )
13461340
13471341 # Emit prediction
13481342 if self .listeners (AgentServerMessageType .END_OF_TURN_PREDICTION ):
0 commit comments