Skip to content

Commit 94492d4

Browse files
committed
better metrics handling
1 parent 168ff09 commit 94492d4

File tree

4 files changed

+40
-64
lines changed

4 files changed

+40
-64
lines changed

examples/voice/cli/cli.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -389,8 +389,7 @@ def log_message(message) -> None:
389389

390390
# Metrics
391391
if args.verbose >= 4:
392-
client.on(AgentServerMessageType.METRICS, log_message)
393-
client.on(AgentServerMessageType.TTFB_METRICS, log_message)
392+
client.on(AgentServerMessageType.SESSION_METRICS, log_message)
394393

395394
# Verbose STT events
396395
if args.verbose >= 5:

sdk/voice/speechmatics/voice/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,13 @@
2525
from ._models import AgentServerMessageType
2626
from ._models import EndOfUtteranceMode
2727
from ._models import SegmentMessage
28+
from ._models import SessionMetricsMessage
2829
from ._models import SmartTurnConfig
2930
from ._models import SpeakerFocusConfig
3031
from ._models import SpeakerFocusMode
3132
from ._models import SpeakerMetricsMessage
3233
from ._models import SpeechSegmentConfig
3334
from ._models import SpeechSegmentEmitMode
34-
from ._models import TTFBMetricsMessage
3535
from ._models import TurnPredictionMessage
3636
from ._models import TurnStartEndMessage
3737
from ._models import VADStatusMessage
@@ -62,8 +62,8 @@
6262
# Server messages
6363
"AgentServerMessageType",
6464
"SegmentMessage",
65+
"SessionMetricsMessage",
6566
"SpeakerMetricsMessage",
66-
"TTFBMetricsMessage",
6767
"TurnPredictionMessage",
6868
"TurnStartEndMessage",
6969
"VADStatusMessage",

sdk/voice/speechmatics/voice/_client.py

Lines changed: 32 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,10 @@
3838
from ._models import ErrorMessage
3939
from ._models import LanguagePackInfo
4040
from ._models import MessageTimeMetadata
41-
from ._models import MetricsMessage
4241
from ._models import SegmentMessage
4342
from ._models import SegmentMessageSegment
4443
from ._models import SegmentMessageSegmentFragment
44+
from ._models import SessionMetricsMessage
4545
from ._models import SessionSpeaker
4646
from ._models import SpeakerFocusConfig
4747
from ._models import SpeakerFocusMode
@@ -51,7 +51,6 @@
5151
from ._models import SpeechFragment
5252
from ._models import SpeechSegmentEmitMode
5353
from ._models import TranscriptionUpdatePreset
54-
from ._models import TTFBMetricsMessage
5554
from ._models import TurnPredictionMessage
5655
from ._models import TurnPredictionMetadata
5756
from ._models import TurnStartEndMessage
@@ -185,8 +184,7 @@ def __init__(
185184
self._total_time: float = 0
186185
self._total_bytes: int = 0
187186

188-
# TTFB metrics
189-
self._last_ttfb_time: Optional[float] = None
187+
# Latency metrics
190188
self._last_ttfb: float = 0
191189

192190
# Time to disregard speech fragments before
@@ -245,7 +243,7 @@ def __init__(
245243
self._dz_config = self._config.speaker_config
246244

247245
# Metrics emitter task
248-
self._metrics_emitter_interval: float = 10.0
246+
self._metrics_emitter_interval: float = 5.0
249247
self._metrics_emitter_task: Optional[asyncio.Task] = None
250248

251249
# Audio sampling info
@@ -741,27 +739,44 @@ def _start_metrics_task(self) -> None:
741739

742740
# Task to send metrics
743741
async def emit_metrics() -> None:
742+
# Tracker
743+
last_emission_time = self._total_time
744+
745+
# Emit metrics
744746
while True:
745-
# Interval between emitting metrics
746-
await asyncio.sleep(self._metrics_emitter_interval)
747+
# Calculate when the next emission should occur
748+
next_emission_time = (
749+
last_emission_time // self._metrics_emitter_interval + 1
750+
) * self._metrics_emitter_interval
747751

748752
# Check if there are any listeners for AgentServerMessageType.METRICS
749-
if not self.listeners(AgentServerMessageType.METRICS):
750-
break
753+
if not self.listeners(AgentServerMessageType.SESSION_METRICS):
754+
await asyncio.sleep(self._metrics_emitter_interval)
755+
last_emission_time = self._total_time
756+
continue
757+
758+
# Wait until we've actually reached that time
759+
while self._total_time < next_emission_time:
760+
time_to_wait = next_emission_time - self._total_time
761+
await asyncio.sleep(min(0.25, time_to_wait))
751762

752763
# Calculations
753-
time_s = round(self._total_time, 3)
764+
total_time = self._total_time
765+
total_bytes = self._total_bytes
754766

755767
# Emit metrics
756768
self._emit_message(
757-
MetricsMessage(
758-
total_time=time_s,
759-
total_time_str=time.strftime("%H:%M:%S", time.gmtime(time_s)),
760-
total_bytes=self._total_bytes,
761-
last_ttfb=int(self._last_ttfb),
769+
SessionMetricsMessage(
770+
total_time=round(total_time, 1),
771+
total_time_str=time.strftime("%H:%M:%S", time.gmtime(total_time)),
772+
total_bytes=total_bytes,
773+
processing_time=round(self._last_ttfb, 3),
762774
)
763775
)
764776

777+
# Update tracker
778+
last_emission_time = total_time
779+
765780
# Trigger the task
766781
self._metrics_emitter_task = asyncio.create_task(emit_metrics())
767782

@@ -782,36 +797,15 @@ def _calculate_ttfb(self, end_time: float) -> None:
782797
end_time: The end time of the payload from the STT engine.
783798
"""
784799

785-
# Skip if not enabled
786-
if not (self.listeners(AgentServerMessageType.TTFB_METRICS) or self.listeners(AgentServerMessageType.METRICS)):
787-
return
788-
789-
# Skip if no fragments are words
790-
if len(self._speech_fragments) == 0 or all(f.type_ != "word" for f in self._speech_fragments):
791-
return
792-
793-
# Get start of the first fragment
794-
fragments_start_time = self._speech_fragments[0].start_time
795-
796-
# Skip if no partial word or if we have already calculated the TTFB for this word
797-
if self._last_ttfb_time and fragments_start_time <= self._last_ttfb_time:
798-
return
799-
800800
# Calculate the time difference (convert to ms)
801-
ttfb = (self._total_time - end_time) * 1000.0
801+
ttfb = self._total_time - end_time
802802

803803
# Skip if zero or less
804804
if ttfb <= 0:
805805
return
806806

807807
# Save TTFB and end time
808808
self._last_ttfb = ttfb
809-
self._last_ttfb_time = end_time
810-
811-
# Emit the TTFB
812-
self._emit_message(
813-
TTFBMetricsMessage(ttfb=int(self._last_ttfb)),
814-
)
815809

816810
def calculate_speaker_metrics(self, final_segments: list[SpeakerSegment]) -> None:
817811
"""Calculate the speaker metrics.
@@ -1342,7 +1336,7 @@ def add_multipler_reason(multiplier: float, reason: str) -> None:
13421336
clamped_delay = min(delay, self._config.end_of_utterance_max_delay)
13431337

13441338
# Adjust time and make sure no less than 25ms
1345-
finalize_delay = max(clamped_delay - (self._last_ttfb / 1000), 0.025)
1339+
finalize_delay = max(clamped_delay - self._last_ttfb, 0.025)
13461340

13471341
# Emit prediction
13481342
if self.listeners(AgentServerMessageType.END_OF_TURN_PREDICTION):

sdk/voice/speechmatics/voice/_models.py

Lines changed: 5 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -225,8 +225,7 @@ class AgentServerMessageType(str, Enum):
225225
SPEAKERS_RESULT = "SpeakersResult"
226226

227227
# Metrics
228-
METRICS = "Metrics"
229-
TTFB_METRICS = "TTFBMetrics"
228+
SESSION_METRICS = "SessionMetrics"
230229
SPEAKER_METRICS = "SpeakerMetrics"
231230

232231

@@ -947,38 +946,22 @@ class ErrorMessage(BaseMessage):
947946
reason: str
948947

949948

950-
class MetricsMessage(BaseMessage):
949+
class SessionMetricsMessage(BaseMessage):
951950
"""Emitted when metrics are calculated.
952951
953952
Parameters:
954953
message: The message type.
955954
total_time: The total time in seconds.
956955
total_time_str: The total time in HH:MM:SS format.
957956
total_bytes: The total bytes sent to the STT engine.
958-
last_ttfb: The last time to first text in seconds.
957+
processing_time: The latest processing time in seconds.
959958
"""
960959

961-
message: AgentServerMessageType = AgentServerMessageType.METRICS
960+
message: AgentServerMessageType = AgentServerMessageType.SESSION_METRICS
962961
total_time: float
963962
total_time_str: str
964963
total_bytes: int
965-
last_ttfb: int
966-
967-
968-
class TTFBMetricsMessage(BaseMessage):
969-
"""Emitted when the time to first text is calculated.
970-
971-
TTFB is calculated as the time for the audio to be processed by the STT
972-
engine and is calculated periodically during the session. This can be
973-
used to measure the performance of the STT engine.
974-
975-
Parameters:
976-
message: The message type.
977-
ttfb: The time to first text in seconds.
978-
"""
979-
980-
message: AgentServerMessageType = AgentServerMessageType.TTFB_METRICS
981-
ttfb: int
964+
processing_time: float
982965

983966

984967
class VADStatusMessage(BaseMessage):

0 commit comments

Comments
 (0)