From 2e90177fa7f997c47c1250c2d431a30f487b72a3 Mon Sep 17 00:00:00 2001
From: Evan Mattson <evan.mattson@microsoft.com>
Date: Fri, 24 Oct 2025 13:01:00 +0900
Subject: [PATCH 1/4] Support realtime GA models

---
 python/pyproject.toml                         |   2 +-
 ...time_agent_with_function_calling_webrtc.py |   1 +
 .../realtime/simple_realtime_chat_webrtc.py   |  33 ++--
 .../open_ai_realtime_execution_settings.py    |  44 ++++++
 .../ai/open_ai/services/_open_ai_realtime.py  | 148 ++++++++++++------
 .../ai/open_ai/services/azure_realtime.py     |   2 +-
 .../ai/open_ai/settings/open_ai_settings.py   |   2 +-
 .../open_ai/services/test_openai_realtime.py  | 109 +++++++------
 python/uv.lock                                |   8 +-
 9 files changed, 241 insertions(+), 108 deletions(-)

diff --git a/python/pyproject.toml b/python/pyproject.toml
index d1b22c85541a..2b38f3d9344b 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -37,7 +37,7 @@ dependencies = [
     "numpy >= 1.25.0; python_version < '3.12'",
     "numpy >= 1.26.0; python_version >= '3.12'",
     # openai connector
-    "openai >= 1.98.0",
+    "openai >= 2.0.0",
     # openapi and swagger
     "openapi_core >= 0.18,<0.20",
     "websockets >= 13, < 16",
diff --git a/python/samples/concepts/realtime/realtime_agent_with_function_calling_webrtc.py b/python/samples/concepts/realtime/realtime_agent_with_function_calling_webrtc.py
index f17f69229c48..69333c79c463 100644
--- a/python/samples/concepts/realtime/realtime_agent_with_function_calling_webrtc.py
+++ b/python/samples/concepts/realtime/realtime_agent_with_function_calling_webrtc.py
@@ -103,6 +103,7 @@ async def main() -> None:
     flowery prose.
     """,
         voice="alloy",
+        output_modalities=["text", "audio"],
         turn_detection=TurnDetection(type="server_vad", create_response=True, silence_duration_ms=800, threshold=0.8),
         function_choice_behavior=FunctionChoiceBehavior.Auto(),
     )
diff --git a/python/samples/concepts/realtime/simple_realtime_chat_webrtc.py b/python/samples/concepts/realtime/simple_realtime_chat_webrtc.py
index 5f32f4d949fa..901dd9d26c12 100644
--- a/python/samples/concepts/realtime/simple_realtime_chat_webrtc.py
+++ b/python/samples/concepts/realtime/simple_realtime_chat_webrtc.py
@@ -9,6 +9,7 @@
     OpenAIRealtimeExecutionSettings,
     OpenAIRealtimeWebRTC,
 )
+from semantic_kernel.contents import RealtimeTextEvent
 
 logging.basicConfig(level=logging.WARNING)
 utils_log = logging.getLogger("samples.concepts.realtime.utils")
@@ -55,6 +56,8 @@ async def main() -> None:
         # see https://platform.openai.com/docs/api-reference/realtime-sessions/create#realtime-sessions-create-voice
         # for more details.
         voice="alloy",
+        # Enable both text and audio output to get transcripts
+        output_modalities=["text", "audio"],
     )
     realtime_client = OpenAIRealtimeWebRTC(audio_track=AudioRecorderWebRTC(), settings=settings)
     # Create the settings for the session
@@ -62,16 +65,26 @@ async def main() -> None:
     # the context manager calls the create_session method on the client and starts listening to the audio stream
     async with audio_player, realtime_client:
         async for event in realtime_client.receive(audio_output_callback=audio_player.client_callback):
-            match event.event_type:
-                case "text":
-                    # the model returns both audio and transcript of the audio, which we will print
-                    print(event.text.text, end="")
-                case "service":
-                    # OpenAI Specific events
-                    if event.service_type == ListenEvents.SESSION_UPDATED:
-                        print("Session updated")
-                    if event.service_type == ListenEvents.RESPONSE_CREATED:
-                        print("\nMosscap (transcript): ", end="")
+            match event:
+                case RealtimeTextEvent():
+                    # Only process delta events for streaming, skip done events to avoid duplication
+                    if (
+                        hasattr(event, "service_type")
+                        and "delta" in event.service_type
+                        and hasattr(event.text, "text")
+                        and event.text.text
+                    ):
+                        print(event.text.text, end="", flush=True)
+                    # Add newline when transcript is complete (done event)
+                    elif hasattr(event, "service_type") and "done" in event.service_type:
+                        print()  # Add newline for readability
+                case _:
+                    # Handle other events including service events
+                    if hasattr(event, "event_type") and event.event_type == "service":
+                        if hasattr(event, "service_type") and event.service_type == ListenEvents.SESSION_UPDATED:
+                            print("Session updated")
+                        if hasattr(event, "service_type") and event.service_type == ListenEvents.RESPONSE_CREATED:
+                            print("\nMosscap (transcript): ", end="")
 
 
 if __name__ == "__main__":
diff --git a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_realtime_execution_settings.py b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_realtime_execution_settings.py
index 2660187de902..24b03ab3b9ae 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_realtime_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_realtime_execution_settings.py
@@ -55,6 +55,7 @@ class OpenAIRealtimeExecutionSettings(PromptExecutionSettings):
     """Request settings for OpenAI realtime services."""
 
     modalities: Sequence[Literal["audio", "text"]] | None = None
+    output_modalities: Sequence[Literal["audio", "text"]] | None = None
     ai_model_id: Annotated[str | None, Field(None, serialization_alias="model")] = None
     instructions: str | None = None
     voice: str | None = None
@@ -80,6 +81,49 @@ class OpenAIRealtimeExecutionSettings(PromptExecutionSettings):
     max_response_output_tokens: Annotated[int | Literal["inf"] | None, Field(gt=0)] = None
     input_audio_noise_reduction: dict[Literal["type"], Literal["near_field", "far_field"]] | None = None
 
+    def prepare_settings_dict(self, **kwargs) -> dict[str, Any]:
+        """Prepare the settings as a dictionary for sending to the AI service.
+
+        For realtime settings, we need to properly structure the audio configuration
+        to match the OpenAI API expectations where voice and turn_detection are nested
+        under the audio field.
+        """
+        # Get the base settings dict (excludes service_id, extension_data, etc.)
+        settings_dict = super().prepare_settings_dict(**kwargs)
+
+        # Build the audio configuration object
+        audio_config = {}
+
+        # Handle voice (goes in audio.output.voice)
+        if "voice" in settings_dict:
+            audio_config.setdefault("output", {})["voice"] = settings_dict.pop("voice")
+
+        # Handle turn_detection (goes in audio.input.turn_detection)
+        if "turn_detection" in settings_dict:
+            audio_config.setdefault("input", {})["turn_detection"] = settings_dict.pop("turn_detection")
+
+        # Handle input audio format
+        if "input_audio_format" in settings_dict:
+            audio_config.setdefault("input", {})["format"] = settings_dict.pop("input_audio_format")
+
+        # Handle output audio format
+        if "output_audio_format" in settings_dict:
+            audio_config.setdefault("output", {})["format"] = settings_dict.pop("output_audio_format")
+
+        # Handle input audio transcription
+        if "input_audio_transcription" in settings_dict:
+            audio_config.setdefault("input", {})["transcription"] = settings_dict.pop("input_audio_transcription")
+
+        # Handle input audio noise reduction
+        if "input_audio_noise_reduction" in settings_dict:
+            audio_config.setdefault("input", {})["noise_reduction"] = settings_dict.pop("input_audio_noise_reduction")
+
+        # Add the audio config if it has any content
+        if audio_config:
+            settings_dict["audio"] = audio_config
+
+        return settings_dict
+
 
 class AzureRealtimeExecutionSettings(OpenAIRealtimeExecutionSettings):
     """Request settings for Azure OpenAI realtime services."""
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/_open_ai_realtime.py b/python/semantic_kernel/connectors/ai/open_ai/services/_open_ai_realtime.py
index fc07450bf594..98653e103153 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/_open_ai_realtime.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/_open_ai_realtime.py
@@ -23,8 +23,8 @@
 from av.audio.frame import AudioFrame
 from numpy import ndarray
 from openai._models import construct_type_unchecked
-from openai.resources.beta.realtime.realtime import AsyncRealtimeConnection
-from openai.types.beta.realtime import (
+from openai.resources.realtime.realtime import AsyncRealtimeConnection
+from openai.types.realtime import (
     ConversationItemCreateEvent,
     ConversationItemDeleteEvent,
     ConversationItemTruncateEvent,
@@ -32,13 +32,16 @@
     InputAudioBufferClearEvent,
     InputAudioBufferCommitEvent,
     RealtimeClientEvent,
+    RealtimeConversationItemFunctionCall,
+    RealtimeConversationItemFunctionCallOutput,
+    RealtimeConversationItemUserMessage,
+    RealtimeResponseCreateParams,
     RealtimeServerEvent,
     ResponseCancelEvent,
     ResponseCreateEvent,
     ResponseFunctionCallArgumentsDoneEvent,
     SessionUpdateEvent,
 )
-from openai.types.beta.realtime.response_create_event import Response
 from pydantic import Field, PrivateAttr
 
 from semantic_kernel.connectors.ai.function_call_choice_configuration import FunctionCallChoiceConfiguration
@@ -163,9 +166,15 @@ def _create_openai_realtime_client_event(event_type: SendEvents | str, **kwargs:
         case SendEvents.SESSION_UPDATE:
             if "session" not in kwargs:
                 raise ContentException("Session is required for SessionUpdateEvent")
+            session_dict = kwargs.pop("session")
+            # Create proper RealtimeSessionCreateRequest with required type field for SDK validation
+            # The OpenAI SDK will handle the proper serialization for the API
+            from openai.types.realtime import RealtimeSessionCreateRequest
+
+            session_request = RealtimeSessionCreateRequest(type="realtime", **session_dict)
             return SessionUpdateEvent(
                 type=event_type.value,
-                session=kwargs.pop("session"),
+                session=session_request,
                 **kwargs,
             )
         case SendEvents.INPUT_AUDIO_BUFFER_APPEND:
@@ -206,7 +215,9 @@ def _create_openai_realtime_client_event(event_type: SendEvents | str, **kwargs:
             )
         case SendEvents.RESPONSE_CREATE:
             if "response" in kwargs:
-                response: Response | None = Response.model_validate(kwargs.pop("response"))
+                response: RealtimeResponseCreateParams | None = RealtimeResponseCreateParams.model_validate(
+                    kwargs.pop("response")
+                )
             else:
                 response = None
             return ResponseCreateEvent(
@@ -246,10 +257,10 @@ class ListenEvents(str, Enum):
     RESPONSE_CONTENT_PART_DONE = "response.content_part.done"
     RESPONSE_TEXT_DELTA = "response.text.delta"
     RESPONSE_TEXT_DONE = "response.text.done"
-    RESPONSE_AUDIO_TRANSCRIPT_DELTA = "response.audio_transcript.delta"
-    RESPONSE_AUDIO_TRANSCRIPT_DONE = "response.audio_transcript.done"
-    RESPONSE_AUDIO_DELTA = "response.audio.delta"
-    RESPONSE_AUDIO_DONE = "response.audio.done"
+    RESPONSE_AUDIO_TRANSCRIPT_DELTA = "response.output_audio_transcript.delta"
+    RESPONSE_AUDIO_TRANSCRIPT_DONE = "response.output_audio_transcript.done"
+    RESPONSE_AUDIO_DELTA = "response.output_audio.delta"
+    RESPONSE_AUDIO_DONE = "response.output_audio.done"
     RESPONSE_FUNCTION_CALL_ARGUMENTS_DELTA = "response.function_call_arguments.delta"
     RESPONSE_FUNCTION_CALL_ARGUMENTS_DONE = "response.function_call_arguments.done"
     RATE_LIMITS_UPDATED = "rate_limits.updated"
@@ -291,7 +302,7 @@ async def _parse_event(self, event: RealtimeServerEvent) -> AsyncGenerator[Realt
         might be of different types.
         """
         match event.type:
-            case ListenEvents.RESPONSE_AUDIO_TRANSCRIPT_DELTA.value:
+            case ListenEvents.RESPONSE_AUDIO_TRANSCRIPT_DELTA.value | "response.audio_transcript.delta":
                 yield RealtimeTextEvent(
                     service_type=event.type,
                     service_event=event,
@@ -301,6 +312,15 @@ async def _parse_event(self, event: RealtimeServerEvent) -> AsyncGenerator[Realt
                         choice_index=0,
                     ),
                 )
+            case ListenEvents.RESPONSE_AUDIO_TRANSCRIPT_DONE.value | "response.audio_transcript.done":
+                yield RealtimeTextEvent(
+                    service_type=event.type,
+                    service_event=event,
+                    text=TextContent(
+                        inner_content=event,
+                        text=event.transcript,  # type: ignore
+                    ),
+                )
             case ListenEvents.RESPONSE_OUTPUT_ITEM_ADDED.value:
                 if event.item.type == "function_call" and event.item.call_id and event.item.name:  # type: ignore
                     self._call_id_to_function_map[event.item.call_id] = event.item.name  # type: ignore
@@ -323,7 +343,9 @@ async def _parse_event(self, event: RealtimeServerEvent) -> AsyncGenerator[Realt
                     if parsed_event:
                         yield parsed_event
             case ListenEvents.ERROR.value:
-                logger.error("Error received: %s", event.error.model_dump_json())  # type: ignore
+                # In GA API, event.error is a dict instead of an object
+                error_info = event.error if isinstance(event.error, dict) else event.error.model_dump()  # type: ignore
+                logger.error("Error received: %s", error_info)  # type: ignore
                 yield RealtimeEvent(service_type=event.type, service_event=event)
             case ListenEvents.SESSION_CREATED.value | ListenEvents.SESSION_UPDATED.value:
                 logger.info("Session created or updated, session: %s", event.session.model_dump_json())  # type: ignore
@@ -483,43 +505,43 @@ async def send(self, event: RealtimeEvents, **kwargs: Any) -> None:
                 await self._send(
                     _create_openai_realtime_client_event(
                         event_type=SendEvents.CONVERSATION_ITEM_CREATE,
-                        item={
-                            "type": "message",
-                            "content": [
+                        item=RealtimeConversationItemUserMessage(
+                            type="message",
+                            content=[
                                 {
                                     "type": "input_text",
                                     "text": event.text.text,
                                 }
                             ],
-                            "role": "user",
-                        },
+                            role="user",
+                        ),
                     )
                 )
             case RealtimeFunctionCallEvent():
                 await self._send(
                     _create_openai_realtime_client_event(
                         event_type=SendEvents.CONVERSATION_ITEM_CREATE,
-                        item={
-                            "type": "function_call",
-                            "name": event.function_call.name or event.function_call.function_name,
-                            "arguments": ""
+                        item=RealtimeConversationItemFunctionCall(
+                            type="function_call",
+                            name=event.function_call.name or event.function_call.function_name,
+                            arguments=""
                             if not event.function_call.arguments
                             else event.function_call.arguments
                             if isinstance(event.function_call.arguments, str)
                             else json.dumps(event.function_call.arguments),
-                            "call_id": event.function_call.metadata.get("call_id"),
-                        },
+                            call_id=event.function_call.metadata.get("call_id"),
+                        ),
                     )
                 )
             case RealtimeFunctionResultEvent():
                 await self._send(
                     _create_openai_realtime_client_event(
                         event_type=SendEvents.CONVERSATION_ITEM_CREATE,
-                        item={
-                            "type": "function_call_output",
-                            "output": event.function_result.result,
-                            "call_id": event.function_result.metadata.get("call_id"),
-                        },
+                        item=RealtimeConversationItemFunctionCallOutput(
+                            type="function_call_output",
+                            output=event.function_result.result,
+                            call_id=event.function_result.metadata.get("call_id"),
+                        ),
                     )
                 )
             case _:
@@ -575,32 +597,32 @@ async def send(self, event: RealtimeEvents, **kwargs: Any) -> None:
                                     await self._send(
                                         _create_openai_realtime_client_event(
                                             event_type=event.service_type,
-                                            item={
-                                                "type": "message",
-                                                "content": [
+                                            item=RealtimeConversationItemUserMessage(
+                                                type="message",
+                                                content=[
                                                     {
                                                         "type": "input_text",
                                                         "text": item.text,
                                                     }
                                                 ],
-                                                "role": "user",
-                                            },
+                                                role="user",
+                                            ),
                                         )
                                     )
                                 case FunctionCallContent():
                                     await self._send(
                                         _create_openai_realtime_client_event(
                                             event_type=event.service_type,
-                                            item={
-                                                "type": "function_call",
-                                                "name": item.name or item.function_name,
-                                                "arguments": ""
+                                            item=RealtimeConversationItemFunctionCall(
+                                                type="function_call",
+                                                name=item.name or item.function_name,
+                                                arguments=""
                                                 if not item.arguments
                                                 else item.arguments
                                                 if isinstance(item.arguments, str)
                                                 else json.dumps(item.arguments),
-                                                "call_id": item.metadata.get("call_id"),
-                                            },
+                                                call_id=item.metadata.get("call_id"),
+                                            ),
                                         )
                                     )
 
@@ -608,11 +630,11 @@ async def send(self, event: RealtimeEvents, **kwargs: Any) -> None:
                                     await self._send(
                                         _create_openai_realtime_client_event(
                                             event_type=event.service_type,
-                                            item={
-                                                "type": "function_call_output",
-                                                "output": item.result,
-                                                "call_id": item.metadata.get("call_id"),
-                                            },
+                                            item=RealtimeConversationItemFunctionCallOutput(
+                                                type="function_call_output",
+                                                output=item.result,
+                                                call_id=item.metadata.get("call_id"),
+                                            ),
                                         )
                                     )
                     case SendEvents.CONVERSATION_ITEM_TRUNCATE:
@@ -691,7 +713,36 @@ async def _send(self, event: RealtimeClientEvent) -> None:
         while self.data_channel.readyState != "open":
             await asyncio.sleep(0.1)
         try:
-            self.data_channel.send(event.model_dump_json(exclude_none=True))
+            # Handle session update specially to exclude type field for WebRTC
+            if hasattr(event, "type") and event.type == "session.update":
+                event_dict = event.model_dump(exclude_none=True)
+                # Remove fields that aren't allowed in session updates for WebRTC compatibility
+                # Audio configuration should be set during session creation, not updates
+                session_dict = event_dict.get("session")
+                if session_dict and isinstance(session_dict, dict):
+                    # Only keep fields that are allowed in session updates
+                    # Note: output_modalities is not allowed in WebRTC session updates
+                    allowed_fields = {
+                        "instructions",
+                        "model",
+                        "max_output_tokens",
+                        "tools",
+                        "tool_choice",
+                        "temperature",
+                        "prompt",
+                        "tracing",
+                        "truncation",
+                    }
+                    event_dict["session"] = {k: v for k, v in session_dict.items() if k in allowed_fields}
+
+                # Debug: Log what we're sending to see the structure
+                import json
+
+                json_data = json.dumps(event_dict)
+                logger.debug(f"Sending WebRTC session.update: {json_data}")
+                self.data_channel.send(json_data)
+            else:
+                self.data_channel.send(event.model_dump_json(exclude_none=True))
         except Exception as e:
             logger.error(f"Failed to send event {event} with error: {e!s}")
 
@@ -834,11 +885,11 @@ def _get_ephemeral_token_headers_and_url(self) -> tuple[dict[str, str], str]:
         return {
             "Authorization": f"Bearer {self.client.api_key}",
             "Content-Type": "application/json",
-        }, f"{self.client.beta.realtime._client.base_url}/realtime/sessions"
+        }, f"{self.client.realtime._client.base_url}/realtime/sessions"
 
     def _get_webrtc_url(self) -> str:
         """Get the WebRTC URL."""
-        return f"{self.client.beta.realtime._client.base_url}/realtime?model={self.ai_model_id}"
+        return f"{self.client.realtime._client.base_url}/realtime?model={self.ai_model_id}"
 
 
 # region Websocket
@@ -882,6 +933,9 @@ async def _send(self, event: RealtimeClientEvent) -> None:
         if not self.connection:
             raise ValueError("Connection is not established.")
         try:
+            # Debug logging to see what we're actually sending
+            if hasattr(event, "type") and event.type == "session.update":
+                logger.debug(f"Sending session.update event: {event.model_dump()}")
             await self.connection.send(event)
         except Exception as e:
             logger.error(f"Error sending response: {e!s}")
@@ -894,7 +948,7 @@ async def create_session(
         **kwargs: Any,
     ) -> None:
         """Create a session in the service."""
-        self.connection = await self.client.beta.realtime.connect(
+        self.connection = await self.client.realtime.connect(
             model=self.ai_model_id, extra_headers={USER_AGENT: SEMANTIC_KERNEL_USER_AGENT}
         ).enter()
         self.connected.set()
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/azure_realtime.py b/python/semantic_kernel/connectors/ai/open_ai/services/azure_realtime.py
index 32a11f9ec807..fa45cf8eef97 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/azure_realtime.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/azure_realtime.py
@@ -253,7 +253,7 @@ def get_prompt_execution_settings_class(self) -> type[PromptExecutionSettings]:
     def _get_ephemeral_token_headers_and_url(self) -> tuple[dict[str, str], str]:
         """Get the headers and URL for the ephemeral token."""
         url = (
-            f"{self.client.beta.realtime._client.base_url}/realtimeapi/sessions?api-version="
+            f"{self.client.realtime._client.base_url}/realtimeapi/sessions?api-version="
             f"{self.client._api_version}"  # type: ignore[attr-defined]
         )
         if self.client._azure_ad_token is not None:  # type: ignore[attr-defined]
diff --git a/python/semantic_kernel/connectors/ai/open_ai/settings/open_ai_settings.py b/python/semantic_kernel/connectors/ai/open_ai/settings/open_ai_settings.py
index 6ead680aa4f4..ba92b93b0983 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/settings/open_ai_settings.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/settings/open_ai_settings.py
@@ -35,7 +35,7 @@ class OpenAISettings(KernelBaseSettings):
     - text_to_audio_model_id: str | None - The OpenAI text to audio model ID to use, for example, jukebox-1.
         (Env var OPENAI_TEXT_TO_AUDIO_MODEL_ID)
     - realtime_model_id: str | None - The OpenAI realtime model ID to use,
-    for example, gpt-4o-realtime-preview-2024-12-17.
+        for example, gpt-realtime, gpt-realtime-mini, or gpt-audio-mini.
         (Env var OPENAI_REALTIME_MODEL_ID)
     - env_file_path: str | None - if provided, the .env settings are read from this file path location
     """
diff --git a/python/tests/unit/connectors/ai/open_ai/services/test_openai_realtime.py b/python/tests/unit/connectors/ai/open_ai/services/test_openai_realtime.py
index ce05661d2832..3a7ad49c0732 100644
--- a/python/tests/unit/connectors/ai/open_ai/services/test_openai_realtime.py
+++ b/python/tests/unit/connectors/ai/open_ai/services/test_openai_realtime.py
@@ -9,28 +9,29 @@
 from aiortc import AudioStreamTrack, RTCDataChannel, RTCPeerConnection
 from numpy import ndarray
 from openai import AsyncOpenAI
-from openai.resources.beta.realtime.realtime import (
+from openai.resources.realtime.realtime import (
     AsyncRealtimeConnection,
     AsyncRealtimeConnectionManager,
 )
-from openai.types.beta.realtime import (
-    ConversationItem,
-    ConversationItemContent,
+from openai.types.realtime import (
     ConversationItemCreatedEvent,
     ConversationItemCreateEvent,
     ConversationItemDeletedEvent,
     ConversationItemDeleteEvent,
     ConversationItemTruncatedEvent,
     ConversationItemTruncateEvent,
-    ErrorEvent,
     InputAudioBufferAppendEvent,
     InputAudioBufferClearedEvent,
     InputAudioBufferClearEvent,
     InputAudioBufferCommitEvent,
     InputAudioBufferCommittedEvent,
     InputAudioBufferSpeechStartedEvent,
+    RealtimeConversationItemFunctionCall,
+    RealtimeConversationItemFunctionCallOutput,
+    RealtimeConversationItemUserMessage,
     RealtimeResponse,
     RealtimeServerEvent,
+    RealtimeSessionCreateRequest,
     ResponseAudioDeltaEvent,
     ResponseAudioDoneEvent,
     ResponseAudioTranscriptDeltaEvent,
@@ -40,11 +41,11 @@
     ResponseFunctionCallArgumentsDeltaEvent,
     ResponseFunctionCallArgumentsDoneEvent,
     ResponseOutputItemAddedEvent,
-    Session,
     SessionCreatedEvent,
     SessionUpdatedEvent,
     SessionUpdateEvent,
 )
+from openai.types.realtime.realtime_error import RealtimeError as ErrorEvent
 from pydantic import ValidationError
 from pytest import fixture, mark, param, raises
 
@@ -85,28 +86,36 @@
 from semantic_kernel.kernel import Kernel
 
 events = [
-    SessionCreatedEvent(type=ListenEvents.SESSION_CREATED, session=Session(id="session_id"), event_id="1"),
-    SessionUpdatedEvent(type=ListenEvents.SESSION_UPDATED, session=Session(id="session_id"), event_id="2"),
+    SessionCreatedEvent(
+        type=ListenEvents.SESSION_CREATED.value, session=RealtimeSessionCreateRequest(type="realtime"), event_id="1"
+    ),
+    SessionUpdatedEvent(
+        type=ListenEvents.SESSION_UPDATED.value, session=RealtimeSessionCreateRequest(type="realtime"), event_id="2"
+    ),
     ConversationItemCreatedEvent(
-        type=ListenEvents.CONVERSATION_ITEM_CREATED,
-        item=ConversationItem(id="item_id"),
+        type=ListenEvents.CONVERSATION_ITEM_CREATED.value,
+        item=RealtimeConversationItemUserMessage(id="item_id", type="message", role="user", content=[]),
         event_id="3",
         previous_item_id="2",
     ),
-    ConversationItemDeletedEvent(type=ListenEvents.CONVERSATION_ITEM_DELETED, item_id="item_id", event_id="4"),
+    ConversationItemDeletedEvent(type=ListenEvents.CONVERSATION_ITEM_DELETED.value, item_id="item_id", event_id="4"),
     ConversationItemTruncatedEvent(
-        type=ListenEvents.CONVERSATION_ITEM_TRUNCATED, event_id="5", audio_end_ms=0, content_index=0, item_id="item_id"
+        type=ListenEvents.CONVERSATION_ITEM_TRUNCATED.value,
+        event_id="5",
+        audio_end_ms=0,
+        content_index=0,
+        item_id="item_id",
     ),
-    InputAudioBufferClearedEvent(type=ListenEvents.INPUT_AUDIO_BUFFER_CLEARED, event_id="7"),
+    InputAudioBufferClearedEvent(type=ListenEvents.INPUT_AUDIO_BUFFER_CLEARED.value, event_id="7"),
     InputAudioBufferCommittedEvent(
-        type=ListenEvents.INPUT_AUDIO_BUFFER_COMMITTED,
+        type=ListenEvents.INPUT_AUDIO_BUFFER_COMMITTED.value,
         event_id="8",
         item_id="item_id",
         previous_item_id="previous_item_id",
     ),
-    ResponseCreatedEvent(type=ListenEvents.RESPONSE_CREATED, event_id="10", response=RealtimeResponse()),
+    ResponseCreatedEvent(type=ListenEvents.RESPONSE_CREATED.value, event_id="10", response=RealtimeResponse()),
     ResponseFunctionCallArgumentsDoneEvent(
-        type=ListenEvents.RESPONSE_FUNCTION_CALL_ARGUMENTS_DONE,
+        type=ListenEvents.RESPONSE_FUNCTION_CALL_ARGUMENTS_DONE.value,
         event_id="11",
         arguments="{}",
         call_id="call_id",
@@ -115,7 +124,7 @@
         response_id="response_id",
     ),
     ResponseAudioTranscriptDeltaEvent(
-        type=ListenEvents.RESPONSE_AUDIO_TRANSCRIPT_DELTA,
+        type=ListenEvents.RESPONSE_AUDIO_TRANSCRIPT_DELTA.value,
         event_id="12",
         content_index=0,
         delta="text",
@@ -124,7 +133,7 @@
         response_id="response_id",
     ),
     ResponseAudioDoneEvent(
-        type=ListenEvents.RESPONSE_AUDIO_DONE,
+        type=ListenEvents.RESPONSE_AUDIO_DONE.value,
         event_id="13",
         item_id="item_id",
         output_index=0,
@@ -132,7 +141,7 @@
         content_index=0,
     ),
     ResponseAudioDeltaEvent(
-        type=ListenEvents.RESPONSE_AUDIO_DELTA,
+        type=ListenEvents.RESPONSE_AUDIO_DELTA.value,
         event_id="14",
         item_id="item_id",
         output_index=0,
@@ -253,7 +262,7 @@ def test_openai_realtime_webrtc(openai_unit_test_env, audio_track):
             {
                 "event_id": "event_id",
                 "previous_item_id": "previous_item_id",
-                "item": {"id": "item_id"},
+                "item": RealtimeConversationItemUserMessage(id="item_id", type="message", role="user", content=[]),
             },
             ConversationItemCreateEvent,
             None,
@@ -325,14 +334,14 @@ def test_create_openai_realtime_event(
                 event_id="event_id",
                 output_index=0,
                 response_id="response_id",
-                type="response.audio_transcript.delta",
+                type="response.output_audio_transcript.delta",
             ),
             [RealtimeTextEvent],
             id="response_audio_transcript_delta",
         ),
         param(
             ResponseOutputItemAddedEvent(
-                item=ConversationItem(id="item_id"),
+                item=RealtimeConversationItemUserMessage(id="item_id", type="message", role="user", content=[]),
                 event_id="event_id",
                 output_index=0,
                 response_id="response_id",
@@ -343,7 +352,9 @@ def test_create_openai_realtime_event(
         ),
         param(
             ResponseOutputItemAddedEvent(
-                item=ConversationItem(id="item_id", type="function_call", call_id="call_id", name="function_to_call"),
+                item=RealtimeConversationItemFunctionCall(
+                    id="item_id", type="function_call", call_id="call_id", name="function_to_call", arguments=""
+                ),
                 event_id="event_id",
                 output_index=0,
                 response_id="response_id",
@@ -382,6 +393,7 @@ def test_create_openai_realtime_event(
             ErrorEvent(
                 error={"code": "error_code", "message": "error_message", "type": "invalid_request_error"},
                 event_id="event_id",
+                message="error_message",
                 type="error",
             ),
             [RealtimeEvent],
@@ -389,7 +401,7 @@ def test_create_openai_realtime_event(
         ),
         param(
             SessionCreatedEvent(
-                session=Session(id="session_id"),
+                session=RealtimeSessionCreateRequest(type="realtime"),
                 event_id="event_id",
                 type="session.created",
             ),
@@ -398,7 +410,7 @@ def test_create_openai_realtime_event(
         ),
         param(
             SessionUpdatedEvent(
-                session=Session(id="session_id"),
+                session=RealtimeSessionCreateRequest(type="realtime"),
                 event_id="event_id",
                 type="session.updated",
             ),
@@ -432,14 +444,24 @@ async def test_update_session(OpenAIWebsocket, kernel):
                 role="assistant",
                 items=[
                     FunctionCallContent(
-                        function_name="function_name", plugin_name="plugin", arguments={"arg1": "value"}, id="1"
+                        function_name="function_name",
+                        plugin_name="plugin",
+                        arguments={"arg1": "value"},
+                        id="1",
+                        metadata={"call_id": "call_1"},
                     )
                 ],
             ),
             ChatMessageContent(
                 role="tool",
                 items=[
-                    FunctionResultContent(function_name="function_name", plugin_name="plugin", result="result", id="1")
+                    FunctionResultContent(
+                        function_name="function_name",
+                        plugin_name="plugin",
+                        result="result",
+                        id="1",
+                        metadata={"call_id": "call_1"},
+                    )
                 ],
             ),
             ChatMessageContent(
@@ -451,7 +473,7 @@ async def test_update_session(OpenAIWebsocket, kernel):
             ),
         ]
     )
-    settings = OpenAIRealtimeExecutionSettings(instructions="instructions", ai_model_id="gpt-4o-realtime-preview")
+    settings = OpenAIRealtimeExecutionSettings(instructions="instructions", ai_model_id="gpt-realtime")
     with patch.object(OpenAIWebsocket, "_send") as mock_send:
         await OpenAIWebsocket.update_session(
             chat_history=chat_history, settings=settings, create_response=True, kernel=kernel
@@ -477,7 +499,7 @@ async def test_parse_function_call_arguments_done(OpenAIWebsocket, kernel):
     )
     response_events = [RealtimeFunctionCallEvent, RealtimeFunctionResultEvent]
     OpenAIWebsocket._current_settings = OpenAIRealtimeExecutionSettings(
-        instructions="instructions", ai_model_id="gpt-4o-realtime-preview"
+        instructions="instructions", ai_model_id="gpt-realtime"
     )
     OpenAIWebsocket._current_settings.function_choice_behavior = FunctionChoiceBehavior.Auto()
     OpenAIWebsocket._call_id_to_function_map["call_id"] = "plugin_name-function_name"
@@ -494,7 +516,7 @@ async def test_parse_function_call_arguments_done(OpenAIWebsocket, kernel):
         mock_send.assert_any_await(
             ConversationItemCreateEvent(
                 type="conversation.item.create",
-                item=ConversationItem(
+                item=RealtimeConversationItemFunctionCallOutput(
                     type="function_call_output",
                     output=func_result,
                     call_id="call_id",
@@ -516,7 +538,7 @@ async def test_parse_function_call_arguments_done_fail(OpenAIWebsocket, kernel):
     )
     response_events = [RealtimeEvent]
     OpenAIWebsocket._current_settings = OpenAIRealtimeExecutionSettings(
-        instructions="instructions", ai_model_id="gpt-4o-realtime-preview"
+        instructions="instructions", ai_model_id="gpt-realtime"
     )
     OpenAIWebsocket._current_settings.function_choice_behavior = FunctionChoiceBehavior.Auto()
     # This function name is invalid
@@ -549,7 +571,7 @@ async def test_send_audio(OpenAIWebsocket):
 @mark.parametrize("client", ["OpenAIWebRTC", "OpenAIWebsocket"])
 async def test_send_session_update(client, OpenAIWebRTC, OpenAIWebsocket):
     openai_client = OpenAIWebRTC if client == "OpenAIWebRTC" else OpenAIWebsocket
-    settings = PromptExecutionSettings(ai_model_id="gpt-4o-realtime-preview")
+    settings = PromptExecutionSettings(ai_model_id="gpt-realtime")
     session_event = RealtimeEvent(
         service_type=SendEvents.SESSION_UPDATE,
         service_event={"settings": settings},
@@ -560,7 +582,7 @@ async def test_send_session_update(client, OpenAIWebRTC, OpenAIWebsocket):
         assert len(mock_send.await_args_list) == 1
         mock_send.assert_any_await(
             SessionUpdateEvent(
-                session={"model": "gpt-4o-realtime-preview"},
+                session={"model": "gpt-realtime", "type": "realtime"},
                 type="session.update",
             )
         )
@@ -601,8 +623,8 @@ async def test_send_conversation_item_create(client, OpenAIWebRTC, OpenAIWebsock
         assert len(mock_send.await_args_list) == 3
         mock_send.assert_any_await(
             ConversationItemCreateEvent(
-                item=ConversationItem(
-                    content=[ConversationItemContent(text="Hello", type="input_text")],
+                item=RealtimeConversationItemUserMessage(
+                    content=[{"text": "Hello", "type": "input_text"}],
                     role="user",
                     type="message",
                 ),
@@ -611,7 +633,7 @@ async def test_send_conversation_item_create(client, OpenAIWebRTC, OpenAIWebsock
         )
         mock_send.assert_any_await(
             ConversationItemCreateEvent(
-                item=ConversationItem(
+                item=RealtimeConversationItemFunctionCall(
                     arguments='{"arg1": "value"}',
                     call_id="call_id",
                     name="plugin-function_name",
@@ -622,7 +644,7 @@ async def test_send_conversation_item_create(client, OpenAIWebRTC, OpenAIWebsock
         )
         mock_send.assert_any_await(
             ConversationItemCreateEvent(
-                item=ConversationItem(
+                item=RealtimeConversationItemFunctionCallOutput(
                     call_id="call_id",
                     output="result",
                     type="function_call_output",
@@ -639,7 +661,7 @@ async def test_receive_websocket(OpenAIWebsocket):
     manager = AsyncMock(spec=AsyncRealtimeConnectionManager)
     manager.enter.return_value = connection_mock
 
-    with patch("openai.resources.beta.realtime.realtime.AsyncRealtime.connect") as mock_connect:
+    with patch("openai.resources.realtime.realtime.AsyncRealtime.connect") as mock_connect:
         mock_connect.return_value = manager
         async with OpenAIWebsocket():
             async for msg in OpenAIWebsocket.receive():
@@ -674,7 +696,7 @@ async def openai_realtime_base():
     return OpenAIRealtimeWebRTC(
         audio_track=audio_track_mock,
         client=async_openai_mock,
-        ai_model_id="gpt-4o-realtime-preview",
+        ai_model_id="gpt-realtime",
         kernel=kernel_mock,
     )
 
@@ -810,7 +832,7 @@ def mocked_open_ai_realtime_webrtc(mocked_audio_track, mocked_audio_output_callb
         return OpenAIRealtimeWebRTC(
             audio_track=mocked_audio_track,
             audio_output_callback=mocked_audio_output_callback,
-            ai_model_id="gpt-4o-realtime-preview",
+            ai_model_id="gpt-realtime",
             client=async_openai_mock,
             api_key="fake-api-key",
         )
@@ -870,10 +892,9 @@ async def test_create_session_initializes_peer_connection(mock_post, mocked_open
     mocked_open_ai_realtime_webrtc._get_ephemeral_token = AsyncMock(return_value="fake-token")
     mocked_open_ai_realtime_webrtc.client = AsyncMock(spec=AsyncOpenAI)
     mocked_open_ai_realtime_webrtc.client.api_key = "fake-api-key"
-    mocked_open_ai_realtime_webrtc.client.beta = AsyncMock()
-    mocked_open_ai_realtime_webrtc.client.beta.realtime = AsyncMock()
-    mocked_open_ai_realtime_webrtc.client.beta.realtime._client = AsyncMock()
-    mocked_open_ai_realtime_webrtc.client.beta.realtime._client.base_url = "https://api.openai.com"
+    mocked_open_ai_realtime_webrtc.client.realtime = AsyncMock()
+    mocked_open_ai_realtime_webrtc.client.realtime._client = AsyncMock()
+    mocked_open_ai_realtime_webrtc.client.realtime._client.base_url = "https://api.openai.com"
 
     await mocked_open_ai_realtime_webrtc.create_session()
     assert mocked_open_ai_realtime_webrtc.peer_connection is not None
diff --git a/python/uv.lock b/python/uv.lock
index e06bb5906896..78ffbae03ce4 100644
--- a/python/uv.lock
+++ b/python/uv.lock
@@ -3547,7 +3547,7 @@ wheels = [
 
 [[package]]
 name = "openai"
-version = "1.99.9"
+version = "2.6.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" },
@@ -3559,9 +3559,9 @@ dependencies = [
     { name = "tqdm", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" },
     { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/8a/d2/ef89c6f3f36b13b06e271d3cc984ddd2f62508a0972c1cbcc8485a6644ff/openai-1.99.9.tar.gz", hash = "sha256:f2082d155b1ad22e83247c3de3958eb4255b20ccf4a1de2e6681b6957b554e92", size = 506992, upload-time = "2025-08-12T02:31:10.054Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ee/c7/e42bcd89dfd47fec8a30b9e20f93e512efdbfbb3391b05bbb79a2fb295fa/openai-2.6.0.tar.gz", hash = "sha256:f119faf7fc07d7e558c1e7c32c873e241439b01bd7480418234291ee8c8f4b9d", size = 592904, upload-time = "2025-10-20T17:17:24.588Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e8/fb/df274ca10698ee77b07bff952f302ea627cc12dac6b85289485dd77db6de/openai-1.99.9-py3-none-any.whl", hash = "sha256:9dbcdb425553bae1ac5d947147bebbd630d91bbfc7788394d4c4f3a35682ab3a", size = 786816, upload-time = "2025-08-12T02:31:08.34Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/0a/58e9dcd34abe273eaeac3807a8483073767b5609d01bb78ea2f048e515a0/openai-2.6.0-py3-none-any.whl", hash = "sha256:f33fa12070fe347b5787a7861c8dd397786a4a17e1c3186e239338dac7e2e743", size = 1005403, upload-time = "2025-10-20T17:17:22.091Z" },
 ]
 
 [[package]]
@@ -6048,7 +6048,7 @@ requires-dist = [
     { name = "numpy", marker = "python_full_version >= '3.12'", specifier = ">=1.26.0" },
     { name = "ollama", marker = "extra == 'ollama'", specifier = "~=0.4" },
     { name = "onnxruntime-genai", marker = "extra == 'onnx'", specifier = "~=0.7" },
-    { name = "openai", specifier = ">=1.98.0" },
+    { name = "openai", specifier = ">=2.0.0" },
     { name = "openapi-core", specifier = ">=0.18,<0.20" },
     { name = "opentelemetry-api", specifier = "~=1.24" },
     { name = "opentelemetry-sdk", specifier = "~=1.24" },

From 1442dbb9669bf68ae37005c2d7686a22eb4c23d6 Mon Sep 17 00:00:00 2001
From: Evan Mattson <evan.mattson@microsoft.com>
Date: Fri, 24 Oct 2025 13:08:21 +0900
Subject: [PATCH 2/4] Typing fix

---
 .../open_ai_realtime_execution_settings.py                      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_realtime_execution_settings.py b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_realtime_execution_settings.py
index 24b03ab3b9ae..4d0647de10c2 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_realtime_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_realtime_execution_settings.py
@@ -92,7 +92,7 @@ def prepare_settings_dict(self, **kwargs) -> dict[str, Any]:
         settings_dict = super().prepare_settings_dict(**kwargs)
 
         # Build the audio configuration object
-        audio_config = {}
+        audio_config: dict[str, Any] = {}
 
         # Handle voice (goes in audio.output.voice)
         if "voice" in settings_dict:

From 95bd8a8aaa1695903fc942901b67599a568131f6 Mon Sep 17 00:00:00 2001
From: Evan Mattson <evan.mattson@microsoft.com>
Date: Fri, 24 Oct 2025 13:12:03 +0900
Subject: [PATCH 3/4] cleanup sample

---
 .../realtime/simple_realtime_chat_webrtc.py     | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/python/samples/concepts/realtime/simple_realtime_chat_webrtc.py b/python/samples/concepts/realtime/simple_realtime_chat_webrtc.py
index 901dd9d26c12..4f007ea4342e 100644
--- a/python/samples/concepts/realtime/simple_realtime_chat_webrtc.py
+++ b/python/samples/concepts/realtime/simple_realtime_chat_webrtc.py
@@ -68,22 +68,17 @@ async def main() -> None:
             match event:
                 case RealtimeTextEvent():
                     # Only process delta events for streaming, skip done events to avoid duplication
-                    if (
-                        hasattr(event, "service_type")
-                        and "delta" in event.service_type
-                        and hasattr(event.text, "text")
-                        and event.text.text
-                    ):
+                    if event.service_type and "delta" in event.service_type and event.text.text:
                         print(event.text.text, end="", flush=True)
                     # Add newline when transcript is complete (done event)
-                    elif hasattr(event, "service_type") and "done" in event.service_type:
+                    elif event.service_type and "done" in event.service_type:
                         print()  # Add newline for readability
                 case _:
-                    # Handle other events including service events
-                    if hasattr(event, "event_type") and event.event_type == "service":
-                        if hasattr(event, "service_type") and event.service_type == ListenEvents.SESSION_UPDATED:
+                    # Handle service events
+                    if event.event_type == "service" and event.service_type:
+                        if event.service_type == ListenEvents.SESSION_UPDATED:
                             print("Session updated")
-                        if hasattr(event, "service_type") and event.service_type == ListenEvents.RESPONSE_CREATED:
+                        elif event.service_type == ListenEvents.RESPONSE_CREATED:
                             print("\nMosscap (transcript): ", end="")
 
 

From 7104e3b715f286080428e91991b1dd9bb4df3623 Mon Sep 17 00:00:00 2001
From: Evan Mattson <evan.mattson@microsoft.com>
Date: Fri, 27 Feb 2026 12:41:00 +0900
Subject: [PATCH 4/4] Realtime GA models working

---
 python/samples/concepts/realtime/README.md    |   2 +-
 ...time_agent_with_function_calling_webrtc.py |   8 +-
 ...e_agent_with_function_calling_websocket.py |   3 +
 .../realtime/simple_realtime_chat_webrtc.py   |  15 +-
 .../simple_realtime_chat_websocket.py         |  10 +-
 python/samples/concepts/realtime/utils.py     |   1 +
 .../connectors/ai/open_ai/const.py            |   2 +-
 .../open_ai_realtime_execution_settings.py    |   4 +-
 .../ai/open_ai/services/_open_ai_realtime.py  |  69 ++++----
 .../ai/open_ai/services/azure_realtime.py     | 154 +++++++++++++++---
 .../open_ai/services/test_openai_realtime.py  |   4 +
 11 files changed, 201 insertions(+), 71 deletions(-)

diff --git a/python/samples/concepts/realtime/README.md b/python/samples/concepts/realtime/README.md
index bd717938ed81..37c2f657f921 100644
--- a/python/samples/concepts/realtime/README.md
+++ b/python/samples/concepts/realtime/README.md
@@ -5,7 +5,7 @@ These samples are more complex then most because of the nature of these API's. T
 To run these samples, you will need to have the following setup:
 
 - Environment variables for OpenAI (websocket or WebRTC), with your key and OPENAI_REALTIME_MODEL_ID set.
-- Environment variables for Azure (websocket only), set with your endpoint, optionally a key and AZURE_OPENAI_REALTIME_DEPLOYMENT_NAME set. The API version needs to be at least `2024-10-01-preview`.
+- Environment variables for Azure (websocket only), set with your endpoint, optionally a key and AZURE_OPENAI_REALTIME_DEPLOYMENT_NAME set. The API version needs to be at least `2025-08-28`.
 - To run the sample with a simple version of a class that handles the incoming and outgoing sound you need to install the following packages in your environment:
   - semantic-kernel[realtime]
   - pyaudio
diff --git a/python/samples/concepts/realtime/realtime_agent_with_function_calling_webrtc.py b/python/samples/concepts/realtime/realtime_agent_with_function_calling_webrtc.py
index 69333c79c463..e11bc028c05e 100644
--- a/python/samples/concepts/realtime/realtime_agent_with_function_calling_webrtc.py
+++ b/python/samples/concepts/realtime/realtime_agent_with_function_calling_webrtc.py
@@ -5,8 +5,6 @@
 from datetime import datetime
 from random import randint
 
-from azure.identity import AzureCliCredential
-
 from samples.concepts.realtime.utils import AudioPlayerWebRTC, AudioRecorderWebRTC, check_audio_devices
 from semantic_kernel.connectors.ai import FunctionChoiceBehavior
 from semantic_kernel.connectors.ai.open_ai import (
@@ -81,8 +79,12 @@ async def main() -> None:
     # and can also be passed in the receive method
     # You can also pass in kernel, plugins, chat_history or settings here.
     # For WebRTC the audio_track is required
+
+    # Note: api_version (either through settings or directly in the client) must be set to "2025-08-28"
+    # for Azure OpenAI deployments realtime deployments.
     realtime_agent = AzureRealtimeWebRTC(
-        audio_track=AudioRecorderWebRTC(), region="swedencentral", plugins=[Helpers()], credential=AzureCliCredential()
+        audio_track=AudioRecorderWebRTC(),
+        plugins=[Helpers()],
     )
 
     # Create the settings for the session
diff --git a/python/samples/concepts/realtime/realtime_agent_with_function_calling_websocket.py b/python/samples/concepts/realtime/realtime_agent_with_function_calling_websocket.py
index 9031a4193dfe..ef853dbb6e09 100644
--- a/python/samples/concepts/realtime/realtime_agent_with_function_calling_websocket.py
+++ b/python/samples/concepts/realtime/realtime_agent_with_function_calling_websocket.py
@@ -82,6 +82,9 @@ async def main() -> None:
     # to signal the end of the user's turn and start the response.
     # manual VAD is not part of this sample
     # for more info: https://platform.openai.com/docs/api-reference/realtime-sessions/create#realtime-sessions-create-turn_detection
+
+    # Note: api_version (either through settings or directly in the client) must be set to "2025-08-28"
+    # for Azure OpenAI deployments realtime deployments.
     settings = AzureRealtimeExecutionSettings(
         instructions="""
     You are a chat bot. Your name is Mosscap and
diff --git a/python/samples/concepts/realtime/simple_realtime_chat_webrtc.py b/python/samples/concepts/realtime/simple_realtime_chat_webrtc.py
index 4f007ea4342e..6e2f0f746ead 100644
--- a/python/samples/concepts/realtime/simple_realtime_chat_webrtc.py
+++ b/python/samples/concepts/realtime/simple_realtime_chat_webrtc.py
@@ -5,10 +5,10 @@
 
 from samples.concepts.realtime.utils import AudioPlayerWebRTC, AudioRecorderWebRTC, check_audio_devices
 from semantic_kernel.connectors.ai.open_ai import (
+    AzureRealtimeExecutionSettings,
     ListenEvents,
-    OpenAIRealtimeExecutionSettings,
-    OpenAIRealtimeWebRTC,
 )
+from semantic_kernel.connectors.ai.open_ai.services.azure_realtime import AzureRealtimeWebRTC
 from semantic_kernel.contents import RealtimeTextEvent
 
 logging.basicConfig(level=logging.WARNING)
@@ -43,7 +43,7 @@ async def main() -> None:
     # create the realtime client and optionally add the audio output function, this is optional
     # you can define the protocol to use, either "websocket" or "webrtc"
     # they will behave the same way, even though the underlying protocol is quite different
-    settings = OpenAIRealtimeExecutionSettings(
+    settings = AzureRealtimeExecutionSettings(
         instructions="""
     You are a chat bot. Your name is Mosscap and
     you have one goal: figure out what people need.
@@ -59,7 +59,12 @@ async def main() -> None:
         # Enable both text and audio output to get transcripts
         output_modalities=["text", "audio"],
     )
-    realtime_client = OpenAIRealtimeWebRTC(audio_track=AudioRecorderWebRTC(), settings=settings)
+    # Note: api_version (either through settings or directly in the client) must be set to "2025-08-28"
+    # for Azure OpenAI deployments realtime deployments.
+    realtime_client = AzureRealtimeWebRTC(
+        audio_track=AudioRecorderWebRTC(),
+        settings=settings,
+    )
     # Create the settings for the session
     audio_player = AudioPlayerWebRTC()
     # the context manager calls the create_session method on the client and starts listening to the audio stream
@@ -84,7 +89,7 @@ async def main() -> None:
 
 if __name__ == "__main__":
     print(
-        "Instructions: start speaking. "
+        "Instructions: start speaking when you see 'Session updated.' "
         "The model will detect when you stop and automatically start responding. "
         "Press ctrl + c to stop the program."
     )
diff --git a/python/samples/concepts/realtime/simple_realtime_chat_websocket.py b/python/samples/concepts/realtime/simple_realtime_chat_websocket.py
index 412703f24e4f..0aa59acc1258 100644
--- a/python/samples/concepts/realtime/simple_realtime_chat_websocket.py
+++ b/python/samples/concepts/realtime/simple_realtime_chat_websocket.py
@@ -3,8 +3,6 @@
 import asyncio
 import logging
 
-from azure.identity import AzureCliCredential
-
 from samples.concepts.realtime.utils import AudioPlayerWebsocket, AudioRecorderWebsocket, check_audio_devices
 from semantic_kernel.connectors.ai.open_ai import (
     AzureRealtimeExecutionSettings,
@@ -59,7 +57,11 @@ async def main() -> None:
         # for more details.
         voice="shimmer",
     )
-    realtime_client = AzureRealtimeWebsocket(settings=settings, credential=AzureCliCredential())
+    # Note: api_version (either through settings or directly in the client) must be set to "2025-08-28"
+    # for Azure OpenAI deployments realtime deployments.
+    realtime_client = AzureRealtimeWebsocket(
+        settings=settings,
+    )
     audio_player = AudioPlayerWebsocket()
     audio_recorder = AudioRecorderWebsocket(realtime_client=realtime_client)
     # Create the settings for the session
@@ -84,7 +86,7 @@ async def main() -> None:
 
 if __name__ == "__main__":
     print(
-        "Instructions: Start speaking. "
+        "Instructions: Start speaking when you see 'Session updated.' "
         "The model will detect when you stop and automatically start responding. "
         "Press ctrl + c to stop the program."
     )
diff --git a/python/samples/concepts/realtime/utils.py b/python/samples/concepts/realtime/utils.py
index 9c8d4491c734..6943eaddcfb0 100644
--- a/python/samples/concepts/realtime/utils.py
+++ b/python/samples/concepts/realtime/utils.py
@@ -321,6 +321,7 @@ def _sounddevice_callback(self, outdata, frames, time, status):
             logger.debug(f"Audio output status: {status}")
         if self._queue:
             if self._queue.empty():
+                outdata[:] = 0
                 return
             data = self._queue.get_nowait()
             outdata[:] = data.reshape(outdata.shape)
diff --git a/python/semantic_kernel/connectors/ai/open_ai/const.py b/python/semantic_kernel/connectors/ai/open_ai/const.py
index 0f39c75fd593..3c78fee15d2b 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/const.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/const.py
@@ -2,4 +2,4 @@
 
 from typing import Final
 
-DEFAULT_AZURE_API_VERSION: Final[str] = "2024-10-21"
+DEFAULT_AZURE_API_VERSION: Final[str] = "2025-08-28"
diff --git a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_realtime_execution_settings.py b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_realtime_execution_settings.py
index 4d0647de10c2..3c45a9aac477 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_realtime_execution_settings.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_realtime_execution_settings.py
@@ -54,7 +54,6 @@ class TurnDetection(KernelBaseModel):
 class OpenAIRealtimeExecutionSettings(PromptExecutionSettings):
     """Request settings for OpenAI realtime services."""
 
-    modalities: Sequence[Literal["audio", "text"]] | None = None
     output_modalities: Sequence[Literal["audio", "text"]] | None = None
     ai_model_id: Annotated[str | None, Field(None, serialization_alias="model")] = None
     instructions: str | None = None
@@ -77,8 +76,7 @@ class OpenAIRealtimeExecutionSettings(PromptExecutionSettings):
             "on the function choice configuration.",
         ),
     ] = None
-    temperature: Annotated[float | None, Field(ge=0.6, le=1.2)] = None
-    max_response_output_tokens: Annotated[int | Literal["inf"] | None, Field(gt=0)] = None
+    max_output_tokens: Annotated[int | Literal["inf"] | None, Field(gt=0)] = None
     input_audio_noise_reduction: dict[Literal["type"], Literal["near_field", "far_field"]] | None = None
 
     def prepare_settings_dict(self, **kwargs) -> dict[str, Any]:
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/_open_ai_realtime.py b/python/semantic_kernel/connectors/ai/open_ai/services/_open_ai_realtime.py
index 98653e103153..304b4e4efff1 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/_open_ai_realtime.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/_open_ai_realtime.py
@@ -255,8 +255,8 @@ class ListenEvents(str, Enum):
     RESPONSE_OUTPUT_ITEM_DONE = "response.output_item.done"
     RESPONSE_CONTENT_PART_ADDED = "response.content_part.added"
     RESPONSE_CONTENT_PART_DONE = "response.content_part.done"
-    RESPONSE_TEXT_DELTA = "response.text.delta"
-    RESPONSE_TEXT_DONE = "response.text.done"
+    RESPONSE_TEXT_DELTA = "response.output_text.delta"
+    RESPONSE_TEXT_DONE = "response.output_text.done"
     RESPONSE_AUDIO_TRANSCRIPT_DELTA = "response.output_audio_transcript.delta"
     RESPONSE_AUDIO_TRANSCRIPT_DONE = "response.output_audio_transcript.done"
     RESPONSE_AUDIO_DELTA = "response.output_audio.delta"
@@ -302,7 +302,12 @@ async def _parse_event(self, event: RealtimeServerEvent) -> AsyncGenerator[Realt
         might be of different types.
         """
         match event.type:
-            case ListenEvents.RESPONSE_AUDIO_TRANSCRIPT_DELTA.value | "response.audio_transcript.delta":
+            case (
+                ListenEvents.RESPONSE_AUDIO_TRANSCRIPT_DELTA.value
+                | "response.audio_transcript.delta"
+                | ListenEvents.RESPONSE_TEXT_DELTA.value
+                | "response.text.delta"
+            ):
                 yield RealtimeTextEvent(
                     service_type=event.type,
                     service_event=event,
@@ -312,15 +317,16 @@ async def _parse_event(self, event: RealtimeServerEvent) -> AsyncGenerator[Realt
                         choice_index=0,
                     ),
                 )
-            case ListenEvents.RESPONSE_AUDIO_TRANSCRIPT_DONE.value | "response.audio_transcript.done":
-                yield RealtimeTextEvent(
-                    service_type=event.type,
-                    service_event=event,
-                    text=TextContent(
-                        inner_content=event,
-                        text=event.transcript,  # type: ignore
-                    ),
-                )
+            case (
+                ListenEvents.RESPONSE_AUDIO_TRANSCRIPT_DONE.value
+                | "response.audio_transcript.done"
+                | ListenEvents.RESPONSE_TEXT_DONE.value
+                | "response.text.done"
+            ):
+                # Don't yield RealtimeTextEvent here — the deltas already streamed all
+                # the text.  Emitting the full text again would cause duplicate output
+                # for any consumer that prints every RealtimeTextEvent.
+                yield RealtimeEvent(service_type=event.type, service_event=event)
             case ListenEvents.RESPONSE_OUTPUT_ITEM_ADDED.value:
                 if event.item.type == "function_call" and event.item.call_id and event.item.name:  # type: ignore
                     self._call_id_to_function_map[event.item.call_id] = event.item.name  # type: ignore
@@ -723,24 +729,19 @@ async def _send(self, event: RealtimeClientEvent) -> None:
                     # Only keep fields that are allowed in session updates
                     # Note: output_modalities is not allowed in WebRTC session updates
                     allowed_fields = {
+                        "type",
                         "instructions",
                         "model",
                         "max_output_tokens",
                         "tools",
                         "tool_choice",
-                        "temperature",
                         "prompt",
                         "tracing",
                         "truncation",
                     }
                     event_dict["session"] = {k: v for k, v in session_dict.items() if k in allowed_fields}
 
-                # Debug: Log what we're sending to see the structure
-                import json
-
-                json_data = json.dumps(event_dict)
-                logger.debug(f"Sending WebRTC session.update: {json_data}")
-                self.data_channel.send(json_data)
+                self.data_channel.send(json.dumps(event_dict))
             else:
                 self.data_channel.send(event.model_dump_json(exclude_none=True))
         except Exception as e:
@@ -860,8 +861,18 @@ async def _on_data(self, data: str) -> None:
             await self._receive_buffer.put(parsed_event)
 
     async def _get_ephemeral_token(self) -> str:
-        """Get an ephemeral token from OpenAI."""
-        data = {"model": self.ai_model_id}
+        """Get an ephemeral token from OpenAI.
+
+        GA endpoint: POST /v1/realtime/client_secrets
+        Request body: {"session": {"type": "realtime", "model": "<model>"}}
+        Response: {"value": "<token>", "expires_at": ..., "session": {...}}
+        """
+        data = {
+            "session": {
+                "type": "realtime",
+                "model": self.ai_model_id,
+            }
+        }
         headers, url = self._get_ephemeral_token_headers_and_url()
         headers = prepend_semantic_kernel_to_user_agent(headers)
         try:
@@ -874,22 +885,25 @@ async def _get_ephemeral_token(self) -> str:
                     raise Exception(f"Failed to get ephemeral token: {error_text}")
 
                 result = await response.json()
-                return result["client_secret"]["value"]
+                return result["value"]
 
         except Exception as e:
             logger.error(f"Failed to get ephemeral token: {e!s}")
             raise
 
     def _get_ephemeral_token_headers_and_url(self) -> tuple[dict[str, str], str]:
-        """Get the headers for the ephemeral token."""
+        """Get the headers and URL for the ephemeral token."""
         return {
             "Authorization": f"Bearer {self.client.api_key}",
             "Content-Type": "application/json",
-        }, f"{self.client.realtime._client.base_url}/realtime/sessions"
+        }, f"{self.client.realtime._client.base_url}/realtime/client_secrets"
 
     def _get_webrtc_url(self) -> str:
-        """Get the WebRTC URL."""
-        return f"{self.client.realtime._client.base_url}/realtime?model={self.ai_model_id}"
+        """Get the WebRTC URL.
+
+        GA endpoint: POST /v1/realtime/calls?model=<model>
+        """
+        return f"{self.client.realtime._client.base_url}/realtime/calls?model={self.ai_model_id}"
 
 
 # region Websocket
@@ -933,9 +947,6 @@ async def _send(self, event: RealtimeClientEvent) -> None:
         if not self.connection:
             raise ValueError("Connection is not established.")
         try:
-            # Debug logging to see what we're actually sending
-            if hasattr(event, "type") and event.type == "session.update":
-                logger.debug(f"Sending session.update event: {event.model_dump()}")
             await self.connection.send(event)
         except Exception as e:
             logger.error(f"Error sending response: {e!s}")
diff --git a/python/semantic_kernel/connectors/ai/open_ai/services/azure_realtime.py b/python/semantic_kernel/connectors/ai/open_ai/services/azure_realtime.py
index fa45cf8eef97..9ae6f7558ec6 100644
--- a/python/semantic_kernel/connectors/ai/open_ai/services/azure_realtime.py
+++ b/python/semantic_kernel/connectors/ai/open_ai/services/azure_realtime.py
@@ -2,12 +2,15 @@
 
 import logging
 import sys
+import warnings
 from collections.abc import Callable, Coroutine, Mapping
 from typing import TYPE_CHECKING, Any
 
+from aiohttp import ClientSession
 from azure.core.credentials import TokenCredential
 from openai import AsyncAzureOpenAI
 from openai.lib.azure import AsyncAzureADTokenProvider
+from openai.resources.realtime.realtime import AsyncRealtimeConnection
 from pydantic import ValidationError
 
 from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_realtime_execution_settings import (
@@ -21,13 +24,18 @@
 from semantic_kernel.connectors.ai.open_ai.services.open_ai_model_types import OpenAIModelTypes
 from semantic_kernel.connectors.ai.open_ai.settings.azure_open_ai_settings import AzureOpenAISettings
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+from semantic_kernel.const import USER_AGENT
 from semantic_kernel.exceptions.service_exceptions import ServiceInitializationError
 from semantic_kernel.utils.feature_stage_decorator import experimental
+from semantic_kernel.utils.telemetry.user_agent import SEMANTIC_KERNEL_USER_AGENT, prepend_semantic_kernel_to_user_agent
 
 if TYPE_CHECKING:
     from aiortc.mediastreams import MediaStreamTrack
     from numpy import ndarray
 
+    from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+    from semantic_kernel.contents.chat_history import ChatHistory
+
 if sys.version_info >= (3, 12):
     from typing import override  # pragma: no cover
 else:
@@ -139,6 +147,50 @@ def __init__(
     def get_prompt_execution_settings_class(self) -> type[PromptExecutionSettings]:
         return AzureRealtimeExecutionSettings
 
+    @override
+    async def create_session(
+        self,
+        chat_history: "ChatHistory | None" = None,
+        settings: "PromptExecutionSettings | None" = None,
+        **kwargs: Any,
+    ) -> None:
+        """Create a session in the service.
+
+        The Azure GA Realtime endpoint (/openai/v1/realtime) does not accept
+        the api-version query parameter. The openai SDK always adds it, so we
+        bypass the SDK's _configure_realtime and build the connection directly.
+        """
+        from websockets.asyncio.client import connect as ws_connect
+
+        # Build the GA WebSocket URL: wss://<resource>.openai.azure.com/openai/v1/realtime?model=<deployment-name>
+        # Note: GA uses ?model= (not ?deployment= which was preview)
+        # See: https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/realtime-audio-websockets
+        endpoint = str(self.client._base_url).rstrip("/")  # type: ignore[attr-defined]
+        if "/openai" in endpoint:
+            endpoint = endpoint[: endpoint.index("/openai")]
+        url = f"wss://{endpoint.split('://')[-1]}/openai/v1/realtime?model={self.ai_model_id}"
+
+        # Build auth headers
+        auth_headers: dict[str, str] = {}
+        if self.client.api_key and self.client.api_key != "<missing API key>":
+            auth_headers["api-key"] = self.client.api_key
+        else:
+            token = await self.client._get_azure_ad_token()  # type: ignore[attr-defined]
+            if token:
+                auth_headers["Authorization"] = f"Bearer {token}"
+
+        ws = await ws_connect(
+            url,
+            additional_headers={
+                **auth_headers,
+                USER_AGENT: SEMANTIC_KERNEL_USER_AGENT,
+            },
+        )
+
+        self.connection = AsyncRealtimeConnection(ws)
+        self.connected.set()
+        await self.update_session(settings=settings, chat_history=chat_history, **kwargs)
+
 
 @experimental
 class AzureRealtimeWebRTC(OpenAIRealtimeWebRTCBase, AzureOpenAIConfigBase):
@@ -147,7 +199,7 @@ class AzureRealtimeWebRTC(OpenAIRealtimeWebRTCBase, AzureOpenAIConfigBase):
     def __init__(
         self,
         audio_track: "MediaStreamTrack",
-        region: str,
+        region: str | None = None,
         audio_output_callback: Callable[["ndarray"], Coroutine[Any, Any, None]] | None = None,
         service_id: str | None = None,
         api_key: str | None = None,
@@ -165,14 +217,13 @@ def __init__(
         credential: TokenCredential | None = None,
         **kwargs: Any,
     ) -> None:
-        """Initialize an AzureRealtimeWebsocket service.
+        """Initialize an AzureRealtimeWebRTC service.
 
         Args:
             audio_track: The audio track to use for the service, only used by WebRTC.
                 It can be any class that implements the AudioStreamTrack interface.
-            region: The region to use for the service.
-                This is required for WebRTC, and should be the same as the region of the Azure deployment.
-                Currently this can be "eastus2" or "swedencentral".
+            region: Deprecated. No longer needed for GA Realtime API.
+                Previously required for the preview WebRTC endpoint.
             audio_output_callback: The audio output callback, optional.
                 This should be a coroutine, that takes a ndarray with audio as input.
                 The goal of this function is to allow you to play the audio with the
@@ -224,9 +275,15 @@ def __init__(
             raise ServiceInitializationError("Failed to create OpenAI settings.", ex) from ex
         if not azure_openai_settings.realtime_deployment_name:
             raise ServiceInitializationError("The OpenAI realtime model ID is required.")
+        if region is not None:
+            warnings.warn(
+                "The 'region' parameter is deprecated and no longer needed for the GA Realtime API. "
+                "The WebRTC endpoint is now derived from the resource endpoint.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
         if audio_track:
             kwargs["audio_track"] = audio_track
-        kwargs["region"] = region
         super().__init__(
             api_key=azure_openai_settings.api_key.get_secret_value() if azure_openai_settings.api_key else None,
             audio_output_callback=audio_output_callback,
@@ -251,11 +308,27 @@ def get_prompt_execution_settings_class(self) -> type[PromptExecutionSettings]:
 
     @override
     def _get_ephemeral_token_headers_and_url(self) -> tuple[dict[str, str], str]:
-        """Get the headers and URL for the ephemeral token."""
-        url = (
-            f"{self.client.realtime._client.base_url}/realtimeapi/sessions?api-version="
-            f"{self.client._api_version}"  # type: ignore[attr-defined]
-        )
+        """Get the headers and URL for the ephemeral token.
+
+        Uses the GA endpoint format: POST /openai/v1/realtime/client_secrets
+        See: https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/realtime-audio-webrtc
+        """
+        endpoint = str(self.client._base_url).rstrip("/")  # type: ignore[attr-defined]
+        # Strip any trailing path segments to get the base Azure resource URL
+        # base_url typically looks like https://<resource>.openai.azure.com/openai/...
+        # We need: https://<resource>.openai.azure.com/openai/v1/realtime/client_secrets
+        if "/openai" in endpoint:
+            endpoint = endpoint[: endpoint.index("/openai")]
+        url = f"{endpoint}/openai/v1/realtime/client_secrets"
+
+        if self.client.api_key and self.client.api_key != "<missing API key>":
+            return (
+                {
+                    "api-key": self.client.api_key,
+                    "Content-Type": "application/json",
+                },
+                url,
+            )
         if self.client._azure_ad_token is not None:  # type: ignore[attr-defined]
             return (
                 {
@@ -264,20 +337,51 @@ def _get_ephemeral_token_headers_and_url(self) -> tuple[dict[str, str], str]:
                 },
                 url,
             )
-        return (
-            {
-                "Authorization": f"Bearer {self.client.api_key}",
-                "Content-Type": "application/json",
-            },
-            url,
-        )
+        raise ServiceInitializationError("No API key or Azure AD token available for ephemeral token request.")
+
+    @override
+    async def _get_ephemeral_token(self) -> str:
+        """Get an ephemeral token from Azure OpenAI.
+
+        Azure GA requires a nested session object:
+            {"session": {"type": "realtime", "model": "<deployment>"}}
+        And returns the token directly as {"value": "..."} rather than
+        OpenAI's {"client_secret": {"value": "..."}}.
+        See: https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/realtime-audio-webrtc
+        """
+        data = {
+            "session": {
+                "type": "realtime",
+                "model": self.ai_model_id,
+            }
+        }
+        headers, url = self._get_ephemeral_token_headers_and_url()
+        headers = prepend_semantic_kernel_to_user_agent(headers)
+        try:
+            async with (
+                ClientSession() as session,
+                session.post(url, headers=headers, json=data) as response,
+            ):
+                if response.status not in [200, 201]:
+                    error_text = await response.text()
+                    raise Exception(f"Failed to get ephemeral token: {error_text}")
+
+                result = await response.json()
+                # Azure GA format returns {"value": "token"} directly
+                return result["value"]
+
+        except Exception as e:
+            logger.error(f"Failed to get ephemeral token: {e!s}")
+            raise
 
     @override
     def _get_webrtc_url(self) -> str:
-        """Get the webrtc URL."""
-        if not self.model_extra:
-            raise ServiceInitializationError("The region is required for WebRTC.")
-        region = self.model_extra.get("region")
-        if not region:
-            raise ServiceInitializationError("The region is required for WebRTC.")
-        return f"https://{region}.realtimeapi-preview.ai.azure.com/v1/realtimertc"
+        """Get the WebRTC URL.
+
+        Uses the GA endpoint format: /openai/v1/realtime/calls
+        See: https://learn.microsoft.com/en-us/azure/ai-foundry/openai/how-to/realtime-audio-webrtc
+        """
+        endpoint = str(self.client._base_url).rstrip("/")  # type: ignore[attr-defined]
+        if "/openai" in endpoint:
+            endpoint = endpoint[: endpoint.index("/openai")]
+        return f"{endpoint}/openai/v1/realtime/calls"
diff --git a/python/tests/unit/connectors/ai/open_ai/services/test_openai_realtime.py b/python/tests/unit/connectors/ai/open_ai/services/test_openai_realtime.py
index 3a7ad49c0732..64d7a6a64b58 100644
--- a/python/tests/unit/connectors/ai/open_ai/services/test_openai_realtime.py
+++ b/python/tests/unit/connectors/ai/open_ai/services/test_openai_realtime.py
@@ -120,6 +120,7 @@
         arguments="{}",
         call_id="call_id",
         item_id="item_id",
+        name="function_name",
         output_index=0,
         response_id="response_id",
     ),
@@ -383,6 +384,7 @@ def test_create_openai_realtime_event(
                 event_id="event_id",
                 output_index=0,
                 item_id="item_id",
+                name="function_name",
                 response_id="response_id",
                 type="response.function_call_arguments.done",
             ),
@@ -494,6 +496,7 @@ async def test_parse_function_call_arguments_done(OpenAIWebsocket, kernel):
         event_id="event_id",
         output_index=0,
         item_id="item_id",
+        name="plugin_name-function_name",
         response_id="response_id",
         type="response.function_call_arguments.done",
     )
@@ -533,6 +536,7 @@ async def test_parse_function_call_arguments_done_fail(OpenAIWebsocket, kernel):
         event_id="event_id",
         output_index=0,
         item_id="item_id",
+        name="function_name",
         response_id="response_id",
         type="response.function_call_arguments.done",
     )