speechmatics · lgavincrl · Oct 21, 2025 · Oct 21, 2025 · Oct 21, 2025
diff --git a/.DS_Store b/.DS_Store
diff --git a/1_nb_async_client_ b/1_nb_async_client_
@@ -0,0 +1,272 @@
+Current Implementation Analysis
+Looking at the code:
+
+File Reading: The method reads the entire file content at once:
+python
+async with aiofiles.open(file_path, 'r', encoding='utf-8') as f:
+    text_content = await f.read()  # Reads ALL content
+Single API Call: It makes one API request with all the text:
+python
+return await self.synthesize_speech(text_content, ...)  # Single request
+Batch Processing: The API endpoint /synthesize appears to be a batch endpoint that processes the entire text and returns the complete audio file.
+What This Means
+No Streaming: The text is sent as one complete payload
+No Real-time Generation: You must wait for the entire audio to be synthesized before getting any response
+No Incremental Output: You can't start playing audio while more is being generated
+
+For True Streaming TTS, You Would Need:
+Chunked Text Processing: Break text into smaller segments
+Streaming API Endpoint: An endpoint that supports streaming responses (like /stream-synthesize)
+WebSocket or Server-Sent Events: For real-time bidirectional communication
+Incremental Audio Delivery: Receive audio chunks as they're generated
+
+
+"""
+Asynchronous client for Speechmatics batch transcription.
+
+This module provides the main AsyncClient class that handles batch
+speech-to-text transcription using the Speechmatics Batch API.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import os
+import uuid
+from typing import Any
+from typing import BinaryIO
+from typing import Optional
+from typing import Union
+
+import aiohttp
+
+from ._auth import AuthBase
+from ._auth import StaticKeyAuth
+from ._exceptions import AuthenticationError
+from ._exceptions import TimeoutError
+from ._logging import get_logger
+from ._models import ConnectionConfig
+
+from ._transport import Transport
+
+
+class AsyncClient:
+    """
+    Asynchronous client for Speechmatics batch speech transcription.
+
+    This client provides a full-featured async interface to the Speechmatics Batch API,
+    supporting job submission, monitoring, and result retrieval with comprehensive
+    error management. It properly implements the Speechmatics REST API.
+
+    The client handles the complete batch transcription workflow:
+    1. Job submission with audio file and configuration
+    2. Job status monitoring (with polling helpers)
+    3. Result retrieval when transcription is complete
+    4. Proper cleanup and error handling
+
+    Args:
+        auth: Authentication instance. If not provided, uses StaticKeyAuth
+              with api_key parameter or SPEECHMATICS_API_KEY environment variable.
+        api_key: Speechmatics API key (used only if auth not provided).
+        url: REST API endpoint URL. If not provided, uses SPEECHMATICS_BATCH_URL
+             environment variable or defaults to production endpoint.
+        conn_config: Complete connection configuration object. If provided, overrides
+               other parameters.
+
+    Raises:
+        ConfigurationError: If required configuration is missing or invalid.
+
+    Examples:
+        Basic usage:
+            >>> async with AsyncClient(api_key="your-key") as client:
+            ...     job = await client.submit_job("audio.wav")
+            ...     result = await client.wait_for_completion(job.id)
+            ...     print(result.transcript)
+
+        With JWT authentication:
+            >>> from speechmatics.batch import JWTAuth
+            >>> auth = JWTAuth("your-api-key", ttl=3600)
+            >>> async with AsyncClient(auth=auth) as client:
+            ...     # Use client with JWT auth
+            ...     pass
+    """
+
+    def __init__(
+        self,
+        auth: Optional[AuthBase] = None,
+        *,
+        api_key: Optional[str] = None,
+        url: Optional[str] = None,
+        conn_config: Optional[ConnectionConfig] = None,
+    ) -> None:
+        """
+        Initialize the AsyncClient.
+
+        Args:
+            auth: Authentication method, it can be StaticKeyAuth or JWTAuth.
+                If None, creates StaticKeyAuth with the api_key.
+            api_key: Speechmatics API key. If None, uses SPEECHMATICS_API_KEY env var.
+            url: REST API endpoint URL. If None, uses SPEECHMATICS_BATCH_URL env var
+                 or defaults to production endpoint.
+            conn_config: Complete connection configuration.
+
+        Raises:
+            ConfigurationError: If auth is None and API key is not provided/found.
+        """
+        self._auth = auth or StaticKeyAuth(api_key)
+        self._url = url or os.environ.get("SPEECHMATICS_BATCH_URL") or "https://asr.api.speechmatics.com/v2"
+        self._conn_config = conn_config or ConnectionConfig()
+        self._request_id = str(uuid.uuid4())
+        self._transport = Transport(self._url, self._conn_config, self._auth, self._request_id)
+
+        self._logger = get_logger(__name__)
+        self._logger.debug("AsyncClient initialized (request_id=%s, url=%s)", self._request_id, self._url)
+
+    async def __aenter__(self) -> AsyncClient:
+        """
+        Async context manager entry.
+
+        Returns:
+            Self for use in async with statements.
+
+        Examples:
+            >>> async with AsyncClient(api_key="key") as client:
+            ...     job = await client.submit_job("audio.wav")
+        """
+        return self
+
+    async def synthesize_speech(
+        self,
+        text: str,
+        *,
+        voice: Optional[str] = None,
+        output_format: str = "wav",
+        sample_rate: Optional[int] = None,
+        speed: Optional[float] = None,
+    ) -> aiohttp.ClientResponse:
+        """
+        Convert text to speech audio.
+
+        Args:
+            text: Text to convert to speech.
+            voice: Voice ID to use for synthesis (e.g., "en-US-neural-1").
+            output_format: Audio format ("wav", "mp3", "ogg").
+            sample_rate: Audio sample rate in Hz (e.g., 22050, 44100).
+            speed: Speech speed multiplier (0.5 to 2.0).
+
+        Returns:
+            Audio data as bytes.
+
+        Raises:
+            AuthenticationError: If API key is invalid.
+            TransportError: If synthesis fails.
+
+        Examples:
+            >>> response = await client.synthesize_speech("Hello world")
+            >>> audio_data = await response.read()
+            >>> with open("output.wav", "wb") as f:
+            ...     f.write(audio_data)
+        """
+        # Prepare synthesis request
+        request_data = {
+            "text": text,
+            "output_format": output_format,
+        }
+
+        if voice:
+            request_data["voice"] = voice
+        if sample_rate:
+            request_data["sample_rate"] = str(sample_rate)
+        if speed:
+            request_data["speed"] = str(speed)
+
+        response = await self._transport.post("/synthesize", json_data=request_data)
+        return response
+
+    async def synthesize_from_file(
+        self,
+        file_path: Union[str, os.PathLike],
+        *,
+        voice: Optional[str] = None,
+        output_format: str = "wav",
+        sample_rate: Optional[int] = None,
+        speed: Optional[float] = None,
+    ) -> aiohttp.ClientResponse:
+        """
+        Convert text from a file to speech audio.
+
+        Args:
+            file_path: Path to text or SSML file.
+            voice: Voice ID to use for synthesis.
+            output_format: Audio format ("wav", "mp3", "ogg").
+            sample_rate: Audio sample rate in Hz.
+            speed: Speech speed multiplier (0.5 to 2.0).
+
+        Returns:
+            Raw aiohttp ClientResponse object.
+
+        Raises:
+            FileNotFoundError: If file doesn't exist.
+            AuthenticationError: If API key is invalid.
+            TransportError: If synthesis fails.
+
+        Examples:
+            >>> response = await client.synthesize_from_file("script.txt")
+            >>> audio_data = await response.read()
+            >>> with open("output.wav", "wb") as f:
+            ...     f.write(audio_data)
+        """
+        import aiofiles
+        from pathlib import Path
+
+        file_path_obj = Path(file_path)
+        if not file_path_obj.exists():
+            raise FileNotFoundError(f"File not found: {file_path}")
+
+        # Read text content
+        async with aiofiles.open(file_path, 'r', encoding='utf-8') as f:
+            text_content = await f.read()
+
+        return await self.synthesize_speech(
+            text_content,
+            voice=voice,
+            output_format=output_format,
+            sample_rate=sample_rate,
+            speed=speed,
+        )   
+
+    async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
+        """
+        Async context manager exit with automatic cleanup.
+
+        Ensures all resources are properly cleaned up when exiting the
+        async context manager, including closing HTTP connections.
+
+        Args:
+            exc_type: Exception type if an exception occurred.
+            exc_val: Exception value if an exception occurred.
+            exc_tb: Exception traceback if an exception occurred.
+        """
+        await self.close()
+
+    async def close(self) -> None:
+        """
+        Close the client and cleanup all resources.
+
+        This method ensures proper cleanup of all client resources including
+        closing HTTP connections and sessions.
+
+        This method is safe to call multiple times and will handle cleanup
+        gracefully even if errors occur during the process.
+
+        Examples:
+            >>> client = AsyncClient(api_key="key")
+            >>> try:
+            ...     result = await client.transcribe("audio.wav")
+            >>> finally:
+            ...     await client.close()
+        """
+        try:
+            await self._transport.close()
+        except Exception:
+            pass  # Best effort cleanup
diff --git a/Makefile b/Makefile
@@ -1,12 +1,12 @@
 # Makefile for Speechmatics Python SDKs
 
 .PHONY: help
-.PHONY: test-all test-rt test-batch test-flow
-.PHONY: format-all format-rt format-batch format-flow
-.PHONY: lint-all lint-rt lint-batch lint-flow
-.PHONY: type-check-all type-check-rt type-check-batch type-check-flow
-.PHONY: build-all build-rt build-batch build-flow
-.PHONY: clean-all clean-rt clean-batch clean-flow clean-flow
+.PHONY: test-all test-rt test-batch test-flow test-tts
+.PHONY: format-all format-rt format-batch format-flow format-tts
+.PHONY: lint-all lint-rt lint-batch lint-flow lint-tts
+.PHONY: type-check-all type-check-rt type-check-batch type-check-flow type-check-tts
+.PHONY: build-all build-rt build-batch build-flow build-tts
+.PHONY: clean-all clean-rt clean-batch clean-flow clean-tts
 
 help:
 	@echo "Available commands:"
@@ -49,7 +49,7 @@ help:
 	@echo ""
 
 # Testing targets
-test-all: test-rt test-batch test-flow
+test-all: test-rt test-batch test-flow test-tts
 
 test-rt:
 	pytest tests/rt/ -v
@@ -60,8 +60,11 @@ test-batch:
 test-flow:
 	pytest tests/flow/ -v
 
+test-tts:
+	pytest tests/tts/ -v
+
 # Formatting targets
-format-all: format-rt format-batch format-flow
+format-all: format-rt format-batch format-flow format-tts
 
 format-rt:
 	cd sdk/rt/speechmatics && black .
@@ -75,8 +78,12 @@ format-flow:
 	cd sdk/flow/speechmatics && black .
 	cd sdk/flow/speechmatics && ruff check --fix .
 
+format-tts:
+	cd sdk/tts/speechmatics && black .
+	cd sdk/tts/speechmatics && ruff check --fix .
+
 # Linting targets
-lint-all: lint-rt lint-batch lint-flow
+lint-all: lint-rt lint-batch lint-flow lint-tts
 
 lint-rt:
 	cd sdk/rt/speechmatics && ruff check .
@@ -87,8 +94,11 @@ lint-batch:
 lint-flow:
 	cd sdk/flow/speechmatics && ruff check .
 
+lint-tts:
+	cd sdk/tts/speechmatics && ruff check .
+
 # Type checking targets
-type-check-all: type-check-rt type-check-batch type-check-flow
+type-check-all: type-check-rt type-check-batch type-check-flow type-check-tts
 
 type-check-rt:
 	cd sdk/rt/speechmatics && mypy .
@@ -99,18 +109,22 @@ type-check-batch:
 type-check-flow:
 	cd sdk/flow/speechmatics && mypy .
 
+type-check-tts:
+	cd sdk/tts/speechmatics && mypy .
+
 # Installation targets
 install-dev:
 	python -m pip install --upgrade pip
 	python -m pip install -e sdk/rt[dev]
 	python -m pip install -e sdk/batch[dev]
 	python -m pip install -e sdk/flow[dev]
+	python -m pip install -e sdk/tts[dev]
 
 install-build:
 	python -m pip install --upgrade build
 
 # Building targets
-build-all: build-rt build-batch build-flow
+build-all: build-rt build-batch build-flow build-tts
 
 build-rt: install-build
 	cd sdk/rt && python -m build
@@ -121,8 +135,11 @@ build-batch: install-build
 build-flow: install-build
 	cd sdk/flow && python -m build
 
+build-tts: install-build
+	cd sdk/tts && python -m build
+
 # Cleaning targets
-clean-all: clean-rt clean-batch clean-flow
+clean-all: clean-rt clean-batch clean-flow clean-tts
 
 clean-rt:
 	rm -rf sdk/rt/dist sdk/rt/build sdk/rt/*.egg-info
@@ -135,3 +152,7 @@ clean-batch:
 clean-flow:
 	rm -rf sdk/flow/dist sdk/flow/build sdk/flow/*.egg-info
 	find sdk/flow -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
+
+clean-tts:
+	rm -rf sdk/tts/dist sdk/tts/build sdk/tts/*.egg-info
+	find sdk/tts -name __pycache__ -exec rm -rf {} + 2>/dev/null || true