diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..4a769cc Binary files /dev/null and b/.DS_Store differ diff --git a/.gitignore b/.gitignore index f3074d5..f8ac9cf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# macOS +.DS_Store + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/Makefile b/Makefile index 575f351..5836b46 100644 --- a/Makefile +++ b/Makefile @@ -1,12 +1,12 @@ # Makefile for Speechmatics Python SDKs .PHONY: help -.PHONY: test-all test-rt test-batch test-flow -.PHONY: format-all format-rt format-batch format-flow -.PHONY: lint-all lint-rt lint-batch lint-flow -.PHONY: type-check-all type-check-rt type-check-batch type-check-flow -.PHONY: build-all build-rt build-batch build-flow -.PHONY: clean-all clean-rt clean-batch clean-flow clean-flow +.PHONY: test-all test-rt test-batch test-flow test-tts +.PHONY: format-all format-rt format-batch format-flow format-tts +.PHONY: lint-all lint-rt lint-batch lint-flow lint-tts +.PHONY: type-check-all type-check-rt type-check-batch type-check-flow type-check-tts +.PHONY: build-all build-rt build-batch build-flow build-tts +.PHONY: clean-all clean-rt clean-batch clean-flow clean-tts help: @echo "Available commands:" @@ -49,7 +49,7 @@ help: @echo "" # Testing targets -test-all: test-rt test-batch test-flow +test-all: test-rt test-batch test-flow test-tts test-rt: pytest tests/rt/ -v @@ -60,8 +60,14 @@ test-batch: test-flow: pytest tests/flow/ -v +test-tts: + pytest tests/tts/ -v + # Formatting targets -format-all: format-rt format-batch format-flow +format-all: format-rt format-batch format-flow format-tts +format-tests: + cd tests && black . + cd tests && ruff check --fix . format-rt: cd sdk/rt/speechmatics && black . @@ -75,8 +81,12 @@ format-flow: cd sdk/flow/speechmatics && black . cd sdk/flow/speechmatics && ruff check --fix . +format-tts: + cd sdk/tts/speechmatics && black . + cd sdk/tts/speechmatics && ruff check --fix . + # Linting targets -lint-all: lint-rt lint-batch lint-flow +lint-all: lint-rt lint-batch lint-flow lint-tts lint-rt: cd sdk/rt/speechmatics && ruff check . @@ -87,8 +97,14 @@ lint-batch: lint-flow: cd sdk/flow/speechmatics && ruff check . +lint-tts: + cd sdk/tts/speechmatics && ruff check . + +lint-tests: + cd tests && ruff check . + # Type checking targets -type-check-all: type-check-rt type-check-batch type-check-flow +type-check-all: type-check-rt type-check-batch type-check-flow type-check-tts type-check-rt: cd sdk/rt/speechmatics && mypy . @@ -99,18 +115,25 @@ type-check-batch: type-check-flow: cd sdk/flow/speechmatics && mypy . +type-check-tts: + cd sdk/tts/speechmatics && mypy . + +type-check-tests: + cd tests && mypy . + # Installation targets install-dev: python -m pip install --upgrade pip - python -m pip install -e sdk/rt[dev] - python -m pip install -e sdk/batch[dev] - python -m pip install -e sdk/flow[dev] + python -m pip install -e sdk/rt[dev] --config-settings editable_mode=strict + python -m pip install -e sdk/batch[dev] --config-settings editable_mode=strict + python -m pip install -e sdk/flow[dev] --config-settings editable_mode=strict + python -m pip install -e sdk/tts[dev] --config-settings editable_mode=strict install-build: python -m pip install --upgrade build # Building targets -build-all: build-rt build-batch build-flow +build-all: build-rt build-batch build-flow build-tts build-rt: install-build cd sdk/rt && python -m build @@ -121,8 +144,11 @@ build-batch: install-build build-flow: install-build cd sdk/flow && python -m build +build-tts: install-build + cd sdk/tts && python -m build + # Cleaning targets -clean-all: clean-rt clean-batch clean-flow +clean-all: clean-rt clean-batch clean-flow clean-tts clean-rt: rm -rf sdk/rt/dist sdk/rt/build sdk/rt/*.egg-info @@ -135,3 +161,7 @@ clean-batch: clean-flow: rm -rf sdk/flow/dist sdk/flow/build sdk/flow/*.egg-info find sdk/flow -name __pycache__ -exec rm -rf {} + 2>/dev/null || true + +clean-tts: + rm -rf sdk/tts/dist sdk/tts/build sdk/tts/*.egg-info + find sdk/tts -name __pycache__ -exec rm -rf {} + 2>/dev/null || true diff --git a/sdk/TTS/README.md b/sdk/TTS/README.md new file mode 100644 index 0000000..a316002 --- /dev/null +++ b/sdk/TTS/README.md @@ -0,0 +1,274 @@ +# Speechmatics Batch API Client + +[![PyPI](https://img.shields.io/pypi/v/speechmatics-batch)](https://pypi.org/project/speechmatics-batch/) +![PythonSupport](https://img.shields.io/badge/Python-3.9%2B-green) + +Async Python client for Speechmatics Batch API. + +## Features + +- Async API client with comprehensive error handling +- Type hints throughout for better IDE support +- Environment variable support for credentials +- Easy-to-use interface for submitting, monitoring, and retrieving transcription jobs +- Full job configuration support with all Speechmatics features +- Intelligent transcript formatting with speaker diarization +- Support for multiple output formats (JSON, TXT, SRT) + +## Installation + +```bash +pip install speechmatics-batch +``` + +## Usage + +### Quick Start + +```python +import asyncio +from speechmatics.batch import AsyncClient + +async def main(): + # Create a client using environment variable SPEECHMATICS_API_KEY + async with AsyncClient() as client: + # Simple transcription + result = await client.transcribe("audio.wav") + print(result.transcript_text) + +asyncio.run(main()) +``` + +## JWT Authentication + +For enhanced security, use temporary JWT tokens instead of static API keys. +JWTs are short-lived (60 seconds default) and automatically refreshed: + +```python +from speechmatics.batch import AsyncClient, JWTAuth + +auth = JWTAuth("your-api-key", ttl=60) + +async with AsyncClient(auth=auth) as client: + # Tokens are cached and auto-refreshed automatically + result = await client.transcribe("audio.wav") + print(result.transcript_text) +``` + +Ideal for long-running applications or when minimizing API key exposure. +See the [authentication documentation](https://docs.speechmatics.com/introduction/authentication) for more details. + +### Basic Job Workflow + +```python +import asyncio +from speechmatics.batch import AsyncClient, JobConfig, JobType, TranscriptionConfig + +async def main(): + # Create client with explicit API key + async with AsyncClient(api_key="your-api-key") as client: + + # Configure transcription + config = JobConfig( + type=JobType.TRANSCRIPTION, + transcription_config=TranscriptionConfig( + language="en", + enable_entities=True, + diarization="speaker" + ) + ) + + # Submit job + job = await client.submit_job("audio.wav", config=config) + print(f"Job submitted: {job.id}") + + # Wait for completion + result = await client.wait_for_completion( + job.id, + polling_interval=2.0, + timeout=300.0 + ) + + # Access results + print(f"Transcript: {result.transcript_text}") + print(f"Confidence: {result.confidence}") + +asyncio.run(main()) +``` + +### Advanced Configuration + +```python +import asyncio +from speechmatics.batch import ( + AsyncClient, + JobConfig, + JobType, + OperatingPoint, + TranscriptionConfig, + TranslationConfig, + SummarizationConfig +) + +async def main(): + async with AsyncClient(api_key="your-api-key") as client: + + # Advanced job configuration + config = JobConfig( + type=JobType.TRANSCRIPTION, + transcription_config=TranscriptionConfig( + language="en", + operating_point=OperatingPoint.ENHANCED, + enable_entities=True, + diarization="speaker", + ), + translation_config=TranslationConfig(target_languages=["es", "fr"]), + summarization_config=SummarizationConfig( + content_type="conversational", summary_length="brief" + ), + ) + + result = await client.transcribe("audio.wav", config=config) + + # Access advanced features + if result.summary: + print(f"Summary: {result.summary}") + if result.translations: + print(f"Translations: {result.translations}") + +asyncio.run(main()) +``` + +### Manual Job Management + +```python +import asyncio +from speechmatics.batch import AsyncClient, JobStatus + +async def main(): + async with AsyncClient() as client: + + # Submit job + job = await client.submit_job("audio.wav") + + # Check job status + job_details = await client.get_job_info(job.id) + print(f"Status: {job_details.status}") + + # Wait for completion manually + while job_details.status == JobStatus.RUNNING: + await asyncio.sleep(5) + job_details = await client.get_job_info(job.id) + + if job_details.status == JobStatus.DONE: + # Get transcript + transcript = await client.get_transcript(job.id) + print(transcript.transcript_text) + else: + print(f"Job failed with status: {job_details.status}") + +asyncio.run(main()) +``` + +### Different Output Formats + +```python +import asyncio +from speechmatics.batch import AsyncClient, FormatType + +async def main(): + async with AsyncClient() as client: + job = await client.submit_job("audio.wav") + + # Get JSON format (default) + json_result = await client.get_transcript(job.id, format_type=FormatType.JSON) + print(json_result.transcript_text) + + # Get plain text + txt_result = await client.get_transcript(job.id, format_type=FormatType.TXT) + print(txt_result) + + # Get SRT subtitles + srt_result = await client.get_transcript(job.id, format_type=FormatType.SRT) + print(srt_result) + +asyncio.run(main()) +``` + +### Error Handling + +```python +import asyncio +from speechmatics.batch import ( + AsyncClient, + BatchError, + AuthenticationError, + JobError, + TimeoutError +) + +async def main(): + try: + async with AsyncClient() as client: + result = await client.transcribe("audio.wav", timeout=120.0) + print(result.transcript_text) + + except AuthenticationError: + print("Invalid API key") + except BatchError as e: + print(f"Job submission failed: {e}") + except JobError as e: + print(f"Job processing failed: {e}") + except TimeoutError as e: + print(f"Job timed out: {e}") + except FileNotFoundError: + print("Audio file not found") + +asyncio.run(main()) +``` + +### Connection Configuration + +```python +import asyncio +from speechmatics.batch import AsyncClient, ConnectionConfig + +async def main(): + # Custom connection settings + config = ConnectionConfig( + url="https://asr.api.speechmatics.com/v2", + api_key="your-api-key", + connect_timeout=30.0, + operation_timeout=600.0 + ) + + async with AsyncClient(conn_config=config) as client: + result = await client.transcribe("audio.wav") + print(result.transcript_text) + +asyncio.run(main()) +``` + +## Logging + +The client supports logging with job id tracing for debugging. To increase logging verbosity, set `DEBUG` level in your example code: + +```python +import logging +import sys + +logging.basicConfig( + level=logging.DEBUG, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[ + logging.StreamHandler(sys.stdout) + ] +) +``` + +## Environment Variables + +The client supports the following environment variables: + +- `SPEECHMATICS_API_KEY`: Your Speechmatics API key +- `SPEECHMATICS_BATCH_URL`: Custom API endpoint URL (optional) diff --git a/sdk/TTS/pyproject.toml b/sdk/TTS/pyproject.toml new file mode 100644 index 0000000..065ccec --- /dev/null +++ b/sdk/TTS/pyproject.toml @@ -0,0 +1,52 @@ +[build-system] +requires = ["setuptools>=61.0.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "speechmatics-tts" +dynamic = ["version"] +description = "Speechmatics TTS API Client" +readme = "README.md" +authors = [{ name = "Speechmatics", email = "support@speechmatics.com" }] +license = "MIT" +requires-python = ">=3.9" +dependencies = ["aiohttp", "aiofiles"] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Operating System :: OS Independent", + "Topic :: Multimedia :: Sound/Audio :: Speech", + "Topic :: Software Development :: Libraries :: Python Modules", +] +keywords = ["speechmatics", "speech-to-text", "tts", "transcription", "api"] + +[project.optional-dependencies] +dev = [ + "black", + "ruff", + "mypy", + "types-aiofiles", + "pre-commit", + "pytest", + "pytest-asyncio", + "pytest-cov", + "pytest-mock", + "build", +] + +[project.urls] +homepage = "https://github.com/speechmatics/speechmatics-python-sdk" +documentation = "https://docs.speechmatics.com/" +repository = "https://github.com/speechmatics/speechmatics-python-sdk" +issues = "https://github.com/speechmatics/speechmatics-python-sdk/issues" + +[tool.setuptools.dynamic] +version = { attr = "speechmatics.tts.__version__" } + +[tool.setuptools.packages.find] +where = ["."] diff --git a/sdk/TTS/speechmatics/tts/__init__.py b/sdk/TTS/speechmatics/tts/__init__.py new file mode 100644 index 0000000..1dececf --- /dev/null +++ b/sdk/TTS/speechmatics/tts/__init__.py @@ -0,0 +1,33 @@ +__version__ = "0.0.0" + +from ._async_client import AsyncClient +from ._auth import AuthBase +from ._auth import JWTAuth +from ._auth import StaticKeyAuth +from ._exceptions import AuthenticationError +from ._exceptions import BatchError +from ._exceptions import ConfigurationError +from ._exceptions import ConnectionError +from ._exceptions import JobError +from ._exceptions import TimeoutError +from ._exceptions import TransportError +from ._models import ConnectionConfig +from ._models import OutputFormat +from ._models import Voice + +__all__ = [ + "AsyncClient", + "AuthBase", + "JWTAuth", + "StaticKeyAuth", + "ConfigurationError", + "AuthenticationError", + "ConnectionError", + "TransportError", + "BatchError", + "JobError", + "TimeoutError", + "ConnectionConfig", + "Voice", + "OutputFormat", +] diff --git a/sdk/TTS/speechmatics/tts/_async_client.py b/sdk/TTS/speechmatics/tts/_async_client.py new file mode 100644 index 0000000..ab200b5 --- /dev/null +++ b/sdk/TTS/speechmatics/tts/_async_client.py @@ -0,0 +1,183 @@ +""" +Asynchronous client for Speechmatics TTS transcription. + +This module provides the main AsyncClient class that handles text-to-speech +using the Speechmatics TTS API. +""" + +from __future__ import annotations + +import os +import uuid +from typing import Any +from typing import Optional + +import aiohttp + +from ._auth import AuthBase +from ._auth import StaticKeyAuth +from ._logging import get_logger +from ._models import ConnectionConfig +from ._models import OutputFormat +from ._models import Voice +from ._transport import Transport + + +class AsyncClient: + """ + Asynchronous client for Speechmatics TTS transcription. + + This client provides a full-featured async interface to the Speechmatics TTS API, + supporting job submission, monitoring, and result retrieval with comprehensive + error management. It properly implements the Speechmatics REST API. + + The client handles the complete batch transcription workflow: + 1. Job submission with audio file and configuration + 2. Job status monitoring (with polling helpers) + 3. Result retrieval when transcription is complete + 4. Proper cleanup and error handling + + Args: + auth: Authentication instance. If not provided, uses StaticKeyAuth + with api_key parameter or SPEECHMATICS_API_KEY environment variable. + api_key: Speechmatics API key (used only if auth not provided). + url: REST API endpoint URL. If not provided, uses SPEECHMATICS_TTS_URL + environment variable or defaults to production endpoint. + conn_config: Complete connection configuration object. If provided, overrides + other parameters. + + Raises: + ConfigurationError: If required configuration is missing or invalid. + + Examples: + Basic usage: + >>> async with AsyncClient(api_key="your-key") as client: + ... response = await client.generate(text="Hello world") + ... print(response) + + With JWT authentication: + >>> from speechmatics.batch import JWTAuth + >>> auth = JWTAuth("your-api-key", ttl=3600) + >>> async with AsyncClient(auth=auth) as client: + ... # Use client with JWT auth + ... pass + """ + + def __init__( + self, + auth: Optional[AuthBase] = None, + *, + api_key: Optional[str] = None, + url: Optional[str] = None, + conn_config: Optional[ConnectionConfig] = None, + ) -> None: + """ + Initialize the AsyncClient. + + Args: + auth: Authentication method, it can be StaticKeyAuth or JWTAuth. + If None, creates StaticKeyAuth with the api_key. + api_key: Speechmatics API key. If None, uses SPEECHMATICS_API_KEY env var. + url: REST API endpoint URL. If None, uses SPEECHMATICS_TTS_URL env var + or defaults to production endpoint. + conn_config: Complete connection configuration. + + Raises: + ConfigurationError: If auth is None and API key is not provided/found. + """ + self._auth = auth or StaticKeyAuth(api_key) + self._url = url or os.environ.get("SPEECHMATICS_TTS_URL") or "https://preview.tts.speechmatics.com" + self._conn_config = conn_config or ConnectionConfig() + self._request_id = str(uuid.uuid4()) + self._transport = Transport(self._url, self._conn_config, self._auth, self._request_id) + + self._logger = get_logger(__name__) + self._logger.debug("AsyncClient initialized (request_id=%s, url=%s)", self._request_id, self._url) + + async def __aenter__(self) -> AsyncClient: + """ + Async context manager entry. + + Returns: + Self for use in async with statements. + + Examples: + >>> async with AsyncClient(api_key="key") as client: + ... response = await client.generate(text="Hello world") + ... print(response) + """ + return self + + async def generate( + self, + *, + text: str = "", + voice: Voice = Voice.SARAH, + output_format: OutputFormat = OutputFormat.RAW_PCM_16000, + ) -> aiohttp.ClientResponse: + """ + Convert text to speech audio. + + Args: + text: Text to convert to speech. + voice: Voice ID to use for synthesis (e.g., "en-US-neural-1"). + output_format: Audio format ("wav", "mp3", "ogg"). + + Returns: + Audio data as bytes. + + Raises: + AuthenticationError: If API key is invalid. + TransportError: If synthesis fails. + + Examples: + >>> response = await client.generate(text="Hello world") + >>> audio_data = await response.read() + >>> with open("output.wav", "wb") as f: + ... f.write(audio_data) + """ + # Prepare synthesis request + request_data = { + "text": text, + } + + response = await self._transport.post( + f"/generate/{voice.value}?output_format={output_format.value}", json_data=request_data + ) + return response + + async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: + """ + Async context manager exit with automatic cleanup. + + Ensures all resources are properly cleaned up when exiting the + async context manager, including closing HTTP connections. + + Args: + exc_type: Exception type if an exception occurred. + exc_val: Exception value if an exception occurred. + exc_tb: Exception traceback if an exception occurred. + """ + await self.close() + + async def close(self) -> None: + """ + Close the client and cleanup all resources. + + This method ensures proper cleanup of all client resources including + closing HTTP connections and sessions. + + This method is safe to call multiple times and will handle cleanup + gracefully even if errors occur during the process. + + Examples: + >>> client = AsyncClient(api_key="key") + >>> try: + ... result = await client.generate(text="Hello world") + >>> finally: + ... await client.close() + """ + try: + await self._transport.close() + except Exception: + pass # Best effort cleanup diff --git a/sdk/TTS/speechmatics/tts/_auth.py b/sdk/TTS/speechmatics/tts/_auth.py new file mode 100644 index 0000000..e71730a --- /dev/null +++ b/sdk/TTS/speechmatics/tts/_auth.py @@ -0,0 +1,162 @@ +import abc +import asyncio +import os +import time +from typing import Literal +from typing import Optional + +from ._exceptions import AuthenticationError + + +class AuthBase(abc.ABC): + """ + Abstract base class for authentication methods. + """ + + BASE_URL = "https://mp.speechmatics.com" + + @abc.abstractmethod + async def get_auth_headers(self) -> dict[str, str]: + """ + Get authentication headers asynchronously. + + Returns: + A dictionary of authentication headers. + """ + raise NotImplementedError + + +class StaticKeyAuth(AuthBase): + """ + Authentication using a static API key. + + This is the traditional authentication method where the same + API key is used for all requests. + + Args: + api_key: The Speechmatics API key. + + Examples: + >>> auth = StaticKeyAuth("your-api-key") + >>> headers = await auth.get_auth_headers() + >>> print(headers) + {'Authorization': 'Bearer your-api-key'} + """ + + def __init__(self, api_key: Optional[str] = None): + self._api_key = api_key or os.environ.get("SPEECHMATICS_API_KEY") + + if not self._api_key: + raise ValueError("API key required: provide api_key or set SPEECHMATICS_API_KEY") + + async def get_auth_headers(self) -> dict[str, str]: + return {"Authorization": f"Bearer {self._api_key}"} + + +class JWTAuth(AuthBase): + """ + Authentication using temporary JWT tokens. + + Generates short-lived JWTs for enhanced security. + + Args: + api_key: The main Speechmatics API key used to generate JWTs. + ttl: Time-to-live for tokens between 60 and 86400 seconds. + For security reasons, we suggest using the shortest TTL possible. + region: Self-Service customers are restricted to "eu". + Enterprise customers can use this to specify which region the temporary key should be enabled in. + client_ref: Optional client reference for JWT token. + This parameter must be used if the temporary keys are exposed to the end-user's client + to prevent a user from accessing the data of a different user. + mp_url: Optional management platform URL override. + request_id: Optional request ID for debugging purposes. + + Examples: + >>> auth = JWTAuth("your-api-key") + >>> headers = await auth.get_auth_headers() + >>> print(headers) + {'Authorization': 'Bearer eyJhbGciOiJSUzI1NiIs...'} + """ + + def __init__( + self, + api_key: Optional[str] = None, + *, + ttl: int = 60, + region: Literal["eu", "usa", "au"] = "eu", + client_ref: Optional[str] = None, + mp_url: Optional[str] = None, + request_id: Optional[str] = None, + ): + self._api_key = api_key or os.environ.get("SPEECHMATICS_API_KEY") + self._ttl = ttl + self._region = region + self._client_ref = client_ref + self._request_id = request_id + self._mp_url = mp_url or os.getenv("SM_MANAGEMENT_PLATFORM_URL", self.BASE_URL) + + if not self._api_key: + raise ValueError( + "API key required: please provide api_key or set SPEECHMATICS_API_KEY environment variable" + ) + + if not 60 <= self._ttl <= 86_400: + raise ValueError("ttl must be between 60 and 86400 seconds") + + self._cached_token: Optional[str] = None + self._token_expires_at: float = 0 + self._token_lock = asyncio.Lock() + + async def get_auth_headers(self) -> dict[str, str]: + """Get JWT auth headers with caching.""" + async with self._token_lock: + current_time = time.time() + if current_time >= self._token_expires_at - 10: + self._cached_token = await self._generate_token() + self._token_expires_at = current_time + self._ttl + + return {"Authorization": f"Bearer {self._cached_token}"} + + async def _generate_token(self) -> str: + try: + import aiohttp + except ImportError: + raise ImportError( + "aiohttp is required for JWT authentication. Please install it with `pip install 'speechmatics-batch[jwt]'`" + ) + + endpoint = f"{self._mp_url}/v1/api_keys" + params = {"type": "batch"} + payload = {"ttl": self._ttl, "region": str(self._region)} + + if self._client_ref: + payload["client_ref"] = self._client_ref + + headers = { + "Authorization": f"Bearer {self._api_key}", + "Content-Type": "application/json", + } + + if self._request_id: + headers["X-Request-Id"] = self._request_id + + try: + async with aiohttp.ClientSession() as session: + async with session.post( + endpoint, + params=params, + json=payload, + headers=headers, + timeout=aiohttp.ClientTimeout(total=10), + ) as response: + if response.status != 201: + text = await response.text() + raise AuthenticationError(f"Failed to generate JWT: HTTP {response.status}: {text}") + + data = await response.json() + return str(data["key_value"]) + + except aiohttp.ClientError as e: + raise AuthenticationError(f"Network error generating JWT: {e}") + except Exception as e: + raise AuthenticationError(f"Unexpected error generating JWT: {e}") diff --git a/sdk/TTS/speechmatics/tts/_exceptions.py b/sdk/TTS/speechmatics/tts/_exceptions.py new file mode 100644 index 0000000..173c400 --- /dev/null +++ b/sdk/TTS/speechmatics/tts/_exceptions.py @@ -0,0 +1,40 @@ +class ConfigurationError(Exception): + """Raised when there's an error in configuration.""" + + pass + + +class AuthenticationError(Exception): + """Raised when authentication fails.""" + + pass + + +class ConnectionError(Exception): + """Raised when connection to the service fails.""" + + pass + + +class TransportError(Exception): + """Raised when there's an error in the transport layer.""" + + pass + + +class BatchError(Exception): + """Raised when batch processing fails.""" + + pass + + +class JobError(Exception): + """Raised when there's an error with a job.""" + + pass + + +class TimeoutError(Exception): + """Raised when an operation times out.""" + + pass diff --git a/sdk/TTS/speechmatics/tts/_helpers.py b/sdk/TTS/speechmatics/tts/_helpers.py new file mode 100644 index 0000000..443e49a --- /dev/null +++ b/sdk/TTS/speechmatics/tts/_helpers.py @@ -0,0 +1,57 @@ +""" +Utility functions for the Speechmatics Batch SDK. +""" + +from __future__ import annotations + +import importlib.metadata +import os +from collections.abc import AsyncGenerator +from contextlib import asynccontextmanager +from typing import BinaryIO +from typing import Union + +import aiofiles + + +@asynccontextmanager +async def prepare_audio_file( + audio_file: Union[str, BinaryIO], +) -> AsyncGenerator[tuple[str, Union[BinaryIO, bytes]], None]: + """ + Async context manager for file handling with proper resource management. + + Args: + audio_file: Path to audio file or file-like object containing audio data. + + Yields: + Tuple of (filename, file_data) + + Examples: + >>> async with prepare_audio_file("audio.wav") as (filename, file_data): + ... # Use file_data for upload + ... pass + """ + if isinstance(audio_file, str): + async with aiofiles.open(audio_file, "rb") as f: + content = await f.read() + filename = os.path.basename(audio_file) + yield filename, content + else: + # It's already a file-like object + filename = getattr(audio_file, "name", "audio.wav") + if hasattr(filename, "split"): + filename = os.path.basename(filename) + yield filename, audio_file + + +def get_version() -> str: + try: + return importlib.metadata.version("speechmatics-batch") + except importlib.metadata.PackageNotFoundError: + try: + from . import __version__ + + return __version__ + except ImportError: + return "0.0.0" diff --git a/sdk/TTS/speechmatics/tts/_logging.py b/sdk/TTS/speechmatics/tts/_logging.py new file mode 100644 index 0000000..e63bda6 --- /dev/null +++ b/sdk/TTS/speechmatics/tts/_logging.py @@ -0,0 +1,49 @@ +import logging + +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) + + +def get_logger(name: str) -> logging.Logger: + """ + Get a logger that stays silent by default. + + The logger uses Python's standard logging module and includes NullHandler + by default to avoid unwanted output. Users can configure logging levels + and handlers as needed. + + Args: + name: Logger name, typically __name__ from the calling module. + + Returns: + Configured logger instance. + + Examples: + Basic usage in SDK modules: + logger = get_logger(__name__) + logger.debug("HTTP request sent %s %s", method, url) + logger.info("Job submitted (job_id=%s)", job_id) + logger.warning("Job failed (job_id=%s): %s", job_id, error) + logger.error("Connection failed: %s", e) + + Enable debug logging in user code: + import logging + logging.basicConfig(level=logging.DEBUG) + # Now all SDK debug messages will be visible + + Custom logging configuration: + import logging + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) + + # Or for specific components: + logging.getLogger('speechmatics.batch').setLevel(logging.DEBUG) + """ + module_logger = logging.getLogger(name) + module_logger.addHandler(logging.NullHandler()) + return module_logger + + +__all__ = ["get_logger"] diff --git a/sdk/TTS/speechmatics/tts/_models.py b/sdk/TTS/speechmatics/tts/_models.py new file mode 100644 index 0000000..4c3d7d5 --- /dev/null +++ b/sdk/TTS/speechmatics/tts/_models.py @@ -0,0 +1,57 @@ +""" +Models for the Speechmatics TTS SDK. + +This module contains all data models, enums, and configuration classes used +throughout the Speechmatics TTS SDK. These models +provide type-safe interfaces for configuration, job management, and +result handling based on the official Speechmatics API schema. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum + + +@dataclass +class ConnectionConfig: + """ + Configuration for HTTP connection parameters. + + This class defines connection-related settings and timeouts. + + Attributes: + connect_timeout: Timeout in seconds for connection establishment. + operation_timeout: Default timeout for API operations. + """ + + connect_timeout: float = 30.0 + operation_timeout: float = 300.0 + + +class OutputFormat(str, Enum): + """ + Output format for the generated audio. + + Attributes: + wav_16000: WAV audio format with 16kHz sample rate. + raw_pcm_16000: Raw audio format with 16kHz sample rate. + """ + + WAV_16000 = "wav_16000" + RAW_PCM_16000 = "pcm_16000" + + +class Voice(str, Enum): + """ + Voice ID for the generated audio. + + Attributes: + SARAH: English (UK) female voice. + THEO: English (UK) male voice. + MEGAN: English (UK) female voice. + """ + + SARAH = "sarah" + THEO = "theo" + MEGAN = "megan" diff --git a/sdk/TTS/speechmatics/tts/_transport.py b/sdk/TTS/speechmatics/tts/_transport.py new file mode 100644 index 0000000..d93a51a --- /dev/null +++ b/sdk/TTS/speechmatics/tts/_transport.py @@ -0,0 +1,302 @@ +""" +Transport layer for Speechmatics Batch HTTP communication. + +This module provides the Transport class that handles low-level HTTP +communication with the Speechmatics Batch API, including connection management, +request/response handling, and authentication. +""" + +from __future__ import annotations + +import asyncio +import io +import sys +import uuid +from typing import Any +from typing import Optional + +import aiohttp + +from ._auth import AuthBase +from ._exceptions import AuthenticationError +from ._exceptions import ConnectionError +from ._exceptions import TransportError +from ._helpers import get_version +from ._logging import get_logger +from ._models import ConnectionConfig + + +class Transport: + """ + HTTP transport layer for Speechmatics TTS API communication. + + This class handles all low-level HTTP communication with the Speechmatics + TTS API, including connection management, request serialization, + authentication, and response handling. + + Args: + url: Base URL for the Speechmatics TTS API. + conn_config: Connection configuration including URL and timeouts. + auth: Authentication instance for handling credentials. + request_id: Optional unique identifier for request tracking. Generated + automatically if not provided. + + Attributes: + conn_config: The connection configuration object. + request_id: Unique identifier for this transport instance. + + Examples: + Basic usage: + >>> from ._auth import StaticKeyAuth + >>> conn_config = ConnectionConfig() + >>> auth = StaticKeyAuth("your-api-key") + >>> transport = Transport(conn_config, auth) + >>> response = await transport.post("/generate/sarah") + >>> await transport.close() + """ + + def __init__( + self, + url: str, + conn_config: ConnectionConfig, + auth: AuthBase, + request_id: Optional[str] = None, + ) -> None: + """ + Initialize the transport with connection configuration. + + Args: + conn_config: Connection configuration object containing connection parameters. + auth: Authentication instance for handling credentials. + request_id: Optional unique identifier for request tracking. + Generated automatically if not provided. + """ + self._url = url + self._conn_config = conn_config + self._auth = auth + self._request_id = request_id or str(uuid.uuid4()) + self._session: Optional[aiohttp.ClientSession] = None + self._closed = False + self._logger = get_logger(__name__) + + self._logger.debug("Transport initialized (request_id=%s, url=%s)", self._request_id, self._url) + + async def __aenter__(self) -> Transport: + """Async context manager entry.""" + await self._ensure_session() + return self + + async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: + """Async context manager exit with automatic cleanup.""" + await self.close() + + async def post( + self, + path: str, + json_data: Optional[dict[str, Any]] = None, + multipart_data: Optional[dict[str, Any]] = None, + timeout: Optional[float] = None, + ) -> aiohttp.ClientResponse: + """ + Send POST request to the API. + + Args: + path: API endpoint path + json_data: Optional JSON data for request body + multipart_data: Optional multipart form data + timeout: Optional request timeout + + Returns: + HTTP response object + + Raises: + AuthenticationError: If authentication fails + TransportError: If request fails + """ + return await self._request("POST", path, json_data=json_data, multipart_data=multipart_data, timeout=timeout) + + async def close(self) -> None: + """ + Close the HTTP session and cleanup resources. + + This method gracefully closes the HTTP session and marks the + transport as closed. It's safe to call multiple times. + """ + if self._session: + try: + await self._session.close() + except Exception: + pass # Best effort cleanup + finally: + self._session = None + self._closed = True + + @property + def is_connected(self) -> bool: + """ + Check if the transport has an active session. + + Returns: + True if session is active, False otherwise + """ + return self._session is not None and not self._closed + + async def _ensure_session(self) -> None: + """Ensure HTTP session is created.""" + if self._session is None and not self._closed: + self._logger.debug( + "Creating HTTP session (connect_timeout=%.1fs, operation_timeout=%.1fs)", + self._conn_config.connect_timeout, + self._conn_config.operation_timeout, + ) + timeout = aiohttp.ClientTimeout( + total=self._conn_config.operation_timeout, + connect=self._conn_config.connect_timeout, + ) + self._session = aiohttp.ClientSession(timeout=timeout) + + async def _request( + self, + method: str, + path: str, + params: Optional[dict[str, Any]] = None, + json_data: Optional[dict[str, Any]] = None, + multipart_data: Optional[dict[str, Any]] = None, + timeout: Optional[float] = None, + ) -> aiohttp.ClientResponse: + """ + Send HTTP request to the API. + + Args: + method: HTTP method (GET, POST, DELETE) + path: API endpoint path + params: Optional query parameters + json_data: Optional JSON data for request body + multipart_data: Optional multipart form data + timeout: Optional request timeout + + Returns: + HTTP response object + + Raises: + AuthenticationError: If authentication fails + ConnectionError: If connection fails + TransportError: For other transport errors + """ + await self._ensure_session() + + if self._session is None: + raise ConnectionError("Failed to create HTTP session") + + url = f"{self._url.rstrip('/')}{path}" + headers = await self._prepare_headers() + + self._logger.debug( + "Sending HTTP request %s %s (json=%s, multipart=%s)", + method, + url, + json_data is not None, + multipart_data is not None, + ) + + # Override timeout if specified + if timeout: + request_timeout = aiohttp.ClientTimeout(total=timeout) + else: + request_timeout = None + + try: + # Prepare request arguments + kwargs: dict[str, Any] = { + "headers": headers, + "params": params, + "timeout": request_timeout, + } + + if json_data: + kwargs["json"] = json_data + elif multipart_data: + # Force multipart encoding even when no files are present (for fetch_data support) + form_data = aiohttp.FormData(default_to_multipart=True) + for key, value in multipart_data.items(): + if isinstance(value, tuple) and len(value) == 3: + # File data: (filename, file_data, content_type) + filename, file_data, content_type = value + # aiohttp cannot serialize io.BytesIO directly; convert to bytes + if isinstance(file_data, io.BytesIO): + file_payload = file_data.getvalue() + else: + file_payload = file_data + form_data.add_field(key, file_payload, filename=filename, content_type=content_type) + else: + # Regular form field + if isinstance(value, dict): + import json + + value = json.dumps(value) + form_data.add_field(key, value) + kwargs["data"] = form_data + + async with self._session.request(method, url, **kwargs) as response: + return await self._handle_response(response) + + except asyncio.TimeoutError: + self._logger.error( + "Request timeout %s %s (timeout=%.1fs)", method, path, self._conn_config.operation_timeout + ) + raise TransportError(f"Request timeout for {method} {path}") from None + except aiohttp.ClientError as e: + self._logger.error("Request failed %s %s: %s", method, path, e) + raise ConnectionError(f"Request failed: {e}") from e + except Exception as e: + self._logger.error("Unexpected error %s %s: %s", method, path, e) + raise TransportError(f"Unexpected error: {e}") from e + + async def _prepare_headers(self) -> dict[str, str]: + """ + Prepare HTTP headers for requests. + + Returns: + Headers dictionary with authentication and tracking info + """ + auth_headers = await self._auth.get_auth_headers() + auth_headers["User-Agent"] = ( + f"speechmatics-batch-v{get_version()} python/{sys.version_info.major}.{sys.version_info.minor}" + ) + + if self._request_id: + auth_headers["X-Request-Id"] = self._request_id + + return auth_headers + + async def _handle_response(self, response: aiohttp.ClientResponse) -> aiohttp.ClientResponse: + """ + Handle HTTP response and extract JSON data. + + Args: + response: HTTP response object + + Returns: + HTTP response object + + Raises: + AuthenticationError: For 401/403 responses + TransportError: For other error responses + """ + try: + if response.status == 401: + raise AuthenticationError("Invalid API key - authentication failed") + elif response.status == 403: + raise AuthenticationError("Access forbidden - check API key permissions") + elif response.status >= 400: + error_text = await response.text() + self._logger.error("HTTP error %d %s: %s", response.status, response.reason, error_text) + raise TransportError(f"HTTP {response.status}: {response.reason} - {error_text}") + return response + + except aiohttp.ContentTypeError as e: + self._logger.error("Failed to parse JSON response: %s", e) + raise TransportError(f"Failed to parse response: {e}") from e + except Exception as e: + self._logger.error("Error handling response: %s", e) + raise TransportError(f"Error handling response: {e}") from e diff --git a/sdk/tts/README.md b/sdk/tts/README.md new file mode 100644 index 0000000..a316002 --- /dev/null +++ b/sdk/tts/README.md @@ -0,0 +1,274 @@ +# Speechmatics Batch API Client + +[![PyPI](https://img.shields.io/pypi/v/speechmatics-batch)](https://pypi.org/project/speechmatics-batch/) +![PythonSupport](https://img.shields.io/badge/Python-3.9%2B-green) + +Async Python client for Speechmatics Batch API. + +## Features + +- Async API client with comprehensive error handling +- Type hints throughout for better IDE support +- Environment variable support for credentials +- Easy-to-use interface for submitting, monitoring, and retrieving transcription jobs +- Full job configuration support with all Speechmatics features +- Intelligent transcript formatting with speaker diarization +- Support for multiple output formats (JSON, TXT, SRT) + +## Installation + +```bash +pip install speechmatics-batch +``` + +## Usage + +### Quick Start + +```python +import asyncio +from speechmatics.batch import AsyncClient + +async def main(): + # Create a client using environment variable SPEECHMATICS_API_KEY + async with AsyncClient() as client: + # Simple transcription + result = await client.transcribe("audio.wav") + print(result.transcript_text) + +asyncio.run(main()) +``` + +## JWT Authentication + +For enhanced security, use temporary JWT tokens instead of static API keys. +JWTs are short-lived (60 seconds default) and automatically refreshed: + +```python +from speechmatics.batch import AsyncClient, JWTAuth + +auth = JWTAuth("your-api-key", ttl=60) + +async with AsyncClient(auth=auth) as client: + # Tokens are cached and auto-refreshed automatically + result = await client.transcribe("audio.wav") + print(result.transcript_text) +``` + +Ideal for long-running applications or when minimizing API key exposure. +See the [authentication documentation](https://docs.speechmatics.com/introduction/authentication) for more details. + +### Basic Job Workflow + +```python +import asyncio +from speechmatics.batch import AsyncClient, JobConfig, JobType, TranscriptionConfig + +async def main(): + # Create client with explicit API key + async with AsyncClient(api_key="your-api-key") as client: + + # Configure transcription + config = JobConfig( + type=JobType.TRANSCRIPTION, + transcription_config=TranscriptionConfig( + language="en", + enable_entities=True, + diarization="speaker" + ) + ) + + # Submit job + job = await client.submit_job("audio.wav", config=config) + print(f"Job submitted: {job.id}") + + # Wait for completion + result = await client.wait_for_completion( + job.id, + polling_interval=2.0, + timeout=300.0 + ) + + # Access results + print(f"Transcript: {result.transcript_text}") + print(f"Confidence: {result.confidence}") + +asyncio.run(main()) +``` + +### Advanced Configuration + +```python +import asyncio +from speechmatics.batch import ( + AsyncClient, + JobConfig, + JobType, + OperatingPoint, + TranscriptionConfig, + TranslationConfig, + SummarizationConfig +) + +async def main(): + async with AsyncClient(api_key="your-api-key") as client: + + # Advanced job configuration + config = JobConfig( + type=JobType.TRANSCRIPTION, + transcription_config=TranscriptionConfig( + language="en", + operating_point=OperatingPoint.ENHANCED, + enable_entities=True, + diarization="speaker", + ), + translation_config=TranslationConfig(target_languages=["es", "fr"]), + summarization_config=SummarizationConfig( + content_type="conversational", summary_length="brief" + ), + ) + + result = await client.transcribe("audio.wav", config=config) + + # Access advanced features + if result.summary: + print(f"Summary: {result.summary}") + if result.translations: + print(f"Translations: {result.translations}") + +asyncio.run(main()) +``` + +### Manual Job Management + +```python +import asyncio +from speechmatics.batch import AsyncClient, JobStatus + +async def main(): + async with AsyncClient() as client: + + # Submit job + job = await client.submit_job("audio.wav") + + # Check job status + job_details = await client.get_job_info(job.id) + print(f"Status: {job_details.status}") + + # Wait for completion manually + while job_details.status == JobStatus.RUNNING: + await asyncio.sleep(5) + job_details = await client.get_job_info(job.id) + + if job_details.status == JobStatus.DONE: + # Get transcript + transcript = await client.get_transcript(job.id) + print(transcript.transcript_text) + else: + print(f"Job failed with status: {job_details.status}") + +asyncio.run(main()) +``` + +### Different Output Formats + +```python +import asyncio +from speechmatics.batch import AsyncClient, FormatType + +async def main(): + async with AsyncClient() as client: + job = await client.submit_job("audio.wav") + + # Get JSON format (default) + json_result = await client.get_transcript(job.id, format_type=FormatType.JSON) + print(json_result.transcript_text) + + # Get plain text + txt_result = await client.get_transcript(job.id, format_type=FormatType.TXT) + print(txt_result) + + # Get SRT subtitles + srt_result = await client.get_transcript(job.id, format_type=FormatType.SRT) + print(srt_result) + +asyncio.run(main()) +``` + +### Error Handling + +```python +import asyncio +from speechmatics.batch import ( + AsyncClient, + BatchError, + AuthenticationError, + JobError, + TimeoutError +) + +async def main(): + try: + async with AsyncClient() as client: + result = await client.transcribe("audio.wav", timeout=120.0) + print(result.transcript_text) + + except AuthenticationError: + print("Invalid API key") + except BatchError as e: + print(f"Job submission failed: {e}") + except JobError as e: + print(f"Job processing failed: {e}") + except TimeoutError as e: + print(f"Job timed out: {e}") + except FileNotFoundError: + print("Audio file not found") + +asyncio.run(main()) +``` + +### Connection Configuration + +```python +import asyncio +from speechmatics.batch import AsyncClient, ConnectionConfig + +async def main(): + # Custom connection settings + config = ConnectionConfig( + url="https://asr.api.speechmatics.com/v2", + api_key="your-api-key", + connect_timeout=30.0, + operation_timeout=600.0 + ) + + async with AsyncClient(conn_config=config) as client: + result = await client.transcribe("audio.wav") + print(result.transcript_text) + +asyncio.run(main()) +``` + +## Logging + +The client supports logging with job id tracing for debugging. To increase logging verbosity, set `DEBUG` level in your example code: + +```python +import logging +import sys + +logging.basicConfig( + level=logging.DEBUG, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[ + logging.StreamHandler(sys.stdout) + ] +) +``` + +## Environment Variables + +The client supports the following environment variables: + +- `SPEECHMATICS_API_KEY`: Your Speechmatics API key +- `SPEECHMATICS_BATCH_URL`: Custom API endpoint URL (optional) diff --git a/sdk/tts/pyproject.toml b/sdk/tts/pyproject.toml new file mode 100644 index 0000000..065ccec --- /dev/null +++ b/sdk/tts/pyproject.toml @@ -0,0 +1,52 @@ +[build-system] +requires = ["setuptools>=61.0.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "speechmatics-tts" +dynamic = ["version"] +description = "Speechmatics TTS API Client" +readme = "README.md" +authors = [{ name = "Speechmatics", email = "support@speechmatics.com" }] +license = "MIT" +requires-python = ">=3.9" +dependencies = ["aiohttp", "aiofiles"] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Operating System :: OS Independent", + "Topic :: Multimedia :: Sound/Audio :: Speech", + "Topic :: Software Development :: Libraries :: Python Modules", +] +keywords = ["speechmatics", "speech-to-text", "tts", "transcription", "api"] + +[project.optional-dependencies] +dev = [ + "black", + "ruff", + "mypy", + "types-aiofiles", + "pre-commit", + "pytest", + "pytest-asyncio", + "pytest-cov", + "pytest-mock", + "build", +] + +[project.urls] +homepage = "https://github.com/speechmatics/speechmatics-python-sdk" +documentation = "https://docs.speechmatics.com/" +repository = "https://github.com/speechmatics/speechmatics-python-sdk" +issues = "https://github.com/speechmatics/speechmatics-python-sdk/issues" + +[tool.setuptools.dynamic] +version = { attr = "speechmatics.tts.__version__" } + +[tool.setuptools.packages.find] +where = ["."] diff --git a/sdk/tts/speechmatics/tts/__init__.py b/sdk/tts/speechmatics/tts/__init__.py new file mode 100644 index 0000000..1dececf --- /dev/null +++ b/sdk/tts/speechmatics/tts/__init__.py @@ -0,0 +1,33 @@ +__version__ = "0.0.0" + +from ._async_client import AsyncClient +from ._auth import AuthBase +from ._auth import JWTAuth +from ._auth import StaticKeyAuth +from ._exceptions import AuthenticationError +from ._exceptions import BatchError +from ._exceptions import ConfigurationError +from ._exceptions import ConnectionError +from ._exceptions import JobError +from ._exceptions import TimeoutError +from ._exceptions import TransportError +from ._models import ConnectionConfig +from ._models import OutputFormat +from ._models import Voice + +__all__ = [ + "AsyncClient", + "AuthBase", + "JWTAuth", + "StaticKeyAuth", + "ConfigurationError", + "AuthenticationError", + "ConnectionError", + "TransportError", + "BatchError", + "JobError", + "TimeoutError", + "ConnectionConfig", + "Voice", + "OutputFormat", +] diff --git a/sdk/tts/speechmatics/tts/_async_client.py b/sdk/tts/speechmatics/tts/_async_client.py new file mode 100644 index 0000000..ab200b5 --- /dev/null +++ b/sdk/tts/speechmatics/tts/_async_client.py @@ -0,0 +1,183 @@ +""" +Asynchronous client for Speechmatics TTS transcription. + +This module provides the main AsyncClient class that handles text-to-speech +using the Speechmatics TTS API. +""" + +from __future__ import annotations + +import os +import uuid +from typing import Any +from typing import Optional + +import aiohttp + +from ._auth import AuthBase +from ._auth import StaticKeyAuth +from ._logging import get_logger +from ._models import ConnectionConfig +from ._models import OutputFormat +from ._models import Voice +from ._transport import Transport + + +class AsyncClient: + """ + Asynchronous client for Speechmatics TTS transcription. + + This client provides a full-featured async interface to the Speechmatics TTS API, + supporting job submission, monitoring, and result retrieval with comprehensive + error management. It properly implements the Speechmatics REST API. + + The client handles the complete batch transcription workflow: + 1. Job submission with audio file and configuration + 2. Job status monitoring (with polling helpers) + 3. Result retrieval when transcription is complete + 4. Proper cleanup and error handling + + Args: + auth: Authentication instance. If not provided, uses StaticKeyAuth + with api_key parameter or SPEECHMATICS_API_KEY environment variable. + api_key: Speechmatics API key (used only if auth not provided). + url: REST API endpoint URL. If not provided, uses SPEECHMATICS_TTS_URL + environment variable or defaults to production endpoint. + conn_config: Complete connection configuration object. If provided, overrides + other parameters. + + Raises: + ConfigurationError: If required configuration is missing or invalid. + + Examples: + Basic usage: + >>> async with AsyncClient(api_key="your-key") as client: + ... response = await client.generate(text="Hello world") + ... print(response) + + With JWT authentication: + >>> from speechmatics.batch import JWTAuth + >>> auth = JWTAuth("your-api-key", ttl=3600) + >>> async with AsyncClient(auth=auth) as client: + ... # Use client with JWT auth + ... pass + """ + + def __init__( + self, + auth: Optional[AuthBase] = None, + *, + api_key: Optional[str] = None, + url: Optional[str] = None, + conn_config: Optional[ConnectionConfig] = None, + ) -> None: + """ + Initialize the AsyncClient. + + Args: + auth: Authentication method, it can be StaticKeyAuth or JWTAuth. + If None, creates StaticKeyAuth with the api_key. + api_key: Speechmatics API key. If None, uses SPEECHMATICS_API_KEY env var. + url: REST API endpoint URL. If None, uses SPEECHMATICS_TTS_URL env var + or defaults to production endpoint. + conn_config: Complete connection configuration. + + Raises: + ConfigurationError: If auth is None and API key is not provided/found. + """ + self._auth = auth or StaticKeyAuth(api_key) + self._url = url or os.environ.get("SPEECHMATICS_TTS_URL") or "https://preview.tts.speechmatics.com" + self._conn_config = conn_config or ConnectionConfig() + self._request_id = str(uuid.uuid4()) + self._transport = Transport(self._url, self._conn_config, self._auth, self._request_id) + + self._logger = get_logger(__name__) + self._logger.debug("AsyncClient initialized (request_id=%s, url=%s)", self._request_id, self._url) + + async def __aenter__(self) -> AsyncClient: + """ + Async context manager entry. + + Returns: + Self for use in async with statements. + + Examples: + >>> async with AsyncClient(api_key="key") as client: + ... response = await client.generate(text="Hello world") + ... print(response) + """ + return self + + async def generate( + self, + *, + text: str = "", + voice: Voice = Voice.SARAH, + output_format: OutputFormat = OutputFormat.RAW_PCM_16000, + ) -> aiohttp.ClientResponse: + """ + Convert text to speech audio. + + Args: + text: Text to convert to speech. + voice: Voice ID to use for synthesis (e.g., "en-US-neural-1"). + output_format: Audio format ("wav", "mp3", "ogg"). + + Returns: + Audio data as bytes. + + Raises: + AuthenticationError: If API key is invalid. + TransportError: If synthesis fails. + + Examples: + >>> response = await client.generate(text="Hello world") + >>> audio_data = await response.read() + >>> with open("output.wav", "wb") as f: + ... f.write(audio_data) + """ + # Prepare synthesis request + request_data = { + "text": text, + } + + response = await self._transport.post( + f"/generate/{voice.value}?output_format={output_format.value}", json_data=request_data + ) + return response + + async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: + """ + Async context manager exit with automatic cleanup. + + Ensures all resources are properly cleaned up when exiting the + async context manager, including closing HTTP connections. + + Args: + exc_type: Exception type if an exception occurred. + exc_val: Exception value if an exception occurred. + exc_tb: Exception traceback if an exception occurred. + """ + await self.close() + + async def close(self) -> None: + """ + Close the client and cleanup all resources. + + This method ensures proper cleanup of all client resources including + closing HTTP connections and sessions. + + This method is safe to call multiple times and will handle cleanup + gracefully even if errors occur during the process. + + Examples: + >>> client = AsyncClient(api_key="key") + >>> try: + ... result = await client.generate(text="Hello world") + >>> finally: + ... await client.close() + """ + try: + await self._transport.close() + except Exception: + pass # Best effort cleanup diff --git a/sdk/tts/speechmatics/tts/_auth.py b/sdk/tts/speechmatics/tts/_auth.py new file mode 100644 index 0000000..e71730a --- /dev/null +++ b/sdk/tts/speechmatics/tts/_auth.py @@ -0,0 +1,162 @@ +import abc +import asyncio +import os +import time +from typing import Literal +from typing import Optional + +from ._exceptions import AuthenticationError + + +class AuthBase(abc.ABC): + """ + Abstract base class for authentication methods. + """ + + BASE_URL = "https://mp.speechmatics.com" + + @abc.abstractmethod + async def get_auth_headers(self) -> dict[str, str]: + """ + Get authentication headers asynchronously. + + Returns: + A dictionary of authentication headers. + """ + raise NotImplementedError + + +class StaticKeyAuth(AuthBase): + """ + Authentication using a static API key. + + This is the traditional authentication method where the same + API key is used for all requests. + + Args: + api_key: The Speechmatics API key. + + Examples: + >>> auth = StaticKeyAuth("your-api-key") + >>> headers = await auth.get_auth_headers() + >>> print(headers) + {'Authorization': 'Bearer your-api-key'} + """ + + def __init__(self, api_key: Optional[str] = None): + self._api_key = api_key or os.environ.get("SPEECHMATICS_API_KEY") + + if not self._api_key: + raise ValueError("API key required: provide api_key or set SPEECHMATICS_API_KEY") + + async def get_auth_headers(self) -> dict[str, str]: + return {"Authorization": f"Bearer {self._api_key}"} + + +class JWTAuth(AuthBase): + """ + Authentication using temporary JWT tokens. + + Generates short-lived JWTs for enhanced security. + + Args: + api_key: The main Speechmatics API key used to generate JWTs. + ttl: Time-to-live for tokens between 60 and 86400 seconds. + For security reasons, we suggest using the shortest TTL possible. + region: Self-Service customers are restricted to "eu". + Enterprise customers can use this to specify which region the temporary key should be enabled in. + client_ref: Optional client reference for JWT token. + This parameter must be used if the temporary keys are exposed to the end-user's client + to prevent a user from accessing the data of a different user. + mp_url: Optional management platform URL override. + request_id: Optional request ID for debugging purposes. + + Examples: + >>> auth = JWTAuth("your-api-key") + >>> headers = await auth.get_auth_headers() + >>> print(headers) + {'Authorization': 'Bearer eyJhbGciOiJSUzI1NiIs...'} + """ + + def __init__( + self, + api_key: Optional[str] = None, + *, + ttl: int = 60, + region: Literal["eu", "usa", "au"] = "eu", + client_ref: Optional[str] = None, + mp_url: Optional[str] = None, + request_id: Optional[str] = None, + ): + self._api_key = api_key or os.environ.get("SPEECHMATICS_API_KEY") + self._ttl = ttl + self._region = region + self._client_ref = client_ref + self._request_id = request_id + self._mp_url = mp_url or os.getenv("SM_MANAGEMENT_PLATFORM_URL", self.BASE_URL) + + if not self._api_key: + raise ValueError( + "API key required: please provide api_key or set SPEECHMATICS_API_KEY environment variable" + ) + + if not 60 <= self._ttl <= 86_400: + raise ValueError("ttl must be between 60 and 86400 seconds") + + self._cached_token: Optional[str] = None + self._token_expires_at: float = 0 + self._token_lock = asyncio.Lock() + + async def get_auth_headers(self) -> dict[str, str]: + """Get JWT auth headers with caching.""" + async with self._token_lock: + current_time = time.time() + if current_time >= self._token_expires_at - 10: + self._cached_token = await self._generate_token() + self._token_expires_at = current_time + self._ttl + + return {"Authorization": f"Bearer {self._cached_token}"} + + async def _generate_token(self) -> str: + try: + import aiohttp + except ImportError: + raise ImportError( + "aiohttp is required for JWT authentication. Please install it with `pip install 'speechmatics-batch[jwt]'`" + ) + + endpoint = f"{self._mp_url}/v1/api_keys" + params = {"type": "batch"} + payload = {"ttl": self._ttl, "region": str(self._region)} + + if self._client_ref: + payload["client_ref"] = self._client_ref + + headers = { + "Authorization": f"Bearer {self._api_key}", + "Content-Type": "application/json", + } + + if self._request_id: + headers["X-Request-Id"] = self._request_id + + try: + async with aiohttp.ClientSession() as session: + async with session.post( + endpoint, + params=params, + json=payload, + headers=headers, + timeout=aiohttp.ClientTimeout(total=10), + ) as response: + if response.status != 201: + text = await response.text() + raise AuthenticationError(f"Failed to generate JWT: HTTP {response.status}: {text}") + + data = await response.json() + return str(data["key_value"]) + + except aiohttp.ClientError as e: + raise AuthenticationError(f"Network error generating JWT: {e}") + except Exception as e: + raise AuthenticationError(f"Unexpected error generating JWT: {e}") diff --git a/sdk/tts/speechmatics/tts/_exceptions.py b/sdk/tts/speechmatics/tts/_exceptions.py new file mode 100644 index 0000000..173c400 --- /dev/null +++ b/sdk/tts/speechmatics/tts/_exceptions.py @@ -0,0 +1,40 @@ +class ConfigurationError(Exception): + """Raised when there's an error in configuration.""" + + pass + + +class AuthenticationError(Exception): + """Raised when authentication fails.""" + + pass + + +class ConnectionError(Exception): + """Raised when connection to the service fails.""" + + pass + + +class TransportError(Exception): + """Raised when there's an error in the transport layer.""" + + pass + + +class BatchError(Exception): + """Raised when batch processing fails.""" + + pass + + +class JobError(Exception): + """Raised when there's an error with a job.""" + + pass + + +class TimeoutError(Exception): + """Raised when an operation times out.""" + + pass diff --git a/sdk/tts/speechmatics/tts/_helpers.py b/sdk/tts/speechmatics/tts/_helpers.py new file mode 100644 index 0000000..443e49a --- /dev/null +++ b/sdk/tts/speechmatics/tts/_helpers.py @@ -0,0 +1,57 @@ +""" +Utility functions for the Speechmatics Batch SDK. +""" + +from __future__ import annotations + +import importlib.metadata +import os +from collections.abc import AsyncGenerator +from contextlib import asynccontextmanager +from typing import BinaryIO +from typing import Union + +import aiofiles + + +@asynccontextmanager +async def prepare_audio_file( + audio_file: Union[str, BinaryIO], +) -> AsyncGenerator[tuple[str, Union[BinaryIO, bytes]], None]: + """ + Async context manager for file handling with proper resource management. + + Args: + audio_file: Path to audio file or file-like object containing audio data. + + Yields: + Tuple of (filename, file_data) + + Examples: + >>> async with prepare_audio_file("audio.wav") as (filename, file_data): + ... # Use file_data for upload + ... pass + """ + if isinstance(audio_file, str): + async with aiofiles.open(audio_file, "rb") as f: + content = await f.read() + filename = os.path.basename(audio_file) + yield filename, content + else: + # It's already a file-like object + filename = getattr(audio_file, "name", "audio.wav") + if hasattr(filename, "split"): + filename = os.path.basename(filename) + yield filename, audio_file + + +def get_version() -> str: + try: + return importlib.metadata.version("speechmatics-batch") + except importlib.metadata.PackageNotFoundError: + try: + from . import __version__ + + return __version__ + except ImportError: + return "0.0.0" diff --git a/sdk/tts/speechmatics/tts/_logging.py b/sdk/tts/speechmatics/tts/_logging.py new file mode 100644 index 0000000..e63bda6 --- /dev/null +++ b/sdk/tts/speechmatics/tts/_logging.py @@ -0,0 +1,49 @@ +import logging + +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) + + +def get_logger(name: str) -> logging.Logger: + """ + Get a logger that stays silent by default. + + The logger uses Python's standard logging module and includes NullHandler + by default to avoid unwanted output. Users can configure logging levels + and handlers as needed. + + Args: + name: Logger name, typically __name__ from the calling module. + + Returns: + Configured logger instance. + + Examples: + Basic usage in SDK modules: + logger = get_logger(__name__) + logger.debug("HTTP request sent %s %s", method, url) + logger.info("Job submitted (job_id=%s)", job_id) + logger.warning("Job failed (job_id=%s): %s", job_id, error) + logger.error("Connection failed: %s", e) + + Enable debug logging in user code: + import logging + logging.basicConfig(level=logging.DEBUG) + # Now all SDK debug messages will be visible + + Custom logging configuration: + import logging + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) + + # Or for specific components: + logging.getLogger('speechmatics.batch').setLevel(logging.DEBUG) + """ + module_logger = logging.getLogger(name) + module_logger.addHandler(logging.NullHandler()) + return module_logger + + +__all__ = ["get_logger"] diff --git a/sdk/tts/speechmatics/tts/_models.py b/sdk/tts/speechmatics/tts/_models.py new file mode 100644 index 0000000..fdbca0e --- /dev/null +++ b/sdk/tts/speechmatics/tts/_models.py @@ -0,0 +1,57 @@ +""" +Models for the Speechmatics TTS SDK. + +This module contains all data models, enums, and configuration classes used +throughout the Speechmatics TTS SDK. These models +provide type-safe interfaces for configuration, job management, and +result handling based on the official Speechmatics API schema. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum + + +@dataclass +class ConnectionConfig: + """ + Configuration for HTTP connection parameters. + + This class defines connection-related settings and timeouts. + + Attributes: + connect_timeout: Timeout in seconds for connection establishment. + operation_timeout: Default timeout for API operations. + """ + + connect_timeout: float = 30.0 + operation_timeout: float = 300.0 + + +class OutputFormat(str, Enum): + """ + Output format for the generated audio. + + Attributes: + wav_16000: WAV audio format with 16kHz sample rate. + raw_pcm_16000: Raw audio format with 16kHz sample rate. + """ + + WAV_16000 = "wav_16000" + RAW_PCM_16000 = "pcm_16000" + + +class Voice(str, Enum): + """ + Voice ID for the generated audio. + + Attributes: + sarah: English (UK) female voice. + theo: English (UK) male voice. + megan: English (UK) female voice. + """ + + SARAH = "sarah" + THEO = "theo" + MEGAN = "megan" diff --git a/sdk/tts/speechmatics/tts/_transport.py b/sdk/tts/speechmatics/tts/_transport.py new file mode 100644 index 0000000..cd1aba3 --- /dev/null +++ b/sdk/tts/speechmatics/tts/_transport.py @@ -0,0 +1,302 @@ +""" +Transport layer for Speechmatics Batch HTTP communication. + +This module provides the Transport class that handles low-level HTTP +communication with the Speechmatics Batch API, including connection management, +request/response handling, and authentication. +""" + +from __future__ import annotations + +import asyncio +import io +import sys +import uuid +from typing import Any +from typing import Optional + +import aiohttp + +from ._auth import AuthBase +from ._exceptions import AuthenticationError +from ._exceptions import ConnectionError +from ._exceptions import TransportError +from ._helpers import get_version +from ._logging import get_logger +from ._models import ConnectionConfig + + +class Transport: + """ + HTTP transport layer for Speechmatics Batch API communication. + + This class handles all low-level HTTP communication with the Speechmatics + Batch API, including connection management, request serialization, + authentication, and response handling. + + Args: + url: Base URL for the Speechmatics Batch API. + conn_config: Connection configuration including URL and timeouts. + auth: Authentication instance for handling credentials. + request_id: Optional unique identifier for request tracking. Generated + automatically if not provided. + + Attributes: + conn_config: The connection configuration object. + request_id: Unique identifier for this transport instance. + + Examples: + Basic usage: + >>> from ._auth import StaticKeyAuth + >>> conn_config = ConnectionConfig() + >>> auth = StaticKeyAuth("your-api-key") + >>> transport = Transport(conn_config, auth) + >>> response = await transport.get("/jobs") + >>> await transport.close() + """ + + def __init__( + self, + url: str, + conn_config: ConnectionConfig, + auth: AuthBase, + request_id: Optional[str] = None, + ) -> None: + """ + Initialize the transport with connection configuration. + + Args: + conn_config: Connection configuration object containing connection parameters. + auth: Authentication instance for handling credentials. + request_id: Optional unique identifier for request tracking. + Generated automatically if not provided. + """ + self._url = url + self._conn_config = conn_config + self._auth = auth + self._request_id = request_id or str(uuid.uuid4()) + self._session: Optional[aiohttp.ClientSession] = None + self._closed = False + self._logger = get_logger(__name__) + + self._logger.debug("Transport initialized (request_id=%s, url=%s)", self._request_id, self._url) + + async def __aenter__(self) -> Transport: + """Async context manager entry.""" + await self._ensure_session() + return self + + async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: + """Async context manager exit with automatic cleanup.""" + await self.close() + + async def post( + self, + path: str, + json_data: Optional[dict[str, Any]] = None, + multipart_data: Optional[dict[str, Any]] = None, + timeout: Optional[float] = None, + ) -> aiohttp.ClientResponse: + """ + Send POST request to the API. + + Args: + path: API endpoint path + json_data: Optional JSON data for request body + multipart_data: Optional multipart form data + timeout: Optional request timeout + + Returns: + HTTP response object + + Raises: + AuthenticationError: If authentication fails + TransportError: If request fails + """ + return await self._request("POST", path, json_data=json_data, multipart_data=multipart_data, timeout=timeout) + + async def close(self) -> None: + """ + Close the HTTP session and cleanup resources. + + This method gracefully closes the HTTP session and marks the + transport as closed. It's safe to call multiple times. + """ + if self._session: + try: + await self._session.close() + except Exception: + pass # Best effort cleanup + finally: + self._session = None + self._closed = True + + @property + def is_connected(self) -> bool: + """ + Check if the transport has an active session. + + Returns: + True if session is active, False otherwise + """ + return self._session is not None and not self._closed + + async def _ensure_session(self) -> None: + """Ensure HTTP session is created.""" + if self._session is None and not self._closed: + self._logger.debug( + "Creating HTTP session (connect_timeout=%.1fs, operation_timeout=%.1fs)", + self._conn_config.connect_timeout, + self._conn_config.operation_timeout, + ) + timeout = aiohttp.ClientTimeout( + total=self._conn_config.operation_timeout, + connect=self._conn_config.connect_timeout, + ) + self._session = aiohttp.ClientSession(timeout=timeout) + + async def _request( + self, + method: str, + path: str, + params: Optional[dict[str, Any]] = None, + json_data: Optional[dict[str, Any]] = None, + multipart_data: Optional[dict[str, Any]] = None, + timeout: Optional[float] = None, + ) -> aiohttp.ClientResponse: + """ + Send HTTP request to the API. + + Args: + method: HTTP method (GET, POST, DELETE) + path: API endpoint path + params: Optional query parameters + json_data: Optional JSON data for request body + multipart_data: Optional multipart form data + timeout: Optional request timeout + + Returns: + HTTP response object + + Raises: + AuthenticationError: If authentication fails + ConnectionError: If connection fails + TransportError: For other transport errors + """ + await self._ensure_session() + + if self._session is None: + raise ConnectionError("Failed to create HTTP session") + + url = f"{self._url.rstrip('/')}{path}" + headers = await self._prepare_headers() + + self._logger.debug( + "Sending HTTP request %s %s (json=%s, multipart=%s)", + method, + url, + json_data is not None, + multipart_data is not None, + ) + + # Override timeout if specified + if timeout: + request_timeout = aiohttp.ClientTimeout(total=timeout) + else: + request_timeout = None + + try: + # Prepare request arguments + kwargs: dict[str, Any] = { + "headers": headers, + "params": params, + "timeout": request_timeout, + } + + if json_data: + kwargs["json"] = json_data + elif multipart_data: + # Force multipart encoding even when no files are present (for fetch_data support) + form_data = aiohttp.FormData(default_to_multipart=True) + for key, value in multipart_data.items(): + if isinstance(value, tuple) and len(value) == 3: + # File data: (filename, file_data, content_type) + filename, file_data, content_type = value + # aiohttp cannot serialize io.BytesIO directly; convert to bytes + if isinstance(file_data, io.BytesIO): + file_payload = file_data.getvalue() + else: + file_payload = file_data + form_data.add_field(key, file_payload, filename=filename, content_type=content_type) + else: + # Regular form field + if isinstance(value, dict): + import json + + value = json.dumps(value) + form_data.add_field(key, value) + kwargs["data"] = form_data + + async with self._session.request(method, url, **kwargs) as response: + return await self._handle_response(response) + + except asyncio.TimeoutError: + self._logger.error( + "Request timeout %s %s (timeout=%.1fs)", method, path, self._conn_config.operation_timeout + ) + raise TransportError(f"Request timeout for {method} {path}") from None + except aiohttp.ClientError as e: + self._logger.error("Request failed %s %s: %s", method, path, e) + raise ConnectionError(f"Request failed: {e}") from e + except Exception as e: + self._logger.error("Unexpected error %s %s: %s", method, path, e) + raise TransportError(f"Unexpected error: {e}") from e + + async def _prepare_headers(self) -> dict[str, str]: + """ + Prepare HTTP headers for requests. + + Returns: + Headers dictionary with authentication and tracking info + """ + auth_headers = await self._auth.get_auth_headers() + auth_headers["User-Agent"] = ( + f"speechmatics-batch-v{get_version()} python/{sys.version_info.major}.{sys.version_info.minor}" + ) + + if self._request_id: + auth_headers["X-Request-Id"] = self._request_id + + return auth_headers + + async def _handle_response(self, response: aiohttp.ClientResponse) -> aiohttp.ClientResponse: + """ + Handle HTTP response and extract JSON data. + + Args: + response: HTTP response object + + Returns: + HTTP response object + + Raises: + AuthenticationError: For 401/403 responses + TransportError: For other error responses + """ + try: + if response.status == 401: + raise AuthenticationError("Invalid API key - authentication failed") + elif response.status == 403: + raise AuthenticationError("Access forbidden - check API key permissions") + elif response.status >= 400: + error_text = await response.text() + self._logger.error("HTTP error %d %s: %s", response.status, response.reason, error_text) + raise TransportError(f"HTTP {response.status}: {response.reason} - {error_text}") + return response + + except aiohttp.ContentTypeError as e: + self._logger.error("Failed to parse JSON response: %s", e) + raise TransportError(f"Failed to parse response: {e}") from e + except Exception as e: + self._logger.error("Error handling response: %s", e) + raise TransportError(f"Error handling response: {e}") from e diff --git a/tests/tts/async_http_test.py b/tests/tts/async_http_test.py new file mode 100644 index 0000000..2aa4f82 --- /dev/null +++ b/tests/tts/async_http_test.py @@ -0,0 +1,14 @@ +import os + +import pytest + +from speechmatics.tts import AsyncClient + + +@pytest.mark.asyncio +async def test_async_http(): + if os.environ.get("SPEECHMATICS_API_KEY") is None: + pytest.skip("SPEECHMATICS_API_KEY not set") + async with AsyncClient() as client: + response = await client.generate(text="Hello world") + assert response.status == 200