Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
272 changes: 272 additions & 0 deletions 1_nb_async_client_
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
Current Implementation Analysis
Looking at the code:

File Reading: The method reads the entire file content at once:
python
async with aiofiles.open(file_path, 'r', encoding='utf-8') as f:
text_content = await f.read() # Reads ALL content
Single API Call: It makes one API request with all the text:
python
return await self.synthesize_speech(text_content, ...) # Single request
Batch Processing: The API endpoint /synthesize appears to be a batch endpoint that processes the entire text and returns the complete audio file.
What This Means
No Streaming: The text is sent as one complete payload
No Real-time Generation: You must wait for the entire audio to be synthesized before getting any response
No Incremental Output: You can't start playing audio while more is being generated

For True Streaming TTS, You Would Need:
Chunked Text Processing: Break text into smaller segments
Streaming API Endpoint: An endpoint that supports streaming responses (like /stream-synthesize)
WebSocket or Server-Sent Events: For real-time bidirectional communication
Incremental Audio Delivery: Receive audio chunks as they're generated


"""
Asynchronous client for Speechmatics batch transcription.

This module provides the main AsyncClient class that handles batch
speech-to-text transcription using the Speechmatics Batch API.
"""

from __future__ import annotations

import asyncio
import os
import uuid
from typing import Any
from typing import BinaryIO
from typing import Optional
from typing import Union

import aiohttp

from ._auth import AuthBase
from ._auth import StaticKeyAuth
from ._exceptions import AuthenticationError
from ._exceptions import TimeoutError
from ._logging import get_logger
from ._models import ConnectionConfig

from ._transport import Transport


class AsyncClient:
"""
Asynchronous client for Speechmatics batch speech transcription.

This client provides a full-featured async interface to the Speechmatics Batch API,
supporting job submission, monitoring, and result retrieval with comprehensive
error management. It properly implements the Speechmatics REST API.

The client handles the complete batch transcription workflow:
1. Job submission with audio file and configuration
2. Job status monitoring (with polling helpers)
3. Result retrieval when transcription is complete
4. Proper cleanup and error handling

Args:
auth: Authentication instance. If not provided, uses StaticKeyAuth
with api_key parameter or SPEECHMATICS_API_KEY environment variable.
api_key: Speechmatics API key (used only if auth not provided).
url: REST API endpoint URL. If not provided, uses SPEECHMATICS_BATCH_URL
environment variable or defaults to production endpoint.
conn_config: Complete connection configuration object. If provided, overrides
other parameters.

Raises:
ConfigurationError: If required configuration is missing or invalid.

Examples:
Basic usage:
>>> async with AsyncClient(api_key="your-key") as client:
... job = await client.submit_job("audio.wav")
... result = await client.wait_for_completion(job.id)
... print(result.transcript)

With JWT authentication:
>>> from speechmatics.batch import JWTAuth
>>> auth = JWTAuth("your-api-key", ttl=3600)
>>> async with AsyncClient(auth=auth) as client:
... # Use client with JWT auth
... pass
"""

def __init__(
self,
auth: Optional[AuthBase] = None,
*,
api_key: Optional[str] = None,
url: Optional[str] = None,
conn_config: Optional[ConnectionConfig] = None,
) -> None:
"""
Initialize the AsyncClient.

Args:
auth: Authentication method, it can be StaticKeyAuth or JWTAuth.
If None, creates StaticKeyAuth with the api_key.
api_key: Speechmatics API key. If None, uses SPEECHMATICS_API_KEY env var.
url: REST API endpoint URL. If None, uses SPEECHMATICS_BATCH_URL env var
or defaults to production endpoint.
conn_config: Complete connection configuration.

Raises:
ConfigurationError: If auth is None and API key is not provided/found.
"""
self._auth = auth or StaticKeyAuth(api_key)
self._url = url or os.environ.get("SPEECHMATICS_BATCH_URL") or "https://asr.api.speechmatics.com/v2"
self._conn_config = conn_config or ConnectionConfig()
self._request_id = str(uuid.uuid4())
self._transport = Transport(self._url, self._conn_config, self._auth, self._request_id)

self._logger = get_logger(__name__)
self._logger.debug("AsyncClient initialized (request_id=%s, url=%s)", self._request_id, self._url)

async def __aenter__(self) -> AsyncClient:
"""
Async context manager entry.

Returns:
Self for use in async with statements.

Examples:
>>> async with AsyncClient(api_key="key") as client:
... job = await client.submit_job("audio.wav")
"""
return self

async def synthesize_speech(
self,
text: str,
*,
voice: Optional[str] = None,
output_format: str = "wav",
sample_rate: Optional[int] = None,
speed: Optional[float] = None,
) -> aiohttp.ClientResponse:
"""
Convert text to speech audio.

Args:
text: Text to convert to speech.
voice: Voice ID to use for synthesis (e.g., "en-US-neural-1").
output_format: Audio format ("wav", "mp3", "ogg").
sample_rate: Audio sample rate in Hz (e.g., 22050, 44100).
speed: Speech speed multiplier (0.5 to 2.0).

Returns:
Audio data as bytes.

Raises:
AuthenticationError: If API key is invalid.
TransportError: If synthesis fails.

Examples:
>>> response = await client.synthesize_speech("Hello world")
>>> audio_data = await response.read()
>>> with open("output.wav", "wb") as f:
... f.write(audio_data)
"""
# Prepare synthesis request
request_data = {
"text": text,
"output_format": output_format,
}

if voice:
request_data["voice"] = voice
if sample_rate:
request_data["sample_rate"] = str(sample_rate)
if speed:
request_data["speed"] = str(speed)

response = await self._transport.post("/synthesize", json_data=request_data)
return response

async def synthesize_from_file(
self,
file_path: Union[str, os.PathLike],
*,
voice: Optional[str] = None,
output_format: str = "wav",
sample_rate: Optional[int] = None,
speed: Optional[float] = None,
) -> aiohttp.ClientResponse:
"""
Convert text from a file to speech audio.

Args:
file_path: Path to text or SSML file.
voice: Voice ID to use for synthesis.
output_format: Audio format ("wav", "mp3", "ogg").
sample_rate: Audio sample rate in Hz.
speed: Speech speed multiplier (0.5 to 2.0).

Returns:
Raw aiohttp ClientResponse object.

Raises:
FileNotFoundError: If file doesn't exist.
AuthenticationError: If API key is invalid.
TransportError: If synthesis fails.

Examples:
>>> response = await client.synthesize_from_file("script.txt")
>>> audio_data = await response.read()
>>> with open("output.wav", "wb") as f:
... f.write(audio_data)
"""
import aiofiles
from pathlib import Path

file_path_obj = Path(file_path)
if not file_path_obj.exists():
raise FileNotFoundError(f"File not found: {file_path}")

# Read text content
async with aiofiles.open(file_path, 'r', encoding='utf-8') as f:
text_content = await f.read()

return await self.synthesize_speech(
text_content,
voice=voice,
output_format=output_format,
sample_rate=sample_rate,
speed=speed,
)

async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
"""
Async context manager exit with automatic cleanup.

Ensures all resources are properly cleaned up when exiting the
async context manager, including closing HTTP connections.

Args:
exc_type: Exception type if an exception occurred.
exc_val: Exception value if an exception occurred.
exc_tb: Exception traceback if an exception occurred.
"""
await self.close()

async def close(self) -> None:
"""
Close the client and cleanup all resources.

This method ensures proper cleanup of all client resources including
closing HTTP connections and sessions.

This method is safe to call multiple times and will handle cleanup
gracefully even if errors occur during the process.

Examples:
>>> client = AsyncClient(api_key="key")
>>> try:
... result = await client.transcribe("audio.wav")
>>> finally:
... await client.close()
"""
try:
await self._transport.close()
except Exception:
pass # Best effort cleanup
45 changes: 33 additions & 12 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# Makefile for Speechmatics Python SDKs

.PHONY: help
.PHONY: test-all test-rt test-batch test-flow
.PHONY: format-all format-rt format-batch format-flow
.PHONY: lint-all lint-rt lint-batch lint-flow
.PHONY: type-check-all type-check-rt type-check-batch type-check-flow
.PHONY: build-all build-rt build-batch build-flow
.PHONY: clean-all clean-rt clean-batch clean-flow clean-flow
.PHONY: test-all test-rt test-batch test-flow test-tts
.PHONY: format-all format-rt format-batch format-flow format-tts
.PHONY: lint-all lint-rt lint-batch lint-flow lint-tts
.PHONY: type-check-all type-check-rt type-check-batch type-check-flow type-check-tts
.PHONY: build-all build-rt build-batch build-flow build-tts
.PHONY: clean-all clean-rt clean-batch clean-flow clean-tts

help:
@echo "Available commands:"
Expand Down Expand Up @@ -49,7 +49,7 @@ help:
@echo ""

# Testing targets
test-all: test-rt test-batch test-flow
test-all: test-rt test-batch test-flow test-tts

test-rt:
pytest tests/rt/ -v
Expand All @@ -60,8 +60,11 @@ test-batch:
test-flow:
pytest tests/flow/ -v

test-tts:
pytest tests/tts/ -v

# Formatting targets
format-all: format-rt format-batch format-flow
format-all: format-rt format-batch format-flow format-tts

format-rt:
cd sdk/rt/speechmatics && black .
Expand All @@ -75,8 +78,12 @@ format-flow:
cd sdk/flow/speechmatics && black .
cd sdk/flow/speechmatics && ruff check --fix .

format-tts:
cd sdk/tts/speechmatics && black .
cd sdk/tts/speechmatics && ruff check --fix .

# Linting targets
lint-all: lint-rt lint-batch lint-flow
lint-all: lint-rt lint-batch lint-flow lint-tts

lint-rt:
cd sdk/rt/speechmatics && ruff check .
Expand All @@ -87,8 +94,11 @@ lint-batch:
lint-flow:
cd sdk/flow/speechmatics && ruff check .

lint-tts:
cd sdk/tts/speechmatics && ruff check .

# Type checking targets
type-check-all: type-check-rt type-check-batch type-check-flow
type-check-all: type-check-rt type-check-batch type-check-flow type-check-tts

type-check-rt:
cd sdk/rt/speechmatics && mypy .
Expand All @@ -99,18 +109,22 @@ type-check-batch:
type-check-flow:
cd sdk/flow/speechmatics && mypy .

type-check-tts:
cd sdk/tts/speechmatics && mypy .

# Installation targets
install-dev:
python -m pip install --upgrade pip
python -m pip install -e sdk/rt[dev]
python -m pip install -e sdk/batch[dev]
python -m pip install -e sdk/flow[dev]
python -m pip install -e sdk/tts[dev]

install-build:
python -m pip install --upgrade build

# Building targets
build-all: build-rt build-batch build-flow
build-all: build-rt build-batch build-flow build-tts

build-rt: install-build
cd sdk/rt && python -m build
Expand All @@ -121,8 +135,11 @@ build-batch: install-build
build-flow: install-build
cd sdk/flow && python -m build

build-tts: install-build
cd sdk/tts && python -m build

# Cleaning targets
clean-all: clean-rt clean-batch clean-flow
clean-all: clean-rt clean-batch clean-flow clean-tts

clean-rt:
rm -rf sdk/rt/dist sdk/rt/build sdk/rt/*.egg-info
Expand All @@ -135,3 +152,7 @@ clean-batch:
clean-flow:
rm -rf sdk/flow/dist sdk/flow/build sdk/flow/*.egg-info
find sdk/flow -name __pycache__ -exec rm -rf {} + 2>/dev/null || true

clean-tts:
rm -rf sdk/tts/dist sdk/tts/build sdk/tts/*.egg-info
find sdk/tts -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
Loading
Loading