Readium Speech is a TypeScript library for implementing a read aloud feature with Web technologies. It follows best practices gathered through interviews with members of the digital publishing industry.
While this project is still in a very early stage, it is meant to power the read aloud feature for two different Readium projects: Readium Web and Thorium.
Readium Speech was spun out as a separate project in order to facilitate its integration as a shared component, but also because of its potential outside of the realm of ebook reading apps.
- Extracting Guided Navigation objects from a document (or a fragment of a document)
- Generating utterances from these Guided Navigation objects
- Processing utterances (prepending/appending text to utterances based on context, pronunciation through SSML/PLSβ¦)
- Voice selection
- TTS playback
- Highlighting
For our initial work on this project, we're focusing on voice selection based on recommended voices.
The outline of this work has been explored in a GitHub discussion and through a best practices document.
A live demo of the voice selection API is available.
It demonstrates the following features:
- fetching a list of all available languages, translating them to the user's locale and sorting them based on these translations
- returning a list of voices for a given language, grouped by region and sorted based on quality
- filtering languages and voices based on gender and offline availability
- using embedded test utterances to demo voices
At the moment, the new alpha version of the library is not published on npm, so you need to clone the repository and build it yourself.
git clone https://github.com/readium/speech.gitcd speech
npm install
npm run buildYou can then link the library to your project, for example using npm link.
import { getVoices } from "readium-speech";
console.log(getVoices);
const voices = await getVoices();
console.log(voices);Here's how to get started with the Readium Speech library:
import { WebSpeechReadAloudNavigator } from "readium-speech";
// Initialize the navigator with default WebSpeech engine
const navigator = new WebSpeechReadAloudNavigator();
// Load content to be read
navigator.loadContent([
{ text: "Hello, this is the first sentence.", language: "en-US" },
{ text: "And this is the second sentence.", language: "en-US" }
]);
// Set up event listeners
navigator.on("start", () => console.log("Playback started"));
navigator.on("end", () => console.log("Playback finished"));
// Start playback
navigator.play();
// Later, you can pause, resume, or stop
// navigator.pause();
// navigator.stop();
// Clean up when done
// navigator.destroy();export interface ReadiumSpeechVoices {
label: string;
voiceURI: string;
name: string;
language: string;
gender?: TGender | undefined;
age?: string | undefined;
offlineAvailability: boolean;
quality?: TQuality | undefined;
pitchControl: boolean;
recommendedPitch?: number | undefined;
recommendedRate?: number | undefined;
}
export interface ILanguages {
label: string;
code: string;
count: number;
}function getVoices(preferredLanguage?: string[] | string, localization?: string): Promise<ReadiumSpeechVoices[]>function getLanguages(voices: ReadiumSpeechVoices[], preferredLanguage?: string[] | string, localization?: string | undefined): ILanguages[]function listLanguages(voices: ReadiumSpeechVoices[], localization?: string): ILanguages[]
function ListRegions(voices: ReadiumSpeechVoices[], localization?: string): ILanguages[]
function parseSpeechSynthesisVoices(speechSynthesisVoices: SpeechSynthesisVoice[]): ReadiumSpeechVoices[]
function getSpeechSynthesisVoices(): Promise<SpeechSynthesisVoice[]>function groupByKindOfVoices(allVoices: ReadiumSpeechVoices[]): TGroupVoices
function groupByRegions(voices: ReadiumSpeechVoices[], language: string, preferredRegions?: string[] | string, localization?: string): TGroupVoices
function groupByLanguage(voices: ReadiumSpeechVoices[], preferredLanguage?: string[] | string, localization?: string): TGroupVoicesfunction sortByLanguage(voices: ReadiumSpeechVoices[], preferredLanguage?: string[] | string): ReadiumSpeechVoices[]
function sortByRegion(voices: ReadiumSpeechVoices[], preferredRegions?: string[] | string, localization?: string | undefined): ReadiumSpeechVoices[]
function sortByGender(voices: ReadiumSpeechVoices[], genderFirst: TGender): ReadiumSpeechVoices[]
function sortByName(voices: ReadiumSpeechVoices[]): ReadiumSpeechVoices[]
function sortByQuality(voices: ReadiumSpeechVoices[]): ReadiumSpeechVoices[]function filterOnRecommended(voices: ReadiumSpeechVoices[], _recommended?: IRecommended[]): TReturnFilterOnRecommended
function filterOnVeryLowQuality(voices: ReadiumSpeechVoices[]): ReadiumSpeechVoices[]
function filterOnNovelty(voices: ReadiumSpeechVoices[]): ReadiumSpeechVoices[]
function filterOnQuality(voices: ReadiumSpeechVoices[], quality: TQuality | TQuality[]): ReadiumSpeechVoices[]
function filterOnLanguage(voices: ReadiumSpeechVoices[], language: string | string[]): ReadiumSpeechVoices[]
function filterOnGender(voices: ReadiumSpeechVoices[], gender: TGender): ReadiumSpeechVoices[]interface ReadiumSpeechNavigator {
// Voice Management
getVoices(): Promise<ReadiumSpeechVoice[]>;
setVoice(voice: ReadiumSpeechVoice | string): Promise<void>;
getCurrentVoice(): ReadiumSpeechVoice | null;
// Content Management
loadContent(content: ReadiumSpeechUtterance | ReadiumSpeechUtterance[]): void;
getCurrentContent(): ReadiumSpeechUtterance | null;
getContentQueue(): ReadiumSpeechUtterance[];
// Playback Control
play(): void;
pause(): void;
stop(): void;
// Navigation
next(): boolean;
previous(): boolean;
jumpTo(utteranceIndex: number): void;
// Playback Parameters
setRate(rate: number): void;
getRate(): number;
setPitch(pitch: number): void;
getPitch(): number;
setVolume(volume: number): void;
getVolume(): number;
// State
getState(): ReadiumSpeechPlaybackState;
getCurrentUtteranceIndex(): number;
// Events
on(
event: ReadiumSpeechPlaybackEvent["type"],
listener: (event: ReadiumSpeechPlaybackEvent) => void
): void;
// Cleanup
destroy(): void;
}type ReadiumSpeechPlaybackEvent = {
type:
| "start" // Playback started
| "pause" // Playback paused
| "resume" // Playback resumed
| "end" // Playback ended naturally
| "stop" // Playback stopped manually
| "skip" // Skipped to another utterance
| "error" // An error occurred
| "boundary" // Reached a word/sentence boundary
| "mark" // Reached a named mark in SSML
| "idle" // No content loaded
| "loading" // Loading content
| "ready" // Ready to play
| "voiceschanged"; // Available voices changed
detail?: any; // Event-specific data
};type ReadiumSpeechPlaybackState = "playing" | "paused" | "idle" | "loading" | "ready";