11import unittest
2+ import json
3+ from pathlib import Path
24
35try :
46 import wfloat
@@ -52,6 +54,31 @@ def test_version_matches_expected(self) -> None:
5254
5355 import wfloat
5456
57+ print (wfloat .version )
58+
59+ SPEAKER_IDS = {
60+ "skilled_hero_man" : 0 ,
61+ "skilled_hero_woman" : 1 ,
62+ "fun_hero_man" : 2 ,
63+ "fun_hero_woman" : 3 ,
64+ "strong_hero_man" : 4 ,
65+ "strong_hero_woman" : 5 ,
66+ "mad_scientist_man" : 6 ,
67+ "mad_scientist_woman" : 7 ,
68+ "clever_villain_man" : 8 ,
69+ "clever_villain_woman" : 9 ,
70+ "narrator_man" : 10 ,
71+ "narrator_woman" : 11 ,
72+ "wise_elder_man" : 12 ,
73+ "wise_elder_woman" : 13 ,
74+ "outgoing_anime_man" : 14 ,
75+ "outgoing_anime_woman" : 15 ,
76+ "scary_villain_man" : 16 ,
77+ "scary_villain_woman" : 17 ,
78+ "news_reporter_man" : 18 ,
79+ "news_reporter_woman" : 19 ,
80+ }
81+
5582 model = wfloat .OfflineTtsWfloatModelConfig (
5683 model = "../wfloat-web/assets/models/wfloat-model/1.0.0/wfloat-model-1.0.0.onnx" ,
5784 tokens = "../wfloat-web/assets/models/wfloat-model/1.0.0/wfloat-model-1.0.0_tokens.txt" ,
@@ -71,9 +98,78 @@ def test_version_matches_expected(self) -> None:
7198 )
7299
73100 tts = wfloat .OfflineTts (config )
74- audio = tts .generate ("Hello world." , sid = 0 , speed = 1.0 )
75- ok = wfloat .write_wave ("out.wav" , audio .samples , audio .sample_rate )
76101
77- print ("sample_rate:" , audio .sample_rate )
78- print ("num_samples:" , len (audio .samples ))
79- print ("write_wave:" , ok )
102+ voices_path = "../../web/assets/js/voices.js" # string path to the JSON file
103+ with open (voices_path , "r" , encoding = "utf-8" ) as f :
104+ voices_text = f .read ()
105+ prefix = "export const VOICES = "
106+ voices_text = voices_text [len (prefix ) :]
107+
108+ voices = json .loads (voices_text )
109+ out_dir = Path ("out" )
110+ out_dir .mkdir (exist_ok = True )
111+ progress_by_voice = {}
112+
113+ for v in voices :
114+ sid = SPEAKER_IDS [v ["voiceId" ]]
115+ silence_padding_sec = v ["padding" ]
116+ speed = v ["speed" ]
117+ final_samples = []
118+ sample_rate = None
119+ raw_text_cursor = 0
120+ current_time_sec = 0.0
121+ progress_events = []
122+
123+ prepared = tts .prepare_wfloat_text (
124+ v ["text" ],
125+ emotion = v ["emotion" ],
126+ intensity = v ["intensity" ],
127+ )
128+
129+ for i in range (len (prepared .text )):
130+ audio = tts .generate (prepared .text_clean [i ], sid = sid , speed = speed )
131+ if sample_rate is None :
132+ sample_rate = audio .sample_rate
133+ elif sample_rate != audio .sample_rate :
134+ raise ValueError (
135+ f"Sample rate changed for { v ['voiceId' ]} : "
136+ f"{ sample_rate } != { audio .sample_rate } "
137+ )
138+
139+ raw_chunk_text = prepared .text [i ] or ""
140+ highlight_start = raw_text_cursor
141+ highlight_end = raw_text_cursor + len (raw_chunk_text )
142+ raw_text_cursor = highlight_end
143+
144+ chunk_duration_sec = len (audio .samples ) / sample_rate
145+ padding_sec = silence_padding_sec if i < len (prepared .text ) - 1 else 0.0
146+ start_time_sec = current_time_sec
147+ end_time_sec = start_time_sec + chunk_duration_sec + padding_sec
148+
149+ progress_events .append (
150+ {
151+ "text" : raw_chunk_text ,
152+ "progress" : (i + 1 ) / len (prepared .text ),
153+ "textHighlightStart" : highlight_start ,
154+ "textHighlightEnd" : highlight_end ,
155+ "startTimeSec" : start_time_sec ,
156+ "endTimeSec" : end_time_sec ,
157+ }
158+ )
159+
160+ final_samples .extend (audio .samples )
161+ current_time_sec += chunk_duration_sec
162+
163+ if i < len (prepared .text ) - 1 :
164+ silence_samples = int (sample_rate * silence_padding_sec )
165+ final_samples .extend ([0 ] * silence_samples )
166+ current_time_sec += silence_padding_sec
167+
168+ output_path = out_dir / f"{ v ['voiceId' ]} .wav"
169+ ok = wfloat .write_wave (str (output_path ), final_samples , sample_rate )
170+ progress_by_voice [v ["voiceId" ]] = progress_events
171+ print (f"{ output_path } : sample_rate={ sample_rate } num_samples={ len (final_samples )} write_wave={ ok } " )
172+
173+ progress_path = out_dir / "progress.json"
174+ with open (progress_path , "w" , encoding = "utf-8" ) as f :
175+ json .dump (progress_by_voice , f , ensure_ascii = False , indent = 2 )
0 commit comments