Remsky commited on
Commit
4259439
·
1 Parent(s): 6d67517

Add wav files and GPU timeout changes

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.wav filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -1,3 +1,16 @@
1
  dorian_grey.txt
2
  texts/time_machine.txt
3
- *.pyc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  dorian_grey.txt
2
  texts/time_machine.txt
3
+ *.pyc
4
+ *.pt
5
+
6
+ # Audio files
7
+
8
+ # Binary files
9
+ *.bin
10
+ *.pth
11
+ *.ckpt
12
+ *.model
13
+
14
+ # Cache directories
15
+ __pycache__/
16
+ .cache/
app.py CHANGED
@@ -5,7 +5,6 @@ import math
5
  import logging
6
  import matplotlib.pyplot as plt
7
  import numpy as np
8
- # from lib.mock_tts import MockTTSModel
9
  from lib import format_audio_output
10
  from lib.ui_content import header_html, demo_text_info
11
  from lib.book_utils import get_available_books, get_book_info, get_chapter_text
@@ -25,7 +24,6 @@ logging.getLogger('matplotlib').setLevel(logging.WARNING)
25
  logger = logging.getLogger(__name__)
26
  logger.debug("Starting app initialization...")
27
 
28
-
29
  model = TTSModel()
30
 
31
  def initialize_model():
@@ -64,7 +62,7 @@ def update_progress(chunk_num, total_chunks, tokens_per_sec, rtf, progress_state
64
  # Only update progress display during processing
65
  progress(progress_state["progress"], desc=f"Processing chunk {chunk_num}/{total_chunks} | GPU Time Left: {int(gpu_time_left)}s")
66
 
67
- def generate_speech_from_ui(text, voice_names, speed, gpu_timeout, progress=gr.Progress(track_tqdm=False)):
68
  """Handle text-to-speech generation from the Gradio UI"""
69
  try:
70
  if not text or not voice_names:
@@ -72,6 +70,11 @@ def generate_speech_from_ui(text, voice_names, speed, gpu_timeout, progress=gr.P
72
 
73
  start_time = time.time()
74
 
 
 
 
 
 
75
  # Create progress state with explicit type initialization
76
  progress_state = {
77
  "progress": 0.0,
@@ -175,7 +178,6 @@ def create_performance_plot(metrics, voice_names):
175
 
176
  return fig, metrics_text
177
 
178
-
179
  # Create Gradio interface
180
  with gr.Blocks(title="Kokoro TTS Demo", css="""
181
  .equal-height {
@@ -192,40 +194,53 @@ with gr.Blocks(title="Kokoro TTS Demo", css="""
192
  .token-count {
193
  color: #4169e1;
194
  }
 
 
 
 
 
 
195
  """) as demo:
196
  gr.HTML(header_html)
197
 
198
  with gr.Row():
199
  # Column 1: Text Input and Book Selection
200
  with gr.Column(elem_classes="equal-height"):
201
- # Book selection
202
- books = get_available_books()
203
- book_dropdown = gr.Dropdown(
204
- label="Select Book",
205
- choices=[book['label'] for book in books],
206
- value=books[0]['label'] if books else None,
207
- type="value",
208
- allow_custom_value=True
209
- )
210
-
211
- # Initialize chapters for first book
212
- initial_book = books[0]['value'] if books else None
213
- initial_chapters = []
214
- if initial_book:
215
- book_path = os.path.join("texts/processed", initial_book)
216
- _, chapters = get_book_info(book_path)
217
- initial_chapters = [ch['title'] for ch in chapters]
218
-
219
- # Chapter selection with initial chapters
220
- chapter_dropdown = gr.Dropdown(
221
- label="Select Chapter",
222
- choices=initial_chapters,
223
- value=initial_chapters[0] if initial_chapters else None,
224
- type="value",
225
- allow_custom_value=True
226
- )
227
- lab_tps = 175
228
- lab_rts = 50
 
 
 
 
 
 
 
229
  # Text input area with initial chapter text
230
  initial_text = ""
231
  if initial_chapters and initial_book:
@@ -250,7 +265,6 @@ with gr.Blocks(title="Kokoro TTS Demo", css="""
250
  output_estimate = (time_estimate * lab_rts)//60
251
  return f'<div class="token-label"><span class="token-count">Estimated {output_estimate} minutes in ~{time_estimate}s</span></div>'
252
 
253
-
254
  text_input = gr.TextArea(
255
  label=None,
256
  placeholder="Enter text here, select a chapter, or upload a .txt file",
@@ -258,7 +272,7 @@ with gr.Blocks(title="Kokoro TTS Demo", css="""
258
  lines=8,
259
  max_lines=14,
260
  show_label=False,
261
- show_copy_button=True # Add copy button for convenience
262
  )
263
 
264
  clear_btn = gr.Button("Clear Text", variant="secondary")
@@ -295,8 +309,9 @@ with gr.Blocks(title="Kokoro TTS Demo", css="""
295
  initial_text = get_chapter_text(book_path, chapters[0]['id']) if chapters else ""
296
  if initial_text:
297
  tokens = count_tokens(initial_text)
298
- time_estimate = math.ceil(tokens / 150 / 10) * 10
299
- label = f'<div class="token-label"><span class="token-count">({tokens} tokens, ~{time_estimate}s generation time)</span></div>'
 
300
  else:
301
  label = '<div class="token-label"></div>'
302
  return gr.update(choices=chapter_choices, value=chapter_choices[0] if chapter_choices else None), initial_text, label
@@ -315,8 +330,9 @@ with gr.Blocks(title="Kokoro TTS Demo", css="""
315
  if ch['title'] == chapter_title:
316
  text = get_chapter_text(book_path, ch['id'])
317
  tokens = count_tokens(text)
318
- time_estimate = math.ceil(tokens / 150 / 10) * 10
319
- return text, f'<div class="token-label"> <span class="token-count">({tokens} tokens, ~{time_estimate}s generation time)</span></div>'
 
320
  return "", '<div class="token-label"></div>'
321
 
322
  # Set up event handlers for book/chapter selection
@@ -346,8 +362,9 @@ with gr.Blocks(title="Kokoro TTS Demo", css="""
346
  try:
347
  text = file_bytes.decode('utf-8')
348
  tokens = count_tokens(text)
349
- time_estimate = math.ceil(tokens / 150 / 10) * 10 # Round up to nearest 10 seconds
350
- return text, f'<div class="token-label"><span class="token-count">({tokens} tokens, ~{time_estimate}s generation time)</span></div>'
 
351
  except Exception as e:
352
  raise gr.Error(f"Failed to read file: {str(e)}")
353
 
@@ -366,9 +383,6 @@ with gr.Blocks(title="Kokoro TTS Demo", css="""
366
  multiselect=True
367
  )
368
 
369
- # Add refresh button to manually update voice list
370
- refresh_btn = gr.Button("🔄 Refresh Voices", size="sm")
371
-
372
  speed_slider = gr.Slider(
373
  label="Speed",
374
  minimum=0.5,
@@ -376,15 +390,38 @@ with gr.Blocks(title="Kokoro TTS Demo", css="""
376
  value=1.0,
377
  step=0.1
378
  )
379
- gpu_timeout_slider = gr.Slider(
380
- label="GPU Timeout (seconds)",
381
- minimum=15,
382
- maximum=120,
383
- value=90,
384
- step=1,
385
- info="Maximum time allowed for GPU processing"
386
- )
387
  submit_btn = gr.Button("Generate Speech", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
 
389
  # Column 3: Output
390
  with gr.Column(elem_classes="equal-height"):
@@ -403,18 +440,13 @@ with gr.Blocks(title="Kokoro TTS Demo", css="""
403
  metrics_plot = gr.Plot(
404
  label="Processing Metrics",
405
  show_label=True,
406
- format="png" # Explicitly set format to PNG which is supported by matplotlib
407
  )
408
 
409
  # Set up event handlers
410
- refresh_btn.click(
411
- fn=initialize_model,
412
- outputs=[voice_dropdown]
413
- )
414
-
415
  submit_btn.click(
416
  fn=generate_speech_from_ui,
417
- inputs=[text_input, voice_dropdown, speed_slider, gpu_timeout_slider],
418
  outputs=[audio_output, metrics_plot, metrics_text],
419
  show_progress=True
420
  )
 
5
  import logging
6
  import matplotlib.pyplot as plt
7
  import numpy as np
 
8
  from lib import format_audio_output
9
  from lib.ui_content import header_html, demo_text_info
10
  from lib.book_utils import get_available_books, get_book_info, get_chapter_text
 
24
  logger = logging.getLogger(__name__)
25
  logger.debug("Starting app initialization...")
26
 
 
27
  model = TTSModel()
28
 
29
  def initialize_model():
 
62
  # Only update progress display during processing
63
  progress(progress_state["progress"], desc=f"Processing chunk {chunk_num}/{total_chunks} | GPU Time Left: {int(gpu_time_left)}s")
64
 
65
+ def generate_speech_from_ui(text, voice_names, speed, progress=gr.Progress(track_tqdm=False)):
66
  """Handle text-to-speech generation from the Gradio UI"""
67
  try:
68
  if not text or not voice_names:
 
70
 
71
  start_time = time.time()
72
 
73
+ # Calculate GPU timeout based on token estimate
74
+ tokens = count_tokens(text)
75
+ time_estimate = math.ceil(tokens / lab_tps)
76
+ gpu_timeout = min(max(int(time_estimate * 1.3), 15), 120) # Cap between 15-120s
77
+
78
  # Create progress state with explicit type initialization
79
  progress_state = {
80
  "progress": 0.0,
 
178
 
179
  return fig, metrics_text
180
 
 
181
  # Create Gradio interface
182
  with gr.Blocks(title="Kokoro TTS Demo", css="""
183
  .equal-height {
 
194
  .token-count {
195
  color: #4169e1;
196
  }
197
+ #gradio-accordion > .label-wrap {
198
+ background: radial-gradient(circle, rgba(7,57,153,0.2) 6%, rgba(2,0,36,0.05) 37%, rgba(9,9,121,0.15) 73%, rgba(0,212,255,0.15) 225%);
199
+ padding: 0.8rem 1rem;
200
+ font-weight: 500;
201
+ color: #000000;
202
+ }
203
  """) as demo:
204
  gr.HTML(header_html)
205
 
206
  with gr.Row():
207
  # Column 1: Text Input and Book Selection
208
  with gr.Column(elem_classes="equal-height"):
209
+ # Book and Chapter Selection Row
210
+ with gr.Row():
211
+ # Book selection
212
+ books = get_available_books()
213
+ book_dropdown = gr.Dropdown(
214
+ label=None,
215
+ show_label=False,
216
+ choices=[book['label'] for book in books],
217
+ value=books[0]['label'] if books else None,
218
+ type="value",
219
+ allow_custom_value=True,
220
+ scale=3
221
+ )
222
+
223
+ # Initialize chapters for first book
224
+ initial_book = books[0]['value'] if books else None
225
+ initial_chapters = []
226
+ if initial_book:
227
+ book_path = os.path.join("texts/processed", initial_book)
228
+ _, chapters = get_book_info(book_path)
229
+ initial_chapters = [ch['title'] for ch in chapters]
230
+
231
+ # Chapter selection with initial chapters
232
+ chapter_dropdown = gr.Dropdown(
233
+ show_label=False,
234
+ label=None,
235
+ choices=initial_chapters,
236
+ value=initial_chapters[0] if initial_chapters else None,
237
+ type="value",
238
+ allow_custom_value=True,
239
+ scale=2
240
+ )
241
+
242
+ lab_tps = 175 # Average tokens per second for o200k_base
243
+ lab_rts = 50 # Average real-time speed for o200k_base
244
  # Text input area with initial chapter text
245
  initial_text = ""
246
  if initial_chapters and initial_book:
 
265
  output_estimate = (time_estimate * lab_rts)//60
266
  return f'<div class="token-label"><span class="token-count">Estimated {output_estimate} minutes in ~{time_estimate}s</span></div>'
267
 
 
268
  text_input = gr.TextArea(
269
  label=None,
270
  placeholder="Enter text here, select a chapter, or upload a .txt file",
 
272
  lines=8,
273
  max_lines=14,
274
  show_label=False,
275
+ show_copy_button=True
276
  )
277
 
278
  clear_btn = gr.Button("Clear Text", variant="secondary")
 
309
  initial_text = get_chapter_text(book_path, chapters[0]['id']) if chapters else ""
310
  if initial_text:
311
  tokens = count_tokens(initial_text)
312
+ time_estimate = math.ceil(tokens / lab_tps)
313
+ output_estimate = (time_estimate * lab_rts)//60
314
+ label = f'<div class="token-label"><span class="token-count">Estimated {output_estimate} minutes in ~{time_estimate}s</span></div>'
315
  else:
316
  label = '<div class="token-label"></div>'
317
  return gr.update(choices=chapter_choices, value=chapter_choices[0] if chapter_choices else None), initial_text, label
 
330
  if ch['title'] == chapter_title:
331
  text = get_chapter_text(book_path, ch['id'])
332
  tokens = count_tokens(text)
333
+ time_estimate = math.ceil(tokens / lab_tps)
334
+ output_estimate = (time_estimate * lab_rts)//60
335
+ return text, f'<div class="token-label"><span class="token-count">Estimated {output_estimate} minutes in ~{time_estimate}s</span></div>'
336
  return "", '<div class="token-label"></div>'
337
 
338
  # Set up event handlers for book/chapter selection
 
362
  try:
363
  text = file_bytes.decode('utf-8')
364
  tokens = count_tokens(text)
365
+ time_estimate = math.ceil(tokens / lab_tps)
366
+ output_estimate = (time_estimate * lab_rts)//60
367
+ return text, f'<div class="token-label"><span class="token-count">Estimated {output_estimate} minutes in ~{time_estimate}s</span></div>'
368
  except Exception as e:
369
  raise gr.Error(f"Failed to read file: {str(e)}")
370
 
 
383
  multiselect=True
384
  )
385
 
 
 
 
386
  speed_slider = gr.Slider(
387
  label="Speed",
388
  minimum=0.5,
 
390
  value=1.0,
391
  step=0.1
392
  )
393
+
 
 
 
 
 
 
 
394
  submit_btn = gr.Button("Generate Speech", variant="primary")
395
+
396
+ # Audio Samples Accordion with custom styling
397
+ with gr.Accordion("Audio Samples", open=False, elem_id='gradio-accordion') as audio_accordion:
398
+ sample_files = [f for f in os.listdir("samples") if f.endswith('.wav')]
399
+ sample_audio = gr.Audio(
400
+ value=os.path.join("samples", sample_files[0]) if sample_files else None,
401
+ sources=["upload"],
402
+ type="filepath",
403
+ label="Sample Audio",
404
+ interactive=False
405
+ )
406
+ sample_dropdown = gr.Dropdown(
407
+ choices=sample_files,
408
+ value=sample_files[0] if sample_files else None,
409
+ label="Select Sample",
410
+ type="value"
411
+ )
412
+
413
+ def update_sample(sample_name):
414
+ if not sample_name:
415
+ return None
416
+ return os.path.join("samples", sample_name)
417
+
418
+ sample_dropdown.change(
419
+ fn=update_sample,
420
+ inputs=[sample_dropdown],
421
+ outputs=[sample_audio]
422
+ )
423
+
424
+
425
 
426
  # Column 3: Output
427
  with gr.Column(elem_classes="equal-height"):
 
440
  metrics_plot = gr.Plot(
441
  label="Processing Metrics",
442
  show_label=True,
443
+ format="png"
444
  )
445
 
446
  # Set up event handlers
 
 
 
 
 
447
  submit_btn.click(
448
  fn=generate_speech_from_ui,
449
+ inputs=[text_input, voice_dropdown, speed_slider],
450
  outputs=[audio_output, metrics_plot, metrics_text],
451
  show_progress=True
452
  )
deprecated copy.py ADDED
@@ -0,0 +1,435 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import os
2
+ # import gradio as gr
3
+ # import time
4
+ # import math
5
+ # import logging
6
+ # import matplotlib.pyplot as plt
7
+ # import numpy as np
8
+ # # from lib.mock_tts import MockTTSModel
9
+ # from lib import format_audio_output
10
+ # from lib.ui_content import header_html, demo_text_info
11
+ # from lib.book_utils import get_available_books, get_book_info, get_chapter_text
12
+ # from lib.text_utils import count_tokens
13
+ # from tts_model import TTSModel
14
+
15
+ # # Set HF_HOME for faster restarts with cached models/voices
16
+ # os.environ["HF_HOME"] = "/data/.huggingface"
17
+
18
+ # # Create TTS model instance
19
+ # model = TTSModel()
20
+
21
+ # # Configure logging
22
+ # logging.basicConfig(level=logging.DEBUG)
23
+ # # Suppress matplotlib debug messages
24
+ # logging.getLogger('matplotlib').setLevel(logging.WARNING)
25
+ # logger = logging.getLogger(__name__)
26
+ # logger.debug("Starting app initialization...")
27
+
28
+
29
+ # model = TTSModel()
30
+
31
+ # def initialize_model():
32
+ # """Initialize model and get voices"""
33
+ # if model.model is None:
34
+ # if not model.initialize():
35
+ # raise gr.Error("Failed to initialize model")
36
+
37
+ # voices = model.list_voices()
38
+ # if not voices:
39
+ # raise gr.Error("No voices found. Please check the voices directory.")
40
+
41
+ # default_voice = 'af_sky' if 'af_sky' in voices else voices[0] if voices else None
42
+
43
+ # return gr.update(choices=voices, value=default_voice)
44
+
45
+ # def update_progress(chunk_num, total_chunks, tokens_per_sec, rtf, progress_state, start_time, gpu_timeout, progress):
46
+ # # Calculate time metrics
47
+ # elapsed = time.time() - start_time
48
+ # gpu_time_left = max(0, gpu_timeout - elapsed)
49
+
50
+ # # Calculate chunk time more accurately
51
+ # prev_total_time = sum(progress_state["chunk_times"]) if progress_state["chunk_times"] else 0
52
+ # chunk_time = elapsed - prev_total_time
53
+
54
+ # # Validate metrics before adding to state
55
+ # if chunk_time > 0 and tokens_per_sec >= 0:
56
+ # # Update progress state with validated metrics
57
+ # progress_state["progress"] = chunk_num / total_chunks
58
+ # progress_state["total_chunks"] = total_chunks
59
+ # progress_state["gpu_time_left"] = gpu_time_left
60
+ # progress_state["tokens_per_sec"].append(float(tokens_per_sec))
61
+ # progress_state["rtf"].append(float(rtf))
62
+ # progress_state["chunk_times"].append(chunk_time)
63
+
64
+ # # Only update progress display during processing
65
+ # progress(progress_state["progress"], desc=f"Processing chunk {chunk_num}/{total_chunks} | GPU Time Left: {int(gpu_time_left)}s")
66
+
67
+ # def generate_speech_from_ui(text, voice_names, speed, gpu_timeout, progress=gr.Progress(track_tqdm=False)):
68
+ # """Handle text-to-speech generation from the Gradio UI"""
69
+ # try:
70
+ # if not text or not voice_names:
71
+ # raise gr.Error("Please enter text and select at least one voice")
72
+
73
+ # start_time = time.time()
74
+
75
+ # # Create progress state with explicit type initialization
76
+ # progress_state = {
77
+ # "progress": 0.0,
78
+ # "tokens_per_sec": [], # Initialize as empty list
79
+ # "rtf": [], # Initialize as empty list
80
+ # "chunk_times": [], # Initialize as empty list
81
+ # "gpu_time_left": float(gpu_timeout), # Ensure float
82
+ # "total_chunks": 0
83
+ # }
84
+
85
+ # # Handle single or multiple voices
86
+ # if isinstance(voice_names, str):
87
+ # voice_names = [voice_names]
88
+
89
+ # # Generate speech with progress tracking using combined voice
90
+ # audio_array, duration, metrics = model.generate_speech(
91
+ # text,
92
+ # voice_names,
93
+ # speed,
94
+ # gpu_timeout=gpu_timeout,
95
+ # progress_callback=update_progress,
96
+ # progress_state=progress_state,
97
+ # progress=progress
98
+ # )
99
+
100
+ # # Format output for Gradio
101
+ # audio_output, duration_text = format_audio_output(audio_array)
102
+
103
+ # # Create plot and metrics text outside GPU context
104
+ # fig, metrics_text = create_performance_plot(metrics, voice_names)
105
+
106
+ # return (
107
+ # audio_output,
108
+ # fig,
109
+ # metrics_text
110
+ # )
111
+ # except Exception as e:
112
+ # raise gr.Error(f"Generation failed: {str(e)}")
113
+
114
+ # def create_performance_plot(metrics, voice_names):
115
+ # """Create performance plot and metrics text from generation metrics"""
116
+ # # Clean and process the data
117
+ # tokens_per_sec = np.array(metrics["tokens_per_sec"])
118
+ # rtf_values = np.array(metrics["rtf"])
119
+
120
+ # # Calculate statistics using cleaned data
121
+ # median_tps = float(np.median(tokens_per_sec))
122
+ # mean_tps = float(np.mean(tokens_per_sec))
123
+ # std_tps = float(np.std(tokens_per_sec))
124
+
125
+ # # Set y-axis limits based on data range
126
+ # y_min = max(0, np.min(tokens_per_sec) * 0.9)
127
+ # y_max = np.max(tokens_per_sec) * 1.1
128
+
129
+ # # Create plot
130
+ # fig, ax = plt.subplots(figsize=(10, 5))
131
+ # fig.patch.set_facecolor('black')
132
+ # ax.set_facecolor('black')
133
+
134
+ # # Plot data points
135
+ # chunk_nums = list(range(1, len(tokens_per_sec) + 1))
136
+
137
+ # # Plot data points
138
+ # ax.bar(chunk_nums, tokens_per_sec, color='#ff2a6d', alpha=0.6)
139
+
140
+ # # Set y-axis limits with padding
141
+ # padding = 0.1 * (y_max - y_min)
142
+ # ax.set_ylim(max(0, y_min - padding), y_max + padding)
143
+
144
+ # # Add median line
145
+ # ax.axhline(y=median_tps, color='#05d9e8', linestyle='--',
146
+ # label=f'Median: {median_tps:.1f} tokens/sec')
147
+
148
+ # # Style improvements
149
+ # ax.set_xlabel('Chunk Number', fontsize=24, labelpad=20, color='white')
150
+ # ax.set_ylabel('Tokens per Second', fontsize=24, labelpad=20, color='white')
151
+ # ax.set_title('Processing Speed by Chunk', fontsize=28, pad=30, color='white')
152
+ # ax.tick_params(axis='both', which='major', labelsize=20, colors='white')
153
+ # ax.spines['bottom'].set_color('white')
154
+ # ax.spines['top'].set_color('white')
155
+ # ax.spines['left'].set_color('white')
156
+ # ax.spines['right'].set_color('white')
157
+ # ax.grid(False)
158
+ # ax.legend(fontsize=20, facecolor='black', edgecolor='#05d9e8', loc='lower left',
159
+ # labelcolor='white')
160
+
161
+ # plt.tight_layout()
162
+
163
+ # # Calculate average RTF from individual chunk RTFs
164
+ # rtf = np.mean(rtf_values)
165
+
166
+ # # Prepare metrics text
167
+ # metrics_text = (
168
+ # f"Median Speed: {median_tps:.1f} tokens/sec (o200k_base)\n" +
169
+ # f"Real-time Factor: {rtf:.3f}\n" +
170
+ # f"Real Time Speed: {int(1/rtf)}x\n" +
171
+ # f"Processing Time: {int(metrics['total_time'])}s\n" +
172
+ # f"Total Tokens: {metrics['total_tokens']} (o200k_base)\n" +
173
+ # f"Voices: {', '.join(voice_names)}"
174
+ # )
175
+
176
+ # return fig, metrics_text
177
+
178
+
179
+ # # Create Gradio interface
180
+ # with gr.Blocks(title="Kokoro TTS Demo", css="""
181
+ # .equal-height {
182
+ # min-height: 400px;
183
+ # display: flex;
184
+ # flex-direction: column;
185
+ # }
186
+ # .token-label {
187
+ # font-size: 1rem;
188
+ # margin-bottom: 0.3rem;
189
+ # text-align: center;
190
+ # padding: 0.2rem 0;
191
+ # }
192
+ # .token-count {
193
+ # color: #4169e1;
194
+ # }
195
+ # """) as demo:
196
+ # gr.HTML(header_html)
197
+
198
+ # with gr.Row():
199
+ # # Column 1: Text Input and Book Selection
200
+ # with gr.Column(elem_classes="equal-height"):
201
+ # # Book selection
202
+ # books = get_available_books()
203
+ # book_dropdown = gr.Dropdown(
204
+ # label="Select Book",
205
+ # choices=[book['label'] for book in books],
206
+ # value=books[0]['label'] if books else None,
207
+ # type="value",
208
+ # allow_custom_value=True
209
+ # )
210
+
211
+ # # Initialize chapters for first book
212
+ # initial_book = books[0]['value'] if books else None
213
+ # initial_chapters = []
214
+ # if initial_book:
215
+ # book_path = os.path.join("texts/processed", initial_book)
216
+ # _, chapters = get_book_info(book_path)
217
+ # initial_chapters = [ch['title'] for ch in chapters]
218
+
219
+ # # Chapter selection with initial chapters
220
+ # chapter_dropdown = gr.Dropdown(
221
+ # label="Select Chapter",
222
+ # choices=initial_chapters,
223
+ # value=initial_chapters[0] if initial_chapters else None,
224
+ # type="value",
225
+ # allow_custom_value=True
226
+ # )
227
+ # lab_tps = 175
228
+ # lab_rts = 50
229
+ # # Text input area with initial chapter text
230
+ # initial_text = ""
231
+ # if initial_chapters and initial_book:
232
+ # book_path = os.path.join("texts/processed", initial_book)
233
+ # _, chapters = get_book_info(book_path)
234
+ # if chapters:
235
+ # initial_text = get_chapter_text(book_path, chapters[0]['id'])
236
+ # tokens = count_tokens(initial_text)
237
+ # time_estimate = math.ceil(tokens / lab_tps)
238
+ # output_estimate = (time_estimate * lab_rts)//60
239
+ # initial_label = f'<div class="token-label"><span class="token-count">Estimated {output_estimate} minutes in ~{time_estimate}s</span></div>'
240
+ # else:
241
+ # initial_label = '<div class="token-label"></div>'
242
+ # else:
243
+ # initial_label = '<div class="token-label"></div>'
244
+
245
+ # def update_text_label(text):
246
+ # if not text:
247
+ # return '<div class="token-label"></div>'
248
+ # tokens = count_tokens(text)
249
+ # time_estimate = math.ceil(tokens / lab_tps)
250
+ # output_estimate = (time_estimate * lab_rts)//60
251
+ # return f'<div class="token-label"><span class="token-count">Estimated {output_estimate} minutes in ~{time_estimate}s</span></div>'
252
+
253
+
254
+ # text_input = gr.TextArea(
255
+ # label=None,
256
+ # placeholder="Enter text here, select a chapter, or upload a .txt file",
257
+ # value=initial_text,
258
+ # lines=8,
259
+ # max_lines=14,
260
+ # show_label=False,
261
+ # show_copy_button=True # Add copy button for convenience
262
+ # )
263
+
264
+ # clear_btn = gr.Button("Clear Text", variant="secondary")
265
+ # label_html = gr.HTML(initial_label)
266
+
267
+ # def clear_text():
268
+ # return "", '<div class="token-label"></div>'
269
+
270
+ # clear_btn.click(
271
+ # fn=clear_text,
272
+ # outputs=[text_input, label_html]
273
+ # )
274
+
275
+ # # Update label whenever text changes
276
+ # text_input.change(
277
+ # fn=update_text_label,
278
+ # inputs=[text_input],
279
+ # outputs=[label_html],
280
+ # trigger_mode="always_last"
281
+ # )
282
+
283
+ # def update_chapters(book_name):
284
+ # if not book_name:
285
+ # return gr.update(choices=[], value=None), "", '<div class="token-label"></div>'
286
+ # # Find the corresponding book file
287
+ # book_file = next((book['value'] for book in books if book['label'] == book_name), None)
288
+ # if not book_file:
289
+ # return gr.update(choices=[], value=None), "", '<div class="token-label"></div>'
290
+ # book_path = os.path.join("texts/processed", book_file)
291
+ # book_title, chapters = get_book_info(book_path)
292
+ # # Create simple choices list of chapter titles
293
+ # chapter_choices = [ch['title'] for ch in chapters]
294
+ # # Set initial chapter text when book is selected
295
+ # initial_text = get_chapter_text(book_path, chapters[0]['id']) if chapters else ""
296
+ # if initial_text:
297
+ # tokens = count_tokens(initial_text)
298
+ # time_estimate = math.ceil(tokens / 150 / 10) * 10
299
+ # label = f'<div class="token-label"><span class="token-count">({tokens} tokens, ~{time_estimate}s generation time)</span></div>'
300
+ # else:
301
+ # label = '<div class="token-label"></div>'
302
+ # return gr.update(choices=chapter_choices, value=chapter_choices[0] if chapter_choices else None), initial_text, label
303
+
304
+ # def load_chapter_text(book_name, chapter_title):
305
+ # if not book_name or not chapter_title:
306
+ # return "", '<div class="token-label"></div>'
307
+ # # Find the corresponding book file
308
+ # book_file = next((book['value'] for book in books if book['label'] == book_name), None)
309
+ # if not book_file:
310
+ # return "", '<div class="token-label"></div>'
311
+ # book_path = os.path.join("texts/processed", book_file)
312
+ # # Get all chapters and find the one matching the title
313
+ # _, chapters = get_book_info(book_path)
314
+ # for ch in chapters:
315
+ # if ch['title'] == chapter_title:
316
+ # text = get_chapter_text(book_path, ch['id'])
317
+ # tokens = count_tokens(text)
318
+ # time_estimate = math.ceil(tokens / 150 / 10) * 10
319
+ # return text, f'<div class="token-label"> <span class="token-count">({tokens} tokens, ~{time_estimate}s generation time)</span></div>'
320
+ # return "", '<div class="token-label"></div>'
321
+
322
+ # # Set up event handlers for book/chapter selection
323
+ # book_dropdown.change(
324
+ # fn=update_chapters,
325
+ # inputs=[book_dropdown],
326
+ # outputs=[chapter_dropdown, text_input, label_html]
327
+ # )
328
+
329
+ # chapter_dropdown.change(
330
+ # fn=load_chapter_text,
331
+ # inputs=[book_dropdown, chapter_dropdown],
332
+ # outputs=[text_input, label_html]
333
+ # )
334
+
335
+ # # Column 2: Controls
336
+ # with gr.Column(elem_classes="equal-height"):
337
+ # file_input = gr.File(
338
+ # label="Upload .txt file",
339
+ # file_types=[".txt"],
340
+ # type="binary"
341
+ # )
342
+
343
+ # def load_text_from_file(file_bytes):
344
+ # if file_bytes is None:
345
+ # return None, '<div class="token-label"></div>'
346
+ # try:
347
+ # text = file_bytes.decode('utf-8')
348
+ # tokens = count_tokens(text)
349
+ # time_estimate = math.ceil(tokens / 150 / 10) * 10 # Round up to nearest 10 seconds
350
+ # return text, f'<div class="token-label"><span class="token-count">({tokens} tokens, ~{time_estimate}s generation time)</span></div>'
351
+ # except Exception as e:
352
+ # raise gr.Error(f"Failed to read file: {str(e)}")
353
+
354
+ # file_input.change(
355
+ # fn=load_text_from_file,
356
+ # inputs=[file_input],
357
+ # outputs=[text_input, label_html]
358
+ # )
359
+
360
+ # with gr.Group():
361
+ # voice_dropdown = gr.Dropdown(
362
+ # label="Voice(s)",
363
+ # choices=[], # Start empty, will be populated after initialization
364
+ # value=None,
365
+ # allow_custom_value=True,
366
+ # multiselect=True
367
+ # )
368
+
369
+ # # Add refresh button to manually update voice list
370
+ # refresh_btn = gr.Button("🔄 Refresh Voices", size="sm")
371
+
372
+ # speed_slider = gr.Slider(
373
+ # label="Speed",
374
+ # minimum=0.5,
375
+ # maximum=2.0,
376
+ # value=1.0,
377
+ # step=0.1
378
+ # )
379
+ # gpu_timeout_slider = gr.Slider(
380
+ # label="GPU Timeout (seconds)",
381
+ # minimum=15,
382
+ # maximum=120,
383
+ # value=90,
384
+ # step=1,
385
+ # info="Maximum time allowed for GPU processing"
386
+ # )
387
+ # submit_btn = gr.Button("Generate Speech", variant="primary")
388
+
389
+ # # Column 3: Output
390
+ # with gr.Column(elem_classes="equal-height"):
391
+ # audio_output = gr.Audio(
392
+ # label="Generated Speech",
393
+ # type="numpy",
394
+ # format="wav",
395
+ # autoplay=False
396
+ # )
397
+ # progress_bar = gr.Progress(track_tqdm=False)
398
+ # metrics_text = gr.Textbox(
399
+ # label="Performance Summary",
400
+ # interactive=False,
401
+ # lines=5
402
+ # )
403
+ # metrics_plot = gr.Plot(
404
+ # label="Processing Metrics",
405
+ # show_label=True,
406
+ # format="png" # Explicitly set format to PNG which is supported by matplotlib
407
+ # )
408
+
409
+ # # Set up event handlers
410
+ # refresh_btn.click(
411
+ # fn=initialize_model,
412
+ # outputs=[voice_dropdown]
413
+ # )
414
+
415
+ # submit_btn.click(
416
+ # fn=generate_speech_from_ui,
417
+ # inputs=[text_input, voice_dropdown, speed_slider, gpu_timeout_slider],
418
+ # outputs=[audio_output, metrics_plot, metrics_text],
419
+ # show_progress=True
420
+ # )
421
+
422
+ # # Add text analysis info
423
+ # with gr.Row():
424
+ # with gr.Column():
425
+ # gr.Markdown(demo_text_info)
426
+
427
+ # # Initialize voices on load
428
+ # demo.load(
429
+ # fn=initialize_model,
430
+ # outputs=[voice_dropdown]
431
+ # )
432
+
433
+ # # Launch the app
434
+ # if __name__ == "__main__":
435
+ # demo.launch()
samples/af.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12531c7799479e5db50625e75e14b0c8c78326dbe986d3f23227c6477d7324c0
3
+ size 717834
samples/af_bella.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2713a154e441c1d6b37f7077eaa71f0de8fccee218c82cf53f3eadeead064bd4
3
+ size 508170
samples/af_bella_af_nicole.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fdb9432027c37b07361634bef13d17b993ed19d8fc5e406f5554203718f2cee
3
+ size 483978
samples/af_nicole_af_bella_af_sky.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9427d011f47b22b76b99abeacbc0cb114e5af497c0dce06fd5d968310652c61e
3
+ size 640844
samples/af_nicole_bf_isabella_af_bella.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26f699e3dca378537ab15763e1fece15892c2ef886a60aaaea5c328f402633f4
3
+ size 487244
samples/af_nicole_bm_lewis.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93df5536c00a757f129e4545236859311c0106ea66ac05335daafcc46a2c9ebb
3
+ size 574986
samples/af_sky.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc8916d9612c3f7d7c308cf56da33d68f93088f1385852e185e60f39a242c8cc
3
+ size 549732
samples/af_sky_af_bella_bm_george.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48dc291d9abc4aacedf31c317e6f55530f4b01d7aa80fad6231bcfa8297769bf
3
+ size 861688
samples/af_sky_af_nicole.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec2c514fc475aaac1c2dda5363fba7d8d17031d11b414acd4319563c62a9b3b0
3
+ size 593418
samples/af_sky_af_nicole_bm_george.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2737d08414f1cfcbf097c37779a13308ad0e162928a889cab81084a5baea5f9
3
+ size 504714
samples/af_sky_bm_lewis.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbac122414110350170237b3266f17dcb13d5b1448d4f8f031684feab24ed4cc
3
+ size 675210
samples/am_adam.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd3acc2d6fe4f7856bd29283fcd9e9d9f7bf4575eb96371249f96e12e15ddae7
3
+ size 479370
samples/am_michael.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:322744137f473a889653f0103ab0aed52bfc3d70b84effd7b1bbe625d537b866
3
+ size 458634
samples/bm_lewis.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e7dea7ad726d7fedc2838eb52f26f59d8f6c1c7cf457b4b1be316431dd352e5
3
+ size 520842
samples/bm_lewis_af_sky_af_nicole.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c152e3977a32857fe45ec33009eff2baac809532072b892b5bf4d1b393ec8df4
3
+ size 625332