From f7281504f1666efe4f8386f04dd3fdfe57ded3d2 Mon Sep 17 00:00:00 2001
From: Roberto Echeagaray <rogbertoecheagaray@gmail.com>
Date: Wed, 15 Apr 2026 02:29:53 -0600
Subject: [PATCH 1/2] feat: TTS humanization pipeline, rating system, and STT
 upgrade

- Upgrade default Whisper model to large-v3-mlx (Apple Silicon) / large-v3 (PyTorch) for better Spanish transcription; fix missing preprocessor_config.json with fallback to turbo processor
- Add paralinguistic tags (sniff, shush, whimper, scream, whisper) to tag router PARA_TAGS set
- Add thumbs up/down rating system on history rows; rating + sampling params stored per generation; GET /profiles/{id}/suggested-params returns averaged best params after 3+ high-rated generations
- Show all 5 sampling params (temperature, top_k, top_p, repetition_penalty, speed) in history row badge popover with Reuse button that restores text + params to generation form
- Add Top-K, Top-P, Rep. Penalty sliders to FloatingGenerateBox Advanced popover so those fields are properly saved
- Add breath_injection, hybrid_generate, tag_router, text_preprocess utility modules for TTS humanization pipeline

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../Generation/FloatingGenerateBox.tsx        | 371 +++++++++++++++++-
 .../components/Generation/GenerationForm.tsx  | 303 +++++++++++++-
 app/src/components/History/HistoryTable.tsx   | 158 +++++++-
 .../VoiceProfiles/AudioSampleRecording.tsx    | 205 ++++++++--
 .../components/VoiceProfiles/ProfileForm.tsx  |  11 +-
 .../components/VoiceProfiles/SampleUpload.tsx |  17 +-
 app/src/lib/api/client.ts                     |  13 +
 app/src/lib/api/types.ts                      |  29 +-
 app/src/lib/hooks/useAudioRecording.ts        |   2 +-
 app/src/lib/hooks/useGenerationForm.ts        |  27 ++
 app/src/stores/generationStore.ts             |  19 +
 backend/app.py                                |  12 +
 backend/backends/__init__.py                  |  91 ++++-
 backend/backends/chatterbox_turbo_backend.py  |  10 +-
 backend/backends/mlx_backend.py               | 116 +++++-
 backend/backends/pytorch_backend.py           |   2 +-
 backend/database/migrations.py                |  14 +
 backend/database/models.py                    |   7 +
 backend/models.py                             |  39 +-
 backend/routes/generations.py                 |  53 +++
 backend/routes/profiles.py                    |  55 +++
 backend/routes/transcription.py               |  20 +-
 backend/services/generation.py                |  77 +++-
 backend/services/history.py                   |  16 +
 backend/utils/audio.py                        |  60 ++-
 backend/utils/breath_injection.py             |  98 +++++
 backend/utils/chunked_tts.py                  |  45 ++-
 backend/utils/effects.py                      | 205 ++++++++++
 backend/utils/hybrid_generate.py              | 125 ++++++
 backend/utils/tag_router.py                   |  58 +++
 backend/utils/text_preprocess.py              | 117 ++++++
 31 files changed, 2280 insertions(+), 95 deletions(-)
 create mode 100644 backend/utils/breath_injection.py
 create mode 100644 backend/utils/hybrid_generate.py
 create mode 100644 backend/utils/tag_router.py
 create mode 100644 backend/utils/text_preprocess.py

diff --git a/app/src/components/Generation/FloatingGenerateBox.tsx b/app/src/components/Generation/FloatingGenerateBox.tsx
index f1cd571d..5ab0feb4 100644
--- a/app/src/components/Generation/FloatingGenerateBox.tsx
+++ b/app/src/components/Generation/FloatingGenerateBox.tsx
@@ -1,10 +1,12 @@
 import { useQuery } from '@tanstack/react-query';
 import { useMatchRoute } from '@tanstack/react-router';
 import { AnimatePresence, motion } from 'framer-motion';
-import { Loader2, Sparkles } from 'lucide-react';
+import { CheckCircle, Loader2, SlidersHorizontal, Sparkles } from 'lucide-react';
 import { useEffect, useRef, useState } from 'react';
 import { Button } from '@/components/ui/button';
+import { Checkbox } from '@/components/ui/checkbox';
 import { Form, FormControl, FormField, FormItem, FormMessage } from '@/components/ui/form';
+import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/popover';
 import {
   Select,
   SelectContent,
@@ -12,6 +14,7 @@ import {
   SelectTrigger,
   SelectValue,
 } from '@/components/ui/select';
+import { Slider } from '@/components/ui/slider';
 import { Textarea } from '@/components/ui/textarea';
 import { apiClient } from '@/lib/api/client';
 import { getLanguageOptionsForEngine, type LanguageCode } from '@/lib/constants/languages';
@@ -40,15 +43,19 @@ export function FloatingGenerateBox({
   const { data: selectedProfile } = useProfile(selectedProfileId || '');
   const { data: profiles } = useProfiles();
   const [isExpanded, setIsExpanded] = useState(false);
+  const [showAdvanced, setShowAdvanced] = useState(false);
   const [selectedPresetId, setSelectedPresetId] = useState<string | null>(null);
   const containerRef = useRef<HTMLDivElement>(null);
   const textareaRef = useRef<HTMLTextAreaElement | null>(null);
+  const reuseEffectsChainRef = useRef<import('@/lib/api/types').EffectConfig[] | null>(null);
   const matchRoute = useMatchRoute();
   const isStoriesRoute = matchRoute({ to: '/stories' });
   const selectedStoryId = useStoryStore((state) => state.selectedStoryId);
   const trackEditorHeight = useStoryStore((state) => state.trackEditorHeight);
   const { data: currentStory } = useStory(selectedStoryId);
   const addPendingStoryAdd = useGenerationStore((s) => s.addPendingStoryAdd);
+  const reuseParams = useGenerationStore((s) => s.reuseParams);
+  const setReuseParams = useGenerationStore((s) => s.setReuseParams);
 
   // Fetch effect presets for the dropdown
   const { data: effectPresets } = useQuery({
@@ -56,6 +63,13 @@ export function FloatingGenerateBox({
     queryFn: () => apiClient.listEffectPresets(),
   });
 
+  // Fetch suggested params for the selected profile
+  const { data: suggestedParams } = useQuery({
+    queryKey: ['suggestedParams', selectedProfileId],
+    queryFn: () => apiClient.getSuggestedParams(selectedProfileId!),
+    enabled: !!selectedProfileId,
+  });
+
   // Calculate if track editor is visible (on stories route with items)
   const hasTrackEditor = isStoriesRoute && currentStory && currentStory.items.length > 0;
 
@@ -73,6 +87,10 @@ export function FloatingGenerateBox({
       if (selectedPresetId === '_profile') {
         return selectedProfile?.effects_chain ?? undefined;
       }
+      // Effects chain reused from history (no matching preset)
+      if (selectedPresetId === '_reuse') {
+        return reuseEffectsChainRef.current ?? undefined;
+      }
       if (!effectPresets) return undefined;
       const preset = effectPresets.find((p) => p.id === selectedPresetId);
       return preset?.effects_chain;
@@ -217,6 +235,63 @@ export function FloatingGenerateBox({
     };
   }, [isExpanded]);
 
+  // Apply params from history "Reuse" button
+  useEffect(() => {
+    if (!reuseParams) return;
+    form.setValue('text', reuseParams.text);
+    if (reuseParams.language) form.setValue('language', reuseParams.language as LanguageCode);
+    if (reuseParams.engine)
+      form.setValue(
+        'engine',
+        reuseParams.engine as
+          | 'qwen'
+          | 'qwen_custom_voice'
+          | 'luxtts'
+          | 'chatterbox'
+          | 'chatterbox_turbo'
+          | 'tada'
+          | 'kokoro',
+      );
+    if (reuseParams.temperature != null) form.setValue('temperature', reuseParams.temperature);
+    if (reuseParams.top_k != null) form.setValue('top_k', Math.round(reuseParams.top_k));
+    if (reuseParams.top_p != null) form.setValue('top_p', reuseParams.top_p);
+    if (reuseParams.repetition_penalty != null)
+      form.setValue('repetition_penalty', reuseParams.repetition_penalty);
+    if (reuseParams.speed != null) form.setValue('speed', reuseParams.speed);
+    // Apply effects chain if present
+    if (reuseParams.effects_chain && reuseParams.effects_chain.length > 0) {
+      reuseEffectsChainRef.current = reuseParams.effects_chain;
+      if (effectPresets) {
+        const chainJson = JSON.stringify(reuseParams.effects_chain);
+        const matchingPreset = effectPresets.find(
+          (p) => JSON.stringify(p.effects_chain) === chainJson,
+        );
+        if (matchingPreset) {
+          setSelectedPresetId(matchingPreset.id);
+        } else {
+          // No matching preset — use sentinel so getEffectsChain returns the stored chain
+          setSelectedPresetId('_reuse');
+        }
+      } else {
+        setSelectedPresetId('_reuse');
+      }
+    } else {
+      reuseEffectsChainRef.current = null;
+    }
+    setIsExpanded(true);
+    // Consume the params so this effect doesn't re-fire
+    setReuseParams(null);
+  }, [reuseParams]); // eslint-disable-line react-hooks/exhaustive-deps
+
+  function applySuggestedParams() {
+    if (!suggestedParams) return;
+    if (suggestedParams.temperature != null) form.setValue('temperature', suggestedParams.temperature);
+    if (suggestedParams.top_k != null) form.setValue('top_k', Math.round(suggestedParams.top_k));
+    if (suggestedParams.top_p != null) form.setValue('top_p', suggestedParams.top_p);
+    if (suggestedParams.repetition_penalty != null) form.setValue('repetition_penalty', suggestedParams.repetition_penalty);
+    if (suggestedParams.speed != null) form.setValue('speed', suggestedParams.speed);
+  }
+
   async function onSubmit(data: Parameters<typeof handleSubmit>[0]) {
     await handleSubmit(data, selectedProfileId);
   }
@@ -262,7 +337,7 @@ export function FloatingGenerateBox({
                           transition={{ duration: 0.15, ease: 'easeOut' }}
                           style={{ overflow: 'hidden' }}
                         >
-                          {form.watch('engine') === 'chatterbox_turbo' ? (
+                          {(form.watch('engine') === 'chatterbox_turbo' || form.watch('engine') === 'qwen') ? (
                             <ParalinguisticInput
                               value={field.value}
                               onChange={field.onChange}
@@ -317,7 +392,270 @@ export function FloatingGenerateBox({
                 />
               </motion.div>
 
-              <div className="relative shrink-0">
+              <div className="relative shrink-0 flex flex-col items-center gap-1">
+                {/* Settings / Advanced popover */}
+                <Popover open={showAdvanced} onOpenChange={setShowAdvanced}>
+                  <PopoverTrigger asChild>
+                    <Button
+                      type="button"
+                      variant="ghost"
+                      size="icon"
+                      aria-label="Advanced settings"
+                      className={cn(
+                        'h-7 w-7 rounded-full transition-all duration-200',
+                        showAdvanced
+                          ? 'bg-accent/20 text-accent'
+                          : 'text-muted-foreground hover:text-accent hover:bg-accent/10',
+                      )}
+                    >
+                      <SlidersHorizontal className="h-3.5 w-3.5" />
+                    </Button>
+                  </PopoverTrigger>
+                  <PopoverContent
+                    side="top"
+                    align="end"
+                    sideOffset={8}
+                    className="w-72 space-y-2.5 rounded-2xl border border-accent/20 bg-background/80 backdrop-blur-xl p-4"
+                  >
+                    <p className="text-xs font-medium text-muted-foreground mb-3">Advanced settings</p>
+
+                    {/* Row 1: Temperature + Speed */}
+                    <div className="grid grid-cols-2 gap-3">
+                      <FormField
+                        control={form.control}
+                        name="temperature"
+                        render={({ field }) => (
+                          <FormItem className="space-y-1">
+                            <div className="flex items-center justify-between">
+                              <label className="text-xs text-muted-foreground/70">Temp</label>
+                              <span className="text-xs text-muted-foreground tabular-nums">
+                                {field.value?.toFixed(2) ?? '—'}
+                              </span>
+                            </div>
+                            <FormControl>
+                              <Slider
+                                min={0}
+                                max={2}
+                                step={0.05}
+                                value={field.value !== undefined ? [field.value] : [1]}
+                                onValueChange={([v]) => field.onChange(v)}
+                                className="h-3"
+                              />
+                            </FormControl>
+                          </FormItem>
+                        )}
+                      />
+                      <FormField
+                        control={form.control}
+                        name="speed"
+                        render={({ field }) => (
+                          <FormItem className="space-y-1">
+                            <div className="flex items-center justify-between">
+                              <label className="text-xs text-muted-foreground/70">Speed</label>
+                              <span className="text-xs text-muted-foreground tabular-nums">
+                                {field.value?.toFixed(2) ?? '—'}
+                              </span>
+                            </div>
+                            <FormControl>
+                              <Slider
+                                min={0.5}
+                                max={2}
+                                step={0.05}
+                                value={field.value !== undefined ? [field.value] : [1]}
+                                onValueChange={([v]) => field.onChange(v)}
+                                className="h-3"
+                              />
+                            </FormControl>
+                          </FormItem>
+                        )}
+                      />
+                    </div>
+
+                    {/* Row 2: Top-K + Top-P */}
+                    <div className="grid grid-cols-2 gap-3">
+                      <FormField
+                        control={form.control}
+                        name="top_k"
+                        render={({ field }) => (
+                          <FormItem className="space-y-1">
+                            <div className="flex items-center justify-between">
+                              <label className="text-xs text-muted-foreground/70">Top-K</label>
+                              <span className="text-xs text-muted-foreground tabular-nums">
+                                {field.value !== undefined ? field.value : '—'}
+                              </span>
+                            </div>
+                            <FormControl>
+                              <Slider
+                                min={0}
+                                max={200}
+                                step={1}
+                                value={field.value !== undefined ? [field.value] : [50]}
+                                onValueChange={([v]) => field.onChange(v)}
+                                className="h-3"
+                              />
+                            </FormControl>
+                          </FormItem>
+                        )}
+                      />
+                      <FormField
+                        control={form.control}
+                        name="top_p"
+                        render={({ field }) => (
+                          <FormItem className="space-y-1">
+                            <div className="flex items-center justify-between">
+                              <label className="text-xs text-muted-foreground/70">Top-P</label>
+                              <span className="text-xs text-muted-foreground tabular-nums">
+                                {field.value?.toFixed(2) ?? '—'}
+                              </span>
+                            </div>
+                            <FormControl>
+                              <Slider
+                                min={0}
+                                max={1}
+                                step={0.01}
+                                value={field.value !== undefined ? [field.value] : [0.9]}
+                                onValueChange={([v]) => field.onChange(v)}
+                                className="h-3"
+                              />
+                            </FormControl>
+                          </FormItem>
+                        )}
+                      />
+                    </div>
+
+                    {/* Row 3: Repetition Penalty (half-width, left column) */}
+                    <div className="grid grid-cols-2 gap-3">
+                      <FormField
+                        control={form.control}
+                        name="repetition_penalty"
+                        render={({ field }) => (
+                          <FormItem className="space-y-1">
+                            <div className="flex items-center justify-between">
+                              <label className="text-xs text-muted-foreground/70">Rep. Penalty</label>
+                              <span className="text-xs text-muted-foreground tabular-nums">
+                                {field.value?.toFixed(2) ?? '—'}
+                              </span>
+                            </div>
+                            <FormControl>
+                              <Slider
+                                min={0.5}
+                                max={2}
+                                step={0.01}
+                                value={field.value !== undefined ? [field.value] : [1]}
+                                onValueChange={([v]) => field.onChange(v)}
+                                className="h-3"
+                              />
+                            </FormControl>
+                          </FormItem>
+                        )}
+                      />
+                    </div>
+
+                    {/* Row 5: Humanize text + intensity */}
+                    <div className="flex items-center gap-2">
+                      <FormField
+                        control={form.control}
+                        name="humanize_text"
+                        render={({ field }) => (
+                          <FormItem className="space-y-0">
+                            <FormControl>
+                              <div className="flex items-center gap-1.5">
+                                <Checkbox
+                                  id="humanize_text_adv"
+                                  checked={!!field.value}
+                                  onCheckedChange={field.onChange}
+                                />
+                                <label
+                                  htmlFor="humanize_text_adv"
+                                  className="text-xs text-muted-foreground/70 cursor-pointer select-none"
+                                >
+                                  Humanize
+                                </label>
+                              </div>
+                            </FormControl>
+                          </FormItem>
+                        )}
+                      />
+                      <FormField
+                        control={form.control}
+                        name="humanize_intensity"
+                        render={({ field }) => (
+                          <FormItem className="flex-1 space-y-0">
+                            <FormControl>
+                              <Select
+                                value={field.value ?? 'medium'}
+                                onValueChange={field.onChange}
+                                disabled={!form.watch('humanize_text')}
+                              >
+                                <SelectTrigger className="h-7 text-xs bg-card border-border rounded-full hover:bg-background/50 transition-all">
+                                  <SelectValue />
+                                </SelectTrigger>
+                                <SelectContent>
+                                  <SelectItem value="light" className="text-xs">Light</SelectItem>
+                                  <SelectItem value="medium" className="text-xs">Medium</SelectItem>
+                                  <SelectItem value="heavy" className="text-xs">Heavy</SelectItem>
+                                </SelectContent>
+                              </Select>
+                            </FormControl>
+                          </FormItem>
+                        )}
+                      />
+                    </div>
+
+                    {/* Row 6: Inject breaths + Jitter */}
+                    <div className="grid grid-cols-2 gap-3 items-center">
+                      <FormField
+                        control={form.control}
+                        name="inject_breaths"
+                        render={({ field }) => (
+                          <FormItem className="space-y-0">
+                            <FormControl>
+                              <div className="flex items-center gap-1.5">
+                                <Checkbox
+                                  id="inject_breaths_adv"
+                                  checked={!!field.value}
+                                  onCheckedChange={field.onChange}
+                                />
+                                <label
+                                  htmlFor="inject_breaths_adv"
+                                  className="text-xs text-muted-foreground/70 cursor-pointer select-none"
+                                >
+                                  Inject breaths
+                                </label>
+                              </div>
+                            </FormControl>
+                          </FormItem>
+                        )}
+                      />
+                      <FormField
+                        control={form.control}
+                        name="jitter_ms"
+                        render={({ field }) => (
+                          <FormItem className="space-y-1">
+                            <div className="flex items-center justify-between">
+                              <label className="text-xs text-muted-foreground/70">Jitter</label>
+                              <span className="text-xs text-muted-foreground tabular-nums">
+                                {field.value !== undefined ? `${field.value}ms` : '—'}
+                              </span>
+                            </div>
+                            <FormControl>
+                              <Slider
+                                min={0}
+                                max={50}
+                                step={1}
+                                value={field.value !== undefined ? [field.value] : [0]}
+                                onValueChange={([v]) => field.onChange(v)}
+                                className="h-3"
+                              />
+                            </FormControl>
+                          </FormItem>
+                        )}
+                      />
+                    </div>
+                  </PopoverContent>
+                </Popover>
+
+                {/* Generate button */}
                 <div className="group relative">
                   <Button
                     type="submit"
@@ -357,6 +695,20 @@ export function FloatingGenerateBox({
                 transition={{ duration: 0.3, ease: 'easeOut' }}
                 className=" mt-3"
               >
+                {/* Suggested params banner */}
+                {suggestedParams && (
+                  <div className="flex items-center gap-2 mb-2 px-1 py-1 rounded-xl bg-green-500/10 border border-green-500/20">
+                    <CheckCircle className="h-3 w-3 text-green-500 shrink-0 ml-1" />
+                    <span className="text-xs text-green-500 flex-1">Proven params for this voice</span>
+                    <button
+                      type="button"
+                      className="text-xs text-green-500 font-medium hover:text-green-400 transition-colors px-1.5 py-0.5 rounded-lg hover:bg-green-500/10"
+                      onClick={applySuggestedParams}
+                    >
+                      Apply
+                    </button>
+                  </div>
+                )}
                 <div className="flex items-center gap-2">
                   {showVoiceSelector && (
                     <div className="flex-1">
@@ -414,9 +766,10 @@ export function FloatingGenerateBox({
                   <FormItem className="flex-1 space-y-0">
                     <Select
                       value={selectedPresetId || 'none'}
-                      onValueChange={(value) =>
-                        setSelectedPresetId(value === 'none' ? null : value)
-                      }
+                      onValueChange={(value) => {
+                        if (value !== '_reuse') reuseEffectsChainRef.current = null;
+                        setSelectedPresetId(value === 'none' ? null : value);
+                      }}
                     >
                       <SelectTrigger className="h-8 text-xs bg-card border-border rounded-full hover:bg-background/50 transition-all">
                         <SelectValue placeholder="No effects" />
@@ -431,6 +784,11 @@ export function FloatingGenerateBox({
                               Profile default
                             </SelectItem>
                           )}
+                        {selectedPresetId === '_reuse' && (
+                          <SelectItem value="_reuse" className="text-xs">
+                            From history
+                          </SelectItem>
+                        )}
                         {effectPresets?.map((preset) => (
                           <SelectItem key={preset.id} value={preset.id} className="text-xs">
                             {preset.name}
@@ -440,6 +798,7 @@ export function FloatingGenerateBox({
                     </Select>
                   </FormItem>
                 </div>
+
               </motion.div>
             </AnimatePresence>
           </form>
diff --git a/app/src/components/Generation/GenerationForm.tsx b/app/src/components/Generation/GenerationForm.tsx
index ef3ff2c0..060d273c 100644
--- a/app/src/components/Generation/GenerationForm.tsx
+++ b/app/src/components/Generation/GenerationForm.tsx
@@ -1,7 +1,8 @@
-import { useEffect } from 'react';
-import { Loader2, Mic } from 'lucide-react';
+import { useEffect, useState } from 'react';
+import { ChevronDown, ChevronUp, Loader2, Mic } from 'lucide-react';
 import { Button } from '@/components/ui/button';
 import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
+import { Checkbox } from '@/components/ui/checkbox';
 import {
   Form,
   FormControl,
@@ -19,6 +20,7 @@ import {
   SelectTrigger,
   SelectValue,
 } from '@/components/ui/select';
+import { Slider } from '@/components/ui/slider';
 import { Textarea } from '@/components/ui/textarea';
 import { getLanguageOptionsForEngine, type LanguageCode } from '@/lib/constants/languages';
 import { useGenerationForm } from '@/lib/hooks/useGenerationForm';
@@ -39,6 +41,7 @@ export function GenerationForm() {
   const { data: selectedProfile } = useProfile(selectedProfileId || '');
 
   const { form, handleSubmit, isPending } = useGenerationForm();
+  const [advancedOpen, setAdvancedOpen] = useState(false);
 
   useEffect(() => {
     if (!selectedProfile) {
@@ -59,6 +62,11 @@ export function GenerationForm() {
     await handleSubmit(data, selectedProfileId);
   }
 
+  const engine = form.watch('engine');
+  const humanizeText = form.watch('humanize_text');
+  const showParalinguistic = engine === 'chatterbox_turbo' || engine === 'qwen';
+  const showSpeed = engine === 'qwen' || engine === 'qwen_custom_voice';
+
   return (
     <Card>
       <CardHeader>
@@ -89,7 +97,7 @@ export function GenerationForm() {
                 <FormItem>
                   <FormLabel>Text to Speak</FormLabel>
                   <FormControl>
-                    {form.watch('engine') === 'chatterbox_turbo' ? (
+                    {showParalinguistic ? (
                       <ParalinguisticInput
                         value={field.value}
                         onChange={field.onChange}
@@ -105,16 +113,25 @@ export function GenerationForm() {
                     )}
                   </FormControl>
                   <FormDescription>
-                    {form.watch('engine') === 'chatterbox_turbo'
-                      ? 'Max 5000 characters. Type / to insert sound effects.'
-                      : 'Max 5000 characters'}
+                    {showParalinguistic ? (
+                      <>
+                        Max 5000 characters. Type / to insert sound effects.
+                        {engine === 'qwen' && (
+                          <span className="block mt-0.5 text-muted-foreground/70">
+                            Tags like [laugh] route to Chatterbox Turbo internally.
+                          </span>
+                        )}
+                      </>
+                    ) : (
+                      'Max 5000 characters'
+                    )}
                   </FormDescription>
                   <FormMessage />
                 </FormItem>
               )}
             />
 
-            {(form.watch('engine') === 'qwen' || form.watch('engine') === 'qwen_custom_voice') && (
+            {(engine === 'qwen' || engine === 'qwen_custom_voice') && (
               <FormField
                 control={form.control}
                 name="instruct"
@@ -143,7 +160,7 @@ export function GenerationForm() {
                 <FormLabel>Model</FormLabel>
                 <EngineModelSelector form={form} selectedProfile={selectedProfile} />
                 <FormDescription>
-                  {getEngineDescription(form.watch('engine') || 'qwen')}
+                  {getEngineDescription(engine || 'qwen')}
                 </FormDescription>
               </FormItem>
 
@@ -151,7 +168,7 @@ export function GenerationForm() {
                 control={form.control}
                 name="language"
                 render={({ field }) => {
-                  const engineLangs = getLanguageOptionsForEngine(form.watch('engine') || 'qwen');
+                  const engineLangs = getLanguageOptionsForEngine(engine || 'qwen');
                   return (
                     <FormItem>
                       <FormLabel>Language</FormLabel>
@@ -198,6 +215,274 @@ export function GenerationForm() {
               />
             </div>
 
+            {/* Advanced section */}
+            <div className="border rounded-md">
+              <button
+                type="button"
+                onClick={() => setAdvancedOpen((o) => !o)}
+                className="flex w-full items-center justify-between px-4 py-3 text-sm font-medium text-left hover:bg-muted/50 transition-colors rounded-md"
+              >
+                <span>Advanced</span>
+                {advancedOpen ? (
+                  <ChevronUp className="h-4 w-4 text-muted-foreground" />
+                ) : (
+                  <ChevronDown className="h-4 w-4 text-muted-foreground" />
+                )}
+              </button>
+
+              {advancedOpen && (
+                <div className="px-4 pb-4 space-y-5 border-t">
+                  {/* Sampling Parameters */}
+                  <div className="space-y-4 pt-4">
+                    <p className="text-xs font-semibold uppercase tracking-wide text-muted-foreground">
+                      Sampling
+                    </p>
+
+                    <FormField
+                      control={form.control}
+                      name="temperature"
+                      render={({ field }) => (
+                        <FormItem>
+                          <div className="flex items-center justify-between">
+                            <FormLabel className="text-sm">Temperature</FormLabel>
+                            <span className="text-sm text-muted-foreground tabular-nums">
+                              {field.value ?? '—'}
+                            </span>
+                          </div>
+                          <FormControl>
+                            <Slider
+                              min={0}
+                              max={2}
+                              step={0.1}
+                              value={field.value !== undefined ? [field.value] : []}
+                              onValueChange={([v]) => field.onChange(v)}
+                            />
+                          </FormControl>
+                          <FormDescription>0.0 – 2.0 · default ~0.9</FormDescription>
+                        </FormItem>
+                      )}
+                    />
+
+                    <FormField
+                      control={form.control}
+                      name="top_p"
+                      render={({ field }) => (
+                        <FormItem>
+                          <div className="flex items-center justify-between">
+                            <FormLabel className="text-sm">Top-P</FormLabel>
+                            <span className="text-sm text-muted-foreground tabular-nums">
+                              {field.value ?? '—'}
+                            </span>
+                          </div>
+                          <FormControl>
+                            <Slider
+                              min={0}
+                              max={1}
+                              step={0.05}
+                              value={field.value !== undefined ? [field.value] : []}
+                              onValueChange={([v]) => field.onChange(v)}
+                            />
+                          </FormControl>
+                          <FormDescription>0.0 – 1.0</FormDescription>
+                        </FormItem>
+                      )}
+                    />
+
+                    <FormField
+                      control={form.control}
+                      name="repetition_penalty"
+                      render={({ field }) => (
+                        <FormItem>
+                          <div className="flex items-center justify-between">
+                            <FormLabel className="text-sm">Repetition Penalty</FormLabel>
+                            <span className="text-sm text-muted-foreground tabular-nums">
+                              {field.value ?? '—'}
+                            </span>
+                          </div>
+                          <FormControl>
+                            <Slider
+                              min={0.5}
+                              max={3}
+                              step={0.05}
+                              value={field.value !== undefined ? [field.value] : []}
+                              onValueChange={([v]) => field.onChange(v)}
+                            />
+                          </FormControl>
+                          <FormDescription>0.5 – 3.0</FormDescription>
+                        </FormItem>
+                      )}
+                    />
+
+                    <FormField
+                      control={form.control}
+                      name="top_k"
+                      render={({ field }) => (
+                        <FormItem>
+                          <FormLabel className="text-sm">Top-K</FormLabel>
+                          <FormControl>
+                            <Input
+                              type="number"
+                              min={0}
+                              max={5000}
+                              step={1}
+                              placeholder="Default"
+                              value={field.value ?? ''}
+                              onChange={(e) =>
+                                field.onChange(
+                                  e.target.value ? parseInt(e.target.value, 10) : undefined,
+                                )
+                              }
+                            />
+                          </FormControl>
+                          <FormDescription>0 – 5000</FormDescription>
+                        </FormItem>
+                      )}
+                    />
+
+                    {showSpeed && (
+                      <FormField
+                        control={form.control}
+                        name="speed"
+                        render={({ field }) => (
+                          <FormItem>
+                            <div className="flex items-center justify-between">
+                              <FormLabel className="text-sm">Speed</FormLabel>
+                              <span className="text-sm text-muted-foreground tabular-nums">
+                                {field.value !== undefined ? `${field.value}×` : '—'}
+                              </span>
+                            </div>
+                            <FormControl>
+                              <Slider
+                                min={0.5}
+                                max={2}
+                                step={0.1}
+                                value={field.value !== undefined ? [field.value] : []}
+                                onValueChange={([v]) => field.onChange(v)}
+                              />
+                            </FormControl>
+                            <FormDescription>0.5× – 2.0×</FormDescription>
+                          </FormItem>
+                        )}
+                      />
+                    )}
+                  </div>
+
+                  {/* Humanization */}
+                  <div className="space-y-4">
+                    <p className="text-xs font-semibold uppercase tracking-wide text-muted-foreground">
+                      Humanization
+                    </p>
+
+                    <FormField
+                      control={form.control}
+                      name="humanize_text"
+                      render={({ field }) => (
+                        <FormItem>
+                          <div className="flex items-center gap-2">
+                            <FormControl>
+                              <Checkbox
+                                id="humanize_text"
+                                checked={!!field.value}
+                                onCheckedChange={field.onChange}
+                              />
+                            </FormControl>
+                            <FormLabel htmlFor="humanize_text" className="text-sm font-normal cursor-pointer">
+                              Humanize text
+                            </FormLabel>
+                          </div>
+                          <FormDescription>
+                            Pre-process text with LLM to add natural speech patterns
+                          </FormDescription>
+                        </FormItem>
+                      )}
+                    />
+
+                    {humanizeText && (
+                      <FormField
+                        control={form.control}
+                        name="humanize_intensity"
+                        render={({ field }) => (
+                          <FormItem className="pl-6">
+                            <FormLabel className="text-sm">Intensity</FormLabel>
+                            <div className="flex gap-3">
+                              {(['light', 'medium', 'heavy'] as const).map((level) => (
+                                <label
+                                  key={level}
+                                  className="flex items-center gap-1.5 cursor-pointer text-sm"
+                                >
+                                  <input
+                                    type="radio"
+                                    name="humanize_intensity"
+                                    value={level}
+                                    checked={field.value === level}
+                                    onChange={() => field.onChange(level)}
+                                    className="accent-primary"
+                                  />
+                                  <span className="capitalize">{level}</span>
+                                </label>
+                              ))}
+                            </div>
+                            <FormMessage />
+                          </FormItem>
+                        )}
+                      />
+                    )}
+
+                    <FormField
+                      control={form.control}
+                      name="inject_breaths"
+                      render={({ field }) => (
+                        <FormItem>
+                          <div className="flex items-center gap-2">
+                            <FormControl>
+                              <Checkbox
+                                id="inject_breaths"
+                                checked={!!field.value}
+                                onCheckedChange={field.onChange}
+                              />
+                            </FormControl>
+                            <FormLabel htmlFor="inject_breaths" className="text-sm font-normal cursor-pointer">
+                              Inject breaths
+                            </FormLabel>
+                          </div>
+                          <FormDescription>
+                            Insert natural breath sounds between sentences
+                          </FormDescription>
+                        </FormItem>
+                      )}
+                    />
+
+                    <FormField
+                      control={form.control}
+                      name="jitter_ms"
+                      render={({ field }) => (
+                        <FormItem>
+                          <div className="flex items-center justify-between">
+                            <FormLabel className="text-sm">Timing jitter</FormLabel>
+                            <span className="text-sm text-muted-foreground tabular-nums">
+                              {field.value !== undefined ? `${field.value} ms` : '—'}
+                            </span>
+                          </div>
+                          <FormControl>
+                            <Slider
+                              min={0}
+                              max={50}
+                              step={1}
+                              value={field.value !== undefined ? [field.value] : []}
+                              onValueChange={([v]) => field.onChange(v)}
+                            />
+                          </FormControl>
+                          <FormDescription>
+                            Random timing offset per chunk (0 – 50 ms)
+                          </FormDescription>
+                        </FormItem>
+                      )}
+                    />
+                  </div>
+                </div>
+              )}
+            </div>
+
             <Button type="submit" className="w-full" disabled={isPending || !selectedProfileId}>
               {isPending ? (
                 <>
diff --git a/app/src/components/History/HistoryTable.tsx b/app/src/components/History/HistoryTable.tsx
index 914c7fcb..88151859 100644
--- a/app/src/components/History/HistoryTable.tsx
+++ b/app/src/components/History/HistoryTable.tsx
@@ -1,16 +1,17 @@
 import { useQueryClient } from '@tanstack/react-query';
 import { AnimatePresence, motion } from 'framer-motion';
 import {
-  AlignCenter,
   AudioLines,
-  AudioWaveform,
   Download,
   FileArchive,
   Loader2,
   MoreHorizontal,
   Play,
   RotateCcw,
+  Settings2,
   Star,
+  ThumbsDown,
+  ThumbsUp,
   Trash2,
   Wand2,
 } from 'lucide-react';
@@ -32,6 +33,7 @@ import {
   DropdownMenuItem,
   DropdownMenuTrigger,
 } from '@/components/ui/dropdown-menu';
+import { Popover, PopoverContent, PopoverTrigger } from '@/components/ui/popover';
 import {
   Select,
   SelectContent,
@@ -56,6 +58,77 @@ import { formatDate, formatDuration, formatEngineName } from '@/lib/utils/format
 import { useGenerationStore } from '@/stores/generationStore';
 import { usePlayerStore } from '@/stores/playerStore';
 
+// ─── Params Badge ────────────────────────────────────────────────────────────
+
+function ParamsBadge({ gen }: { gen: HistoryResponse }) {
+  const activeVersion = gen.versions?.find((v) => v.is_default) ?? gen.versions?.[0];
+  const effectsChain = activeVersion?.effects_chain;
+
+  const hasNonDefaultParams =
+    gen.temperature != null ||
+    gen.top_k != null ||
+    gen.top_p != null ||
+    gen.repetition_penalty != null ||
+    gen.speed != null ||
+    (effectsChain && effectsChain.length > 0);
+
+  if (!hasNonDefaultParams) return null;
+
+  return (
+    <Popover>
+      <PopoverTrigger asChild>
+        <Button
+          variant="ghost"
+          size="icon"
+          className="h-6 w-6 text-muted-foreground/50 hover:bg-muted-foreground/20 hover:text-muted-foreground"
+          aria-label="View generation params"
+        >
+          <Settings2 className="h-2 w-2" />
+        </Button>
+      </PopoverTrigger>
+      <PopoverContent side="left" align="start" className="w-48 p-2 text-xs space-y-1">
+        <p className="font-medium text-muted-foreground mb-1.5">Generation params</p>
+        {gen.temperature != null && (
+          <div className="flex justify-between">
+            <span className="text-muted-foreground">temp</span>
+            <span className="tabular-nums">{gen.temperature.toFixed(2)}</span>
+          </div>
+        )}
+        {gen.speed != null && (
+          <div className="flex justify-between">
+            <span className="text-muted-foreground">speed</span>
+            <span className="tabular-nums">{gen.speed.toFixed(2)}</span>
+          </div>
+        )}
+        {gen.top_k != null && (
+          <div className="flex justify-between">
+            <span className="text-muted-foreground">top_k</span>
+            <span className="tabular-nums">{gen.top_k}</span>
+          </div>
+        )}
+        {gen.top_p != null && (
+          <div className="flex justify-between">
+            <span className="text-muted-foreground">top_p</span>
+            <span className="tabular-nums">{gen.top_p.toFixed(2)}</span>
+          </div>
+        )}
+        {gen.repetition_penalty != null && (
+          <div className="flex justify-between">
+            <span className="text-muted-foreground">rep_penalty</span>
+            <span className="tabular-nums">{gen.repetition_penalty.toFixed(2)}</span>
+          </div>
+        )}
+        {effectsChain && effectsChain.length > 0 && (
+          <div className="flex justify-between">
+            <span className="text-muted-foreground">effects</span>
+            <span className="truncate ml-2">{effectsChain.map((e) => e.type).join(', ')}</span>
+          </div>
+        )}
+      </PopoverContent>
+    </Popover>
+  );
+}
+
 // ─── Audio Bars ─────────────────────────────────────────────────────────────
 
 function AudioBars({ mode }: { mode: 'idle' | 'generating' | 'playing' }) {
@@ -128,6 +201,7 @@ export function HistoryTable() {
   const exportGenerationAudio = useExportGenerationAudio();
   const importGeneration = useImportGeneration();
   const addPendingGeneration = useGenerationStore((state) => state.addPendingGeneration);
+  const setReuseParams = useGenerationStore((state) => state.setReuseParams);
   const setAudioWithAutoPlay = usePlayerStore((state) => state.setAudioWithAutoPlay);
   const restartCurrentAudio = usePlayerStore((state) => state.restartCurrentAudio);
   const currentAudioId = usePlayerStore((state) => state.audioId);
@@ -307,6 +381,23 @@ export function HistoryTable() {
     }
   };
 
+  const handleRate = async (generationId: string, rating: number, currentRating: number | null | undefined) => {
+    // Clicking the same rating again clears it — not supported by the API directly,
+    // but we can send rating 0 — backend won't accept it, so we just skip toggling for now.
+    // If user clicks the already-active thumb, we do nothing.
+    if (currentRating === rating) return;
+    try {
+      await apiClient.rateGeneration(generationId, rating);
+      queryClient.invalidateQueries({ queryKey: ['history'] });
+    } catch (error) {
+      toast({
+        title: 'Failed to rate generation',
+        description: error instanceof Error ? error.message : 'Unknown error',
+        variant: 'destructive',
+      });
+    }
+  };
+
   const handleApplyEffects = (generationId: string) => {
     const gen = allHistory.find((g) => g.id === generationId);
     const versions = gen?.versions ?? [];
@@ -357,6 +448,25 @@ export function HistoryTable() {
     }
   };
 
+  const handleReuseParams = (gen: HistoryResponse) => {
+    // Get effects chain from the active/default version of this generation
+    const activeVersion = gen.versions?.find((v) => v.is_default) ?? gen.versions?.[0];
+    const effectsChain = activeVersion?.effects_chain ?? null;
+    setReuseParams({
+      text: gen.text,
+      language: gen.language,
+      engine: gen.engine ?? undefined,
+      model_size: gen.model_size ?? undefined,
+      temperature: gen.temperature,
+      top_k: gen.top_k,
+      top_p: gen.top_p,
+      repetition_penalty: gen.repetition_penalty,
+      speed: gen.speed,
+      effects_chain: effectsChain,
+    });
+    toast({ title: 'Params applied', description: 'Generation settings loaded into the form.' });
+  };
+
   const handleSwitchVersion = async (generationId: string, versionId: string) => {
     try {
       await apiClient.setDefaultVersion(generationId, versionId);
@@ -538,6 +648,17 @@ export function HistoryTable() {
                       onMouseDown={(e) => e.stopPropagation()}
                       onClick={(e) => e.stopPropagation()}
                     >
+                      <ParamsBadge gen={gen} />
+                      <Button
+                        variant="ghost"
+                        size="icon"
+                        className="h-6 w-6 text-muted-foreground/50 hover:bg-muted-foreground/20 hover:text-muted-foreground"
+                        aria-label="Reuse these params"
+                        title="Reuse text + params"
+                        onClick={() => handleReuseParams(gen)}
+                      >
+                        <RotateCcw className="h-2 w-2" />
+                      </Button>
                       <Button
                         variant="ghost"
                         size="icon"
@@ -553,6 +674,39 @@ export function HistoryTable() {
                           fill={gen.is_favorited ? 'currentColor' : 'none'}
                         />
                       </Button>
+                      {/* Rating: thumbs up (5) / thumbs down (1) */}
+                      <Button
+                        variant="ghost"
+                        size="icon"
+                        className={cn(
+                          'h-6 w-6 text-muted-foreground/50 hover:bg-muted-foreground/20 hover:text-muted-foreground',
+                          gen.rating === 5 && 'text-green-500 hover:text-green-500',
+                        )}
+                        aria-label="Thumbs up"
+                        disabled={isGenerating}
+                        onClick={() => handleRate(gen.id, 5, gen.rating)}
+                      >
+                        <ThumbsUp
+                          className="h-2 w-2"
+                          fill={gen.rating === 5 ? 'currentColor' : 'none'}
+                        />
+                      </Button>
+                      <Button
+                        variant="ghost"
+                        size="icon"
+                        className={cn(
+                          'h-6 w-6 text-muted-foreground/50 hover:bg-muted-foreground/20 hover:text-muted-foreground',
+                          gen.rating === 1 && 'text-destructive hover:text-destructive',
+                        )}
+                        aria-label="Thumbs down"
+                        disabled={isGenerating}
+                        onClick={() => handleRate(gen.id, 1, gen.rating)}
+                      >
+                        <ThumbsDown
+                          className="h-2 w-2"
+                          fill={gen.rating === 1 ? 'currentColor' : 'none'}
+                        />
+                      </Button>
                       {hasVersions && (
                         <Button
                           variant="ghost"
diff --git a/app/src/components/VoiceProfiles/AudioSampleRecording.tsx b/app/src/components/VoiceProfiles/AudioSampleRecording.tsx
index acebbbd4..209d5a0f 100644
--- a/app/src/components/VoiceProfiles/AudioSampleRecording.tsx
+++ b/app/src/components/VoiceProfiles/AudioSampleRecording.tsx
@@ -1,5 +1,5 @@
-import { Mic, Pause, Play, Square } from 'lucide-react';
-import { memo, useEffect, useState } from 'react';
+import { ChevronRight, Mic, Pause, Play, Square } from 'lucide-react';
+import { memo, useEffect, useRef, useState } from 'react';
 import { Visualizer } from 'react-sound-visualizer';
 import { Button } from '@/components/ui/button';
 import { FormControl, FormItem, FormMessage } from '@/components/ui/form';
@@ -33,13 +33,24 @@ interface AudioSampleRecordingProps {
   onStart: () => void;
   onStop: () => void;
   onCancel: () => void;
-  onTranscribe: () => void;
   onPlayPause: () => void;
   isPlaying: boolean;
-  isTranscribing?: boolean;
   showWaveform?: boolean;
 }
 
+export const SCRIPT_LINES = [
+  { cue: 'Neutral, natural', text: 'Hola, este es un ejemplo de voz para captura.' },
+  { cue: 'Curious, rising intonation', text: '¿Puedes ver cómo el murciélago vuela mientras como kiwi y cardillo?' },
+  { cue: 'Slight surprise, higher pitch', text: '¡Qué extraño y fascinante suena todo esto!' },
+  { cue: 'Lower, slow, controlled', text: 'Ahora hablo más despacio, con un tono más bajo y relajado.' },
+  { cue: 'Rising energy and speed', text: 'Y ahora cambio el ritmo, hablo más rápido y con mayor claridad.' },
+  { cue: 'Soft, almost whisper', text: 'Esta es una prueba en voz baja, tranquila y controlada.' },
+  { cue: 'Firm, projected', text: 'Y esta es mi voz con más fuerza y proyección.' },
+  { cue: 'Relaxed, natural close', text: 'Finalmente, cierro esta grabación de forma clara y natural.' },
+] as const;
+
+const SECS_PER_LINE = 40 / SCRIPT_LINES.length; // 5s per line
+
 export function AudioSampleRecording({
   file,
   isRecording,
@@ -47,13 +58,13 @@ export function AudioSampleRecording({
   onStart,
   onStop,
   onCancel,
-  onTranscribe,
   onPlayPause,
   isPlaying,
-  isTranscribing = false,
   showWaveform = true,
 }: AudioSampleRecordingProps) {
   const [audioStream, setAudioStream] = useState<MediaStream | null>(null);
+  const [currentLineIndex, setCurrentLineIndex] = useState(0);
+  const currentLineRef = useRef<HTMLDivElement>(null);
 
   // Request microphone access when component mounts
   useEffect(() => {
@@ -81,15 +92,68 @@ export function AudioSampleRecording({
     };
   }, [showWaveform]);
 
+  // Reset line index when recording starts
+  useEffect(() => {
+    if (isRecording) {
+      setCurrentLineIndex(0);
+    }
+  }, [isRecording]);
+
+  // Auto-advance line based on elapsed time
+  useEffect(() => {
+    if (!isRecording) return;
+    const autoIndex = Math.min(
+      Math.floor(duration / SECS_PER_LINE),
+      SCRIPT_LINES.length - 1,
+    );
+    setCurrentLineIndex(autoIndex);
+  }, [isRecording, duration]);
+
+  // Scroll current line into view when it changes
+  useEffect(() => {
+    if (isRecording && currentLineRef.current) {
+      currentLineRef.current.scrollIntoView({ behavior: 'smooth', block: 'center' });
+    }
+  }, [currentLineIndex, isRecording]);
+
+  const handleAdvanceLine = () => {
+    setCurrentLineIndex((prev) => Math.min(prev + 1, SCRIPT_LINES.length - 1));
+  };
+
   return (
     <FormItem>
       <FormControl>
         <div className="space-y-4">
+          {/* PRE-RECORDING: full guide preview */}
           {!isRecording && !file && (
-            <div className="relative flex flex-col items-center justify-center gap-4 p-4 border-2 border-dashed rounded-lg min-h-[180px] overflow-hidden">
+            <div className="relative flex flex-col items-center gap-4 p-4 border-2 border-dashed rounded-lg overflow-hidden">
               {showWaveform && audioStream && (
                 <MemoizedWaveform audioStream={audioStream} />
               )}
+
+              <div className="relative z-10 w-full max-w-md space-y-3">
+                <p className="text-sm font-medium text-center">Recording Guide</p>
+                <div className="text-xs space-y-2 bg-muted/50 rounded-lg p-3 max-h-[200px] overflow-y-auto">
+                  <p className="text-muted-foreground italic">Read naturally with emotional intention:</p>
+
+                  <div className="space-y-1.5">
+                    {SCRIPT_LINES.map((line, i) => (
+                      <p key={i}>
+                        <span className="text-muted-foreground">[{line.cue}]</span>
+                        <br />
+                        {line.text}
+                      </p>
+                    ))}
+                  </div>
+
+                  <div className="pt-1 border-t border-border/50 space-y-0.5 text-muted-foreground">
+                    <p>⚠ Don't read like a tongue twister — say it with real intention</p>
+                    <p>⚠ Vary volume, rhythm and emotion between lines</p>
+                    <p>⚠ Brief pauses between sections</p>
+                  </div>
+                </div>
+              </div>
+
               <Button
                 type="button"
                 onClick={onStart}
@@ -100,38 +164,126 @@ export function AudioSampleRecording({
                 Start Recording
               </Button>
               <p className="relative z-10 text-sm text-muted-foreground text-center">
-                Click to start recording. Maximum duration: 30 seconds.
+                Click to start recording. Maximum duration: 40 seconds.
               </p>
             </div>
           )}
 
+          {/* RECORDING: interactive guide */}
           {isRecording && (
-            <div className="relative flex flex-col items-center justify-center gap-4 p-4 border-2 border-accent rounded-lg bg-accent/5 min-h-[180px] overflow-hidden">
+            <div className="relative flex flex-col items-center gap-4 p-4 border-2 border-accent rounded-lg bg-accent/5 overflow-hidden">
               {showWaveform && audioStream && (
                 <MemoizedWaveform audioStream={audioStream} />
               )}
-              <div className="relative z-10 flex items-center gap-4">
+
+              {/* Timer row */}
+              <div className="relative z-10 flex items-center justify-between w-full max-w-md">
                 <div className="flex items-center gap-2">
                   <div className="h-3 w-3 rounded-full bg-accent animate-pulse" />
                   <span className="text-lg font-mono font-semibold">
                     {formatAudioDuration(duration)}
                   </span>
                 </div>
+                <span className="text-sm text-muted-foreground font-mono">
+                  {formatAudioDuration(40 - duration)} left
+                </span>
+              </div>
+
+              {/* Progress dots */}
+              <div className="relative z-10 flex gap-1.5 items-center">
+                {SCRIPT_LINES.map((_, i) => (
+                  <div
+                    key={i}
+                    className={
+                      i < currentLineIndex
+                        ? 'h-1.5 w-1.5 rounded-full bg-accent/40'
+                        : i === currentLineIndex
+                          ? 'h-2.5 w-2.5 rounded-full bg-accent ring-2 ring-accent/30'
+                          : 'h-1.5 w-1.5 rounded-full bg-muted-foreground/20'
+                    }
+                  />
+                ))}
+                <span className="ml-1 text-xs text-muted-foreground">
+                  {currentLineIndex + 1}/{SCRIPT_LINES.length}
+                </span>
+              </div>
+
+              {/* Scrollable script */}
+              <div className="relative z-10 w-full max-w-md max-h-[260px] overflow-y-auto space-y-2 py-1 px-1">
+                {SCRIPT_LINES.map((line, i) => {
+                  const isCurrent = i === currentLineIndex;
+                  const isPast = i < currentLineIndex;
+
+                  return (
+                    <div
+                      key={i}
+                      ref={isCurrent ? currentLineRef : null}
+                      className={[
+                        'rounded-lg px-3 py-2 transition-all duration-300',
+                        isCurrent
+                          ? 'bg-accent/15 border border-accent/40 shadow-sm'
+                          : isPast
+                            ? 'opacity-30'
+                            : 'opacity-50',
+                      ].join(' ')}
+                    >
+                      <p
+                        className={[
+                          'text-xs mb-0.5',
+                          isCurrent ? 'text-accent font-medium' : 'text-muted-foreground',
+                        ].join(' ')}
+                      >
+                        [{line.cue}]
+                      </p>
+                      <p
+                        className={[
+                          'transition-all duration-300',
+                          isCurrent
+                            ? 'text-base font-medium leading-snug'
+                            : isPast
+                              ? 'text-sm line-through text-muted-foreground'
+                              : 'text-sm text-muted-foreground',
+                        ].join(' ')}
+                      >
+                        {line.text}
+                      </p>
+                    </div>
+                  );
+                })}
+              </div>
+
+              {/* Tips */}
+              <div className="relative z-10 w-full max-w-md text-xs text-muted-foreground space-y-0.5 border-t border-border/30 pt-2">
+                <p>⚠ Say it with real intention, not like a tongue twister</p>
+                <p>⚠ Vary volume, rhythm and emotion between lines</p>
+              </div>
+
+              {/* Controls */}
+              <div className="relative z-10 flex items-center gap-3">
+                <Button
+                  type="button"
+                  variant="outline"
+                  size="sm"
+                  onClick={handleAdvanceLine}
+                  disabled={currentLineIndex >= SCRIPT_LINES.length - 1}
+                  className="flex items-center gap-1"
+                >
+                  Next
+                  <ChevronRight className="h-4 w-4" />
+                </Button>
+                <Button
+                  type="button"
+                  onClick={onStop}
+                  className="flex items-center gap-2 bg-accent text-accent-foreground hover:bg-accent/90"
+                >
+                  <Square className="h-4 w-4" />
+                  Stop Recording
+                </Button>
               </div>
-              <Button
-                type="button"
-                onClick={onStop}
-                className="relative z-10 flex items-center gap-2 bg-accent text-accent-foreground hover:bg-accent/90"
-              >
-                <Square className="h-4 w-4" />
-                Stop Recording
-              </Button>
-              <p className="relative z-10 text-sm text-muted-foreground text-center">
-                {formatAudioDuration(30 - duration)} remaining
-              </p>
             </div>
           )}
 
+          {/* POST-RECORDING: completion state — unchanged */}
           {file && !isRecording && (
             <div className="flex flex-col items-center justify-center gap-4 p-4 border-2 border-primary rounded-lg bg-primary/5 min-h-[180px]">
               <div className="flex items-center gap-2">
@@ -139,6 +291,7 @@ export function AudioSampleRecording({
                 <span className="font-medium">Recording complete</span>
               </div>
               <p className="text-sm text-muted-foreground text-center">File: {file.name}</p>
+              <p className="text-xs text-muted-foreground">Transcript auto-filled from guide</p>
               <div className="flex gap-2">
                 <Button
                   type="button"
@@ -149,16 +302,6 @@ export function AudioSampleRecording({
                 >
                   {isPlaying ? <Pause className="h-4 w-4" /> : <Play className="h-4 w-4" />}
                 </Button>
-                <Button
-                  type="button"
-                  variant="outline"
-                  onClick={onTranscribe}
-                  disabled={isTranscribing}
-                  className="flex items-center gap-2"
-                >
-                  <Mic className="h-4 w-4" />
-                  {isTranscribing ? 'Transcribing...' : 'Transcribe'}
-                </Button>
                 <Button
                   type="button"
                   variant="outline"
diff --git a/app/src/components/VoiceProfiles/ProfileForm.tsx b/app/src/components/VoiceProfiles/ProfileForm.tsx
index 50b8cb57..f9cbb95b 100644
--- a/app/src/components/VoiceProfiles/ProfileForm.tsx
+++ b/app/src/components/VoiceProfiles/ProfileForm.tsx
@@ -54,12 +54,12 @@ import { convertToWav, formatAudioDuration, getAudioDuration } from '@/lib/utils
 import { usePlatform } from '@/platform/PlatformContext';
 import { useServerStore } from '@/stores/serverStore';
 import { type ProfileFormDraft, useUIStore } from '@/stores/uiStore';
-import { AudioSampleRecording } from './AudioSampleRecording';
+import { AudioSampleRecording, SCRIPT_LINES } from './AudioSampleRecording';
 import { AudioSampleSystem } from './AudioSampleSystem';
 import { AudioSampleUpload } from './AudioSampleUpload';
 import { SampleList } from './SampleList';
 
-const MAX_AUDIO_DURATION_SECONDS = 30;
+const MAX_AUDIO_DURATION_SECONDS = 40;
 const PRESET_ONLY_ENGINES = new Set(['kokoro', 'qwen_custom_voice']);
 const DEFAULT_ENGINE_OPTIONS = [
   { value: 'qwen', label: 'Qwen3-TTS' },
@@ -215,7 +215,7 @@ export function ProfileForm() {
     stopRecording,
     cancelRecording,
   } = useAudioRecording({
-    maxDurationSeconds: 29,
+    maxDurationSeconds: 39,
     onRecordingComplete: (blob, recordedDuration) => {
       const file = new File([blob], `recording-${Date.now()}.webm`, {
         type: blob.type || 'audio/webm',
@@ -225,6 +225,9 @@ export function ProfileForm() {
         file.recordedDuration = recordedDuration;
       }
       form.setValue('sampleFile', file, { shouldValidate: true });
+      // Auto-fill the transcript from the known script
+      const text = SCRIPT_LINES.map((line) => line.text).join(' ');
+      form.setValue('referenceText', text, { shouldValidate: true });
       toast({
         title: 'Recording complete',
         description: 'Audio has been recorded successfully.',
@@ -970,10 +973,8 @@ export function ProfileForm() {
                                     onStart={startRecording}
                                     onStop={stopRecording}
                                     onCancel={handleCancelRecording}
-                                    onTranscribe={handleTranscribe}
                                     onPlayPause={handlePlayPause}
                                     isPlaying={isPlaying}
-                                    isTranscribing={transcribe.isPending}
                                   />
                                 )}
                               />
diff --git a/app/src/components/VoiceProfiles/SampleUpload.tsx b/app/src/components/VoiceProfiles/SampleUpload.tsx
index 3c53b7d6..d6019fbd 100644
--- a/app/src/components/VoiceProfiles/SampleUpload.tsx
+++ b/app/src/components/VoiceProfiles/SampleUpload.tsx
@@ -27,8 +27,9 @@ import { useAudioRecording } from '@/lib/hooks/useAudioRecording';
 import { useAddSample, useProfile } from '@/lib/hooks/useProfiles';
 import { useSystemAudioCapture } from '@/lib/hooks/useSystemAudioCapture';
 import { useTranscription } from '@/lib/hooks/useTranscription';
+import type { LanguageCode } from '@/lib/constants/languages';
 import { usePlatform } from '@/platform/PlatformContext';
-import { AudioSampleRecording } from './AudioSampleRecording';
+import { AudioSampleRecording, SCRIPT_LINES } from './AudioSampleRecording';
 import { AudioSampleSystem } from './AudioSampleSystem';
 import { AudioSampleUpload } from './AudioSampleUpload';
 
@@ -74,7 +75,7 @@ export function SampleUpload({ profileId, open, onOpenChange }: SampleUploadProp
     stopRecording,
     cancelRecording,
   } = useAudioRecording({
-    maxDurationSeconds: 29,
+    maxDurationSeconds: 39,
     onRecordingComplete: (blob, recordedDuration) => {
       // Convert blob to File object
       const file = new File([blob], `recording-${Date.now()}.webm`, {
@@ -85,6 +86,9 @@ export function SampleUpload({ profileId, open, onOpenChange }: SampleUploadProp
         file.recordedDuration = recordedDuration;
       }
       form.setValue('file', file, { shouldValidate: true });
+      // Auto-fill the transcript from the known script
+      const text = SCRIPT_LINES.map((line) => line.text).join(' ');
+      form.setValue('referenceText', text, { shouldValidate: true });
       toast({
         title: 'Recording complete',
         description: 'Audio has been recorded successfully.',
@@ -153,7 +157,7 @@ export function SampleUpload({ profileId, open, onOpenChange }: SampleUploadProp
     }
 
     try {
-      const language = profile?.language as 'en' | 'zh' | undefined;
+      const language = profile?.language as LanguageCode | undefined;
       const result = await transcribe.mutateAsync({ file, language });
 
       form.setValue('referenceText', result.text, { shouldValidate: true });
@@ -166,6 +170,11 @@ export function SampleUpload({ profileId, open, onOpenChange }: SampleUploadProp
     }
   }
 
+  function handleAutoFillTranscript() {
+    const text = SCRIPT_LINES.map((line) => line.text).join(' ');
+    form.setValue('referenceText', text, { shouldValidate: true });
+  }
+
   async function onSubmit(data: SampleFormValues) {
     try {
       await addSample.mutateAsync({
@@ -281,10 +290,8 @@ export function SampleUpload({ profileId, open, onOpenChange }: SampleUploadProp
                       onStart={startRecording}
                       onStop={stopRecording}
                       onCancel={handleCancelRecording}
-                      onTranscribe={handleTranscribe}
                       onPlayPause={handlePlayPause}
                       isPlaying={isPlaying}
-                      isTranscribing={transcribe.isPending}
                     />
                   )}
                 />
diff --git a/app/src/lib/api/client.ts b/app/src/lib/api/client.ts
index 98a375e3..31d689e4 100644
--- a/app/src/lib/api/client.ts
+++ b/app/src/lib/api/client.ts
@@ -8,6 +8,7 @@ import type {
   EffectConfig,
   EffectPresetCreate,
   EffectPresetResponse,
+  GenerationRatingRequest,
   GenerationRequest,
   GenerationResponse,
   GenerationVersionResponse,
@@ -30,6 +31,7 @@ import type {
   StoryItemTrim,
   StoryItemVersionUpdate,
   StoryResponse,
+  SuggestedParams,
   TranscriptionResponse,
   VoiceProfileCreate,
   VoiceProfileResponse,
@@ -246,6 +248,17 @@ class ApiClient {
     });
   }
 
+  async rateGeneration(generationId: string, rating: number): Promise<{ id: string; rating: number }> {
+    return this.request<{ id: string; rating: number }>(`/generations/${generationId}/rating`, {
+      method: 'PATCH',
+      body: JSON.stringify({ rating } as GenerationRatingRequest),
+    });
+  }
+
+  async getSuggestedParams(profileId: string): Promise<SuggestedParams | null> {
+    return this.request<SuggestedParams | null>(`/profiles/${profileId}/suggested-params`);
+  }
+
   // History
   async listHistory(query?: HistoryQuery): Promise<HistoryListResponse> {
     const params = new URLSearchParams();
diff --git a/app/src/lib/api/types.ts b/app/src/lib/api/types.ts
index 86e3012f..deaea7ef 100644
--- a/app/src/lib/api/types.ts
+++ b/app/src/lib/api/types.ts
@@ -75,6 +75,15 @@ export interface GenerationRequest {
   crossfade_ms?: number;
   normalize?: boolean;
   effects_chain?: EffectConfig[];
+  temperature?: number;
+  top_k?: number;
+  top_p?: number;
+  repetition_penalty?: number;
+  speed?: number;
+  inject_breaths?: boolean;
+  jitter_ms?: number;
+  humanize_text?: boolean;
+  humanize_intensity?: 'light' | 'medium' | 'heavy';
 }
 
 export interface GenerationVersionResponse {
@@ -102,11 +111,29 @@ export interface GenerationResponse {
   status: 'loading_model' | 'generating' | 'completed' | 'failed';
   error?: string;
   is_favorited?: boolean;
+  rating?: number | null;
+  temperature?: number | null;
+  top_k?: number | null;
+  top_p?: number | null;
+  repetition_penalty?: number | null;
+  speed?: number | null;
   created_at: string;
   versions?: GenerationVersionResponse[];
   active_version_id?: string;
 }
 
+export interface GenerationRatingRequest {
+  rating: number; // 1-5
+}
+
+export interface SuggestedParams {
+  temperature?: number | null;
+  top_k?: number | null;
+  top_p?: number | null;
+  repetition_penalty?: number | null;
+  speed?: number | null;
+}
+
 export interface HistoryQuery {
   profile_id?: string;
   search?: string;
@@ -125,7 +152,7 @@ export interface HistoryListResponse {
   total: number;
 }
 
-export type WhisperModelSize = 'base' | 'small' | 'medium' | 'large' | 'turbo';
+export type WhisperModelSize = 'base' | 'small' | 'medium' | 'large' | 'large-v3' | 'large-v3-mlx' | 'turbo';
 
 export interface TranscriptionRequest {
   language?: LanguageCode;
diff --git a/app/src/lib/hooks/useAudioRecording.ts b/app/src/lib/hooks/useAudioRecording.ts
index 152f90c1..55cea090 100644
--- a/app/src/lib/hooks/useAudioRecording.ts
+++ b/app/src/lib/hooks/useAudioRecording.ts
@@ -8,7 +8,7 @@ interface UseAudioRecordingOptions {
 }
 
 export function useAudioRecording({
-  maxDurationSeconds = 29,
+  maxDurationSeconds = 39,
   onRecordingComplete,
 }: UseAudioRecordingOptions = {}) {
   const platform = usePlatform();
diff --git a/app/src/lib/hooks/useGenerationForm.ts b/app/src/lib/hooks/useGenerationForm.ts
index 0acdabbf..890a54f0 100644
--- a/app/src/lib/hooks/useGenerationForm.ts
+++ b/app/src/lib/hooks/useGenerationForm.ts
@@ -28,6 +28,15 @@ const generationSchema = z.object({
       'kokoro',
     ])
     .optional(),
+  temperature: z.number().min(0).max(2).optional(),
+  top_k: z.number().int().min(0).max(5000).optional(),
+  top_p: z.number().min(0).max(1).optional(),
+  repetition_penalty: z.number().min(0.5).max(3).optional(),
+  speed: z.number().min(0.5).max(2).optional(),
+  inject_breaths: z.boolean().optional(),
+  jitter_ms: z.number().int().min(0).max(50).optional(),
+  humanize_text: z.boolean().optional(),
+  humanize_intensity: z.enum(['light', 'medium', 'heavy']).optional(),
 });
 
 export type GenerationFormValues = z.infer<typeof generationSchema>;
@@ -149,6 +158,15 @@ export function useGenerationForm(options: UseGenerationFormOptions = {}) {
         crossfade_ms: crossfadeMs,
         normalize: normalizeAudio,
         effects_chain: effectsChain?.length ? effectsChain : undefined,
+        temperature: data.temperature,
+        top_k: data.top_k,
+        top_p: data.top_p,
+        repetition_penalty: data.repetition_penalty,
+        speed: data.speed,
+        inject_breaths: data.inject_breaths,
+        jitter_ms: data.jitter_ms,
+        humanize_text: data.humanize_text,
+        humanize_intensity: data.humanize_intensity,
       });
 
       // Track this generation for SSE status updates
@@ -162,6 +180,15 @@ export function useGenerationForm(options: UseGenerationFormOptions = {}) {
         modelSize: data.modelSize,
         instruct: '',
         engine: data.engine,
+        temperature: data.temperature,
+        top_k: data.top_k,
+        top_p: data.top_p,
+        repetition_penalty: data.repetition_penalty,
+        speed: data.speed,
+        inject_breaths: data.inject_breaths,
+        jitter_ms: data.jitter_ms,
+        humanize_text: data.humanize_text,
+        humanize_intensity: data.humanize_intensity,
       });
       options.onSuccess?.(result.id);
     } catch (error) {
diff --git a/app/src/stores/generationStore.ts b/app/src/stores/generationStore.ts
index edf715d1..a60a4cd0 100644
--- a/app/src/stores/generationStore.ts
+++ b/app/src/stores/generationStore.ts
@@ -1,4 +1,18 @@
 import { create } from 'zustand';
+import type { EffectConfig } from '@/lib/api/types';
+
+export interface ReuseParams {
+  text: string;
+  language?: string;
+  engine?: string;
+  model_size?: string;
+  temperature?: number | null;
+  top_k?: number | null;
+  top_p?: number | null;
+  repetition_penalty?: number | null;
+  speed?: number | null;
+  effects_chain?: EffectConfig[] | null;
+}
 
 interface GenerationState {
   /** IDs of generations currently in progress */
@@ -13,6 +27,9 @@ interface GenerationState {
   removePendingStoryAdd: (generationId: string) => string | undefined;
   setActiveGenerationId: (id: string | null) => void;
   activeGenerationId: string | null;
+  /** Params to reuse from history — set by HistoryTable, consumed by FloatingGenerateBox */
+  reuseParams: ReuseParams | null;
+  setReuseParams: (params: ReuseParams | null) => void;
 }
 
 export const useGenerationStore = create<GenerationState>((set, get) => ({
@@ -20,6 +37,8 @@ export const useGenerationStore = create<GenerationState>((set, get) => ({
   isGenerating: false,
   activeGenerationId: null,
   pendingStoryAdds: new Map(),
+  reuseParams: null,
+  setReuseParams: (params) => set({ reuseParams: params }),
 
   addPendingGeneration: (id) =>
     set((state) => {
diff --git a/backend/app.py b/backend/app.py
index 1293460a..307cda10 100644
--- a/backend/app.py
+++ b/backend/app.py
@@ -172,6 +172,17 @@ def _get_gpu_status() -> str:
     return "None (CPU only)"
 
 
+async def _idle_unload_loop():
+    """Background task to unload idle models."""
+    while True:
+        await asyncio.sleep(60)
+        try:
+            from .backends import check_idle_models
+            check_idle_models()
+        except Exception as e:
+            logging.getLogger(__name__).warning("Idle unload check failed: %s", e)
+
+
 def _register_lifecycle(application: FastAPI) -> None:
     """Attach startup and shutdown event handlers."""
 
@@ -197,6 +208,7 @@ async def startup_event():
         logger.info("Data directory: %s", config.get_data_dir())
 
         init_queue()
+        asyncio.create_task(_idle_unload_loop())
 
         # Mark stale "generating" records as failed -- leftovers from a killed process
         from sqlalchemy import text as sa_text
diff --git a/backend/backends/__init__.py b/backend/backends/__init__.py
index db19b140..7eec50eb 100644
--- a/backend/backends/__init__.py
+++ b/backend/backends/__init__.py
@@ -5,12 +5,16 @@
 and a model config registry that eliminates per-engine dispatch maps.
 """
 
+import logging
 import threading
+import time as _time
 from dataclasses import dataclass, field
 from typing import Protocol, Optional, Tuple, List
 from typing_extensions import runtime_checkable
 import numpy as np
 
+logger = logging.getLogger(__name__)
+
 from ..utils.platform_detect import get_backend_type
 
 LANGUAGE_CODE_TO_NAME = {
@@ -31,6 +35,8 @@
     "small": "openai/whisper-small",
     "medium": "openai/whisper-medium",
     "large": "openai/whisper-large-v3",
+    "large-v3": "openai/whisper-large-v3",
+    "large-v3-mlx": "mlx-community/whisper-large-v3-mlx",
     "turbo": "openai/whisper-large-v3-turbo",
 }
 
@@ -95,6 +101,7 @@ async def generate(
         language: str = "en",
         seed: Optional[int] = None,
         instruct: Optional[str] = None,
+        sampling_params: Optional[dict] = None,
     ) -> Tuple[np.ndarray, int]:
         """
         Generate audio from text.
@@ -159,6 +166,10 @@ def is_loaded(self) -> bool:
 _tts_backends_lock = threading.Lock()
 _stt_backend: Optional[STTBackend] = None
 
+_model_last_used: dict[str, float] = {}
+_stt_last_used: float = 0.0
+IDLE_TIMEOUT_SECONDS = 300  # 5 minutes
+
 # Supported TTS engines — keyed by engine name, value is the backend class import path.
 # The factory function uses this for the if/elif chain; the model configs live on the backend classes.
 TTS_ENGINES = {
@@ -176,8 +187,8 @@ def _get_qwen_model_configs() -> list[ModelConfig]:
     """Return Qwen model configs with backend-aware HF repo IDs."""
     backend_type = get_backend_type()
     if backend_type == "mlx":
-        repo_1_7b = "mlx-community/Qwen3-TTS-12Hz-1.7B-Base-bf16"
-        repo_0_6b = "mlx-community/Qwen3-TTS-12Hz-1.7B-Base-bf16"  # 0.6B not available in MLX, falls back
+        repo_1_7b = "mlx-community/Qwen3-TTS-12Hz-1.7B-Base-8bit"
+        repo_0_6b = "mlx-community/Qwen3-TTS-12Hz-0.6B-Base-bf16"
     else:
         repo_1_7b = "Qwen/Qwen3-TTS-12Hz-1.7B-Base"
         repo_0_6b = "Qwen/Qwen3-TTS-12Hz-0.6B-Base"
@@ -348,6 +359,22 @@ def _get_whisper_configs() -> list[ModelConfig]:
             hf_repo_id="openai/whisper-large-v3",
             model_size="large",
         ),
+        ModelConfig(
+            model_name="whisper-large-v3",
+            display_name="Whisper Large v3 (Best Spanish)",
+            engine="whisper",
+            hf_repo_id="openai/whisper-large-v3",
+            model_size="large-v3",
+            size_mb=3100,
+        ),
+        ModelConfig(
+            model_name="whisper-large-v3-mlx",
+            display_name="Whisper Large v3 MLX (Best Spanish, Apple Silicon)",
+            engine="whisper",
+            hf_repo_id="mlx-community/whisper-large-v3-mlx",
+            model_size="large-v3-mlx",
+            size_mb=3000,
+        ),
         ModelConfig(
             model_name="whisper-turbo",
             display_name="Whisper Turbo",
@@ -606,6 +633,66 @@ def get_stt_backend() -> STTBackend:
     return _stt_backend
 
 
+def touch_tts_model(engine: str):
+    """Record that a TTS model was just used."""
+    _model_last_used[engine] = _time.time()
+
+
+def touch_stt_model():
+    """Record that the STT model was just used."""
+    global _stt_last_used
+    _stt_last_used = _time.time()
+
+
+def check_idle_models():
+    """Check for idle models and unload them. Called by background task."""
+    now = _time.time()
+    for engine_name in list(_model_last_used.keys()):
+        last_used = _model_last_used.get(engine_name, 0)
+        if last_used > 0 and now - last_used > IDLE_TIMEOUT_SECONDS:
+            backend = _tts_backends.get(engine_name)
+            if backend and hasattr(backend, 'model') and backend.model is not None:
+                logger.info("Auto-unloading idle TTS engine: %s (idle %.0fs)", engine_name, now - last_used)
+                try:
+                    backend.unload_model()
+                except Exception as e:
+                    logger.warning("Failed to auto-unload TTS %s: %s", engine_name, e)
+                _model_last_used[engine_name] = 0
+
+    if _stt_last_used > 0 and now - _stt_last_used > IDLE_TIMEOUT_SECONDS:
+        if _stt_backend is not None and hasattr(_stt_backend, 'model') and _stt_backend.model is not None:
+            logger.info("Auto-unloading idle Whisper model (idle %.0fs)", now - _stt_last_used)
+            try:
+                _stt_backend.unload_model()
+            except Exception as e:
+                logger.warning("Failed to auto-unload Whisper: %s", e)
+
+
+def ensure_tts_memory():
+    """Unload STT (Whisper) model if loaded, to free memory for TTS."""
+    global _stt_backend
+    if _stt_backend is not None:
+        try:
+            if hasattr(_stt_backend, 'model') and _stt_backend.model is not None:
+                logger.info("Auto-unloading Whisper to free memory for TTS")
+                _stt_backend.unload_model()
+        except Exception as e:
+            logger.warning("Failed to auto-unload Whisper: %s", e)
+
+
+def ensure_exclusive_tts_engine(engine: str) -> None:
+    """Unload all TTS backends except the given engine to free memory."""
+    import gc
+    for name, backend in list(_tts_backends.items()):
+        if name != engine and hasattr(backend, 'model') and backend.model is not None:
+            logger.info("Unloading %s to free memory for %s", name, engine)
+            try:
+                backend.unload_model()
+            except Exception as e:
+                logger.warning("Failed to unload TTS %s: %s", name, e)
+    gc.collect()
+
+
 def reset_backends():
     """Reset backend instances (useful for testing)."""
     global _tts_backend, _tts_backends, _stt_backend
diff --git a/backend/backends/chatterbox_turbo_backend.py b/backend/backends/chatterbox_turbo_backend.py
index 6f7d6b94..0f4ccfc3 100644
--- a/backend/backends/chatterbox_turbo_backend.py
+++ b/backend/backends/chatterbox_turbo_backend.py
@@ -153,6 +153,7 @@ async def generate(
         language: str = "en",
         seed: Optional[int] = None,
         instruct: Optional[str] = None,
+        sampling_params: Optional[dict] = None,
     ) -> Tuple[np.ndarray, int]:
         """
         Generate audio using Chatterbox Turbo TTS.
@@ -184,13 +185,14 @@ def _generate_sync():
 
             logger.info("[Chatterbox Turbo] Generating (English)")
 
+            sp = sampling_params or {}
             wav = self.model.generate(
                 text,
                 audio_prompt_path=ref_audio,
-                temperature=0.8,
-                top_k=1000,
-                top_p=0.95,
-                repetition_penalty=1.2,
+                temperature=sp.get("temperature", 0.8),
+                top_k=sp.get("top_k", 1000),
+                top_p=sp.get("top_p", 0.95),
+                repetition_penalty=sp.get("repetition_penalty", 1.2),
             )
 
             # Convert tensor -> numpy
diff --git a/backend/backends/mlx_backend.py b/backend/backends/mlx_backend.py
index c6157a40..de65b887 100644
--- a/backend/backends/mlx_backend.py
+++ b/backend/backends/mlx_backend.py
@@ -47,9 +47,9 @@ def _get_model_path(self, model_size: str) -> str:
         """
         # MLX model mapping
         mlx_model_map = {
-            "1.7B": "mlx-community/Qwen3-TTS-12Hz-1.7B-Base-bf16",
+            "1.7B": "mlx-community/Qwen3-TTS-12Hz-1.7B-Base-8bit",
             # 0.6B not yet converted to MLX format
-            "0.6B": "mlx-community/Qwen3-TTS-12Hz-1.7B-Base-bf16",  # Fallback to 1.7B
+            "0.6B": "mlx-community/Qwen3-TTS-12Hz-0.6B-Base-bf16",
         }
 
         if model_size not in mlx_model_map:
@@ -114,6 +114,13 @@ def unload_model(self):
             del self.model
             self.model = None
             self._current_model_size = None
+            import gc
+            gc.collect()
+            try:
+                import mlx.core as mx
+                mx.clear_cache()
+            except Exception:
+                pass
             logger.info("MLX TTS model unloaded")
 
     async def create_voice_prompt(
@@ -153,11 +160,20 @@ async def create_voice_prompt(
                         # Cached file no longer exists, invalidate cache
                         logger.warning("Cached audio file not found: %s, regenerating prompt", cached_audio_path)
 
+        # Compute reference audio duration
+        try:
+            import soundfile as sf
+            info = sf.info(str(audio_path))
+            ref_audio_duration = info.duration
+        except Exception:
+            ref_audio_duration = 0.0
+
         # MLX voice prompt format - store audio path and text
         # The model will process this during generation
         voice_prompt_items = {
             "ref_audio": str(audio_path),
             "ref_text": reference_text,
+            "ref_audio_duration": ref_audio_duration,
         }
 
         # Cache if enabled
@@ -177,6 +193,7 @@ async def generate(
         language: str = "en",
         seed: Optional[int] = None,
         instruct: Optional[str] = None,
+        sampling_params: Optional[dict] = None,
     ) -> Tuple[np.ndarray, int]:
         """
         Generate audio from text using voice prompt.
@@ -187,6 +204,8 @@ async def generate(
             language: Language code (en or zh) - may not be fully supported by MLX
             seed: Random seed for reproducibility
             instruct: Natural language instruction (may not be supported by MLX)
+            sampling_params: Optional dict with temperature, top_k, top_p,
+                repetition_penalty, speed overrides.
 
         Returns:
             Tuple of (audio_array, sample_rate)
@@ -213,6 +232,12 @@ def _generate_sync():
             ref_audio = voice_prompt.get("ref_audio") or voice_prompt.get("ref_audio_path")
             ref_text = voice_prompt.get("ref_text", "")
 
+            logger.info("Voice cloning: ref_audio=%s, ref_text_len=%d, model_type=%s",
+                        bool(ref_audio), len(ref_text) if ref_text else 0,
+                        type(self.model).__name__)
+            if hasattr(self.model, 'speech_tokenizer') and self.model.speech_tokenizer:
+                logger.info("Speech tokenizer has_encoder: %s", self.model.speech_tokenizer.has_encoder)
+
             # Validate that the audio file exists
             if ref_audio and not Path(ref_audio).exists():
                 logger.warning("Audio file not found: %s", ref_audio)
@@ -220,6 +245,20 @@ def _generate_sync():
                 logger.warning("Regenerating without voice prompt.")
                 ref_audio = None
 
+            # Build sampling kwargs from optional overrides
+            sp = sampling_params or {}
+            sampling_kwargs = {}
+            if sp.get("temperature") is not None:
+                sampling_kwargs["temperature"] = sp["temperature"]
+            if sp.get("top_k") is not None:
+                sampling_kwargs["top_k"] = sp["top_k"]
+            if sp.get("top_p") is not None:
+                sampling_kwargs["top_p"] = sp["top_p"]
+            if sp.get("repetition_penalty") is not None:
+                sampling_kwargs["repetition_penalty"] = sp["repetition_penalty"]
+            if sp.get("speed") is not None:
+                sampling_kwargs["speed"] = sp["speed"]
+
             # Check if model supports voice cloning via generate method
             # MLX API may support ref_audio parameter directly
             try:
@@ -231,23 +270,23 @@ def _generate_sync():
                     sig = inspect.signature(self.model.generate)
                     if "ref_audio" in sig.parameters:
                         # Generate with voice cloning
-                        for result in self.model.generate(text, ref_audio=ref_audio, ref_text=ref_text, lang_code=lang):
+                        for result in self.model.generate(text, ref_audio=ref_audio, ref_text=ref_text, lang_code=lang, **sampling_kwargs):
                             audio_chunks.append(np.array(result.audio))
                             sample_rate = result.sample_rate
                     else:
                         # Fallback: generate without voice cloning
-                        for result in self.model.generate(text, lang_code=lang):
+                        for result in self.model.generate(text, lang_code=lang, **sampling_kwargs):
                             audio_chunks.append(np.array(result.audio))
                             sample_rate = result.sample_rate
                 else:
                     # No voice prompt, generate normally
-                    for result in self.model.generate(text, lang_code=lang):
+                    for result in self.model.generate(text, lang_code=lang, **sampling_kwargs):
                         audio_chunks.append(np.array(result.audio))
                         sample_rate = result.sample_rate
             except Exception as e:
                 # If voice cloning fails, try without it
                 logger.warning("Voice cloning failed, generating without voice prompt: %s", e)
-                for result in self.model.generate(text, lang_code=lang):
+                for result in self.model.generate(text, lang_code=lang, **sampling_kwargs):
                     audio_chunks.append(np.array(result.audio))
                     sample_rate = result.sample_rate
 
@@ -258,6 +297,16 @@ def _generate_sync():
                 # Fallback: empty audio
                 audio = np.array([], dtype=np.float32)
 
+            logger.info("Generated audio: duration=%.2fs, samples=%d, sample_rate=%d",
+                        len(audio) / sample_rate, len(audio), sample_rate)
+
+            # Clear MLX inference buffers
+            try:
+                import mlx.core as mx
+                mx.clear_cache()
+            except Exception:
+                pass
+
             return audio, sample_rate
 
         # Run blocking inference in thread pool
@@ -269,7 +318,7 @@ def _generate_sync():
 class MLXSTTBackend:
     """MLX-based STT backend using mlx-audio Whisper."""
 
-    def __init__(self, model_size: str = "base"):
+    def __init__(self, model_size: str = "large-v3-mlx"):
         self.model = None
         self.model_size = model_size
 
@@ -300,6 +349,14 @@ async def load_model_async(self, model_size: Optional[str] = None):
     # Alias for compatibility
     load_model = load_model_async
 
+    # Mapping from MLX-community repos that lack preprocessor_config.json
+    # to the best cached OpenAI repo that has it.
+    # openai/whisper-large-v3-turbo shares the same processor config as large-v3
+    # and is already in the local HF cache, so no download is needed.
+    _PROCESSOR_FALLBACK_MAP = {
+        "mlx-community/whisper-large-v3-mlx": "openai/whisper-large-v3-turbo",
+    }
+
     def _load_model_sync(self, model_size: str):
         """Synchronous model loading."""
         progress_model_name = f"whisper-{model_size}"
@@ -314,6 +371,44 @@ def _load_model_sync(self, model_size: str):
             with force_offline_if_cached(is_cached, progress_model_name):
                 self.model = load(model_name)
 
+        # Option A fallback: if the MLX repo doesn't ship preprocessor_config.json,
+        # WhisperProcessor.from_pretrained silently sets _processor=None, which then
+        # crashes in get_tokenizer() at transcription time.  Load the processor from
+        # the matching OpenAI base repo so the weights stay MLX-optimised but the
+        # feature extractor / tokenizer come from a repo that has all the files.
+        if getattr(self.model, "_processor", None) is None:
+            fallback_repo = self._PROCESSOR_FALLBACK_MAP.get(model_name)
+            if fallback_repo is None:
+                # Generic fallback: try openai/whisper-<size> where size strips any
+                # trailing "-mlx" suffix.
+                base_size = model_size.replace("-mlx", "")
+                fallback_repo = f"openai/whisper-{base_size}"
+            try:
+                from transformers import WhisperProcessor
+                logger.info(
+                    "WhisperProcessor missing for %s — loading from fallback repo %s",
+                    model_name, fallback_repo,
+                )
+                self.model._processor = WhisperProcessor.from_pretrained(
+                    fallback_repo, local_files_only=True
+                )
+                logger.info("WhisperProcessor fallback loaded successfully from %s", fallback_repo)
+            except Exception as exc:
+                # local_files_only failed — try with network access allowed
+                logger.warning(
+                    "WhisperProcessor not in local cache for %s (%s), attempting download...",
+                    fallback_repo, exc,
+                )
+                try:
+                    from transformers import WhisperProcessor
+                    self.model._processor = WhisperProcessor.from_pretrained(fallback_repo)
+                    logger.info("WhisperProcessor fallback downloaded from %s", fallback_repo)
+                except Exception as exc2:
+                    logger.error(
+                        "Could not load WhisperProcessor fallback from %s: %s",
+                        fallback_repo, exc2,
+                    )
+
         self.model_size = model_size
         logger.info("MLX Whisper model %s loaded successfully", model_size)
 
@@ -322,6 +417,13 @@ def unload_model(self):
         if self.model is not None:
             del self.model
             self.model = None
+            import gc
+            gc.collect()
+            try:
+                import mlx.core as mx
+                mx.clear_cache()
+            except Exception:
+                pass
             logger.info("MLX Whisper model unloaded")
 
     async def transcribe(
diff --git a/backend/backends/pytorch_backend.py b/backend/backends/pytorch_backend.py
index c505d98d..cff57c99 100644
--- a/backend/backends/pytorch_backend.py
+++ b/backend/backends/pytorch_backend.py
@@ -236,7 +236,7 @@ def _generate_sync():
 class PyTorchSTTBackend:
     """PyTorch-based STT backend using Whisper."""
 
-    def __init__(self, model_size: str = "base"):
+    def __init__(self, model_size: str = "large-v3"):
         self.model = None
         self.processor = None
         self.model_size = model_size
diff --git a/backend/database/migrations.py b/backend/database/migrations.py
index 2bdd9282..9f682e50 100644
--- a/backend/database/migrations.py
+++ b/backend/database/migrations.py
@@ -164,6 +164,20 @@ def _migrate_generations(engine, inspector, tables: set[str]) -> None:
         _add_column(engine, "generations", "model_size VARCHAR", "model_size")
     if "is_favorited" not in columns:
         _add_column(engine, "generations", "is_favorited BOOLEAN DEFAULT 0", "is_favorited")
+    # Rating + sampling params (for suggested-params feature)
+    columns = _get_columns(inspector, "generations")
+    if "rating" not in columns:
+        _add_column(engine, "generations", "rating INTEGER", "rating")
+    if "temperature" not in columns:
+        _add_column(engine, "generations", "temperature FLOAT", "temperature")
+    if "top_k" not in columns:
+        _add_column(engine, "generations", "top_k INTEGER", "top_k")
+    if "top_p" not in columns:
+        _add_column(engine, "generations", "top_p FLOAT", "top_p")
+    if "repetition_penalty" not in columns:
+        _add_column(engine, "generations", "repetition_penalty FLOAT", "repetition_penalty")
+    if "speed" not in columns:
+        _add_column(engine, "generations", "speed FLOAT", "speed")
 
 
 def _migrate_effect_presets(engine, inspector, tables: set[str]) -> None:
diff --git a/backend/database/models.py b/backend/database/models.py
index ca03d47e..7cd6afe9 100644
--- a/backend/database/models.py
+++ b/backend/database/models.py
@@ -67,6 +67,13 @@ class Generation(Base):
     status = Column(String, default="completed")
     error = Column(Text, nullable=True)
     is_favorited = Column(Boolean, default=False)
+    rating = Column(Integer, nullable=True)  # 1-5, nullable = unrated
+    # Sampling params stored so we can compute suggested-params from highly-rated gens
+    temperature = Column(Float, nullable=True)
+    top_k = Column(Integer, nullable=True)
+    top_p = Column(Float, nullable=True)
+    repetition_penalty = Column(Float, nullable=True)
+    speed = Column(Float, nullable=True)
     created_at = Column(DateTime, default=datetime.utcnow)
 
 
diff --git a/backend/models.py b/backend/models.py
index f2f43d4b..482ab429 100644
--- a/backend/models.py
+++ b/backend/models.py
@@ -86,9 +86,18 @@ class GenerationRequest(BaseModel):
         default=50, ge=0, le=500, description="Crossfade duration in ms between chunks (0 for hard cut)"
     )
     normalize: bool = Field(default=True, description="Normalize output audio volume")
+    inject_breaths: bool = Field(default=False, description="Inject synthetic breaths at pauses")
+    jitter_ms: int = Field(default=0, ge=0, le=50, description="Micro-timing jitter in ms")
     effects_chain: Optional[List["EffectConfig"]] = Field(
         None, description="Effects chain to apply after generation (overrides profile default)"
     )
+    temperature: Optional[float] = Field(None, ge=0.0, le=2.0, description="Sampling temperature")
+    top_k: Optional[int] = Field(None, ge=0, le=5000, description="Top-k sampling")
+    top_p: Optional[float] = Field(None, ge=0.0, le=1.0, description="Nucleus sampling threshold")
+    repetition_penalty: Optional[float] = Field(None, ge=0.5, le=3.0, description="Repetition penalty")
+    speed: Optional[float] = Field(None, ge=0.5, le=2.0, description="Speed factor")
+    humanize_text: bool = Field(default=False, description="Preprocess text with LLM to add natural disfluencies")
+    humanize_intensity: Optional[str] = Field(None, pattern="^(light|medium|heavy)$", description="Disfluency intensity")
 
 
 class GenerationResponse(BaseModel):
@@ -107,6 +116,12 @@ class GenerationResponse(BaseModel):
     status: str = "completed"
     error: Optional[str] = None
     is_favorited: bool = False
+    rating: Optional[int] = None
+    temperature: Optional[float] = None
+    top_k: Optional[int] = None
+    top_p: Optional[float] = None
+    repetition_penalty: Optional[float] = None
+    speed: Optional[float] = None
     created_at: datetime
     versions: Optional[List["GenerationVersionResponse"]] = None
     active_version_id: Optional[str] = None
@@ -141,6 +156,12 @@ class HistoryResponse(BaseModel):
     status: str = "completed"
     error: Optional[str] = None
     is_favorited: bool = False
+    rating: Optional[int] = None
+    temperature: Optional[float] = None
+    top_k: Optional[int] = None
+    top_p: Optional[float] = None
+    repetition_penalty: Optional[float] = None
+    speed: Optional[float] = None
     created_at: datetime
     versions: Optional[List["GenerationVersionResponse"]] = None
     active_version_id: Optional[str] = None
@@ -149,6 +170,22 @@ class Config:
         from_attributes = True
 
 
+class GenerationRatingRequest(BaseModel):
+    """Request model for rating a generation."""
+
+    rating: int = Field(..., ge=1, le=5)
+
+
+class SuggestedParams(BaseModel):
+    """Suggested generation params derived from highly-rated generations."""
+
+    temperature: Optional[float] = None
+    top_k: Optional[float] = None
+    top_p: Optional[float] = None
+    repetition_penalty: Optional[float] = None
+    speed: Optional[float] = None
+
+
 class HistoryListResponse(BaseModel):
     """Response model for history list."""
 
@@ -160,7 +197,7 @@ class TranscriptionRequest(BaseModel):
     """Request model for audio transcription."""
 
     language: Optional[str] = Field(None, pattern="^(en|zh|ja|ko|de|fr|ru|pt|es|it)$")
-    model: Optional[str] = Field(None, pattern="^(base|small|medium|large|turbo)$")
+    model: Optional[str] = Field(None, pattern="^(base|small|medium|large|large-v3|large-v3-mlx|turbo)$")
 
 
 class TranscriptionResponse(BaseModel):
diff --git a/backend/routes/generations.py b/backend/routes/generations.py
index 2af3832e..16e64a64 100644
--- a/backend/routes/generations.py
+++ b/backend/routes/generations.py
@@ -24,6 +24,26 @@ def _resolve_generation_engine(data: models.GenerationRequest, profile) -> str:
     return data.engine or getattr(profile, "default_engine", None) or getattr(profile, "preset_engine", None) or "qwen"
 
 
+def _build_sampling_params(data: models.GenerationRequest) -> dict | None:
+    """Extract sampling override fields from a GenerationRequest.
+
+    Returns None if no overrides are set so callers can skip the kwarg
+    entirely and preserve existing backend defaults.
+    """
+    params = {}
+    if data.temperature is not None:
+        params["temperature"] = data.temperature
+    if data.top_k is not None:
+        params["top_k"] = data.top_k
+    if data.top_p is not None:
+        params["top_p"] = data.top_p
+    if data.repetition_penalty is not None:
+        params["repetition_penalty"] = data.repetition_penalty
+    if data.speed is not None:
+        params["speed"] = data.speed
+    return params or None
+
+
 @router.post("/generate", response_model=models.GenerationResponse)
 async def generate_speech(
     data: models.GenerationRequest,
@@ -60,6 +80,11 @@ async def generate_speech(
         status="generating",
         engine=engine,
         model_size=model_size if engine_has_model_sizes(engine) else None,
+        temperature=data.temperature,
+        top_k=data.top_k,
+        top_p=data.top_p,
+        repetition_penalty=data.repetition_penalty,
+        speed=data.speed,
     )
 
     task_manager.start_generation(
@@ -81,6 +106,8 @@ async def generate_speech(
             except Exception:
                 pass
 
+    sampling_params = _build_sampling_params(data)
+
     enqueue_generation(
         run_generation(
             generation_id=generation_id,
@@ -96,6 +123,11 @@ async def generate_speech(
             mode="generate",
             max_chunk_chars=data.max_chunk_chars,
             crossfade_ms=data.crossfade_ms,
+            sampling_params=sampling_params,
+            inject_breaths=data.inject_breaths,
+            jitter_ms=data.jitter_ms,
+            humanize_text=data.humanize_text,
+            humanize_intensity=data.humanize_intensity,
         )
     )
 
@@ -274,8 +306,14 @@ async def stream_speech(
         max_chunk_chars=data.max_chunk_chars,
         crossfade_ms=data.crossfade_ms,
         trim_fn=trim_fn,
+        sampling_params=_build_sampling_params(data),
+        jitter_ms=data.jitter_ms,
     )
 
+    if data.inject_breaths:
+        from ..utils.breath_injection import inject_breaths as _inject_breaths
+        audio = _inject_breaths(audio, sample_rate)
+
     effects_chain_config = None
     if data.effects_chain is not None:
         effects_chain_config = [e.model_dump() for e in data.effects_chain]
@@ -312,3 +350,18 @@ async def _wav_stream():
         media_type="audio/wav",
         headers={"Content-Disposition": 'attachment; filename="speech.wav"'},
     )
+
+
+@router.patch("/generations/{generation_id}/rating")
+async def rate_generation(
+    generation_id: str,
+    data: models.GenerationRatingRequest,
+    db: Session = Depends(get_db),
+):
+    """Set or update the rating for a generation (1-5)."""
+    gen = db.query(DBGeneration).filter_by(id=generation_id).first()
+    if not gen:
+        raise HTTPException(status_code=404, detail="Generation not found")
+    gen.rating = data.rating
+    db.commit()
+    return {"id": generation_id, "rating": gen.rating}
diff --git a/backend/routes/profiles.py b/backend/routes/profiles.py
index 7bc075c5..e61345d9 100644
--- a/backend/routes/profiles.py
+++ b/backend/routes/profiles.py
@@ -4,6 +4,7 @@
 import json as _json
 import logging
 import tempfile
+from datetime import datetime
 from pathlib import Path
 
 from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
@@ -13,6 +14,7 @@
 from .. import config, models
 from ..app import safe_content_disposition
 from ..database import VoiceProfile as DBVoiceProfile, get_db
+from ..database import Generation as DBGeneration
 from ..services import channels, export_import, profiles
 from ..services.profiles import _profile_to_response
 
@@ -306,6 +308,59 @@ async def export_profile(
         raise HTTPException(status_code=500, detail=str(e))
 
 
+@router.get("/profiles/{profile_id}/suggested-params", response_model=models.SuggestedParams | None)
+async def get_suggested_params(
+    profile_id: str,
+    db: Session = Depends(get_db),
+):
+    """Return averaged sampling params from generations with rating >= 4 for this profile.
+
+    Returns None if fewer than 3 highly-rated generations exist.
+    """
+    from sqlalchemy import func
+
+    profile = db.query(DBVoiceProfile).filter_by(id=profile_id).first()
+    if not profile:
+        raise HTTPException(status_code=404, detail="Profile not found")
+
+    # Query all highly-rated generations that have at least one non-null sampling param
+    rated = (
+        db.query(DBGeneration)
+        .filter(
+            DBGeneration.profile_id == profile_id,
+            DBGeneration.rating >= 4,
+            DBGeneration.status == "completed",
+        )
+        .all()
+    )
+
+    if len(rated) < 3:
+        return None
+
+    # Average across generations that have each param set
+    def _avg(attr: str) -> float | None:
+        vals = [getattr(g, attr) for g in rated if getattr(g, attr) is not None]
+        return sum(vals) / len(vals) if vals else None
+
+    temperature = _avg("temperature")
+    top_k_avg = _avg("top_k")
+    top_p = _avg("top_p")
+    repetition_penalty = _avg("repetition_penalty")
+    speed = _avg("speed")
+
+    # If none of the params are available across all rated gens, no suggestion
+    if all(v is None for v in (temperature, top_k_avg, top_p, repetition_penalty, speed)):
+        return None
+
+    return models.SuggestedParams(
+        temperature=temperature,
+        top_k=top_k_avg,
+        top_p=top_p,
+        repetition_penalty=repetition_penalty,
+        speed=speed,
+    )
+
+
 @router.get("/profiles/{profile_id}/channels")
 async def get_profile_channels(
     profile_id: str,
diff --git a/backend/routes/transcription.py b/backend/routes/transcription.py
index dc949132..7d07b0bd 100644
--- a/backend/routes/transcription.py
+++ b/backend/routes/transcription.py
@@ -23,7 +23,22 @@ async def transcribe_audio(
     model: str | None = Form(None),
 ):
     """Transcribe audio file to text."""
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
+    _mime_to_ext = {
+        "audio/webm": ".webm",
+        "audio/ogg": ".ogg",
+        "audio/mpeg": ".mp3",
+        "audio/mp4": ".mp4",
+        "audio/flac": ".flac",
+        "audio/wav": ".wav",
+        "audio/x-wav": ".wav",
+    }
+    _content_type = (file.content_type or "").split(";")[0].strip().lower()
+    _suffix = _mime_to_ext.get(_content_type)
+    if not _suffix and file.filename:
+        _suffix = Path(file.filename).suffix or ".wav"
+    if not _suffix:
+        _suffix = ".wav"
+    with tempfile.NamedTemporaryFile(suffix=_suffix, delete=False) as tmp:
         while chunk := await file.read(UPLOAD_CHUNK_SIZE):
             tmp.write(chunk)
         tmp_path = tmp.name
@@ -79,6 +94,7 @@ async def download_whisper_background():
     except HTTPException:
         raise
     except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
+        detail = str(e) or "Unsupported audio format — ensure the file is WAV, MP3, OGG, or FLAC"
+        raise HTTPException(status_code=500, detail=detail)
     finally:
         Path(tmp_path).unlink(missing_ok=True)
diff --git a/backend/services/generation.py b/backend/services/generation.py
index a70e633e..bfb521ef 100644
--- a/backend/services/generation.py
+++ b/backend/services/generation.py
@@ -41,6 +41,11 @@ async def run_generation(
     max_chunk_chars: Optional[int] = None,
     crossfade_ms: Optional[int] = None,
     version_id: Optional[str] = None,
+    sampling_params: Optional[dict] = None,
+    inject_breaths: bool = False,
+    jitter_ms: int = 0,
+    humanize_text: bool = False,
+    humanize_intensity: Optional[str] = None,
 ) -> None:
     """Execute TTS inference and persist the result.
 
@@ -55,11 +60,21 @@ async def run_generation(
     bg_db = next(get_db())
 
     try:
+        # LLM text preprocessing (before loading TTS model to leave RAM for Ollama)
+        if humanize_text:
+            from ..utils.text_preprocess import inject_disfluencies
+            intensity = humanize_intensity or "light"
+            text = await inject_disfluencies(text, language, intensity)
+
         tts_model = get_tts_backend_for_engine(engine)
 
         if not tts_model.is_loaded():
             await history.update_generation_status(generation_id, "loading_model", bg_db)
 
+        # Free Whisper memory before loading TTS
+        from ..backends import ensure_tts_memory
+        ensure_tts_memory()
+
         await load_engine_model(engine, model_size)
 
         voice_prompt = await profiles.create_voice_prompt_for_profile(
@@ -72,18 +87,58 @@ async def run_generation(
         await history.update_generation_status(generation_id, "generating", bg_db)
         trim_fn = trim_tts_output if engine_needs_trim(engine) else None
 
-        gen_kwargs: dict = dict(
-            language=language,
-            seed=seed if mode != "regenerate" else None,
-            instruct=instruct,
-            trim_fn=trim_fn,
-        )
-        if max_chunk_chars is not None:
-            gen_kwargs["max_chunk_chars"] = max_chunk_chars
-        if crossfade_ms is not None:
-            gen_kwargs["crossfade_ms"] = crossfade_ms
+        effective_seed = seed if mode != "regenerate" else None
+        _max_chunk_chars = max_chunk_chars if max_chunk_chars is not None else 800
+        _crossfade_ms = crossfade_ms if crossfade_ms is not None else 50
+
+        from ..utils.tag_router import has_paralinguistic_tags
+        if has_paralinguistic_tags(text) and engine in ("qwen",):
+            from ..utils.hybrid_generate import generate_hybrid
+            audio, sample_rate = await generate_hybrid(
+                text,
+                voice_prompt,
+                language=language,
+                seed=effective_seed,
+                instruct=instruct,
+                sampling_params=sampling_params,
+                max_chunk_chars=_max_chunk_chars,
+                crossfade_ms=_crossfade_ms,
+                jitter_ms=jitter_ms,
+                primary_engine=engine,
+                primary_model_size=model_size,
+            )
+        else:
+            gen_kwargs: dict = dict(
+                language=language,
+                seed=effective_seed,
+                instruct=instruct,
+                trim_fn=trim_fn,
+                max_chunk_chars=_max_chunk_chars,
+                crossfade_ms=_crossfade_ms,
+            )
+            if sampling_params:
+                gen_kwargs["sampling_params"] = sampling_params
+            if jitter_ms > 0:
+                gen_kwargs["jitter_ms"] = jitter_ms
+
+            audio, sample_rate = await generate_chunked(tts_model, text, voice_prompt, **gen_kwargs)
+
+        from ..backends import touch_tts_model
+        touch_tts_model(engine)
+
+        # Trim voice cloning warm-up for Qwen engines
+        if engine in ("qwen",) and voice_prompt.get("ref_audio"):
+            from ..utils.audio import trim_leading_warmup
+            audio = trim_leading_warmup(
+                audio, sample_rate,
+                ref_audio_duration=voice_prompt.get("ref_audio_duration", 0.0),
+                ref_text=voice_prompt.get("ref_text", ""),
+            )
 
-        audio, sample_rate = await generate_chunked(tts_model, text, voice_prompt, **gen_kwargs)
+        # Breath injection (if requested)
+        if inject_breaths:
+            from ..utils.breath_injection import inject_breaths as _inject_breaths
+            audio = _inject_breaths(audio, sample_rate)
 
         # --- Normalize (generate and regenerate always; retry skips) -----
         if normalize or mode == "regenerate":
diff --git a/backend/services/history.py b/backend/services/history.py
index 473c4b37..982543a1 100644
--- a/backend/services/history.py
+++ b/backend/services/history.py
@@ -65,6 +65,11 @@ async def create_generation(
     status: str = "completed",
     engine: Optional[str] = "qwen",
     model_size: Optional[str] = None,
+    temperature: Optional[float] = None,
+    top_k: Optional[int] = None,
+    top_p: Optional[float] = None,
+    repetition_penalty: Optional[float] = None,
+    speed: Optional[float] = None,
 ) -> GenerationResponse:
     """
     Create a new generation history entry.
@@ -98,6 +103,11 @@ async def create_generation(
         engine=engine,
         model_size=model_size,
         status=status,
+        temperature=temperature,
+        top_k=top_k,
+        top_p=top_p,
+        repetition_penalty=repetition_penalty,
+        speed=speed,
         created_at=datetime.utcnow(),
     )
 
@@ -218,6 +228,12 @@ async def list_generations(
             status=generation.status or "completed",
             error=generation.error,
             is_favorited=bool(generation.is_favorited),
+            rating=generation.rating,
+            temperature=generation.temperature,
+            top_k=generation.top_k,
+            top_p=generation.top_p,
+            repetition_penalty=generation.repetition_penalty,
+            speed=generation.speed,
             created_at=generation.created_at,
             versions=versions,
             active_version_id=active_version_id,
diff --git a/backend/utils/audio.py b/backend/utils/audio.py
index 2016992c..4a474141 100644
--- a/backend/utils/audio.py
+++ b/backend/utils/audio.py
@@ -199,10 +199,66 @@ def trim_tts_output(
     return trimmed
 
 
+def trim_leading_warmup(
+    audio: np.ndarray,
+    sample_rate: int,
+    ref_audio_duration: float,
+    ref_text: str,
+) -> np.ndarray:
+    """Trim leading warm-up audio from voice cloning output.
+
+    The Qwen3-TTS ICL mode performs proportional trimming of the reference
+    audio, but it's slightly imprecise and leaves ~0.3-0.5s of residual
+    artifacts at the start. This function handles two cases:
+
+    1. Library trim worked but left residual: trim a small fixed amount
+    2. Library trim didn't work at all: trim the full warmup duration
+    """
+    import logging
+    logger = logging.getLogger(__name__)
+
+    if ref_audio_duration <= 0:
+        return audio
+
+    # Estimate ref_text spoken duration (~2.5 words/sec)
+    words = len(ref_text.split()) if ref_text else 0
+    ref_text_duration = words / 2.5
+    expected_warmup = ref_audio_duration + ref_text_duration
+
+    audio_duration = len(audio) / sample_rate
+
+    if audio_duration >= expected_warmup * 1.5:
+        # Library trim didn't work — cut the full warmup
+        trim_seconds = expected_warmup
+        logger.info("Trimming %.2fs full warmup from audio (total %.2fs)",
+                    trim_seconds, audio_duration)
+    else:
+        # Library trim worked but may have left residual artifacts
+        # Trim a small fixed amount (0.3s) to clean up the boundary
+        trim_seconds = 0.3
+        if audio_duration <= trim_seconds * 2:
+            # Audio is too short to trim safely
+            return audio
+        logger.info("Trimming %.2fs residual artifact from audio (total %.2fs)",
+                    trim_seconds, audio_duration)
+
+    trim_samples = int(trim_seconds * sample_rate)
+    if trim_samples > 0 and trim_samples < len(audio):
+        audio = audio[trim_samples:]
+
+    # Apply 50ms cosine fade-in to eliminate any residual click artifacts
+    fade_samples = min(int(0.05 * sample_rate), len(audio) // 4)
+    if fade_samples > 0:
+        fade = np.cos(np.linspace(np.pi, 2 * np.pi, fade_samples)) * 0.5 + 0.5
+        audio[:fade_samples] = audio[:fade_samples] * fade
+
+    return audio
+
+
 def validate_reference_audio(
     audio_path: str,
     min_duration: float = 2.0,
-    max_duration: float = 30.0,
+    max_duration: float = 40.0,
     min_rms: float = 0.01,
 ) -> Tuple[bool, Optional[str]]:
     """
@@ -226,7 +282,7 @@ def validate_reference_audio(
 def validate_and_load_reference_audio(
     audio_path: str,
     min_duration: float = 2.0,
-    max_duration: float = 30.0,
+    max_duration: float = 40.0,
     min_rms: float = 0.01,
 ) -> Tuple[bool, Optional[str], Optional[np.ndarray], Optional[int]]:
     """
diff --git a/backend/utils/breath_injection.py b/backend/utils/breath_injection.py
new file mode 100644
index 00000000..be53d2ab
--- /dev/null
+++ b/backend/utils/breath_injection.py
@@ -0,0 +1,98 @@
+"""Synthetic breath injection for humanizing TTS output."""
+
+import numpy as np
+from scipy.signal import butter, sosfilt
+
+
+def _generate_breath(duration_ms: int, sample_rate: int, volume: float) -> np.ndarray:
+    """Generate a synthetic breath sound using band-pass filtered noise."""
+    n_samples = int(duration_ms * sample_rate / 1000)
+    # White noise
+    noise = np.random.randn(n_samples).astype(np.float32)
+    # Band-pass filter 200Hz-2kHz (breath frequency range)
+    sos = butter(4, [200, 2000], btype='bandpass', fs=sample_rate, output='sos')
+    filtered = sosfilt(sos, noise).astype(np.float32)
+    # Gaussian amplitude envelope (natural breath shape)
+    t = np.linspace(-3, 3, n_samples)
+    envelope = np.exp(-0.5 * t**2).astype(np.float32)
+    return filtered * envelope * volume
+
+
+def _find_silence_gaps(
+    audio: np.ndarray,
+    sample_rate: int,
+    min_silence_ms: int = 200,
+    silence_threshold_db: float = -40.0,
+    frame_ms: int = 20,
+) -> list[tuple[int, int]]:
+    """Find silence gaps in audio, returning list of (start_sample, end_sample)."""
+    frame_size = int(frame_ms * sample_rate / 1000)
+    threshold = 10 ** (silence_threshold_db / 20)
+
+    gaps = []
+    gap_start = None
+
+    for i in range(0, len(audio) - frame_size, frame_size):
+        frame = audio[i:i + frame_size]
+        rms = np.sqrt(np.mean(frame**2))
+
+        if rms < threshold:
+            if gap_start is None:
+                gap_start = i
+        else:
+            if gap_start is not None:
+                gap_end = i
+                gap_duration_ms = (gap_end - gap_start) * 1000 / sample_rate
+                if gap_duration_ms >= min_silence_ms:
+                    gaps.append((gap_start, gap_end))
+                gap_start = None
+
+    return gaps
+
+
+def inject_breaths(
+    audio: np.ndarray,
+    sample_rate: int,
+    min_silence_ms: int = 200,
+    breath_duration_ms: int = 150,
+    breath_volume: float = 0.03,
+    silence_threshold_db: float = -40.0,
+) -> np.ndarray:
+    """
+    Inject synthetic breath sounds at silence gaps in audio.
+
+    Detects silence gaps longer than min_silence_ms and overlays
+    a subtle synthetic breath sound in the middle of each gap.
+    Skips the first and last gaps to avoid leading/trailing artifacts.
+    """
+    gaps = _find_silence_gaps(audio, sample_rate, min_silence_ms, silence_threshold_db)
+
+    if len(gaps) <= 2:
+        return audio  # Not enough gaps (need at least one internal gap)
+
+    result = audio.copy()
+    # Skip first and last gaps
+    internal_gaps = gaps[1:-1]
+
+    rng = np.random.default_rng()
+
+    for gap_start, gap_end in internal_gaps:
+        # Random variation in breath params
+        dur = breath_duration_ms + rng.integers(-30, 31)
+        vol = breath_volume * (1.0 + rng.uniform(-0.3, 0.3))
+
+        breath = _generate_breath(max(dur, 50), sample_rate, vol)
+
+        # Center breath in the gap
+        gap_center = (gap_start + gap_end) // 2
+        breath_start = gap_center - len(breath) // 2
+        breath_end = breath_start + len(breath)
+
+        # Bounds check
+        if breath_start < 0 or breath_end > len(result):
+            continue
+
+        # Overlay (add, don't replace)
+        result[breath_start:breath_end] += breath
+
+    return result
diff --git a/backend/utils/chunked_tts.py b/backend/utils/chunked_tts.py
index 1f43379e..0e991f52 100644
--- a/backend/utils/chunked_tts.py
+++ b/backend/utils/chunked_tts.py
@@ -11,7 +11,7 @@
 
 import logging
 import re
-from typing import List, Tuple
+from typing import List, Optional, Tuple
 
 import numpy as np
 
@@ -173,10 +173,18 @@ def concatenate_audio_chunks(
     chunks: List[np.ndarray],
     sample_rate: int,
     crossfade_ms: int = 50,
+    jitter_ms: int = 0,
 ) -> np.ndarray:
     """Concatenate audio arrays with a short crossfade to eliminate clicks.
 
     Each chunk is expected to be a 1-D float32 ndarray at *sample_rate* Hz.
+
+    Parameters
+    ----------
+    jitter_ms : int
+        If > 0, apply a random micro-timing offset (±jitter_ms) at each
+        chunk boundary.  Positive offsets insert silence; negative offsets
+        increase the crossfade overlap (bounded by available audio).
     """
     if not chunks:
         return np.array([], dtype=np.float32)
@@ -189,14 +197,37 @@ def concatenate_audio_chunks(
     for chunk in chunks[1:]:
         if len(chunk) == 0:
             continue
-        overlap = min(crossfade_samples, len(result), len(chunk))
+
+        effective_crossfade = crossfade_samples
+        extra_silence = 0
+
+        if jitter_ms > 0:
+            jitter_samples = np.random.randint(-jitter_ms, jitter_ms + 1) * sample_rate // 1000
+            if jitter_samples > 0:
+                # Insert silence before this chunk
+                extra_silence = jitter_samples
+            else:
+                # Increase overlap (bounded by available audio and chunk length)
+                additional_overlap = -jitter_samples
+                effective_crossfade = min(
+                    crossfade_samples + additional_overlap,
+                    len(result),
+                    len(chunk),
+                )
+
+        overlap = min(effective_crossfade, len(result), len(chunk))
         if overlap > 0:
             fade_out = np.linspace(1.0, 0.0, overlap, dtype=np.float32)
             fade_in = np.linspace(0.0, 1.0, overlap, dtype=np.float32)
             result[-overlap:] = result[-overlap:] * fade_out + chunk[:overlap] * fade_in
-            result = np.concatenate([result, chunk[overlap:]])
+            tail = chunk[overlap:]
+        else:
+            tail = chunk
+
+        if extra_silence > 0:
+            result = np.concatenate([result, np.zeros(extra_silence, dtype=np.float32), tail])
         else:
-            result = np.concatenate([result, chunk])
+            result = np.concatenate([result, tail])
 
     return result
 
@@ -211,6 +242,8 @@ async def generate_chunked(
     max_chunk_chars: int = DEFAULT_MAX_CHUNK_CHARS,
     crossfade_ms: int = 50,
     trim_fn=None,
+    sampling_params: Optional[dict] = None,
+    jitter_ms: int = 0,
 ) -> Tuple[np.ndarray, int]:
     """Generate audio with automatic chunking for long text.
 
@@ -254,6 +287,7 @@ async def generate_chunked(
             language,
             seed,
             instruct,
+            sampling_params,
         )
         if trim_fn is not None:
             audio = trim_fn(audio, sample_rate)
@@ -287,6 +321,7 @@ async def generate_chunked(
             language,
             chunk_seed,
             instruct,
+            sampling_params,
         )
         if trim_fn is not None:
             chunk_audio = trim_fn(chunk_audio, chunk_sr)
@@ -295,5 +330,5 @@ async def generate_chunked(
         if sample_rate is None:
             sample_rate = chunk_sr
 
-    audio = concatenate_audio_chunks(audio_chunks, sample_rate, crossfade_ms=crossfade_ms)
+    audio = concatenate_audio_chunks(audio_chunks, sample_rate, crossfade_ms=crossfade_ms, jitter_ms=jitter_ms)
     return audio, sample_rate
diff --git a/backend/utils/effects.py b/backend/utils/effects.py
index afeefdde..8252f15b 100644
--- a/backend/utils/effects.py
+++ b/backend/utils/effects.py
@@ -32,6 +32,10 @@
     LowpassFilter,
     Delay,
     PitchShift,
+    Distortion,
+    Clipping,
+    NoiseGate,
+    Limiter,
 )
 
 
@@ -144,6 +148,42 @@
             "semitones": {"default": 0.0, "min": -12.0, "max": 12.0, "step": 0.5, "description": "Semitones to shift"},
         },
     },
+    "distortion": {
+        "cls": Distortion,
+        "label": "Distortion",
+        "description": "Soft saturation and harmonic warmth",
+        "params": {
+            "drive_db": {"default": 10.0, "min": 0.0, "max": 40.0, "step": 0.5, "description": "Drive amount in dB"},
+        },
+    },
+    "clipping": {
+        "cls": Clipping,
+        "label": "Clipping",
+        "description": "Hard clipping for aggressive saturation",
+        "params": {
+            "threshold_db": {"default": -6.0, "min": -40.0, "max": 0.0, "step": 0.5, "description": "Clipping threshold in dB"},
+        },
+    },
+    "noise_gate": {
+        "cls": NoiseGate,
+        "label": "Noise Gate",
+        "description": "Remove low-level noise and silence",
+        "params": {
+            "threshold_db": {"default": -40.0, "min": -80.0, "max": 0.0, "step": 1.0, "description": "Threshold (dB)"},
+            "ratio": {"default": 10.0, "min": 1.0, "max": 100.0, "step": 1.0, "description": "Gate ratio"},
+            "attack_ms": {"default": 1.0, "min": 0.1, "max": 100.0, "step": 0.1, "description": "Attack time (ms)"},
+            "release_ms": {"default": 100.0, "min": 10.0, "max": 1000.0, "step": 10.0, "description": "Release time (ms)"},
+        },
+    },
+    "limiter": {
+        "cls": Limiter,
+        "label": "Limiter",
+        "description": "Peak limiting to prevent clipping",
+        "params": {
+            "threshold_db": {"default": -1.0, "min": -40.0, "max": 0.0, "step": 0.5, "description": "Threshold (dB)"},
+            "release_ms": {"default": 100.0, "min": 10.0, "max": 1000.0, "step": 10.0, "description": "Release time (ms)"},
+        },
+    },
 }
 
 
@@ -252,6 +292,171 @@
             },
         ],
     },
+    "warm_voice": {
+        "name": "Warm Voice",
+        "sort_order": 5,
+        "description": "Warm analog-style voice with subtle saturation",
+        "effects_chain": [
+            {
+                "type": "distortion",
+                "enabled": True,
+                "params": {"drive_db": 6.0},
+            },
+            {
+                "type": "lowpass",
+                "enabled": True,
+                "params": {"cutoff_frequency_hz": 8000.0},
+            },
+            {
+                "type": "compressor",
+                "enabled": True,
+                "params": {
+                    "threshold_db": -15.0,
+                    "ratio": 2.5,
+                    "attack_ms": 10.0,
+                    "release_ms": 200.0,
+                },
+            },
+            {
+                "type": "gain",
+                "enabled": True,
+                "params": {"gain_db": 3.0},
+            },
+        ],
+    },
+    "tape": {
+        "name": "Tape",
+        "sort_order": 6,
+        "description": "Vintage tape recorder character",
+        "effects_chain": [
+            {
+                "type": "distortion",
+                "enabled": True,
+                "params": {"drive_db": 12.0},
+            },
+            {
+                "type": "highpass",
+                "enabled": True,
+                "params": {"cutoff_frequency_hz": 60.0},
+            },
+            {
+                "type": "lowpass",
+                "enabled": True,
+                "params": {"cutoff_frequency_hz": 12000.0},
+            },
+            {
+                "type": "chorus",
+                "enabled": True,
+                "params": {
+                    "rate_hz": 0.3,
+                    "depth": 0.1,
+                    "centre_delay_ms": 7.0,
+                    "feedback": 0.05,
+                    "mix": 0.15,
+                },
+            },
+            {
+                "type": "compressor",
+                "enabled": True,
+                "params": {
+                    "threshold_db": -18.0,
+                    "ratio": 3.0,
+                    "attack_ms": 5.0,
+                    "release_ms": 150.0,
+                },
+            },
+        ],
+    },
+    "natural": {
+        "name": "Natural",
+        "sort_order": 7,
+        "description": "Clean natural voice with noise reduction",
+        "effects_chain": [
+            {
+                "type": "noise_gate",
+                "enabled": True,
+                "params": {
+                    "threshold_db": -40.0,
+                    "ratio": 10.0,
+                    "attack_ms": 1.0,
+                    "release_ms": 100.0,
+                },
+            },
+            {
+                "type": "compressor",
+                "enabled": True,
+                "params": {
+                    "threshold_db": -20.0,
+                    "ratio": 2.0,
+                    "attack_ms": 15.0,
+                    "release_ms": 200.0,
+                },
+            },
+            {
+                "type": "limiter",
+                "enabled": True,
+                "params": {
+                    "threshold_db": -1.0,
+                    "release_ms": 100.0,
+                },
+            },
+        ],
+    },
+    "emotion_energetic": {
+        "name": "Emotion: Energetic",
+        "sort_order": 8,
+        "description": "Energetic and upbeat vocal style",
+        "effects_chain": [
+            {
+                "type": "pitch_shift",
+                "enabled": True,
+                "params": {"semitones": 1.0},
+            },
+            {
+                "type": "compressor",
+                "enabled": True,
+                "params": {
+                    "threshold_db": -15.0,
+                    "ratio": 3.0,
+                    "attack_ms": 2.0,
+                    "release_ms": 100.0,
+                },
+            },
+            {
+                "type": "gain",
+                "enabled": True,
+                "params": {"gain_db": 2.0},
+            },
+        ],
+    },
+    "emotion_calm": {
+        "name": "Emotion: Calm",
+        "sort_order": 9,
+        "description": "Calm and soothing vocal tone",
+        "effects_chain": [
+            {
+                "type": "pitch_shift",
+                "enabled": True,
+                "params": {"semitones": -0.5},
+            },
+            {
+                "type": "lowpass",
+                "enabled": True,
+                "params": {"cutoff_frequency_hz": 7000.0},
+            },
+            {
+                "type": "reverb",
+                "enabled": True,
+                "params": {
+                    "room_size": 0.2,
+                    "damping": 0.7,
+                    "wet_level": 0.1,
+                    "dry_level": 0.9,
+                    "width": 0.5,
+                },
+            },
+        ],
+    },
 }
 
 
diff --git a/backend/utils/hybrid_generate.py b/backend/utils/hybrid_generate.py
new file mode 100644
index 00000000..484ae7bd
--- /dev/null
+++ b/backend/utils/hybrid_generate.py
@@ -0,0 +1,125 @@
+"""Hybrid multi-engine generation: routes paralinguistic tags to Chatterbox Turbo."""
+
+import gc
+import logging
+from typing import Optional, Tuple
+
+import numpy as np
+
+from .tag_router import parse_tagged_text
+from .chunked_tts import generate_chunked, concatenate_audio_chunks
+
+logger = logging.getLogger(__name__)
+
+
+async def generate_hybrid(
+    text: str,
+    voice_prompt: dict,
+    language: str = "en",
+    seed: Optional[int] = None,
+    instruct: Optional[str] = None,
+    sampling_params: Optional[dict] = None,
+    max_chunk_chars: int = 800,
+    crossfade_ms: int = 50,
+    jitter_ms: int = 0,
+    primary_engine: str = "qwen",
+    primary_model_size: str = "1.7B",
+) -> Tuple[np.ndarray, int]:
+    """
+    Generate audio with hybrid engine routing.
+
+    Text segments go to the primary engine (Qwen).
+    Paralinguistic tag segments go to Chatterbox Turbo.
+    Results are merged in order with crossfade.
+    """
+    from ..backends import (
+        load_engine_model,
+        get_tts_backend_for_engine,
+    )
+
+    segments = parse_tagged_text(text)
+
+    # Separate segments by engine
+    text_segments = [(i, seg) for i, seg in enumerate(segments) if seg.type == "text"]
+    tag_segments = [(i, seg) for i, seg in enumerate(segments) if seg.type == "tag"]
+
+    if not tag_segments:
+        # No tags — fast path, use primary engine only
+        backend = get_tts_backend_for_engine(primary_engine)
+        return await generate_chunked(
+            backend, text, voice_prompt, language, seed, instruct,
+            max_chunk_chars=max_chunk_chars, crossfade_ms=crossfade_ms,
+            sampling_params=sampling_params, jitter_ms=jitter_ms,
+        )
+
+    logger.info(
+        "Hybrid generation: %d text segments, %d tag segments",
+        len(text_segments),
+        len(tag_segments),
+    )
+
+    # Results array indexed by original segment position
+    audio_results: dict[int, np.ndarray] = {}
+    sample_rate = 24000
+
+    # Step 1: Generate all text segments with primary engine (Qwen)
+    if text_segments:
+        await load_engine_model(primary_engine, primary_model_size)
+        backend = get_tts_backend_for_engine(primary_engine)
+
+        for idx, seg in text_segments:
+            chunk_seed = (seed + idx) if seed is not None else None
+            audio, sr = await backend.generate(
+                seg.content, voice_prompt, language, chunk_seed, instruct,
+                sampling_params,
+            )
+            audio_results[idx] = np.asarray(audio, dtype=np.float32)
+            sample_rate = sr
+            logger.info("Generated text segment %d: %.2fs", idx, len(audio) / sr)
+
+    # Step 2: Unload primary engine, load Chatterbox Turbo for tag segments
+    if tag_segments:
+        try:
+            primary_backend = get_tts_backend_for_engine(primary_engine)
+            if hasattr(primary_backend, "unload_model"):
+                primary_backend.unload_model()
+                logger.info(
+                    "Unloaded %s to free memory for Chatterbox Turbo", primary_engine
+                )
+        except Exception:
+            pass
+
+        gc.collect()
+
+        await load_engine_model("chatterbox_turbo")
+        cb_backend = get_tts_backend_for_engine("chatterbox_turbo")
+
+        for idx, seg in tag_segments:
+            chunk_seed = (seed + idx) if seed is not None else None
+            audio, sr = await cb_backend.generate(
+                seg.content, voice_prompt, "en", chunk_seed,
+            )
+            audio_results[idx] = np.asarray(audio, dtype=np.float32)
+            logger.info(
+                "Generated tag segment %d ('%s'): %.2fs",
+                idx,
+                seg.content,
+                len(audio) / sr,
+            )
+
+        # Unload Chatterbox after use
+        try:
+            cb_backend.unload_model()
+            logger.info("Unloaded Chatterbox Turbo")
+        except Exception:
+            pass
+        gc.collect()
+
+    # Step 3: Reassemble in original order
+    ordered_chunks = [audio_results[i] for i in sorted(audio_results.keys())]
+
+    if len(ordered_chunks) == 1:
+        return ordered_chunks[0], sample_rate
+
+    combined = concatenate_audio_chunks(ordered_chunks, sample_rate, crossfade_ms, jitter_ms)
+    return combined, sample_rate
diff --git a/backend/utils/tag_router.py b/backend/utils/tag_router.py
new file mode 100644
index 00000000..bb327afe
--- /dev/null
+++ b/backend/utils/tag_router.py
@@ -0,0 +1,58 @@
+"""Parse text into segments, identifying paralinguistic tags for hybrid routing."""
+
+import re
+from dataclasses import dataclass
+from typing import Literal
+
+PARA_TAGS = frozenset({
+    "laugh", "cough", "chuckle", "sigh", "breath",
+    "sneeze", "gasp", "yawn", "cry", "groan",
+    "sniff", "shush", "whimper", "scream", "whisper",
+})
+
+TAG_RE = re.compile(
+    r'\[(' + '|'.join(sorted(PARA_TAGS)) + r')\]',
+    re.IGNORECASE,
+)
+
+
+@dataclass
+class TagSegment:
+    type: Literal["text", "tag"]
+    content: str
+
+
+def parse_tagged_text(text: str) -> list[TagSegment]:
+    """
+    Parse text into ordered segments of plain text and paralinguistic tags.
+
+    Example:
+        "Hello [laugh] that was funny" -> [
+            TagSegment(type="text", content="Hello"),
+            TagSegment(type="tag", content="[laugh]"),
+            TagSegment(type="text", content="that was funny"),
+        ]
+    """
+    segments: list[TagSegment] = []
+    last_end = 0
+
+    for match in TAG_RE.finditer(text):
+        # Text before this tag
+        before = text[last_end:match.start()].strip()
+        if before:
+            segments.append(TagSegment(type="text", content=before))
+        # The tag itself
+        segments.append(TagSegment(type="tag", content=match.group(0)))
+        last_end = match.end()
+
+    # Remaining text after last tag
+    after = text[last_end:].strip()
+    if after:
+        segments.append(TagSegment(type="text", content=after))
+
+    return segments
+
+
+def has_paralinguistic_tags(text: str) -> bool:
+    """Check if text contains any paralinguistic tags."""
+    return bool(TAG_RE.search(text))
diff --git a/backend/utils/text_preprocess.py b/backend/utils/text_preprocess.py
new file mode 100644
index 00000000..956b12d4
--- /dev/null
+++ b/backend/utils/text_preprocess.py
@@ -0,0 +1,117 @@
+"""LLM-based text preprocessing for humanizing TTS input."""
+
+import logging
+from typing import Literal, Optional
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+DISFLUENCY_PROMPTS = {
+    "es": {
+        "system": (
+            "Eres un preprocesador de texto para síntesis de voz. "
+            "Agrega disfluencias naturales del habla para que el texto suene más humano al ser leído en voz alta. "
+            "Inserta hesitaciones (eh, mm, am), muletillas (bueno, pues, o sea, digo), "
+            "micro-pausas (representadas como ...), y auto-correcciones. "
+            "Mantén el significado idéntico. NO agregues tags entre corchetes. "
+            "Devuelve SOLO el texto modificado, nada más."
+        ),
+        "intensity_hint": {
+            "light": "Agrega muy pocas disfluencias, solo 1-2 en todo el texto.",
+            "medium": "Agrega disfluencias moderadas, como una conversación casual.",
+            "heavy": "Agrega muchas disfluencias, como alguien pensando en voz alta.",
+        },
+    },
+    "en": {
+        "system": (
+            "You are a text preprocessor for text-to-speech. "
+            "Add natural speech disfluencies to make the text sound more human when spoken aloud. "
+            "Insert hesitations (um, uh, hmm), filler words (like, you know, well, I mean), "
+            "micro-pauses (represented as ...), and self-corrections. "
+            "Keep the meaning identical. Do NOT add bracketed tags. "
+            "Return ONLY the modified text, nothing else."
+        ),
+        "intensity_hint": {
+            "light": "Add very few disfluencies, only 1-2 in the entire text.",
+            "medium": "Add moderate disfluencies, like casual conversation.",
+            "heavy": "Add many disfluencies, like someone thinking out loud.",
+        },
+    },
+}
+
+# Default prompts for languages not explicitly defined
+DEFAULT_LANG = "en"
+
+
+async def check_ollama_available(ollama_url: str = "http://localhost:11434") -> bool:
+    """Check if Ollama is reachable."""
+    try:
+        async with httpx.AsyncClient(timeout=3.0) as client:
+            resp = await client.get(f"{ollama_url}/api/tags")
+            return resp.status_code == 200
+    except Exception:
+        return False
+
+
+async def inject_disfluencies(
+    text: str,
+    language: str = "es",
+    intensity: Literal["light", "medium", "heavy"] = "light",
+    ollama_model: str = "llama3.2:1b",
+    ollama_url: str = "http://localhost:11434",
+) -> str:
+    """
+    Inject natural disfluencies into text using a local LLM via Ollama.
+
+    Returns the original text unchanged if Ollama is not available.
+    """
+    if not await check_ollama_available(ollama_url):
+        logger.warning("Ollama not available at %s — skipping text preprocessing", ollama_url)
+        return text
+
+    # Get language-specific prompts
+    lang_key = language if language in DISFLUENCY_PROMPTS else DEFAULT_LANG
+    prompts = DISFLUENCY_PROMPTS[lang_key]
+    system_prompt = prompts["system"]
+    intensity_hint = prompts["intensity_hint"].get(intensity, prompts["intensity_hint"]["light"])
+
+    full_prompt = f"{system_prompt}\n\nIntensity: {intensity_hint}\n\nText to process:\n{text}"
+
+    try:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            resp = await client.post(
+                f"{ollama_url}/api/generate",
+                json={
+                    "model": ollama_model,
+                    "prompt": full_prompt,
+                    "stream": False,
+                    "options": {
+                        "temperature": 0.7,
+                        "num_predict": len(text) * 3,  # Allow up to 3x expansion
+                    },
+                },
+            )
+            resp.raise_for_status()
+            result = resp.json().get("response", "").strip()
+
+        # Validate result is reasonable
+        if not result:
+            logger.warning("Empty response from Ollama — using original text")
+            return text
+
+        # If result is way too long or too short, it's probably garbage
+        if len(result) > len(text) * 4 or len(result) < len(text) * 0.3:
+            logger.warning(
+                "Ollama result length suspicious (original=%d, result=%d) — using original text",
+                len(text),
+                len(result),
+            )
+            return text
+
+        logger.info("Text preprocessed: %d -> %d chars (intensity=%s)", len(text), len(result), intensity)
+        return result
+
+    except Exception as e:
+        logger.warning("Ollama preprocessing failed: %s — using original text", e)
+        return text

From 5c259947b07c9c0870bbbe194a963499b2729ec6 Mon Sep 17 00:00:00 2001
From: Roberto Echeagaray <rogbertoecheagaray@gmail.com>
Date: Wed, 15 Apr 2026 02:35:29 -0600
Subject: [PATCH 2/2] feat: persist humanization params in history, improve
 rating system

- Added humanize_text, humanize_intensity, jitter_ms fields to generation history (DB migration, models, API types, UI badge)
- Rating system: weighted average with exponential decay, no minimum threshold, displays "Based on N ratings"

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../Generation/FloatingGenerateBox.tsx        |  6 ++-
 app/src/components/History/HistoryTable.tsx   | 14 +++++++
 app/src/lib/api/types.ts                      |  4 ++
 backend/database/migrations.py                |  6 +++
 backend/database/models.py                    |  4 ++
 backend/models.py                             |  7 ++++
 backend/routes/generations.py                 |  3 ++
 backend/routes/profiles.py                    | 40 ++++++++++++-------
 backend/services/history.py                   |  9 +++++
 9 files changed, 77 insertions(+), 16 deletions(-)

diff --git a/app/src/components/Generation/FloatingGenerateBox.tsx b/app/src/components/Generation/FloatingGenerateBox.tsx
index 5ab0feb4..276f40a0 100644
--- a/app/src/components/Generation/FloatingGenerateBox.tsx
+++ b/app/src/components/Generation/FloatingGenerateBox.tsx
@@ -699,7 +699,11 @@ export function FloatingGenerateBox({
                 {suggestedParams && (
                   <div className="flex items-center gap-2 mb-2 px-1 py-1 rounded-xl bg-green-500/10 border border-green-500/20">
                     <CheckCircle className="h-3 w-3 text-green-500 shrink-0 ml-1" />
-                    <span className="text-xs text-green-500 flex-1">Proven params for this voice</span>
+                    <span className="text-xs text-green-500 flex-1">
+                      {suggestedParams?.n_samples
+                        ? `Based on ${suggestedParams.n_samples} rating${suggestedParams.n_samples === 1 ? '' : 's'}`
+                        : 'Proven params for this voice'}
+                    </span>
                     <button
                       type="button"
                       className="text-xs text-green-500 font-medium hover:text-green-400 transition-colors px-1.5 py-0.5 rounded-lg hover:bg-green-500/10"
diff --git a/app/src/components/History/HistoryTable.tsx b/app/src/components/History/HistoryTable.tsx
index 88151859..486ae752 100644
--- a/app/src/components/History/HistoryTable.tsx
+++ b/app/src/components/History/HistoryTable.tsx
@@ -70,6 +70,8 @@ function ParamsBadge({ gen }: { gen: HistoryResponse }) {
     gen.top_p != null ||
     gen.repetition_penalty != null ||
     gen.speed != null ||
+    gen.humanize_text === true ||
+    (gen.jitter_ms != null && gen.jitter_ms > 0) ||
     (effectsChain && effectsChain.length > 0);
 
   if (!hasNonDefaultParams) return null;
@@ -118,6 +120,18 @@ function ParamsBadge({ gen }: { gen: HistoryResponse }) {
             <span className="tabular-nums">{gen.repetition_penalty.toFixed(2)}</span>
           </div>
         )}
+        {gen.humanize_text === true && (
+          <div className="flex justify-between">
+            <span className="text-muted-foreground">humanize</span>
+            <span className="tabular-nums">{gen.humanize_intensity ?? 'on'}</span>
+          </div>
+        )}
+        {gen.jitter_ms != null && gen.jitter_ms > 0 && (
+          <div className="flex justify-between">
+            <span className="text-muted-foreground">jitter</span>
+            <span className="tabular-nums">{gen.jitter_ms}ms</span>
+          </div>
+        )}
         {effectsChain && effectsChain.length > 0 && (
           <div className="flex justify-between">
             <span className="text-muted-foreground">effects</span>
diff --git a/app/src/lib/api/types.ts b/app/src/lib/api/types.ts
index deaea7ef..2ee447d2 100644
--- a/app/src/lib/api/types.ts
+++ b/app/src/lib/api/types.ts
@@ -117,6 +117,9 @@ export interface GenerationResponse {
   top_p?: number | null;
   repetition_penalty?: number | null;
   speed?: number | null;
+  humanize_text?: boolean;
+  humanize_intensity?: string | null;
+  jitter_ms?: number | null;
   created_at: string;
   versions?: GenerationVersionResponse[];
   active_version_id?: string;
@@ -132,6 +135,7 @@ export interface SuggestedParams {
   top_p?: number | null;
   repetition_penalty?: number | null;
   speed?: number | null;
+  n_samples?: number;
 }
 
 export interface HistoryQuery {
diff --git a/backend/database/migrations.py b/backend/database/migrations.py
index 9f682e50..d9088172 100644
--- a/backend/database/migrations.py
+++ b/backend/database/migrations.py
@@ -178,6 +178,12 @@ def _migrate_generations(engine, inspector, tables: set[str]) -> None:
         _add_column(engine, "generations", "repetition_penalty FLOAT", "repetition_penalty")
     if "speed" not in columns:
         _add_column(engine, "generations", "speed FLOAT", "speed")
+    if "humanize_text" not in columns:
+        _add_column(engine, "generations", "humanize_text BOOLEAN DEFAULT 0", "humanize_text")
+    if "humanize_intensity" not in columns:
+        _add_column(engine, "generations", "humanize_intensity VARCHAR", "humanize_intensity")
+    if "jitter_ms" not in columns:
+        _add_column(engine, "generations", "jitter_ms INTEGER DEFAULT 0", "jitter_ms")
 
 
 def _migrate_effect_presets(engine, inspector, tables: set[str]) -> None:
diff --git a/backend/database/models.py b/backend/database/models.py
index 7cd6afe9..a9e358f5 100644
--- a/backend/database/models.py
+++ b/backend/database/models.py
@@ -74,6 +74,10 @@ class Generation(Base):
     top_p = Column(Float, nullable=True)
     repetition_penalty = Column(Float, nullable=True)
     speed = Column(Float, nullable=True)
+    # Humanize / jitter settings
+    humanize_text = Column(Boolean, default=False)
+    humanize_intensity = Column(String, nullable=True)
+    jitter_ms = Column(Integer, default=0)
     created_at = Column(DateTime, default=datetime.utcnow)
 
 
diff --git a/backend/models.py b/backend/models.py
index 482ab429..fffb8ff6 100644
--- a/backend/models.py
+++ b/backend/models.py
@@ -122,6 +122,9 @@ class GenerationResponse(BaseModel):
     top_p: Optional[float] = None
     repetition_penalty: Optional[float] = None
     speed: Optional[float] = None
+    humanize_text: bool = False
+    humanize_intensity: Optional[str] = None
+    jitter_ms: int = 0
     created_at: datetime
     versions: Optional[List["GenerationVersionResponse"]] = None
     active_version_id: Optional[str] = None
@@ -162,6 +165,9 @@ class HistoryResponse(BaseModel):
     top_p: Optional[float] = None
     repetition_penalty: Optional[float] = None
     speed: Optional[float] = None
+    humanize_text: bool = False
+    humanize_intensity: Optional[str] = None
+    jitter_ms: int = 0
     created_at: datetime
     versions: Optional[List["GenerationVersionResponse"]] = None
     active_version_id: Optional[str] = None
@@ -184,6 +190,7 @@ class SuggestedParams(BaseModel):
     top_p: Optional[float] = None
     repetition_penalty: Optional[float] = None
     speed: Optional[float] = None
+    n_samples: int = 0
 
 
 class HistoryListResponse(BaseModel):
diff --git a/backend/routes/generations.py b/backend/routes/generations.py
index 16e64a64..bca0831c 100644
--- a/backend/routes/generations.py
+++ b/backend/routes/generations.py
@@ -85,6 +85,9 @@ async def generate_speech(
         top_p=data.top_p,
         repetition_penalty=data.repetition_penalty,
         speed=data.speed,
+        humanize_text=data.humanize_text,
+        humanize_intensity=data.humanize_intensity,
+        jitter_ms=data.jitter_ms,
     )
 
     task_manager.start_generation(
diff --git a/backend/routes/profiles.py b/backend/routes/profiles.py
index e61345d9..1b938917 100644
--- a/backend/routes/profiles.py
+++ b/backend/routes/profiles.py
@@ -313,17 +313,18 @@ async def get_suggested_params(
     profile_id: str,
     db: Session = Depends(get_db),
 ):
-    """Return averaged sampling params from generations with rating >= 4 for this profile.
+    """Return exponentially-weighted averaged sampling params from generations with rating >= 4.
 
-    Returns None if fewer than 3 highly-rated generations exist.
+    Weights more recent likes higher: w_i = exp(-0.1 * i) where i=0 is most recent.
+    Returns None only if zero liked generations exist.
     """
-    from sqlalchemy import func
+    import math
 
     profile = db.query(DBVoiceProfile).filter_by(id=profile_id).first()
     if not profile:
         raise HTTPException(status_code=404, detail="Profile not found")
 
-    # Query all highly-rated generations that have at least one non-null sampling param
+    # Order by most recent first so index 0 = newest like
     rated = (
         db.query(DBGeneration)
         .filter(
@@ -331,22 +332,30 @@ async def get_suggested_params(
             DBGeneration.rating >= 4,
             DBGeneration.status == "completed",
         )
+        .order_by(DBGeneration.created_at.desc())
         .all()
     )
 
-    if len(rated) < 3:
+    if not rated:
         return None
 
-    # Average across generations that have each param set
-    def _avg(attr: str) -> float | None:
-        vals = [getattr(g, attr) for g in rated if getattr(g, attr) is not None]
-        return sum(vals) / len(vals) if vals else None
-
-    temperature = _avg("temperature")
-    top_k_avg = _avg("top_k")
-    top_p = _avg("top_p")
-    repetition_penalty = _avg("repetition_penalty")
-    speed = _avg("speed")
+    # Weighted average using exponential decay; skip nulls per param
+    def _weighted_avg(attr: str) -> float | None:
+        pairs = [
+            (math.exp(-0.1 * i), getattr(g, attr))
+            for i, g in enumerate(rated)
+            if getattr(g, attr) is not None
+        ]
+        if not pairs:
+            return None
+        total_w = sum(w for w, _ in pairs)
+        return sum(w * v for w, v in pairs) / total_w
+
+    temperature = _weighted_avg("temperature")
+    top_k_avg = _weighted_avg("top_k")
+    top_p = _weighted_avg("top_p")
+    repetition_penalty = _weighted_avg("repetition_penalty")
+    speed = _weighted_avg("speed")
 
     # If none of the params are available across all rated gens, no suggestion
     if all(v is None for v in (temperature, top_k_avg, top_p, repetition_penalty, speed)):
@@ -358,6 +367,7 @@ def _avg(attr: str) -> float | None:
         top_p=top_p,
         repetition_penalty=repetition_penalty,
         speed=speed,
+        n_samples=len(rated),
     )
 
 
diff --git a/backend/services/history.py b/backend/services/history.py
index 982543a1..9002177a 100644
--- a/backend/services/history.py
+++ b/backend/services/history.py
@@ -70,6 +70,9 @@ async def create_generation(
     top_p: Optional[float] = None,
     repetition_penalty: Optional[float] = None,
     speed: Optional[float] = None,
+    humanize_text: bool = False,
+    humanize_intensity: Optional[str] = None,
+    jitter_ms: int = 0,
 ) -> GenerationResponse:
     """
     Create a new generation history entry.
@@ -108,6 +111,9 @@ async def create_generation(
         top_p=top_p,
         repetition_penalty=repetition_penalty,
         speed=speed,
+        humanize_text=humanize_text,
+        humanize_intensity=humanize_intensity,
+        jitter_ms=jitter_ms,
         created_at=datetime.utcnow(),
     )
 
@@ -234,6 +240,9 @@ async def list_generations(
             top_p=generation.top_p,
             repetition_penalty=generation.repetition_penalty,
             speed=generation.speed,
+            humanize_text=bool(generation.humanize_text),
+            humanize_intensity=generation.humanize_intensity,
+            jitter_ms=generation.jitter_ms or 0,
             created_at=generation.created_at,
             versions=versions,
             active_version_id=active_version_id,