Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 32 additions & 7 deletions app/src/components/ServerSettings/ConnectionForm.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { useForm } from 'react-hook-form';
import * as z from 'zod';
import { Button } from '@/components/ui/button';
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card';
import { Checkbox } from '@/components/ui/checkbox';
import {
Form,
FormControl,
Expand All @@ -14,10 +15,10 @@ import {
FormMessage,
} from '@/components/ui/form';
import { Input } from '@/components/ui/input';
import { Checkbox } from '@/components/ui/checkbox';
import { Slider } from '@/components/ui/slider';
import { useToast } from '@/components/ui/use-toast';
import { useServerStore } from '@/stores/serverStore';
import { usePlatform } from '@/platform/PlatformContext';
import { useServerStore } from '@/stores/serverStore';

const connectionSchema = z.object({
serverUrl: z.string().url('Please enter a valid URL'),
Expand All @@ -33,6 +34,8 @@ export function ConnectionForm() {
const setKeepServerRunningOnClose = useServerStore((state) => state.setKeepServerRunningOnClose);
const mode = useServerStore((state) => state.mode);
const setMode = useServerStore((state) => state.setMode);
const maxChunkChars = useServerStore((state) => state.maxChunkChars);
const setMaxChunkChars = useServerStore((state) => state.setMaxChunkChars);
const { toast } = useToast();

const form = useForm<ConnectionFormValues>({
Expand All @@ -59,11 +62,7 @@ export function ConnectionForm() {
}

return (
<Card
role="region"
aria-label="Server Connection"
tabIndex={0}
>
<Card role="region" aria-label="Server Connection" tabIndex={0}>
<CardHeader>
<CardTitle>Server Connection</CardTitle>
</CardHeader>
Expand Down Expand Up @@ -153,6 +152,32 @@ export function ConnectionForm() {
</div>
</div>
)}

<div className="mt-6 pt-6 border-t">
<div className="space-y-3">
<div className="flex items-center justify-between">
<label htmlFor="maxChunkChars" className="text-sm font-medium leading-none">
Auto-chunking limit
</label>
<span className="text-sm tabular-nums text-muted-foreground">
{maxChunkChars} chars
</span>
</div>
<Slider
id="maxChunkChars"
value={[maxChunkChars]}
onValueChange={([value]) => setMaxChunkChars(value)}
min={100}
max={2000}
step={50}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Expand slider max to 5000 to match the feature contract.

Line 170-172 currently caps at 2000, but the feature/back-end contract supports up to 5000. This limits valid user configuration unnecessarily.

Proposed change
             <Slider
               id="maxChunkChars"
               value={[maxChunkChars]}
               onValueChange={([value]) => setMaxChunkChars(value)}
               min={100}
-              max={2000}
+              max={5000}
               step={50}
               aria-label="Auto-chunking character limit"
             />
             <p className="text-sm text-muted-foreground">
               Long text is split into chunks at sentence boundaries before generating. Lower values
-              can improve quality for long outputs. Default is 800.
+              can improve quality for long outputs. Range is 100–5000. Default is 800.
             </p>

Also applies to: 175-178

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@app/src/components/ServerSettings/ConnectionForm.tsx` around lines 170 - 172,
ConnectionForm currently sets the slider max to 2000 which conflicts with the
feature/back-end contract; change the slider max prop from 2000 to 5000 for both
slider occurrences in ConnectionForm (the two blocks around the shown
min/max/step props) and update any related constants or validation logic in the
ConnectionForm component that enforce a 2000 upper bound so they match the new
5000 limit.

aria-label="Auto-chunking character limit"
/>
<p className="text-sm text-muted-foreground">
Long text is split into chunks at sentence boundaries before generating. Lower values
can improve quality for long outputs. Default is 800.
</p>
</div>
</div>
</CardContent>
</Card>
);
Expand Down
1 change: 1 addition & 0 deletions app/src/lib/api/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ export interface GenerationRequest {
model_size?: '1.7B' | '0.6B';
engine?: 'qwen' | 'luxtts' | 'chatterbox' | 'chatterbox_turbo';
instruct?: string;
max_chunk_chars?: number;
}

export interface GenerationResponse {
Expand Down
5 changes: 4 additions & 1 deletion app/src/lib/hooks/useGenerationForm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ import { useGeneration } from '@/lib/hooks/useGeneration';
import { useModelDownloadToast } from '@/lib/hooks/useModelDownloadToast';
import { useGenerationStore } from '@/stores/generationStore';
import { usePlayerStore } from '@/stores/playerStore';
import { useServerStore } from '@/stores/serverStore';

const generationSchema = z.object({
text: z.string().min(1, 'Text is required').max(5000),
text: z.string().min(1, 'Text is required').max(50000),
language: z.enum(LANGUAGE_CODES as [LanguageCode, ...LanguageCode[]]),
seed: z.number().int().optional(),
modelSize: z.enum(['1.7B', '0.6B']).optional(),
Expand All @@ -31,6 +32,7 @@ export function useGenerationForm(options: UseGenerationFormOptions = {}) {
const generation = useGeneration();
const setAudioWithAutoPlay = usePlayerStore((state) => state.setAudioWithAutoPlay);
const setIsGenerating = useGenerationStore((state) => state.setIsGenerating);
const maxChunkChars = useServerStore((state) => state.maxChunkChars);
const [downloadingModelName, setDownloadingModelName] = useState<string | null>(null);
const [downloadingDisplayName, setDownloadingDisplayName] = useState<string | null>(null);

Expand Down Expand Up @@ -110,6 +112,7 @@ export function useGenerationForm(options: UseGenerationFormOptions = {}) {
model_size: isQwen ? data.modelSize : undefined,
engine,
instruct: isQwen ? data.instruct || undefined : undefined,
max_chunk_chars: maxChunkChars,
});

toast({
Expand Down
6 changes: 6 additions & 0 deletions app/src/stores/serverStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ interface ServerStore {

keepServerRunningOnClose: boolean;
setKeepServerRunningOnClose: (keepRunning: boolean) => void;

maxChunkChars: number;
setMaxChunkChars: (value: number) => void;
}

export const useServerStore = create<ServerStore>()(
Expand All @@ -29,6 +32,9 @@ export const useServerStore = create<ServerStore>()(

keepServerRunningOnClose: false,
setKeepServerRunningOnClose: (keepRunning) => set({ keepServerRunningOnClose: keepRunning }),

maxChunkChars: 800,
setMaxChunkChars: (value) => set({ maxChunkChars: value }),
}),
{
name: 'voicebox-server',
Expand Down
47 changes: 29 additions & 18 deletions backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -824,18 +824,24 @@ async def download_chatterbox_turbo_background():
engine=engine,
)

audio, sample_rate = await tts_model.generate(
data.text,
voice_prompt,
data.language,
data.seed,
data.instruct,
)
from .utils.chunked_tts import generate_chunked

# Trim trailing silence/hallucination for Chatterbox output
# Resolve per-chunk trim function for engines that need it
trim_fn = None
if engine in ("chatterbox", "chatterbox_turbo"):
from .utils.audio import trim_tts_output
audio = trim_tts_output(audio, sample_rate)
trim_fn = trim_tts_output

audio, sample_rate = await generate_chunked(
tts_model,
data.text,
voice_prompt,
language=data.language,
seed=data.seed,
instruct=data.instruct,
max_chunk_chars=data.max_chunk_chars,
trim_fn=trim_fn,
)

# Calculate duration
duration = len(audio) / sample_rate
Expand Down Expand Up @@ -949,18 +955,23 @@ async def stream_speech(
data.profile_id, db, engine=engine,
)

audio, sample_rate = await tts_model.generate(
data.text,
voice_prompt,
data.language,
data.seed,
data.instruct,
)
from .utils.chunked_tts import generate_chunked

# Trim trailing silence/hallucination for Chatterbox output
trim_fn = None
if engine in ("chatterbox", "chatterbox_turbo"):
from .utils.audio import trim_tts_output
audio = trim_tts_output(audio, sample_rate)
trim_fn = trim_tts_output

audio, sample_rate = await generate_chunked(
tts_model,
data.text,
voice_prompt,
language=data.language,
seed=data.seed,
instruct=data.instruct,
max_chunk_chars=data.max_chunk_chars,
trim_fn=trim_fn,
)

wav_bytes = tts.audio_to_wav_bytes(audio, sample_rate)

Expand Down
3 changes: 2 additions & 1 deletion backend/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,13 @@ class Config:
class GenerationRequest(BaseModel):
"""Request model for voice generation."""
profile_id: str
text: str = Field(..., min_length=1, max_length=5000)
text: str = Field(..., min_length=1, max_length=50000)
language: str = Field(default="en", pattern="^(zh|en|ja|ko|de|fr|ru|pt|es|it|he)$")
seed: Optional[int] = Field(None, ge=0)
model_size: Optional[str] = Field(default="1.7B", pattern="^(1\\.7B|0\\.6B)$")
instruct: Optional[str] = Field(None, max_length=500)
engine: Optional[str] = Field(default="qwen", pattern="^(qwen|luxtts|chatterbox|chatterbox_turbo)$")
max_chunk_chars: int = Field(default=800, ge=100, le=5000, description="Max characters per chunk for long text splitting")


class GenerationResponse(BaseModel):
Expand Down
Loading