Skip to content

Commit 0dfbf99

Browse files
committed
Add long text splitting and update to v3.2.0
Introduces long text splitting functionality to TTSFM, allowing automatic chunking and speech generation for texts exceeding the character limit. Updates CLI, Python API, and web app to use the new methods, expands supported voices, and improves documentation and UI. Bumps version to 3.2.0 and refines styles and feature descriptions.
1 parent 3e5f10b commit 0dfbf99

File tree

12 files changed

+363
-177
lines changed

12 files changed

+363
-177
lines changed

example_long_text.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Example demonstrating long text splitting functionality in TTSFM.
4+
5+
This example shows how to use the new long text methods to automatically
6+
split and generate speech from text longer than the 4096 character limit.
7+
"""
8+
9+
from ttsfm import TTSClient, Voice, AudioFormat
10+
11+
def main():
12+
# Create a long text example (over 4096 characters)
13+
long_text = """
14+
This is a demonstration of the TTSFM long text functionality.
15+
When you have text that exceeds the 4096 character limit, TTSFM can
16+
automatically split it into smaller chunks and generate speech for each chunk.
17+
18+
The text splitting is intelligent - it preserves word boundaries by default,
19+
so you won't get words cut off in the middle. This ensures natural-sounding
20+
speech across all generated audio files.
21+
22+
You can use this functionality in several ways:
23+
24+
1. Using the TTSClient.generate_speech_long_text() method
25+
2. Using the TTSClient.generate_speech_batch() method (alias)
26+
3. Using the convenience function generate_speech_long_text()
27+
4. Using the CLI with the --split-long-text flag
28+
29+
The method returns a list of TTSResponse objects, one for each chunk.
30+
You can then save each response to a separate file, or combine them
31+
if needed for your use case.
32+
33+
This feature is particularly useful for:
34+
- Converting long articles or documents to speech
35+
- Processing book chapters or large text files
36+
- Generating audio for educational content
37+
- Creating podcasts or audiobooks from text
38+
39+
The splitting algorithm is designed to be smart about where it breaks
40+
the text, preferring to split at sentence boundaries when possible,
41+
and always preserving word boundaries unless explicitly disabled.
42+
""" * 10 # Repeat to make it definitely over 4096 characters
43+
44+
print(f"Text length: {len(long_text)} characters")
45+
print("Generating speech from long text...\n")
46+
47+
# Create client
48+
client = TTSClient()
49+
50+
try:
51+
# Method 1: Using generate_speech_long_text
52+
print("Method 1: Using generate_speech_long_text()")
53+
responses = client.generate_speech_long_text(
54+
text=long_text,
55+
voice=Voice.ALLOY,
56+
response_format=AudioFormat.MP3,
57+
max_length=2000, # Smaller chunks for demo
58+
preserve_words=True
59+
)
60+
61+
print(f"Generated {len(responses)} audio chunks")
62+
63+
# Save each chunk
64+
for i, response in enumerate(responses, 1):
65+
filename = f"long_text_part_{i:03d}.mp3"
66+
response.save_to_file(filename)
67+
print(f"Saved: {filename}")
68+
69+
print(f"\nTotal audio files generated: {len(responses)}")
70+
print("You can play these files in sequence to hear the complete text.")
71+
72+
except Exception as e:
73+
print(f"Error: {e}")
74+
75+
if __name__ == "__main__":
76+
main()

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "ttsfm"
7-
version = "3.1.0"
7+
version = "3.2.0"
88
description = "Text-to-Speech API Client with OpenAI compatibility"
99
readme = "README.md"
1010
license = "MIT"

ttsfm-web/app.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -310,50 +310,50 @@ def generate_speech_batch():
310310
except ValueError as e:
311311
return jsonify({"error": f"Invalid voice or format: {e}"}), 400
312312

313-
# Split text into chunks
314-
chunks = split_text_by_length(text, max_length, preserve_words)
315-
316-
if not chunks:
313+
# Use the new long text method
314+
try:
315+
responses = tts_client.generate_speech_long_text(
316+
text=text,
317+
voice=voice_enum,
318+
response_format=format_enum,
319+
instructions=instructions,
320+
max_length=max_length,
321+
preserve_words=preserve_words
322+
)
323+
except Exception as e:
324+
logger.error(f"Long text generation failed: {e}")
325+
return jsonify({"error": f"Long text generation failed: {str(e)}"}), 500
326+
327+
if not responses:
317328
return jsonify({"error": "No valid text chunks found"}), 400
318329

319-
logger.info(f"Processing {len(chunks)} chunks for batch generation")
330+
logger.info(f"Generated {len(responses)} chunks for batch generation")
320331

321-
# Generate speech for each chunk
332+
# Process responses
322333
results = []
323-
for i, chunk in enumerate(chunks):
334+
for i, response in enumerate(responses):
324335
try:
325-
response = tts_client.generate_speech(
326-
text=chunk,
327-
voice=voice_enum,
328-
response_format=format_enum,
329-
instructions=instructions,
330-
max_length=max_length,
331-
validate_length=False # Already split
332-
)
333-
334336
# Convert to base64 for JSON response
335337
import base64
336338
audio_b64 = base64.b64encode(response.audio_data).decode('utf-8')
337339

338340
results.append({
339341
"chunk_index": i + 1,
340-
"chunk_text": chunk[:100] + "..." if len(chunk) > 100 else chunk,
341342
"audio_data": audio_b64,
342343
"content_type": response.content_type,
343344
"size": response.size,
344345
"format": response.format.value
345346
})
346347

347348
except Exception as e:
348-
logger.error(f"Failed to generate chunk {i+1}: {e}")
349+
logger.error(f"Failed to process chunk {i+1}: {e}")
349350
results.append({
350351
"chunk_index": i + 1,
351-
"chunk_text": chunk[:100] + "..." if len(chunk) > 100 else chunk,
352352
"error": str(e)
353353
})
354354

355355
return jsonify({
356-
"total_chunks": len(chunks),
356+
"total_chunks": len(responses),
357357
"successful_chunks": len([r for r in results if "audio_data" in r]),
358358
"results": results
359359
})
@@ -376,7 +376,7 @@ def get_status():
376376
return jsonify({
377377
"status": "online",
378378
"tts_service": "openai.fm (free)",
379-
"package_version": "3.0.0",
379+
"package_version": "3.2.0",
380380
"timestamp": datetime.now().isoformat()
381381
})
382382

ttsfm-web/static/css/style.css

Lines changed: 42 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2,25 +2,25 @@
22

33
:root {
44
/* Clean Color Palette */
5-
--primary-color: #2563eb;
6-
--primary-dark: #1d4ed8;
7-
--primary-light: #3b82f6;
8-
--secondary-color: #64748b;
9-
--secondary-dark: #475569;
10-
--accent-color: #10b981;
11-
--accent-dark: #059669;
5+
--primary-color: #4f46e5;
6+
--primary-dark: #3730a3;
7+
--primary-light: #6366f1;
8+
--secondary-color: #6b7280;
9+
--secondary-dark: #4b5563;
10+
--accent-color: #059669;
11+
--accent-dark: #047857;
1212

1313
/* Status Colors */
14-
--success-color: #10b981;
15-
--warning-color: #f59e0b;
16-
--danger-color: #ef4444;
17-
--info-color: #3b82f6;
14+
--success-color: #059669;
15+
--warning-color: #d97706;
16+
--danger-color: #dc2626;
17+
--info-color: #2563eb;
1818

1919
/* Clean Neutral Colors */
2020
--light-color: #ffffff;
21-
--light-gray: #f8fafc;
22-
--medium-gray: #64748b;
23-
--dark-color: #1e293b;
21+
--light-gray: #f9fafb;
22+
--medium-gray: #6b7280;
23+
--dark-color: #111827;
2424
--text-color: #374151;
2525
--text-muted: #6b7280;
2626

@@ -75,33 +75,38 @@ h1, h2, h3, h4, h5, h6 {
7575
/* Simplified Button Styles */
7676
.btn {
7777
font-weight: 600;
78-
border-radius: var(--border-radius-sm);
79-
transition: all 0.2s ease;
78+
border-radius: 12px;
79+
transition: all 0.3s ease;
8080
letter-spacing: 0.025em;
81+
border: none;
82+
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
8183
}
8284

8385
.btn-primary {
84-
background-color: var(--primary-color);
85-
border-color: var(--primary-color);
86+
background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-light) 100%);
8687
color: white;
8788
}
8889

8990
.btn-primary:hover {
90-
background-color: var(--primary-dark);
91-
border-color: var(--primary-dark);
91+
background: linear-gradient(135deg, var(--primary-dark) 0%, var(--primary-color) 100%);
9292
color: white;
93+
transform: translateY(-1px);
94+
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15);
9395
}
9496

9597
.btn-outline-primary {
9698
border: 2px solid var(--primary-color);
9799
color: var(--primary-color);
98100
background: transparent;
101+
box-shadow: none;
99102
}
100103

101104
.btn-outline-primary:hover {
102105
background: var(--primary-color);
103106
border-color: var(--primary-color);
104107
color: white;
108+
transform: translateY(-1px);
109+
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15);
105110
}
106111

107112
.btn-lg {
@@ -119,15 +124,16 @@ h1, h2, h3, h4, h5, h6 {
119124
/* Clean Card Styles */
120125
.card {
121126
border: 1px solid #e5e7eb;
122-
box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
123-
transition: all 0.2s ease;
124-
border-radius: 12px;
127+
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
128+
transition: all 0.3s ease;
129+
border-radius: 16px;
125130
background: white;
126131
}
127132

128133
.card:hover {
129-
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.07);
130-
border-color: #d1d5db;
134+
box-shadow: 0 10px 25px rgba(0, 0, 0, 0.1);
135+
border-color: var(--primary-light);
136+
transform: translateY(-2px);
131137
}
132138

133139
.card-body {
@@ -136,10 +142,10 @@ h1, h2, h3, h4, h5, h6 {
136142

137143
/* Clean Hero Section */
138144
.hero-section {
139-
background: linear-gradient(135deg, #f8fafc 0%, #ffffff 100%);
145+
background: linear-gradient(135deg, #f9fafb 0%, #ffffff 100%);
140146
color: var(--text-color);
141-
padding: 6rem 0;
142-
min-height: 80vh;
147+
padding: 5rem 0;
148+
min-height: 75vh;
143149
display: flex;
144150
align-items: center;
145151
border-bottom: 1px solid #e5e7eb;
@@ -186,24 +192,27 @@ code {
186192

187193
/* Enhanced Form Styles */
188194
.form-control, .form-select {
189-
border-radius: var(--border-radius-sm);
190-
border: 2px solid #e2e8f0;
195+
border-radius: 12px;
196+
border: 2px solid #e5e7eb;
191197
transition: var(--transition);
192-
padding: 0.875rem 1rem;
198+
padding: 1rem 1.25rem;
193199
font-size: 1rem;
194200
background-color: #ffffff;
195201
color: var(--text-color);
202+
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
196203
}
197204

198205
.form-control:focus, .form-select:focus {
199206
border-color: var(--primary-color);
200-
box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1);
207+
box-shadow: 0 0 0 4px rgba(79, 70, 229, 0.1);
201208
outline: none;
202209
background-color: #ffffff;
210+
transform: translateY(-1px);
203211
}
204212

205213
.form-control:hover, .form-select:hover {
206-
border-color: #cbd5e1;
214+
border-color: var(--primary-light);
215+
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
207216
}
208217

209218
.form-label {

0 commit comments

Comments
 (0)