ttsforge 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ttsforge/__init__.py +114 -0
- ttsforge/_version.py +34 -0
- ttsforge/audio_merge.py +180 -0
- ttsforge/audio_player.py +473 -0
- ttsforge/chapter_selection.py +75 -0
- ttsforge/cli/__init__.py +73 -0
- ttsforge/cli/commands_conversion.py +1927 -0
- ttsforge/cli/commands_phonemes.py +1033 -0
- ttsforge/cli/commands_utility.py +1389 -0
- ttsforge/cli/helpers.py +76 -0
- ttsforge/constants.py +164 -0
- ttsforge/conversion.py +1090 -0
- ttsforge/input_reader.py +408 -0
- ttsforge/kokoro_lang.py +12 -0
- ttsforge/kokoro_runner.py +125 -0
- ttsforge/name_extractor.py +305 -0
- ttsforge/phoneme_conversion.py +978 -0
- ttsforge/phonemes.py +486 -0
- ttsforge/ssmd_generator.py +422 -0
- ttsforge/utils.py +785 -0
- ttsforge/vocab/__init__.py +139 -0
- ttsforge-0.1.0.dist-info/METADATA +659 -0
- ttsforge-0.1.0.dist-info/RECORD +27 -0
- ttsforge-0.1.0.dist-info/WHEEL +5 -0
- ttsforge-0.1.0.dist-info/entry_points.txt +2 -0
- ttsforge-0.1.0.dist-info/licenses/LICENSE +21 -0
- ttsforge-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,659 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ttsforge
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Generate audiobooks from EPUB files using Kokoro ONNX TTS.
|
|
5
|
+
Author-email: Holger Nahrstaedt <nahrstaedt@gmail.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2025 Holger Nahrstaedt
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/holgern/ttsforge
|
|
29
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
30
|
+
Classifier: Intended Audience :: Developers
|
|
31
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
32
|
+
Classifier: Operating System :: OS Independent
|
|
33
|
+
Classifier: Programming Language :: Python :: 3
|
|
34
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
35
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
38
|
+
Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
|
|
39
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
40
|
+
Requires-Python: >=3.10
|
|
41
|
+
Description-Content-Type: text/markdown
|
|
42
|
+
License-File: LICENSE
|
|
43
|
+
Requires-Dist: click>=8.0.0
|
|
44
|
+
Requires-Dist: rich>=13.0.0
|
|
45
|
+
Requires-Dist: epub2text
|
|
46
|
+
Requires-Dist: numpy
|
|
47
|
+
Requires-Dist: soundfile>=0.12.0
|
|
48
|
+
Requires-Dist: chardet
|
|
49
|
+
Requires-Dist: charset-normalizer
|
|
50
|
+
Requires-Dist: platformdirs>=3.0.0
|
|
51
|
+
Requires-Dist: pykokoro>=0.6.4
|
|
52
|
+
Provides-Extra: audio
|
|
53
|
+
Requires-Dist: sounddevice>=0.4.6; extra == "audio"
|
|
54
|
+
Provides-Extra: dev
|
|
55
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
56
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
57
|
+
Requires-Dist: mypy>=1.8.0; extra == "dev"
|
|
58
|
+
Provides-Extra: static-ffmpeg
|
|
59
|
+
Requires-Dist: static-ffmpeg>=3.0; extra == "static-ffmpeg"
|
|
60
|
+
Provides-Extra: gpu
|
|
61
|
+
Requires-Dist: onnxruntime-gpu>=1.16.0; extra == "gpu"
|
|
62
|
+
Dynamic: license-file
|
|
63
|
+
|
|
64
|
+
[](https://pypi.org/project/ttsforge/)
|
|
65
|
+

|
|
66
|
+

|
|
67
|
+
[](https://codecov.io/gh/holgern/ttsforge)
|
|
68
|
+
|
|
69
|
+
# ttsforge
|
|
70
|
+
|
|
71
|
+
Convert EPUB files to audiobooks using Kokoro ONNX TTS.
|
|
72
|
+
|
|
73
|
+
ttsforge is a command-line tool that transforms EPUB ebooks into high-quality audiobooks
|
|
74
|
+
with support for 54 neural voices across 9 languages.
|
|
75
|
+
|
|
76
|
+
## Features
|
|
77
|
+
|
|
78
|
+
- **EPUB to Audiobook**: Convert EPUB files to M4B, MP3, WAV, FLAC, or OPUS
|
|
79
|
+
- **54 Neural Voices**: High-quality TTS in 9 languages
|
|
80
|
+
- **SSMD Editing**: Edit intermediate SSMD files to fine-tune pronunciation and pacing
|
|
81
|
+
- **Custom Phoneme Dictionary**: Control pronunciation of names and technical terms
|
|
82
|
+
- **Auto Name Extraction**: Automatically extract names from books for phoneme
|
|
83
|
+
customization
|
|
84
|
+
- **Mixed-Language Support**: Auto-detect and handle multiple languages in text
|
|
85
|
+
- **Resumable Conversions**: Interrupt and resume long audiobook conversions
|
|
86
|
+
- **Phoneme Pre-tokenization**: Pre-process text for faster batch conversions
|
|
87
|
+
- **Configurable Filenames**: Template-based output naming with book metadata
|
|
88
|
+
- **Voice Blending**: Mix multiple voices for custom narration
|
|
89
|
+
- **GPU Acceleration**: Optional CUDA support for faster processing
|
|
90
|
+
- **Chapter Support**: M4B files include chapter markers from EPUB
|
|
91
|
+
- **Streaming Read**: Listen to EPUB/text directly with the `read` command
|
|
92
|
+
|
|
93
|
+
## Installation
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
pip install ttsforge
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Optional extras:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
# Audio playback (required for --play and read)
|
|
103
|
+
pip install "ttsforge[audio]"
|
|
104
|
+
|
|
105
|
+
# Bundled ffmpeg (if you cannot install system ffmpeg)
|
|
106
|
+
pip install "ttsforge[static_ffmpeg]"
|
|
107
|
+
|
|
108
|
+
# GPU acceleration (CUDA)
|
|
109
|
+
pip install "ttsforge[gpu]"
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Dependencies
|
|
113
|
+
|
|
114
|
+
- **ffmpeg**: Required for MP3/FLAC/OPUS/M4B output and chapter merging
|
|
115
|
+
- **espeak-ng**: Required for phonemization
|
|
116
|
+
- **sounddevice (optional)**: Required for audio playback (`--play`, `read`)
|
|
117
|
+
|
|
118
|
+
**Ubuntu/Debian:**
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
sudo apt-get install ffmpeg espeak-ng
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
**macOS:**
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
brew install ffmpeg espeak-ng
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## Quick Start
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
# Convert an EPUB to audiobook (M4B with chapters)
|
|
134
|
+
ttsforge convert book.epub
|
|
135
|
+
|
|
136
|
+
# Use a specific voice
|
|
137
|
+
ttsforge convert book.epub -v am_adam
|
|
138
|
+
|
|
139
|
+
# Convert specific chapters
|
|
140
|
+
ttsforge convert book.epub --chapters 1-5
|
|
141
|
+
|
|
142
|
+
# List available voices
|
|
143
|
+
ttsforge voices
|
|
144
|
+
|
|
145
|
+
# Generate a voice demo
|
|
146
|
+
ttsforge demo
|
|
147
|
+
|
|
148
|
+
# Read an EPUB aloud (streaming playback)
|
|
149
|
+
ttsforge read book.epub
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## Usage
|
|
153
|
+
|
|
154
|
+
### Basic Conversion
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
ttsforge convert book.epub
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
Creates `book.m4b` with default settings (voice: `af_heart`, format: M4B).
|
|
161
|
+
|
|
162
|
+
### Voice Selection
|
|
163
|
+
|
|
164
|
+
```bash
|
|
165
|
+
# List all voices
|
|
166
|
+
ttsforge voices
|
|
167
|
+
|
|
168
|
+
# List voices for a language
|
|
169
|
+
ttsforge voices -l b # British English
|
|
170
|
+
|
|
171
|
+
# Convert with specific voice
|
|
172
|
+
ttsforge convert book.epub -v bf_emma
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Output Formats
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
ttsforge convert book.epub -f mp3 # MP3
|
|
179
|
+
ttsforge convert book.epub -f wav # WAV (uncompressed)
|
|
180
|
+
ttsforge convert book.epub -f flac # FLAC (lossless)
|
|
181
|
+
ttsforge convert book.epub -f opus # OPUS
|
|
182
|
+
ttsforge convert book.epub -f m4b # M4B audiobook (default)
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### Chapter Selection
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
# Preview chapters
|
|
189
|
+
ttsforge list book.epub
|
|
190
|
+
|
|
191
|
+
# Convert range
|
|
192
|
+
ttsforge convert book.epub --chapters 1-5
|
|
193
|
+
|
|
194
|
+
# Convert specific chapters
|
|
195
|
+
ttsforge convert book.epub --chapters 1,3,5,7
|
|
196
|
+
|
|
197
|
+
# Mixed selection
|
|
198
|
+
ttsforge convert book.epub --chapters 1-3,5,10-15
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
### Speed Control
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
ttsforge convert book.epub -s 1.2 # 20% faster
|
|
205
|
+
ttsforge convert book.epub -s 0.9 # 10% slower
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
### Resumable Conversions
|
|
209
|
+
|
|
210
|
+
Conversions are resumable by default. If interrupted, re-run the same command:
|
|
211
|
+
|
|
212
|
+
```bash
|
|
213
|
+
ttsforge convert book.epub # Resumes from last chapter
|
|
214
|
+
ttsforge convert book.epub --fresh # Start over
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### Phoneme Workflow
|
|
218
|
+
|
|
219
|
+
For large books or batch processing, pre-tokenize to phonemes:
|
|
220
|
+
|
|
221
|
+
```bash
|
|
222
|
+
# Export to phonemes (fast, CPU-only)
|
|
223
|
+
ttsforge phonemes export book.epub
|
|
224
|
+
|
|
225
|
+
# Convert phonemes to audio (can run on different machine)
|
|
226
|
+
ttsforge phonemes convert book.phonemes.json -v am_adam
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
### Configuration
|
|
230
|
+
|
|
231
|
+
```bash
|
|
232
|
+
# View settings
|
|
233
|
+
ttsforge config --show
|
|
234
|
+
|
|
235
|
+
# Set defaults
|
|
236
|
+
ttsforge config --set default_voice am_adam
|
|
237
|
+
ttsforge config --set default_format mp3
|
|
238
|
+
ttsforge config --set use_gpu true
|
|
239
|
+
|
|
240
|
+
# Reset to defaults
|
|
241
|
+
ttsforge config --reset
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
### Filename Templates
|
|
245
|
+
|
|
246
|
+
Customize output filenames with metadata:
|
|
247
|
+
|
|
248
|
+
```bash
|
|
249
|
+
ttsforge config --set output_filename_template "{author} - {book_title}"
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
Available variables: `{book_title}`, `{author}`, `{chapter_title}`, `{chapter_num}`,
|
|
253
|
+
`{input_stem}`, `{chapters_range}`
|
|
254
|
+
|
|
255
|
+
## Voices
|
|
256
|
+
|
|
257
|
+
ttsforge includes 54 voices across 9 languages:
|
|
258
|
+
|
|
259
|
+
| Language | Code | Voices | Default |
|
|
260
|
+
| -------------------- | ---- | ------ | ------------- |
|
|
261
|
+
| American English | `a` | 20 | `af_heart` |
|
|
262
|
+
| British English | `b` | 8 | `bf_emma` |
|
|
263
|
+
| Spanish | `e` | 3 | `ef_dora` |
|
|
264
|
+
| French | `f` | 1 | `ff_siwis` |
|
|
265
|
+
| Hindi | `h` | 4 | `hf_alpha` |
|
|
266
|
+
| Italian | `i` | 2 | `if_sara` |
|
|
267
|
+
| Japanese | `j` | 5 | `jf_alpha` |
|
|
268
|
+
| Brazilian Portuguese | `p` | 3 | `pf_dora` |
|
|
269
|
+
| Mandarin Chinese | `z` | 8 | `zf_xiaoxiao` |
|
|
270
|
+
|
|
271
|
+
Voice naming: `{lang}{gender}_{name}` (e.g., `am_adam` = American Male "Adam")
|
|
272
|
+
|
|
273
|
+
### Voice Demo
|
|
274
|
+
|
|
275
|
+
```bash
|
|
276
|
+
# Demo all voices
|
|
277
|
+
ttsforge demo
|
|
278
|
+
|
|
279
|
+
# Demo specific language
|
|
280
|
+
ttsforge demo -l a
|
|
281
|
+
|
|
282
|
+
# Save individual voice files
|
|
283
|
+
ttsforge demo --separate -o ./voices/
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
### Voice Blending
|
|
287
|
+
|
|
288
|
+
Mix multiple voices for custom narration:
|
|
289
|
+
|
|
290
|
+
```bash
|
|
291
|
+
# Using --voice parameter (auto-detects blend format)
|
|
292
|
+
ttsforge convert book.epub --voice "af_nicole:50,am_michael:50"
|
|
293
|
+
|
|
294
|
+
# Using --voice-blend parameter (traditional method)
|
|
295
|
+
ttsforge convert book.epub --voice-blend "af_nicole:50,am_michael:50"
|
|
296
|
+
|
|
297
|
+
# Weighted blends (70% Nicole, 30% Michael)
|
|
298
|
+
ttsforge convert book.epub --voice "af_nicole:70,am_michael:30"
|
|
299
|
+
|
|
300
|
+
# Works with all commands
|
|
301
|
+
ttsforge sample "Hello world" --voice "af_sky:60,bf_emma:40" -p
|
|
302
|
+
ttsforge phonemes preview "Test blend" --voice "am_adam:50,am_michael:50" --play
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
### Mixed-Language Support
|
|
306
|
+
|
|
307
|
+
For books with multiple languages (e.g., German text with English technical terms):
|
|
308
|
+
|
|
309
|
+
```bash
|
|
310
|
+
# Enable mixed-language auto-detection
|
|
311
|
+
ttsforge convert book.epub \
|
|
312
|
+
--use-mixed-language \
|
|
313
|
+
--mixed-language-primary de \
|
|
314
|
+
--mixed-language-allowed de,en-us
|
|
315
|
+
|
|
316
|
+
# Test with a sample
|
|
317
|
+
ttsforge sample \
|
|
318
|
+
"Das ist ein deutscher Satz. This is an English sentence." \
|
|
319
|
+
--use-mixed-language \
|
|
320
|
+
--mixed-language-primary de \
|
|
321
|
+
--mixed-language-allowed de,en-us
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
**Requirements**: Install `lingua-language-detector` for automatic language detection:
|
|
325
|
+
|
|
326
|
+
```bash
|
|
327
|
+
pip install lingua-language-detector
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
**Configuration options:**
|
|
331
|
+
|
|
332
|
+
- `--use-mixed-language` - Enable mixed-language mode
|
|
333
|
+
- `--mixed-language-primary LANG` - Primary language (e.g., `de`, `en-us`)
|
|
334
|
+
- `--mixed-language-allowed LANGS` - Comma-separated list of allowed languages
|
|
335
|
+
- `--mixed-language-confidence FLOAT` - Detection confidence threshold (0.0-1.0,
|
|
336
|
+
default: 0.7)
|
|
337
|
+
|
|
338
|
+
Supported languages: `en-us`, `en-gb`, `de`, `fr-fr`, `es`, `it`, `pt`, `pl`, `tr`,
|
|
339
|
+
`ru`, `ko`, `ja`, `zh`/`cmn`
|
|
340
|
+
|
|
341
|
+
### SSMD Editing
|
|
342
|
+
|
|
343
|
+
ttsforge uses SSMD (Speech Synthesis Markdown) as an intermediate format between your
|
|
344
|
+
EPUB and the final audio. This allows you to fine-tune pronunciation, pacing, and
|
|
345
|
+
emphasis before conversion.
|
|
346
|
+
|
|
347
|
+
#### How It Works
|
|
348
|
+
|
|
349
|
+
During conversion, ttsforge automatically generates `.ssmd` files for each chapter:
|
|
350
|
+
|
|
351
|
+
```
|
|
352
|
+
.{book_title}_chapters/
|
|
353
|
+
├── chapter_001_intro.ssmd # Editable text with speech markup
|
|
354
|
+
├── chapter_001_intro.wav
|
|
355
|
+
├── chapter_002_chapter1.ssmd
|
|
356
|
+
├── chapter_002_chapter1.wav
|
|
357
|
+
```
|
|
358
|
+
|
|
359
|
+
When you resume a conversion, ttsforge detects if you've edited any SSMD files and
|
|
360
|
+
automatically regenerates the audio.
|
|
361
|
+
|
|
362
|
+
#### Basic Workflow
|
|
363
|
+
|
|
364
|
+
```bash
|
|
365
|
+
# 1. Start conversion
|
|
366
|
+
ttsforge convert book.epub
|
|
367
|
+
|
|
368
|
+
# 2. Pause conversion (Ctrl+C)
|
|
369
|
+
|
|
370
|
+
# 3. Edit SSMD files to fix pronunciation or pacing
|
|
371
|
+
vim .book_chapters/chapter_001_intro.ssmd
|
|
372
|
+
|
|
373
|
+
# 4. Resume - automatically detects edits and regenerates audio
|
|
374
|
+
ttsforge convert book.epub
|
|
375
|
+
```
|
|
376
|
+
|
|
377
|
+
#### SSMD Syntax
|
|
378
|
+
|
|
379
|
+
SSMD files use a simple markdown-like syntax:
|
|
380
|
+
|
|
381
|
+
**Structural Breaks** (control pauses):
|
|
382
|
+
|
|
383
|
+
```
|
|
384
|
+
...p # Paragraph break (0.5-1.0s pause)
|
|
385
|
+
...s # Sentence break (0.1-0.3s pause)
|
|
386
|
+
...c # Clause break (shorter pause)
|
|
387
|
+
```
|
|
388
|
+
|
|
389
|
+
**Emphasis**:
|
|
390
|
+
|
|
391
|
+
```
|
|
392
|
+
*text* # Moderate emphasis
|
|
393
|
+
**text** # Strong emphasis
|
|
394
|
+
```
|
|
395
|
+
|
|
396
|
+
**Custom Phonemes**:
|
|
397
|
+
|
|
398
|
+
```
|
|
399
|
+
[Hermione](ph: /hɝmˈIni/) # Override pronunciation
|
|
400
|
+
[API](ph: /ˌeɪpiˈaɪ/) # Technical terms
|
|
401
|
+
```
|
|
402
|
+
|
|
403
|
+
**Language Switching** (planned):
|
|
404
|
+
|
|
405
|
+
```
|
|
406
|
+
[Bonjour](fr) # Mark text as French
|
|
407
|
+
```
|
|
408
|
+
|
|
409
|
+
#### Example SSMD File
|
|
410
|
+
|
|
411
|
+
```ssmd
|
|
412
|
+
Chapter One ...p
|
|
413
|
+
|
|
414
|
+
[Harry](ph: /hæɹi/) Potter was a *highly unusual* boy in many ways. ...s
|
|
415
|
+
For one thing, he **hated** the summer holidays more than any other
|
|
416
|
+
time of year. ...s For another, he really wanted to do his homework,
|
|
417
|
+
but was forced to do it in secret, in the dead of the night. ...p
|
|
418
|
+
|
|
419
|
+
And he also happened to be a wizard. ...p
|
|
420
|
+
```
|
|
421
|
+
|
|
422
|
+
#### When to Use SSMD Editing
|
|
423
|
+
|
|
424
|
+
- **Pronunciation issues**: Character names, technical terms, foreign words
|
|
425
|
+
- **Pacing problems**: Adjust paragraph and sentence breaks
|
|
426
|
+
- **Emphasis corrections**: Add or remove emphasis on specific words
|
|
427
|
+
- **Combine with phoneme dictionary**: Phoneme dictionary applied automatically to SSMD
|
|
428
|
+
|
|
429
|
+
For detailed SSMD syntax and examples, see [SSMD_QUICKSTART.md](SSMD_QUICKSTART.md).
|
|
430
|
+
|
|
431
|
+
### Custom Phoneme Dictionary
|
|
432
|
+
|
|
433
|
+
Control pronunciation of character names, technical terms, and foreign words with custom
|
|
434
|
+
phoneme dictionaries.
|
|
435
|
+
|
|
436
|
+
#### Quick Start
|
|
437
|
+
|
|
438
|
+
```bash
|
|
439
|
+
# 1. Extract names from your book (requires spacy)
|
|
440
|
+
ttsforge extract-names mybook.epub
|
|
441
|
+
|
|
442
|
+
# 2. Review the generated custom_phonemes.json file
|
|
443
|
+
ttsforge list-names custom_phonemes.json
|
|
444
|
+
|
|
445
|
+
# 3. Test pronunciation with sample
|
|
446
|
+
ttsforge sample "Hermione loves Kubernetes" --phoneme-dict custom_phonemes.json -p
|
|
447
|
+
|
|
448
|
+
# 4. Convert with custom pronunciations
|
|
449
|
+
ttsforge convert mybook.epub --phoneme-dict custom_phonemes.json
|
|
450
|
+
```
|
|
451
|
+
|
|
452
|
+
#### Requirements
|
|
453
|
+
|
|
454
|
+
For automatic name extraction (optional but recommended):
|
|
455
|
+
|
|
456
|
+
```bash
|
|
457
|
+
pip install spacy
|
|
458
|
+
python -m spacy download en_core_web_sm
|
|
459
|
+
```
|
|
460
|
+
|
|
461
|
+
#### Workflow
|
|
462
|
+
|
|
463
|
+
**1. Extract names from your book:**
|
|
464
|
+
|
|
465
|
+
```bash
|
|
466
|
+
# Extract frequent names (≥3 occurrences)
|
|
467
|
+
ttsforge extract-names mybook.epub
|
|
468
|
+
|
|
469
|
+
# Preview without saving
|
|
470
|
+
ttsforge extract-names mybook.epub --preview
|
|
471
|
+
|
|
472
|
+
# Only very frequent names (≥10 occurrences)
|
|
473
|
+
ttsforge extract-names mybook.epub --min-count 10 -o names.json
|
|
474
|
+
|
|
475
|
+
# Include all proper nouns, not just detected person names
|
|
476
|
+
ttsforge extract-names mybook.epub --include-all
|
|
477
|
+
```
|
|
478
|
+
|
|
479
|
+
This creates a `custom_phonemes.json` file with auto-generated phoneme suggestions.
|
|
480
|
+
|
|
481
|
+
**2. Review and edit the dictionary:**
|
|
482
|
+
|
|
483
|
+
```bash
|
|
484
|
+
# List all entries
|
|
485
|
+
ttsforge list-names custom_phonemes.json
|
|
486
|
+
|
|
487
|
+
# Sort alphabetically
|
|
488
|
+
ttsforge list-names custom_phonemes.json --sort-by alpha
|
|
489
|
+
```
|
|
490
|
+
|
|
491
|
+
Edit `custom_phonemes.json` to fix any incorrect phonemes. The file format is:
|
|
492
|
+
|
|
493
|
+
```json
|
|
494
|
+
{
|
|
495
|
+
"_metadata": {
|
|
496
|
+
"generated_from": "mybook.epub",
|
|
497
|
+
"language": "en-us"
|
|
498
|
+
},
|
|
499
|
+
"entries": {
|
|
500
|
+
"Hermione": {
|
|
501
|
+
"phoneme": "/hɝmˈIni/",
|
|
502
|
+
"occurrences": 847,
|
|
503
|
+
"verified": false
|
|
504
|
+
},
|
|
505
|
+
"Kubernetes": {
|
|
506
|
+
"phoneme": "/kubɚnˈɛtɪs/",
|
|
507
|
+
"occurrences": 12,
|
|
508
|
+
"verified": false
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
```
|
|
513
|
+
|
|
514
|
+
Or use the simple format:
|
|
515
|
+
|
|
516
|
+
```json
|
|
517
|
+
{
|
|
518
|
+
"Hermione": "/hɝmˈIni/",
|
|
519
|
+
"Kubernetes": "/kubɚnˈɛtɪs/"
|
|
520
|
+
}
|
|
521
|
+
```
|
|
522
|
+
|
|
523
|
+
**3. Test pronunciation:**
|
|
524
|
+
|
|
525
|
+
```bash
|
|
526
|
+
# Test specific names
|
|
527
|
+
ttsforge sample "Hermione and Harry" --phoneme-dict custom_phonemes.json -p
|
|
528
|
+
|
|
529
|
+
# Test and save to file
|
|
530
|
+
ttsforge sample "Hermione and Harry" --phoneme-dict custom_phonemes.json -o test.wav
|
|
531
|
+
```
|
|
532
|
+
|
|
533
|
+
**4. Convert your book:**
|
|
534
|
+
|
|
535
|
+
```bash
|
|
536
|
+
# Use the dictionary for conversion
|
|
537
|
+
ttsforge convert mybook.epub --phoneme-dict custom_phonemes.json
|
|
538
|
+
|
|
539
|
+
# Case-sensitive matching (default is case-insensitive)
|
|
540
|
+
ttsforge convert mybook.epub \
|
|
541
|
+
--phoneme-dict custom_phonemes.json \
|
|
542
|
+
--phoneme-dict-case-sensitive
|
|
543
|
+
```
|
|
544
|
+
|
|
545
|
+
#### Manual Dictionary Creation
|
|
546
|
+
|
|
547
|
+
You can create a dictionary manually without extraction:
|
|
548
|
+
|
|
549
|
+
```json
|
|
550
|
+
{
|
|
551
|
+
"Katniss": "/kætnɪs/",
|
|
552
|
+
"Peeta": "/pitə/",
|
|
553
|
+
"Panem": "/pænəm/"
|
|
554
|
+
}
|
|
555
|
+
```
|
|
556
|
+
|
|
557
|
+
#### Getting IPA Phonemes
|
|
558
|
+
|
|
559
|
+
To find the correct IPA phonemes for a word:
|
|
560
|
+
|
|
561
|
+
1. Use `ttsforge sample "word" -p` to hear the default pronunciation
|
|
562
|
+
2. Look up IPA pronunciation online (e.g., Wiktionary, IPA dictionaries)
|
|
563
|
+
3. Or use the auto-generated phonemes as a starting point
|
|
564
|
+
|
|
565
|
+
**Note:** Phoneme matching is case-insensitive by default and respects word boundaries
|
|
566
|
+
(e.g., "test" won't match "testing").
|
|
567
|
+
|
|
568
|
+
## Commands
|
|
569
|
+
|
|
570
|
+
| Command | Description |
|
|
571
|
+
| ------------------ | ------------------------------------ |
|
|
572
|
+
| `convert` | Convert EPUB to audiobook |
|
|
573
|
+
| `list` | List chapters in EPUB |
|
|
574
|
+
| `info` | Show EPUB metadata |
|
|
575
|
+
| `sample` | Generate sample audio |
|
|
576
|
+
| `read` | Stream playback from EPUB/text |
|
|
577
|
+
| `voices` | List available voices |
|
|
578
|
+
| `demo` | Generate voice demo |
|
|
579
|
+
| `extract-names` | Extract names for phoneme dictionary |
|
|
580
|
+
| `list-names` | List names in phoneme dictionary |
|
|
581
|
+
| `download` | Download ONNX models |
|
|
582
|
+
| `config` | Manage configuration |
|
|
583
|
+
| `phonemes export` | Export EPUB to phonemes |
|
|
584
|
+
| `phonemes convert` | Convert phonemes to audio |
|
|
585
|
+
| `phonemes info` | Show phoneme file info |
|
|
586
|
+
| `phonemes preview` | Preview text as phonemes |
|
|
587
|
+
|
|
588
|
+
## GPU Acceleration
|
|
589
|
+
|
|
590
|
+
For faster processing with CUDA:
|
|
591
|
+
|
|
592
|
+
```bash
|
|
593
|
+
pip install onnxruntime-gpu
|
|
594
|
+
ttsforge config --set use_gpu true
|
|
595
|
+
```
|
|
596
|
+
|
|
597
|
+
Or use per-command:
|
|
598
|
+
|
|
599
|
+
```bash
|
|
600
|
+
ttsforge convert book.epub --gpu
|
|
601
|
+
```
|
|
602
|
+
|
|
603
|
+
## Configuration Options
|
|
604
|
+
|
|
605
|
+
| Option | Default | Description |
|
|
606
|
+
| --------------------------- | -------------- | ------------------------------------ |
|
|
607
|
+
| `default_voice` | `af_heart` | Default TTS voice |
|
|
608
|
+
| `default_language` | `a` | Default language code |
|
|
609
|
+
| `default_speed` | `1.0` | Speech speed (0.5-2.0) |
|
|
610
|
+
| `default_format` | `m4b` | Output format |
|
|
611
|
+
| `use_gpu` | `false` | Enable GPU acceleration |
|
|
612
|
+
| `model_quality` | `fp32` | Model quality/quantization |
|
|
613
|
+
| `model_variant` | `v1.0` | Model variant |
|
|
614
|
+
| `silence_between_chapters` | `2.0` | Chapter gap (seconds) |
|
|
615
|
+
| `pause_clause` | `0.5` | Clause pause (seconds) |
|
|
616
|
+
| `pause_sentence` | `0.7` | Sentence pause (seconds) |
|
|
617
|
+
| `pause_paragraph` | `0.9` | Paragraph pause (seconds) |
|
|
618
|
+
| `pause_variance` | `0.05` | Pause variance (seconds) |
|
|
619
|
+
| `pause_mode` | `auto` | Pause mode (`tts`, `manual`, `auto`) |
|
|
620
|
+
| `announce_chapters` | `true` | Speak chapter titles |
|
|
621
|
+
| `chapter_pause_after_title` | `2.0` | Pause after chapter title |
|
|
622
|
+
| `phonemization_lang` | `None` | Override phonemization language |
|
|
623
|
+
| `output_filename_template` | `{book_title}` | Output filename template |
|
|
624
|
+
| `default_content_mode` | `chapters` | `read` mode (`chapters`/`pages`) |
|
|
625
|
+
| `default_page_size` | `2000` | Page size for `read` pages mode |
|
|
626
|
+
| `use_mixed_language` | `false` | Enable mixed-language mode |
|
|
627
|
+
| `mixed_language_primary` | `None` | Primary language for mixed mode |
|
|
628
|
+
| `mixed_language_allowed` | `None` | Allowed languages (list) |
|
|
629
|
+
| `mixed_language_confidence` | `0.7` | Language detection threshold |
|
|
630
|
+
|
|
631
|
+
## Documentation
|
|
632
|
+
|
|
633
|
+
Full documentation: https://ttsforge.readthedocs.io/
|
|
634
|
+
|
|
635
|
+
Build locally:
|
|
636
|
+
|
|
637
|
+
```bash
|
|
638
|
+
cd docs
|
|
639
|
+
pip install sphinx sphinx-rtd-theme
|
|
640
|
+
make html
|
|
641
|
+
```
|
|
642
|
+
|
|
643
|
+
## Requirements
|
|
644
|
+
|
|
645
|
+
- Python 3.10+
|
|
646
|
+
- ffmpeg (for MP3/FLAC/OPUS/M4B output and chapter merging)
|
|
647
|
+
- espeak-ng (for phonemization)
|
|
648
|
+
- ~330MB disk space (ONNX models)
|
|
649
|
+
- sounddevice (optional, for audio playback)
|
|
650
|
+
|
|
651
|
+
## License
|
|
652
|
+
|
|
653
|
+
MIT License
|
|
654
|
+
|
|
655
|
+
## Credits
|
|
656
|
+
|
|
657
|
+
- [Kokoro](https://github.com/hexgrad/kokoro) - TTS model
|
|
658
|
+
- [espeak-ng](https://github.com/espeak-ng/espeak-ng) - Phonemization
|
|
659
|
+
- [ONNX Runtime](https://onnxruntime.ai/) - Model inference
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
ttsforge/__init__.py,sha256=Jg8_0vPttTVWrnt4HBqrTOKYfmcgpVpfddSVcU4HKXo,2432
|
|
2
|
+
ttsforge/_version.py,sha256=5jwwVncvCiTnhOedfkzzxmxsggwmTBORdFL_4wq0ZeY,704
|
|
3
|
+
ttsforge/audio_merge.py,sha256=Tt7o8GBNrkcfiSKycUpWvblj-y4zwlULoX-eCblqYpo,5666
|
|
4
|
+
ttsforge/audio_player.py,sha256=HYc4vv46yDXjVXaWRlj1tUtWLiwNTwbzT6oDfOUB5vA,14351
|
|
5
|
+
ttsforge/chapter_selection.py,sha256=a-XlEO4HMzeUBfhvnh6gQOQmDuM0wMpVCi0pw6oM2hQ,2579
|
|
6
|
+
ttsforge/constants.py,sha256=p_-LfE_u1oT--tzjI5PrHGXd5u9DHM7dcHsfcUgvzvY,5108
|
|
7
|
+
ttsforge/conversion.py,sha256=oZ1FyyyeChb6EoCrVJ-maogXqLq537iUBne5oa_epdw,41629
|
|
8
|
+
ttsforge/input_reader.py,sha256=b49SBT-mL4SnR74D8xwyWHC_smPhsJ5jpPAj4QQ5WKo,14068
|
|
9
|
+
ttsforge/kokoro_lang.py,sha256=8603b5whfk0KzGrNK7pqRjzoH1Ge9TKoX7AMzKsX0sk,376
|
|
10
|
+
ttsforge/kokoro_runner.py,sha256=ZGBx70_rHcfwKiUgywa_3-7d5u-wQ_0pPOukQRuACu0,4390
|
|
11
|
+
ttsforge/name_extractor.py,sha256=vBVp2OT8sdYdbczs0SQdEcZff2sN1sk5URi5gBwJrcE,9584
|
|
12
|
+
ttsforge/phoneme_conversion.py,sha256=so5Iex2lme9tHvZiCysLBAkBH1tp42O8pkibLSrHzh8,38280
|
|
13
|
+
ttsforge/phonemes.py,sha256=EUZ1Qr-0rPThRpSeuJQe5Z3J3nz7rX1Xs3Rjjw19qIQ,15517
|
|
14
|
+
ttsforge/ssmd_generator.py,sha256=Dmuvy6T8WVyzHvaNWsPDdyyTIZCpyX2pOsaVUPk8s08,13326
|
|
15
|
+
ttsforge/utils.py,sha256=3BiNFyScV3Dy_xhVm2EigpxUb4Z6YwIQPzzxwDzfCzI,24942
|
|
16
|
+
ttsforge/cli/__init__.py,sha256=CTqYeUAJaKV7YTYqcmr7-VxjwJfjLcnPYM2OKyws0Oc,2103
|
|
17
|
+
ttsforge/cli/commands_conversion.py,sha256=H1fX1M52RdSiSnGfIknr-dBGx-MeMaVf5J6rMDVrgWU,63457
|
|
18
|
+
ttsforge/cli/commands_phonemes.py,sha256=_0PQd9_hjOvzkzx2qM5BEG1fNyZYTjKc5nrZZ41HV4k,32373
|
|
19
|
+
ttsforge/cli/commands_utility.py,sha256=_8KMUjVYVqp63PH_gjOjS-fw6ZCujaMDXFxoKfUzlko,48013
|
|
20
|
+
ttsforge/cli/helpers.py,sha256=5Co2EvDhYspKhjW2-P3sNxj9MFFgWyTFeqOyJbPy2yA,2697
|
|
21
|
+
ttsforge/vocab/__init__.py,sha256=lMgS0dY9VbOYI20LnPjjqrWcjLIQ1FKkR4-xcXsvrqc,3641
|
|
22
|
+
ttsforge-0.1.0.dist-info/licenses/LICENSE,sha256=9csb1sDNn0HdUPKgOTUwtb4CkvYPcFXHnkxKCS99EWQ,1074
|
|
23
|
+
ttsforge-0.1.0.dist-info/METADATA,sha256=cQJf57NrwNZJUZacAQgnokqO1cnSC_Mi1b7ZWjcZ0no,19577
|
|
24
|
+
ttsforge-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
25
|
+
ttsforge-0.1.0.dist-info/entry_points.txt,sha256=SrcNdlhQpoUCzPzhVbOmMzATQeV7j7XYl0DPrVjZ-ks,47
|
|
26
|
+
ttsforge-0.1.0.dist-info/top_level.txt,sha256=rNLi-3muicHF8UvZu_FuA2ML_Dz9sVPCjik2E8XnCVk,9
|
|
27
|
+
ttsforge-0.1.0.dist-info/RECORD,,
|