ttsforge 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ttsforge-0.1.0 → ttsforge-0.1.2}/.codecrate.toml +2 -1
- {ttsforge-0.1.0 → ttsforge-0.1.2}/.github/workflows/codecov.yml +1 -1
- {ttsforge-0.1.0 → ttsforge-0.1.2}/.gitignore +0 -1
- {ttsforge-0.1.0 → ttsforge-0.1.2}/PKG-INFO +13 -12
- {ttsforge-0.1.0 → ttsforge-0.1.2}/README.md +12 -11
- {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/api/index.rst +3 -12
- {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/cli.rst +3 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/configuration.rst +1 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/ssmd.rst +9 -9
- ttsforge-0.1.2/tests/test_chapter_marker_leading_space.py +88 -0
- ttsforge-0.1.2/tests/test_chapter_selection.py +20 -0
- ttsforge-0.1.2/tests/test_cli_smoke.py +27 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_constants.py +1 -6
- ttsforge-0.1.2/tests/test_conversion_state.py +84 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_phoneme_dictionary.py +18 -18
- ttsforge-0.1.2/tests/test_ssmd_generator.py +25 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/__init__.py +3 -18
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/_version.py +3 -3
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/cli/commands_conversion.py +75 -10
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/cli/commands_phonemes.py +22 -4
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/cli/commands_utility.py +18 -1
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/cli/helpers.py +1 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/constants.py +13 -4
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/conversion.py +112 -51
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/kokoro_runner.py +38 -5
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/name_extractor.py +3 -3
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/phoneme_conversion.py +61 -10
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/ssmd_generator.py +4 -4
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge.egg-info/PKG-INFO +13 -12
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge.egg-info/SOURCES.txt +5 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/.coveragerc +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/.github/pytest.ini +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/.github/workflows/pre-commit.yml +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/.github/workflows/python-publish.yml +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/.github/workflows/tests.yml +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/.pre-commit-config.yaml +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/.prettierrc.yml +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/.readthedocs.yaml +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/.ruff.toml +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/LICENSE +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/conf.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/filename_templates.rst +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/index.rst +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/installation.rst +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/make.bat +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/make.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/quickstart.rst +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/requirements.txt +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/voices.rst +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/examples/__init__.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/examples/phoneme_export.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/pyproject.toml +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/requirements-test.txt +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/setup.cfg +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/setup.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/__init__.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_chapter_announcement.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_cli.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_conversion.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_epub_chapter_markers.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_name_extractor.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_onnx_backend.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_phoneme_conversion.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_phonemes.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_tokenizer.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_utils.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/audio_merge.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/audio_player.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/chapter_selection.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/cli/__init__.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/input_reader.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/kokoro_lang.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/phonemes.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/utils.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/vocab/__init__.py +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge.egg-info/dependency_links.txt +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge.egg-info/entry_points.txt +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge.egg-info/requires.txt +0 -0
- {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ttsforge
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: Generate audiobooks from EPUB files using Kokoro ONNX TTS.
|
|
5
5
|
Author-email: Holger Nahrstaedt <nahrstaedt@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -396,14 +396,14 @@ SSMD files use a simple markdown-like syntax:
|
|
|
396
396
|
**Custom Phonemes**:
|
|
397
397
|
|
|
398
398
|
```
|
|
399
|
-
[Hermione]
|
|
400
|
-
[API]
|
|
399
|
+
[Hermione]{ph="hɝmˈIni"} # Override pronunciation
|
|
400
|
+
[API]{ph="ˌeɪpiˈaɪ"} # Technical terms
|
|
401
401
|
```
|
|
402
402
|
|
|
403
403
|
**Language Switching** (planned):
|
|
404
404
|
|
|
405
405
|
```
|
|
406
|
-
[Bonjour]
|
|
406
|
+
[Bonjour]{lang="fr"} # Mark text as French
|
|
407
407
|
```
|
|
408
408
|
|
|
409
409
|
#### Example SSMD File
|
|
@@ -411,7 +411,7 @@ SSMD files use a simple markdown-like syntax:
|
|
|
411
411
|
```ssmd
|
|
412
412
|
Chapter One ...p
|
|
413
413
|
|
|
414
|
-
[Harry]
|
|
414
|
+
[Harry]{ph="hæɹi"} Potter was a *highly unusual* boy in many ways. ...s
|
|
415
415
|
For one thing, he **hated** the summer holidays more than any other
|
|
416
416
|
time of year. ...s For another, he really wanted to do his homework,
|
|
417
417
|
but was forced to do it in secret, in the dead of the night. ...p
|
|
@@ -498,12 +498,12 @@ Edit `custom_phonemes.json` to fix any incorrect phonemes. The file format is:
|
|
|
498
498
|
},
|
|
499
499
|
"entries": {
|
|
500
500
|
"Hermione": {
|
|
501
|
-
"phoneme": "
|
|
501
|
+
"phoneme": "hɝmˈIni",
|
|
502
502
|
"occurrences": 847,
|
|
503
503
|
"verified": false
|
|
504
504
|
},
|
|
505
505
|
"Kubernetes": {
|
|
506
|
-
"phoneme": "
|
|
506
|
+
"phoneme": "kubɚnˈɛtɪs",
|
|
507
507
|
"occurrences": 12,
|
|
508
508
|
"verified": false
|
|
509
509
|
}
|
|
@@ -515,8 +515,8 @@ Or use the simple format:
|
|
|
515
515
|
|
|
516
516
|
```json
|
|
517
517
|
{
|
|
518
|
-
"Hermione": "
|
|
519
|
-
"Kubernetes": "
|
|
518
|
+
"Hermione": "hɝmˈIni",
|
|
519
|
+
"Kubernetes": "kubɚnˈɛtɪs"
|
|
520
520
|
}
|
|
521
521
|
```
|
|
522
522
|
|
|
@@ -548,9 +548,9 @@ You can create a dictionary manually without extraction:
|
|
|
548
548
|
|
|
549
549
|
```json
|
|
550
550
|
{
|
|
551
|
-
"Katniss": "
|
|
552
|
-
"Peeta": "
|
|
553
|
-
"Panem": "
|
|
551
|
+
"Katniss": "kætnɪs",
|
|
552
|
+
"Peeta": "pitə",
|
|
553
|
+
"Panem": "pænəm"
|
|
554
554
|
}
|
|
555
555
|
```
|
|
556
556
|
|
|
@@ -617,6 +617,7 @@ ttsforge convert book.epub --gpu
|
|
|
617
617
|
| `pause_paragraph` | `0.9` | Paragraph pause (seconds) |
|
|
618
618
|
| `pause_variance` | `0.05` | Pause variance (seconds) |
|
|
619
619
|
| `pause_mode` | `auto` | Pause mode (`tts`, `manual`, `auto`) |
|
|
620
|
+
| `enable_short_sentence` | `None` | Handle short sentences |
|
|
620
621
|
| `announce_chapters` | `true` | Speak chapter titles |
|
|
621
622
|
| `chapter_pause_after_title` | `2.0` | Pause after chapter title |
|
|
622
623
|
| `phonemization_lang` | `None` | Override phonemization language |
|
|
@@ -333,14 +333,14 @@ SSMD files use a simple markdown-like syntax:
|
|
|
333
333
|
**Custom Phonemes**:
|
|
334
334
|
|
|
335
335
|
```
|
|
336
|
-
[Hermione]
|
|
337
|
-
[API]
|
|
336
|
+
[Hermione]{ph="hɝmˈIni"} # Override pronunciation
|
|
337
|
+
[API]{ph="ˌeɪpiˈaɪ"} # Technical terms
|
|
338
338
|
```
|
|
339
339
|
|
|
340
340
|
**Language Switching** (planned):
|
|
341
341
|
|
|
342
342
|
```
|
|
343
|
-
[Bonjour]
|
|
343
|
+
[Bonjour]{lang="fr"} # Mark text as French
|
|
344
344
|
```
|
|
345
345
|
|
|
346
346
|
#### Example SSMD File
|
|
@@ -348,7 +348,7 @@ SSMD files use a simple markdown-like syntax:
|
|
|
348
348
|
```ssmd
|
|
349
349
|
Chapter One ...p
|
|
350
350
|
|
|
351
|
-
[Harry]
|
|
351
|
+
[Harry]{ph="hæɹi"} Potter was a *highly unusual* boy in many ways. ...s
|
|
352
352
|
For one thing, he **hated** the summer holidays more than any other
|
|
353
353
|
time of year. ...s For another, he really wanted to do his homework,
|
|
354
354
|
but was forced to do it in secret, in the dead of the night. ...p
|
|
@@ -435,12 +435,12 @@ Edit `custom_phonemes.json` to fix any incorrect phonemes. The file format is:
|
|
|
435
435
|
},
|
|
436
436
|
"entries": {
|
|
437
437
|
"Hermione": {
|
|
438
|
-
"phoneme": "
|
|
438
|
+
"phoneme": "hɝmˈIni",
|
|
439
439
|
"occurrences": 847,
|
|
440
440
|
"verified": false
|
|
441
441
|
},
|
|
442
442
|
"Kubernetes": {
|
|
443
|
-
"phoneme": "
|
|
443
|
+
"phoneme": "kubɚnˈɛtɪs",
|
|
444
444
|
"occurrences": 12,
|
|
445
445
|
"verified": false
|
|
446
446
|
}
|
|
@@ -452,8 +452,8 @@ Or use the simple format:
|
|
|
452
452
|
|
|
453
453
|
```json
|
|
454
454
|
{
|
|
455
|
-
"Hermione": "
|
|
456
|
-
"Kubernetes": "
|
|
455
|
+
"Hermione": "hɝmˈIni",
|
|
456
|
+
"Kubernetes": "kubɚnˈɛtɪs"
|
|
457
457
|
}
|
|
458
458
|
```
|
|
459
459
|
|
|
@@ -485,9 +485,9 @@ You can create a dictionary manually without extraction:
|
|
|
485
485
|
|
|
486
486
|
```json
|
|
487
487
|
{
|
|
488
|
-
"Katniss": "
|
|
489
|
-
"Peeta": "
|
|
490
|
-
"Panem": "
|
|
488
|
+
"Katniss": "kætnɪs",
|
|
489
|
+
"Peeta": "pitə",
|
|
490
|
+
"Panem": "pænəm"
|
|
491
491
|
}
|
|
492
492
|
```
|
|
493
493
|
|
|
@@ -554,6 +554,7 @@ ttsforge convert book.epub --gpu
|
|
|
554
554
|
| `pause_paragraph` | `0.9` | Paragraph pause (seconds) |
|
|
555
555
|
| `pause_variance` | `0.05` | Pause variance (seconds) |
|
|
556
556
|
| `pause_mode` | `auto` | Pause mode (`tts`, `manual`, `auto`) |
|
|
557
|
+
| `enable_short_sentence` | `None` | Handle short sentences |
|
|
557
558
|
| `announce_chapters` | `true` | Speak chapter titles |
|
|
558
559
|
| `chapter_pause_after_title` | `2.0` | Pause after chapter title |
|
|
559
560
|
| `phonemization_lang` | `None` | Override phonemization language |
|
|
@@ -60,10 +60,6 @@ Utilities
|
|
|
60
60
|
**ttsforge.vocab**
|
|
61
61
|
Vocabulary utilities and metadata.
|
|
62
62
|
|
|
63
|
-
**ttsforge.trim**
|
|
64
|
-
Audio trimming utilities for silence removal.
|
|
65
|
-
|
|
66
|
-
|
|
67
63
|
Quick API Examples
|
|
68
64
|
------------------
|
|
69
65
|
|
|
@@ -80,9 +76,9 @@ Basic Text-to-Speech
|
|
|
80
76
|
voice="af_heart",
|
|
81
77
|
speed=1.0,
|
|
82
78
|
use_gpu=False,
|
|
83
|
-
pause_clause=0.
|
|
84
|
-
pause_sentence=0.
|
|
85
|
-
pause_paragraph=0.
|
|
79
|
+
pause_clause=0.3,
|
|
80
|
+
pause_sentence=0.5,
|
|
81
|
+
pause_paragraph=0.9,
|
|
86
82
|
pause_variance=0.05,
|
|
87
83
|
)
|
|
88
84
|
runner = KokoroRunner(opts, log=print)
|
|
@@ -242,8 +238,3 @@ Auto-generated API Documentation
|
|
|
242
238
|
:members:
|
|
243
239
|
:undoc-members:
|
|
244
240
|
:show-inheritance:
|
|
245
|
-
|
|
246
|
-
.. automodule:: ttsforge.trim
|
|
247
|
-
:members:
|
|
248
|
-
:undoc-members:
|
|
249
|
-
:show-inheritance:
|
|
@@ -85,6 +85,9 @@ Options
|
|
|
85
85
|
``--pause-mode MODE``
|
|
86
86
|
Pause mode: ``tts``, ``manual``, or ``auto``. Default: ``auto``.
|
|
87
87
|
|
|
88
|
+
``--enable-short-sentence``
|
|
89
|
+
Enable special handling for short sentences (less than 5 words).
|
|
90
|
+
|
|
88
91
|
``--announce-chapters / --no-announce-chapters``
|
|
89
92
|
Read chapter titles aloud before chapter content. Default: enabled.
|
|
90
93
|
|
|
@@ -452,6 +452,7 @@ Here's an example ``config.json`` with custom settings:
|
|
|
452
452
|
"pause_paragraph": 0.9,
|
|
453
453
|
"pause_variance": 0.05,
|
|
454
454
|
"pause_mode": "auto",
|
|
455
|
+
"enable_short_sentence": None,
|
|
455
456
|
"announce_chapters": true,
|
|
456
457
|
"chapter_pause_after_title": 2.0,
|
|
457
458
|
"save_chapters_separately": false,
|
|
@@ -101,15 +101,15 @@ Override pronunciation using IPA phonemes:
|
|
|
101
101
|
|
|
102
102
|
.. code-block:: ssmd
|
|
103
103
|
|
|
104
|
-
[word]
|
|
104
|
+
[word]{ph="phoneme"}
|
|
105
105
|
|
|
106
106
|
Examples:
|
|
107
107
|
|
|
108
108
|
.. code-block:: ssmd
|
|
109
109
|
|
|
110
|
-
[Hermione]
|
|
111
|
-
The [API]
|
|
112
|
-
[Kubernetes]
|
|
110
|
+
[Hermione]{ph="hɝmˈIni"} Granger was Harry's best friend. ...s
|
|
111
|
+
The [API]{ph="ˌeɪpiˈaɪ"} supports [JSON]{ph="dʒˈeɪsɑn"}. ...s
|
|
112
|
+
[Kubernetes]{ph="kubɚnˈɛtɪs"} is a container orchestrator. ...s
|
|
113
113
|
|
|
114
114
|
|
|
115
115
|
Language Switching (Planned)
|
|
@@ -119,8 +119,8 @@ Mark text as a different language (placeholder for future):
|
|
|
119
119
|
|
|
120
120
|
.. code-block:: ssmd
|
|
121
121
|
|
|
122
|
-
[Bonjour]
|
|
123
|
-
[Hola]
|
|
122
|
+
[Bonjour]{lang="fr"} # French text
|
|
123
|
+
[Hola]{lang="es"} # Spanish text
|
|
124
124
|
|
|
125
125
|
|
|
126
126
|
Complete Example
|
|
@@ -132,14 +132,14 @@ Here's a complete SSMD file example:
|
|
|
132
132
|
|
|
133
133
|
Chapter One ...p
|
|
134
134
|
|
|
135
|
-
[Harry]
|
|
135
|
+
[Harry]{ph="hæɹi"} Potter was a *highly unusual* boy in many ways. ...s
|
|
136
136
|
For one thing, he **hated** the summer holidays more than any other
|
|
137
137
|
time of year. ...s For another, he really wanted to do his homework,
|
|
138
138
|
but was forced to do it in secret, in the dead of the night. ...p
|
|
139
139
|
|
|
140
140
|
And he also happened to be a wizard. ...p
|
|
141
141
|
|
|
142
|
-
The [Dursleys]
|
|
142
|
+
The [Dursleys]{ph="dɝzliz"} had everything they wanted, but they
|
|
143
143
|
also had a secret. ...s And their greatest fear was that somebody
|
|
144
144
|
would discover it. ...p
|
|
145
145
|
|
|
@@ -163,7 +163,7 @@ The generated SSMD will include:
|
|
|
163
163
|
|
|
164
164
|
.. code-block:: ssmd
|
|
165
165
|
|
|
166
|
-
[Hermione]
|
|
166
|
+
[Hermione]{ph="hɝmˈIni"} loved reading books. ...s
|
|
167
167
|
|
|
168
168
|
|
|
169
169
|
HTML Emphasis Detection
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Test chapter marker removal with leading whitespace."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TestChapterMarkerLeadingWhitespace:
|
|
7
|
+
"""Test that chapter markers are removed even with leading whitespace."""
|
|
8
|
+
|
|
9
|
+
def test_marker_with_no_leading_space(self):
|
|
10
|
+
"""Test normal case - marker at start of line."""
|
|
11
|
+
text = "<<CHAPTER: Test Chapter>>\n\nThis is the content."
|
|
12
|
+
pattern = r"^<<CHAPTER:[^>]*>>\s*\n*"
|
|
13
|
+
result = re.sub(pattern, "", text, count=1, flags=re.MULTILINE)
|
|
14
|
+
assert result == "This is the content."
|
|
15
|
+
|
|
16
|
+
def test_marker_with_leading_space(self):
|
|
17
|
+
"""Test marker with a leading space - should be removed with new pattern."""
|
|
18
|
+
text = " <<CHAPTER: Test Chapter>>\n\nThis is the content."
|
|
19
|
+
# New pattern handles leading whitespace
|
|
20
|
+
pattern = r"^\s*<<CHAPTER:[^>]*>>\s*\n*"
|
|
21
|
+
result = re.sub(pattern, "", text, count=1, flags=re.MULTILINE)
|
|
22
|
+
assert result == "This is the content."
|
|
23
|
+
|
|
24
|
+
def test_marker_with_leading_tabs(self):
|
|
25
|
+
"""Test marker with leading tabs - should be removed with new pattern."""
|
|
26
|
+
text = "\t<<CHAPTER: Test Chapter>>\n\nThis is the content."
|
|
27
|
+
pattern = r"^\s*<<CHAPTER:[^>]*>>\s*\n*"
|
|
28
|
+
result = re.sub(pattern, "", text, count=1, flags=re.MULTILINE)
|
|
29
|
+
assert result == "This is the content."
|
|
30
|
+
|
|
31
|
+
def test_marker_with_multiple_spaces(self):
|
|
32
|
+
"""Test marker with multiple leading spaces -
|
|
33
|
+
should be removed with new pattern."""
|
|
34
|
+
text = " <<CHAPTER: Test Chapter>>\n\nThis is the content."
|
|
35
|
+
pattern = r"^\s*<<CHAPTER:[^>]*>>\s*\n*"
|
|
36
|
+
result = re.sub(pattern, "", text, count=1, flags=re.MULTILINE)
|
|
37
|
+
assert result == "This is the content."
|
|
38
|
+
|
|
39
|
+
def test_improved_pattern_handles_leading_whitespace(self):
|
|
40
|
+
"""Test that improved pattern handles all leading whitespace cases."""
|
|
41
|
+
# Improved pattern that handles leading whitespace
|
|
42
|
+
improved_pattern = r"^\s*<<CHAPTER:[^>]*>>\s*\n*"
|
|
43
|
+
|
|
44
|
+
test_cases = [
|
|
45
|
+
("<<CHAPTER: Test>>\n\nContent", "Content"),
|
|
46
|
+
(" <<CHAPTER: Test>>\n\nContent", "Content"),
|
|
47
|
+
("\t<<CHAPTER: Test>>\n\nContent", "Content"),
|
|
48
|
+
(" <<CHAPTER: Test>>\n\nContent", "Content"),
|
|
49
|
+
(" \t <<CHAPTER: Test>>\n\nContent", "Content"),
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
for text, expected in test_cases:
|
|
53
|
+
result = re.sub(improved_pattern, "", text, count=1, flags=re.MULTILINE)
|
|
54
|
+
assert result == expected, f"Failed for input: {repr(text)}"
|
|
55
|
+
|
|
56
|
+
def test_marker_not_at_line_start_still_removed_with_multiline(self):
|
|
57
|
+
"""Test that marker after newline is removed (MULTILINE mode)."""
|
|
58
|
+
text = "Some text\n<<CHAPTER: Test>>\n\nContent"
|
|
59
|
+
improved_pattern = r"^\s*<<CHAPTER:[^>]*>>\s*\n*"
|
|
60
|
+
result = re.sub(improved_pattern, "", text, count=1, flags=re.MULTILINE)
|
|
61
|
+
assert result == "Some text\nContent"
|
|
62
|
+
|
|
63
|
+
def test_only_first_marker_removed(self):
|
|
64
|
+
"""Test that only the first marker is removed (count=1)."""
|
|
65
|
+
text = "<<CHAPTER: One>>\n\nSome text <<CHAPTER: Two>> inside it."
|
|
66
|
+
improved_pattern = r"^\s*<<CHAPTER:[^>]*>>\s*\n*"
|
|
67
|
+
result = re.sub(improved_pattern, "", text, count=1, flags=re.MULTILINE)
|
|
68
|
+
assert result == "Some text <<CHAPTER: Two>> inside it."
|
|
69
|
+
|
|
70
|
+
def test_real_world_epub_scenario(self):
|
|
71
|
+
"""Test realistic epub2text output with potential whitespace issues."""
|
|
72
|
+
# Simulate what epub2text might return with whitespace quirks
|
|
73
|
+
epub_content = " <<CHAPTER: THE STORY SO FAR>>\n\nIn the shadow of the Apt..."
|
|
74
|
+
|
|
75
|
+
# Old pattern (fails)
|
|
76
|
+
old_pattern = r"^<<CHAPTER:[^>]*>>\s*\n*"
|
|
77
|
+
old_result = re.sub(old_pattern, "", epub_content, count=1, flags=re.MULTILINE)
|
|
78
|
+
|
|
79
|
+
# New pattern (works)
|
|
80
|
+
new_pattern = r"^\s*<<CHAPTER:[^>]*>>\s*\n*"
|
|
81
|
+
new_result = re.sub(new_pattern, "", epub_content, count=1, flags=re.MULTILINE)
|
|
82
|
+
|
|
83
|
+
# Verify old pattern fails to remove marker
|
|
84
|
+
assert "<<CHAPTER:" in old_result, "Old pattern should fail with leading space"
|
|
85
|
+
|
|
86
|
+
# Verify new pattern successfully removes marker
|
|
87
|
+
assert "<<CHAPTER:" not in new_result, "New pattern should remove marker"
|
|
88
|
+
assert new_result == "In the shadow of the Apt..."
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from ttsforge.chapter_selection import parse_chapter_selection
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_parse_all() -> None:
|
|
7
|
+
assert parse_chapter_selection("all", 5) == [0, 1, 2, 3, 4]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_parse_ranges_and_commas() -> None:
|
|
11
|
+
assert parse_chapter_selection("1-3,5", 6) == [0, 1, 2, 4]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_parse_open_ended_range() -> None:
|
|
15
|
+
assert parse_chapter_selection("3-", 5) == [2, 3, 4]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_parse_invalid_range() -> None:
|
|
19
|
+
with pytest.raises(ValueError):
|
|
20
|
+
parse_chapter_selection("5-2", 6)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from click.testing import CliRunner
|
|
4
|
+
|
|
5
|
+
from ttsforge.cli import main
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_info_and_list_smoke(tmp_path: Path) -> None:
|
|
9
|
+
text = """Title: Sample Book
|
|
10
|
+
Author: Jane Doe
|
|
11
|
+
Language: English
|
|
12
|
+
|
|
13
|
+
CHAPTER I
|
|
14
|
+
This is the first chapter.
|
|
15
|
+
|
|
16
|
+
CHAPTER II
|
|
17
|
+
This is the second chapter.
|
|
18
|
+
"""
|
|
19
|
+
input_file = tmp_path / "sample.txt"
|
|
20
|
+
input_file.write_text(text, encoding="utf-8")
|
|
21
|
+
|
|
22
|
+
runner = CliRunner()
|
|
23
|
+
info_result = runner.invoke(main, ["info", str(input_file)])
|
|
24
|
+
assert info_result.exit_code == 0
|
|
25
|
+
|
|
26
|
+
list_result = runner.invoke(main, ["list", str(input_file)])
|
|
27
|
+
assert list_result.exit_code == 0
|
|
@@ -20,7 +20,7 @@ class TestLanguageDescriptions:
|
|
|
20
20
|
|
|
21
21
|
def test_all_language_codes_have_descriptions(self):
|
|
22
22
|
"""All language codes should have descriptions."""
|
|
23
|
-
expected_codes = {"a", "b", "e", "f", "h", "i", "j", "p", "z"}
|
|
23
|
+
expected_codes = {"a", "b", "d", "e", "f", "h", "i", "j", "p", "z"}
|
|
24
24
|
assert set(LANGUAGE_DESCRIPTIONS.keys()) == expected_codes
|
|
25
25
|
|
|
26
26
|
def test_english_variants(self):
|
|
@@ -132,11 +132,6 @@ class TestDefaultVoiceForLang:
|
|
|
132
132
|
lang in DEFAULT_VOICE_FOR_LANG
|
|
133
133
|
), f"Language {lang} needs default voice"
|
|
134
134
|
|
|
135
|
-
def test_default_voices_exist_in_voices_list(self):
|
|
136
|
-
"""All default voices should exist in VOICES list."""
|
|
137
|
-
for lang, voice in DEFAULT_VOICE_FOR_LANG.items():
|
|
138
|
-
assert voice in VOICES, f"Default voice {voice} for {lang} not in VOICES"
|
|
139
|
-
|
|
140
135
|
def test_default_voices_match_language(self):
|
|
141
136
|
"""Default voices should match their language."""
|
|
142
137
|
for lang, voice in DEFAULT_VOICE_FOR_LANG.items():
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from ttsforge.conversion import ChapterState, ConversionState
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_conversion_state_roundtrip(tmp_path: Path) -> None:
|
|
8
|
+
state = ConversionState(
|
|
9
|
+
source_file="book.epub",
|
|
10
|
+
source_hash="abc123",
|
|
11
|
+
output_file="book.m4b",
|
|
12
|
+
work_dir=str(tmp_path),
|
|
13
|
+
voice="af_heart",
|
|
14
|
+
language="a",
|
|
15
|
+
speed=1.0,
|
|
16
|
+
split_mode="auto",
|
|
17
|
+
output_format="m4b",
|
|
18
|
+
chapters=[
|
|
19
|
+
ChapterState(
|
|
20
|
+
index=0,
|
|
21
|
+
title="Chapter 1",
|
|
22
|
+
content_hash="hash",
|
|
23
|
+
completed=True,
|
|
24
|
+
audio_file="chapter_001.wav",
|
|
25
|
+
duration=1.2,
|
|
26
|
+
char_count=100,
|
|
27
|
+
ssmd_file="chapter_001.ssmd",
|
|
28
|
+
ssmd_hash="ssmdhash",
|
|
29
|
+
)
|
|
30
|
+
],
|
|
31
|
+
started_at="2024-01-01 00:00:00",
|
|
32
|
+
)
|
|
33
|
+
state_file = tmp_path / "state.json"
|
|
34
|
+
state.save(state_file)
|
|
35
|
+
|
|
36
|
+
loaded = ConversionState.load(state_file)
|
|
37
|
+
assert loaded is not None
|
|
38
|
+
assert loaded.voice == "af_heart"
|
|
39
|
+
assert loaded.chapters[0].audio_file == "chapter_001.wav"
|
|
40
|
+
assert loaded.chapters[0].completed is True
|
|
41
|
+
|
|
42
|
+
assert not (tmp_path / "state.json.tmp").exists()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_conversion_state_backward_compat(tmp_path: Path) -> None:
|
|
46
|
+
data = {
|
|
47
|
+
"version": 1,
|
|
48
|
+
"source_file": "book.epub",
|
|
49
|
+
"source_hash": "hash",
|
|
50
|
+
"output_file": "book.m4b",
|
|
51
|
+
"work_dir": str(tmp_path),
|
|
52
|
+
"voice": "af_heart",
|
|
53
|
+
"language": "a",
|
|
54
|
+
"speed": 1.0,
|
|
55
|
+
"split_mode": "auto",
|
|
56
|
+
"output_format": "m4b",
|
|
57
|
+
"chapters": [
|
|
58
|
+
{
|
|
59
|
+
"index": 0,
|
|
60
|
+
"title": "Chapter 1",
|
|
61
|
+
"content_hash": "hash",
|
|
62
|
+
"completed": False,
|
|
63
|
+
"audio_file": None,
|
|
64
|
+
"duration": 0.0,
|
|
65
|
+
"char_count": 10,
|
|
66
|
+
"ssmd_file": None,
|
|
67
|
+
"ssmd_hash": None,
|
|
68
|
+
}
|
|
69
|
+
],
|
|
70
|
+
"segment_pause_min": 0.1,
|
|
71
|
+
"segment_pause_max": 0.3,
|
|
72
|
+
"paragraph_pause_min": 0.5,
|
|
73
|
+
"paragraph_pause_max": 1.0,
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
state_file = tmp_path / "legacy_state.json"
|
|
77
|
+
state_file.write_text(json.dumps(data), encoding="utf-8")
|
|
78
|
+
|
|
79
|
+
loaded = ConversionState.load(state_file)
|
|
80
|
+
assert loaded is not None
|
|
81
|
+
assert loaded.pause_sentence == 0.2
|
|
82
|
+
assert loaded.pause_paragraph == 0.75
|
|
83
|
+
assert loaded.pause_clause == 0.3
|
|
84
|
+
assert loaded.pause_variance >= 0.01
|
|
@@ -13,8 +13,8 @@ class TestPhonemeDictionary:
|
|
|
13
13
|
def test_load_simple_dictionary(self):
|
|
14
14
|
"""Test loading a simple phoneme dictionary."""
|
|
15
15
|
test_dict = {
|
|
16
|
-
"Misaki": "
|
|
17
|
-
"Kubernetes": "
|
|
16
|
+
"Misaki": "misˈɑki",
|
|
17
|
+
"Kubernetes": "kubɚnˈɛtɪs",
|
|
18
18
|
}
|
|
19
19
|
|
|
20
20
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
|
@@ -40,8 +40,8 @@ class TestPhonemeDictionary:
|
|
|
40
40
|
"language": "en-us",
|
|
41
41
|
},
|
|
42
42
|
"entries": {
|
|
43
|
-
"Misaki": {"phoneme": "
|
|
44
|
-
"nginx": {"phoneme": "
|
|
43
|
+
"Misaki": {"phoneme": "misˈɑki", "occurrences": 42},
|
|
44
|
+
"nginx": {"phoneme": "ˈɛnʤɪnˈɛks", "occurrences": 8},
|
|
45
45
|
},
|
|
46
46
|
}
|
|
47
47
|
|
|
@@ -67,8 +67,8 @@ class TestPhonemeDictionary:
|
|
|
67
67
|
"""Test loading dictionary with metadata format but simple string values."""
|
|
68
68
|
test_dict = {
|
|
69
69
|
"entries": {
|
|
70
|
-
"Misaki": "
|
|
71
|
-
"nginx": "
|
|
70
|
+
"Misaki": "misˈɑki",
|
|
71
|
+
"nginx": "ˈɛnʤɪnˈɛks",
|
|
72
72
|
}
|
|
73
73
|
}
|
|
74
74
|
|
|
@@ -112,8 +112,8 @@ class TestPhonemeDictionary:
|
|
|
112
112
|
def test_phonemize_with_dictionary(self):
|
|
113
113
|
"""Test phonemization with custom dictionary - through SSMD notation."""
|
|
114
114
|
test_dict = {
|
|
115
|
-
"Misaki": "
|
|
116
|
-
"Kubernetes": "
|
|
115
|
+
"Misaki": "misˈɑki",
|
|
116
|
+
"Kubernetes": "kubɚnˈɛtɪs",
|
|
117
117
|
}
|
|
118
118
|
|
|
119
119
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
|
@@ -130,8 +130,8 @@ class TestPhonemeDictionary:
|
|
|
130
130
|
ssmd_text = tokenizer._phoneme_dictionary_obj.apply(text)
|
|
131
131
|
|
|
132
132
|
# Verify SSMD notation is applied
|
|
133
|
-
assert "[Misaki]{ph=" in ssmd_text
|
|
134
|
-
assert "[Kubernetes]{ph=" in ssmd_text
|
|
133
|
+
assert "[Misaki]{ph=" in ssmd_text
|
|
134
|
+
assert "[Kubernetes]{ph=" in ssmd_text
|
|
135
135
|
finally:
|
|
136
136
|
Path(temp_path).unlink()
|
|
137
137
|
|
|
@@ -162,7 +162,7 @@ class TestPhonemeDictionary:
|
|
|
162
162
|
|
|
163
163
|
def test_case_sensitive_matching(self):
|
|
164
164
|
"""Test case-sensitive dictionary matching."""
|
|
165
|
-
test_dict = {"Misaki": "
|
|
165
|
+
test_dict = {"Misaki": "misˈɑki"}
|
|
166
166
|
|
|
167
167
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
|
168
168
|
json.dump(test_dict, f)
|
|
@@ -184,13 +184,13 @@ class TestPhonemeDictionary:
|
|
|
184
184
|
assert phoneme_count == 1, f"Expected 1 match, got {phoneme_count}"
|
|
185
185
|
|
|
186
186
|
# Verify it's "Misaki" that matched
|
|
187
|
-
assert "[Misaki]{ph=" in ssmd_text
|
|
187
|
+
assert "[Misaki]{ph=" in ssmd_text
|
|
188
188
|
finally:
|
|
189
189
|
Path(temp_path).unlink()
|
|
190
190
|
|
|
191
191
|
def test_word_boundaries(self):
|
|
192
192
|
"""Test that word boundaries are respected."""
|
|
193
|
-
test_dict = {"test": "
|
|
193
|
+
test_dict = {"test": "tˈɛst"}
|
|
194
194
|
|
|
195
195
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
|
196
196
|
json.dump(test_dict, f)
|
|
@@ -246,7 +246,7 @@ class TestPhonemeDictionary:
|
|
|
246
246
|
def test_special_characters_in_words(self):
|
|
247
247
|
"""Test dictionary words with special regex characters (periods, etc.)."""
|
|
248
248
|
# Use a simple word that can be phonemized
|
|
249
|
-
test_dict = {"Misaki": "
|
|
249
|
+
test_dict = {"Misaki": "misˈɑki"}
|
|
250
250
|
|
|
251
251
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
|
252
252
|
json.dump(test_dict, f)
|
|
@@ -262,13 +262,13 @@ class TestPhonemeDictionary:
|
|
|
262
262
|
ssmd_text = tokenizer._phoneme_dictionary_obj.apply(text)
|
|
263
263
|
|
|
264
264
|
# Should use custom phoneme
|
|
265
|
-
assert "[Misaki]{ph=" in ssmd_text
|
|
265
|
+
assert "[Misaki]{ph=" in ssmd_text
|
|
266
266
|
finally:
|
|
267
267
|
Path(temp_path).unlink()
|
|
268
268
|
|
|
269
269
|
def test_multiple_occurrences(self):
|
|
270
270
|
"""Test that all occurrences of a word are replaced."""
|
|
271
|
-
test_dict = {"test": "
|
|
271
|
+
test_dict = {"test": "tˈɛst"}
|
|
272
272
|
|
|
273
273
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
|
274
274
|
json.dump(test_dict, f)
|
|
@@ -293,8 +293,8 @@ class TestPhonemeDictionary:
|
|
|
293
293
|
# Note: Multi-word phoneme annotations have limitations in kokorog2p's
|
|
294
294
|
# markdown processing. Testing with overlapping single words instead.
|
|
295
295
|
test_dict = {
|
|
296
|
-
"testing": "
|
|
297
|
-
"test": "
|
|
296
|
+
"testing": "tˈɛstɪŋ",
|
|
297
|
+
"test": "tˈɛst", # Shorter word, different pronunciation
|
|
298
298
|
}
|
|
299
299
|
|
|
300
300
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|