ttsforge 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. {ttsforge-0.1.0 → ttsforge-0.1.2}/.codecrate.toml +2 -1
  2. {ttsforge-0.1.0 → ttsforge-0.1.2}/.github/workflows/codecov.yml +1 -1
  3. {ttsforge-0.1.0 → ttsforge-0.1.2}/.gitignore +0 -1
  4. {ttsforge-0.1.0 → ttsforge-0.1.2}/PKG-INFO +13 -12
  5. {ttsforge-0.1.0 → ttsforge-0.1.2}/README.md +12 -11
  6. {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/api/index.rst +3 -12
  7. {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/cli.rst +3 -0
  8. {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/configuration.rst +1 -0
  9. {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/ssmd.rst +9 -9
  10. ttsforge-0.1.2/tests/test_chapter_marker_leading_space.py +88 -0
  11. ttsforge-0.1.2/tests/test_chapter_selection.py +20 -0
  12. ttsforge-0.1.2/tests/test_cli_smoke.py +27 -0
  13. {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_constants.py +1 -6
  14. ttsforge-0.1.2/tests/test_conversion_state.py +84 -0
  15. {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_phoneme_dictionary.py +18 -18
  16. ttsforge-0.1.2/tests/test_ssmd_generator.py +25 -0
  17. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/__init__.py +3 -18
  18. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/_version.py +3 -3
  19. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/cli/commands_conversion.py +75 -10
  20. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/cli/commands_phonemes.py +22 -4
  21. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/cli/commands_utility.py +18 -1
  22. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/cli/helpers.py +1 -0
  23. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/constants.py +13 -4
  24. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/conversion.py +112 -51
  25. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/kokoro_runner.py +38 -5
  26. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/name_extractor.py +3 -3
  27. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/phoneme_conversion.py +61 -10
  28. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/ssmd_generator.py +4 -4
  29. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge.egg-info/PKG-INFO +13 -12
  30. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge.egg-info/SOURCES.txt +5 -0
  31. {ttsforge-0.1.0 → ttsforge-0.1.2}/.coveragerc +0 -0
  32. {ttsforge-0.1.0 → ttsforge-0.1.2}/.github/pytest.ini +0 -0
  33. {ttsforge-0.1.0 → ttsforge-0.1.2}/.github/workflows/pre-commit.yml +0 -0
  34. {ttsforge-0.1.0 → ttsforge-0.1.2}/.github/workflows/python-publish.yml +0 -0
  35. {ttsforge-0.1.0 → ttsforge-0.1.2}/.github/workflows/tests.yml +0 -0
  36. {ttsforge-0.1.0 → ttsforge-0.1.2}/.pre-commit-config.yaml +0 -0
  37. {ttsforge-0.1.0 → ttsforge-0.1.2}/.prettierrc.yml +0 -0
  38. {ttsforge-0.1.0 → ttsforge-0.1.2}/.readthedocs.yaml +0 -0
  39. {ttsforge-0.1.0 → ttsforge-0.1.2}/.ruff.toml +0 -0
  40. {ttsforge-0.1.0 → ttsforge-0.1.2}/LICENSE +0 -0
  41. {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/conf.py +0 -0
  42. {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/filename_templates.rst +0 -0
  43. {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/index.rst +0 -0
  44. {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/installation.rst +0 -0
  45. {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/make.bat +0 -0
  46. {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/make.py +0 -0
  47. {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/quickstart.rst +0 -0
  48. {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/requirements.txt +0 -0
  49. {ttsforge-0.1.0 → ttsforge-0.1.2}/docs/voices.rst +0 -0
  50. {ttsforge-0.1.0 → ttsforge-0.1.2}/examples/__init__.py +0 -0
  51. {ttsforge-0.1.0 → ttsforge-0.1.2}/examples/phoneme_export.py +0 -0
  52. {ttsforge-0.1.0 → ttsforge-0.1.2}/pyproject.toml +0 -0
  53. {ttsforge-0.1.0 → ttsforge-0.1.2}/requirements-test.txt +0 -0
  54. {ttsforge-0.1.0 → ttsforge-0.1.2}/setup.cfg +0 -0
  55. {ttsforge-0.1.0 → ttsforge-0.1.2}/setup.py +0 -0
  56. {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/__init__.py +0 -0
  57. {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_chapter_announcement.py +0 -0
  58. {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_cli.py +0 -0
  59. {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_conversion.py +0 -0
  60. {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_epub_chapter_markers.py +0 -0
  61. {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_name_extractor.py +0 -0
  62. {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_onnx_backend.py +0 -0
  63. {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_phoneme_conversion.py +0 -0
  64. {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_phonemes.py +0 -0
  65. {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_tokenizer.py +0 -0
  66. {ttsforge-0.1.0 → ttsforge-0.1.2}/tests/test_utils.py +0 -0
  67. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/audio_merge.py +0 -0
  68. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/audio_player.py +0 -0
  69. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/chapter_selection.py +0 -0
  70. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/cli/__init__.py +0 -0
  71. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/input_reader.py +0 -0
  72. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/kokoro_lang.py +0 -0
  73. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/phonemes.py +0 -0
  74. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/utils.py +0 -0
  75. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge/vocab/__init__.py +0 -0
  76. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge.egg-info/dependency_links.txt +0 -0
  77. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge.egg-info/entry_points.txt +0 -0
  78. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge.egg-info/requires.txt +0 -0
  79. {ttsforge-0.1.0 → ttsforge-0.1.2}/ttsforge.egg-info/top_level.txt +0 -0
@@ -1,8 +1,9 @@
1
1
  [codecrate]
2
2
  output = "context_ttsforge.md"
3
3
  keep_docstrings = true
4
- dedupe = true
4
+ dedupe = false
5
5
  metadata = false
6
+ manifest = false
6
7
  respect_gitignore = true
7
8
  exclude = ["*/.venv/*"]
8
9
  include = ["**/*.py", "**/*.toml", "**/*.rst", "**/*.md"]
@@ -12,7 +12,7 @@ jobs:
12
12
  - name: Install espeak-ng
13
13
  run: |
14
14
  sudo apt-get update
15
- sudo apt-get install -y espeak-ng
15
+ sudo apt-get install -y espeak-ng ffmpeg
16
16
  - name: 'generate report'
17
17
  run: |
18
18
  pip install coverage click pytest pytest-cov
@@ -216,7 +216,6 @@ onnx/
216
216
  *.m4a
217
217
 
218
218
  # Test/demo scripts at project root
219
- test_*.py
220
219
  demo_*.py
221
220
 
222
221
  # Binary data files
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ttsforge
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: Generate audiobooks from EPUB files using Kokoro ONNX TTS.
5
5
  Author-email: Holger Nahrstaedt <nahrstaedt@gmail.com>
6
6
  License: MIT License
@@ -396,14 +396,14 @@ SSMD files use a simple markdown-like syntax:
396
396
  **Custom Phonemes**:
397
397
 
398
398
  ```
399
- [Hermione](ph: /hɝmˈIni/) # Override pronunciation
400
- [API](ph: /ˌeɪpiˈaɪ/) # Technical terms
399
+ [Hermione]{ph="hɝmˈIni"} # Override pronunciation
400
+ [API]{ph="ˌeɪpiˈaɪ"} # Technical terms
401
401
  ```
402
402
 
403
403
  **Language Switching** (planned):
404
404
 
405
405
  ```
406
- [Bonjour](fr) # Mark text as French
406
+ [Bonjour]{lang="fr"} # Mark text as French
407
407
  ```
408
408
 
409
409
  #### Example SSMD File
@@ -411,7 +411,7 @@ SSMD files use a simple markdown-like syntax:
411
411
  ```ssmd
412
412
  Chapter One ...p
413
413
 
414
- [Harry](ph: /hæɹi/) Potter was a *highly unusual* boy in many ways. ...s
414
+ [Harry]{ph="hæɹi"} Potter was a *highly unusual* boy in many ways. ...s
415
415
  For one thing, he **hated** the summer holidays more than any other
416
416
  time of year. ...s For another, he really wanted to do his homework,
417
417
  but was forced to do it in secret, in the dead of the night. ...p
@@ -498,12 +498,12 @@ Edit `custom_phonemes.json` to fix any incorrect phonemes. The file format is:
498
498
  },
499
499
  "entries": {
500
500
  "Hermione": {
501
- "phoneme": "/hɝmˈIni/",
501
+ "phoneme": "hɝmˈIni",
502
502
  "occurrences": 847,
503
503
  "verified": false
504
504
  },
505
505
  "Kubernetes": {
506
- "phoneme": "/kubɚnˈɛtɪs/",
506
+ "phoneme": "kubɚnˈɛtɪs",
507
507
  "occurrences": 12,
508
508
  "verified": false
509
509
  }
@@ -515,8 +515,8 @@ Or use the simple format:
515
515
 
516
516
  ```json
517
517
  {
518
- "Hermione": "/hɝmˈIni/",
519
- "Kubernetes": "/kubɚnˈɛtɪs/"
518
+ "Hermione": "hɝmˈIni",
519
+ "Kubernetes": "kubɚnˈɛtɪs"
520
520
  }
521
521
  ```
522
522
 
@@ -548,9 +548,9 @@ You can create a dictionary manually without extraction:
548
548
 
549
549
  ```json
550
550
  {
551
- "Katniss": "/kætnɪs/",
552
- "Peeta": "/pitə/",
553
- "Panem": "/pænəm/"
551
+ "Katniss": "kætnɪs",
552
+ "Peeta": "pitə",
553
+ "Panem": "pænəm"
554
554
  }
555
555
  ```
556
556
 
@@ -617,6 +617,7 @@ ttsforge convert book.epub --gpu
617
617
  | `pause_paragraph` | `0.9` | Paragraph pause (seconds) |
618
618
  | `pause_variance` | `0.05` | Pause variance (seconds) |
619
619
  | `pause_mode` | `auto` | Pause mode (`tts`, `manual`, `auto`) |
620
+ | `enable_short_sentence` | `None` | Handle short sentences |
620
621
  | `announce_chapters` | `true` | Speak chapter titles |
621
622
  | `chapter_pause_after_title` | `2.0` | Pause after chapter title |
622
623
  | `phonemization_lang` | `None` | Override phonemization language |
@@ -333,14 +333,14 @@ SSMD files use a simple markdown-like syntax:
333
333
  **Custom Phonemes**:
334
334
 
335
335
  ```
336
- [Hermione](ph: /hɝmˈIni/) # Override pronunciation
337
- [API](ph: /ˌeɪpiˈaɪ/) # Technical terms
336
+ [Hermione]{ph="hɝmˈIni"} # Override pronunciation
337
+ [API]{ph="ˌeɪpiˈaɪ"} # Technical terms
338
338
  ```
339
339
 
340
340
  **Language Switching** (planned):
341
341
 
342
342
  ```
343
- [Bonjour](fr) # Mark text as French
343
+ [Bonjour]{lang="fr"} # Mark text as French
344
344
  ```
345
345
 
346
346
  #### Example SSMD File
@@ -348,7 +348,7 @@ SSMD files use a simple markdown-like syntax:
348
348
  ```ssmd
349
349
  Chapter One ...p
350
350
 
351
- [Harry](ph: /hæɹi/) Potter was a *highly unusual* boy in many ways. ...s
351
+ [Harry]{ph="hæɹi"} Potter was a *highly unusual* boy in many ways. ...s
352
352
  For one thing, he **hated** the summer holidays more than any other
353
353
  time of year. ...s For another, he really wanted to do his homework,
354
354
  but was forced to do it in secret, in the dead of the night. ...p
@@ -435,12 +435,12 @@ Edit `custom_phonemes.json` to fix any incorrect phonemes. The file format is:
435
435
  },
436
436
  "entries": {
437
437
  "Hermione": {
438
- "phoneme": "/hɝmˈIni/",
438
+ "phoneme": "hɝmˈIni",
439
439
  "occurrences": 847,
440
440
  "verified": false
441
441
  },
442
442
  "Kubernetes": {
443
- "phoneme": "/kubɚnˈɛtɪs/",
443
+ "phoneme": "kubɚnˈɛtɪs",
444
444
  "occurrences": 12,
445
445
  "verified": false
446
446
  }
@@ -452,8 +452,8 @@ Or use the simple format:
452
452
 
453
453
  ```json
454
454
  {
455
- "Hermione": "/hɝmˈIni/",
456
- "Kubernetes": "/kubɚnˈɛtɪs/"
455
+ "Hermione": "hɝmˈIni",
456
+ "Kubernetes": "kubɚnˈɛtɪs"
457
457
  }
458
458
  ```
459
459
 
@@ -485,9 +485,9 @@ You can create a dictionary manually without extraction:
485
485
 
486
486
  ```json
487
487
  {
488
- "Katniss": "/kætnɪs/",
489
- "Peeta": "/pitə/",
490
- "Panem": "/pænəm/"
488
+ "Katniss": "kætnɪs",
489
+ "Peeta": "pitə",
490
+ "Panem": "pænəm"
491
491
  }
492
492
  ```
493
493
 
@@ -554,6 +554,7 @@ ttsforge convert book.epub --gpu
554
554
  | `pause_paragraph` | `0.9` | Paragraph pause (seconds) |
555
555
  | `pause_variance` | `0.05` | Pause variance (seconds) |
556
556
  | `pause_mode` | `auto` | Pause mode (`tts`, `manual`, `auto`) |
557
+ | `enable_short_sentence` | `None` | Handle short sentences |
557
558
  | `announce_chapters` | `true` | Speak chapter titles |
558
559
  | `chapter_pause_after_title` | `2.0` | Pause after chapter title |
559
560
  | `phonemization_lang` | `None` | Override phonemization language |
@@ -60,10 +60,6 @@ Utilities
60
60
  **ttsforge.vocab**
61
61
  Vocabulary utilities and metadata.
62
62
 
63
- **ttsforge.trim**
64
- Audio trimming utilities for silence removal.
65
-
66
-
67
63
  Quick API Examples
68
64
  ------------------
69
65
 
@@ -80,9 +76,9 @@ Basic Text-to-Speech
80
76
  voice="af_heart",
81
77
  speed=1.0,
82
78
  use_gpu=False,
83
- pause_clause=0.25,
84
- pause_sentence=0.2,
85
- pause_paragraph=0.75,
79
+ pause_clause=0.3,
80
+ pause_sentence=0.5,
81
+ pause_paragraph=0.9,
86
82
  pause_variance=0.05,
87
83
  )
88
84
  runner = KokoroRunner(opts, log=print)
@@ -242,8 +238,3 @@ Auto-generated API Documentation
242
238
  :members:
243
239
  :undoc-members:
244
240
  :show-inheritance:
245
-
246
- .. automodule:: ttsforge.trim
247
- :members:
248
- :undoc-members:
249
- :show-inheritance:
@@ -85,6 +85,9 @@ Options
85
85
  ``--pause-mode MODE``
86
86
  Pause mode: ``tts``, ``manual``, or ``auto``. Default: ``auto``.
87
87
 
88
+ ``--enable-short-sentence``
89
+ Enable special handling for short sentences (less than 5 words).
90
+
88
91
  ``--announce-chapters / --no-announce-chapters``
89
92
  Read chapter titles aloud before chapter content. Default: enabled.
90
93
 
@@ -452,6 +452,7 @@ Here's an example ``config.json`` with custom settings:
452
452
  "pause_paragraph": 0.9,
453
453
  "pause_variance": 0.05,
454
454
  "pause_mode": "auto",
455
+ "enable_short_sentence": None,
455
456
  "announce_chapters": true,
456
457
  "chapter_pause_after_title": 2.0,
457
458
  "save_chapters_separately": false,
@@ -101,15 +101,15 @@ Override pronunciation using IPA phonemes:
101
101
 
102
102
  .. code-block:: ssmd
103
103
 
104
- [word](ph: /phoneme/)
104
+ [word]{ph="phoneme"}
105
105
 
106
106
  Examples:
107
107
 
108
108
  .. code-block:: ssmd
109
109
 
110
- [Hermione](ph: /hɝmˈIni/) Granger was Harry's best friend. ...s
111
- The [API](ph: /ˌeɪpiˈaɪ/) supports [JSON](ph: /dʒˈeɪsɑn/). ...s
112
- [Kubernetes](ph: /kubɚnˈɛtɪs/) is a container orchestrator. ...s
110
+ [Hermione]{ph="hɝmˈIni"} Granger was Harry's best friend. ...s
111
+ The [API]{ph="ˌeɪpiˈaɪ"} supports [JSON]{ph="dʒˈeɪsɑn"}. ...s
112
+ [Kubernetes]{ph="kubɚnˈɛtɪs"} is a container orchestrator. ...s
113
113
 
114
114
 
115
115
  Language Switching (Planned)
@@ -119,8 +119,8 @@ Mark text as a different language (placeholder for future):
119
119
 
120
120
  .. code-block:: ssmd
121
121
 
122
- [Bonjour](fr) # French text
123
- [Hola](es) # Spanish text
122
+ [Bonjour]{lang="fr"} # French text
123
+ [Hola]{lang="es"} # Spanish text
124
124
 
125
125
 
126
126
  Complete Example
@@ -132,14 +132,14 @@ Here's a complete SSMD file example:
132
132
 
133
133
  Chapter One ...p
134
134
 
135
- [Harry](ph: /hæɹi/) Potter was a *highly unusual* boy in many ways. ...s
135
+ [Harry]{ph="hæɹi"} Potter was a *highly unusual* boy in many ways. ...s
136
136
  For one thing, he **hated** the summer holidays more than any other
137
137
  time of year. ...s For another, he really wanted to do his homework,
138
138
  but was forced to do it in secret, in the dead of the night. ...p
139
139
 
140
140
  And he also happened to be a wizard. ...p
141
141
 
142
- The [Dursleys](ph: /dɝzliz/) had everything they wanted, but they
142
+ The [Dursleys]{ph="dɝzliz"} had everything they wanted, but they
143
143
  also had a secret. ...s And their greatest fear was that somebody
144
144
  would discover it. ...p
145
145
 
@@ -163,7 +163,7 @@ The generated SSMD will include:
163
163
 
164
164
  .. code-block:: ssmd
165
165
 
166
- [Hermione](ph: /hɝmˈIni/) loved reading books. ...s
166
+ [Hermione]{ph="hɝmˈIni"} loved reading books. ...s
167
167
 
168
168
 
169
169
  HTML Emphasis Detection
@@ -0,0 +1,88 @@
1
+ """Test chapter marker removal with leading whitespace."""
2
+
3
+ import re
4
+
5
+
6
+ class TestChapterMarkerLeadingWhitespace:
7
+ """Test that chapter markers are removed even with leading whitespace."""
8
+
9
+ def test_marker_with_no_leading_space(self):
10
+ """Test normal case - marker at start of line."""
11
+ text = "<<CHAPTER: Test Chapter>>\n\nThis is the content."
12
+ pattern = r"^<<CHAPTER:[^>]*>>\s*\n*"
13
+ result = re.sub(pattern, "", text, count=1, flags=re.MULTILINE)
14
+ assert result == "This is the content."
15
+
16
+ def test_marker_with_leading_space(self):
17
+ """Test marker with a leading space - should be removed with new pattern."""
18
+ text = " <<CHAPTER: Test Chapter>>\n\nThis is the content."
19
+ # New pattern handles leading whitespace
20
+ pattern = r"^\s*<<CHAPTER:[^>]*>>\s*\n*"
21
+ result = re.sub(pattern, "", text, count=1, flags=re.MULTILINE)
22
+ assert result == "This is the content."
23
+
24
+ def test_marker_with_leading_tabs(self):
25
+ """Test marker with leading tabs - should be removed with new pattern."""
26
+ text = "\t<<CHAPTER: Test Chapter>>\n\nThis is the content."
27
+ pattern = r"^\s*<<CHAPTER:[^>]*>>\s*\n*"
28
+ result = re.sub(pattern, "", text, count=1, flags=re.MULTILINE)
29
+ assert result == "This is the content."
30
+
31
+ def test_marker_with_multiple_spaces(self):
32
+ """Test marker with multiple leading spaces -
33
+ should be removed with new pattern."""
34
+ text = " <<CHAPTER: Test Chapter>>\n\nThis is the content."
35
+ pattern = r"^\s*<<CHAPTER:[^>]*>>\s*\n*"
36
+ result = re.sub(pattern, "", text, count=1, flags=re.MULTILINE)
37
+ assert result == "This is the content."
38
+
39
+ def test_improved_pattern_handles_leading_whitespace(self):
40
+ """Test that improved pattern handles all leading whitespace cases."""
41
+ # Improved pattern that handles leading whitespace
42
+ improved_pattern = r"^\s*<<CHAPTER:[^>]*>>\s*\n*"
43
+
44
+ test_cases = [
45
+ ("<<CHAPTER: Test>>\n\nContent", "Content"),
46
+ (" <<CHAPTER: Test>>\n\nContent", "Content"),
47
+ ("\t<<CHAPTER: Test>>\n\nContent", "Content"),
48
+ (" <<CHAPTER: Test>>\n\nContent", "Content"),
49
+ (" \t <<CHAPTER: Test>>\n\nContent", "Content"),
50
+ ]
51
+
52
+ for text, expected in test_cases:
53
+ result = re.sub(improved_pattern, "", text, count=1, flags=re.MULTILINE)
54
+ assert result == expected, f"Failed for input: {repr(text)}"
55
+
56
+ def test_marker_not_at_line_start_still_removed_with_multiline(self):
57
+ """Test that marker after newline is removed (MULTILINE mode)."""
58
+ text = "Some text\n<<CHAPTER: Test>>\n\nContent"
59
+ improved_pattern = r"^\s*<<CHAPTER:[^>]*>>\s*\n*"
60
+ result = re.sub(improved_pattern, "", text, count=1, flags=re.MULTILINE)
61
+ assert result == "Some text\nContent"
62
+
63
+ def test_only_first_marker_removed(self):
64
+ """Test that only the first marker is removed (count=1)."""
65
+ text = "<<CHAPTER: One>>\n\nSome text <<CHAPTER: Two>> inside it."
66
+ improved_pattern = r"^\s*<<CHAPTER:[^>]*>>\s*\n*"
67
+ result = re.sub(improved_pattern, "", text, count=1, flags=re.MULTILINE)
68
+ assert result == "Some text <<CHAPTER: Two>> inside it."
69
+
70
+ def test_real_world_epub_scenario(self):
71
+ """Test realistic epub2text output with potential whitespace issues."""
72
+ # Simulate what epub2text might return with whitespace quirks
73
+ epub_content = " <<CHAPTER: THE STORY SO FAR>>\n\nIn the shadow of the Apt..."
74
+
75
+ # Old pattern (fails)
76
+ old_pattern = r"^<<CHAPTER:[^>]*>>\s*\n*"
77
+ old_result = re.sub(old_pattern, "", epub_content, count=1, flags=re.MULTILINE)
78
+
79
+ # New pattern (works)
80
+ new_pattern = r"^\s*<<CHAPTER:[^>]*>>\s*\n*"
81
+ new_result = re.sub(new_pattern, "", epub_content, count=1, flags=re.MULTILINE)
82
+
83
+ # Verify old pattern fails to remove marker
84
+ assert "<<CHAPTER:" in old_result, "Old pattern should fail with leading space"
85
+
86
+ # Verify new pattern successfully removes marker
87
+ assert "<<CHAPTER:" not in new_result, "New pattern should remove marker"
88
+ assert new_result == "In the shadow of the Apt..."
@@ -0,0 +1,20 @@
1
+ import pytest
2
+
3
+ from ttsforge.chapter_selection import parse_chapter_selection
4
+
5
+
6
+ def test_parse_all() -> None:
7
+ assert parse_chapter_selection("all", 5) == [0, 1, 2, 3, 4]
8
+
9
+
10
+ def test_parse_ranges_and_commas() -> None:
11
+ assert parse_chapter_selection("1-3,5", 6) == [0, 1, 2, 4]
12
+
13
+
14
+ def test_parse_open_ended_range() -> None:
15
+ assert parse_chapter_selection("3-", 5) == [2, 3, 4]
16
+
17
+
18
+ def test_parse_invalid_range() -> None:
19
+ with pytest.raises(ValueError):
20
+ parse_chapter_selection("5-2", 6)
@@ -0,0 +1,27 @@
1
+ from pathlib import Path
2
+
3
+ from click.testing import CliRunner
4
+
5
+ from ttsforge.cli import main
6
+
7
+
8
+ def test_info_and_list_smoke(tmp_path: Path) -> None:
9
+ text = """Title: Sample Book
10
+ Author: Jane Doe
11
+ Language: English
12
+
13
+ CHAPTER I
14
+ This is the first chapter.
15
+
16
+ CHAPTER II
17
+ This is the second chapter.
18
+ """
19
+ input_file = tmp_path / "sample.txt"
20
+ input_file.write_text(text, encoding="utf-8")
21
+
22
+ runner = CliRunner()
23
+ info_result = runner.invoke(main, ["info", str(input_file)])
24
+ assert info_result.exit_code == 0
25
+
26
+ list_result = runner.invoke(main, ["list", str(input_file)])
27
+ assert list_result.exit_code == 0
@@ -20,7 +20,7 @@ class TestLanguageDescriptions:
20
20
 
21
21
  def test_all_language_codes_have_descriptions(self):
22
22
  """All language codes should have descriptions."""
23
- expected_codes = {"a", "b", "e", "f", "h", "i", "j", "p", "z"}
23
+ expected_codes = {"a", "b", "d", "e", "f", "h", "i", "j", "p", "z"}
24
24
  assert set(LANGUAGE_DESCRIPTIONS.keys()) == expected_codes
25
25
 
26
26
  def test_english_variants(self):
@@ -132,11 +132,6 @@ class TestDefaultVoiceForLang:
132
132
  lang in DEFAULT_VOICE_FOR_LANG
133
133
  ), f"Language {lang} needs default voice"
134
134
 
135
- def test_default_voices_exist_in_voices_list(self):
136
- """All default voices should exist in VOICES list."""
137
- for lang, voice in DEFAULT_VOICE_FOR_LANG.items():
138
- assert voice in VOICES, f"Default voice {voice} for {lang} not in VOICES"
139
-
140
135
  def test_default_voices_match_language(self):
141
136
  """Default voices should match their language."""
142
137
  for lang, voice in DEFAULT_VOICE_FOR_LANG.items():
@@ -0,0 +1,84 @@
1
+ import json
2
+ from pathlib import Path
3
+
4
+ from ttsforge.conversion import ChapterState, ConversionState
5
+
6
+
7
+ def test_conversion_state_roundtrip(tmp_path: Path) -> None:
8
+ state = ConversionState(
9
+ source_file="book.epub",
10
+ source_hash="abc123",
11
+ output_file="book.m4b",
12
+ work_dir=str(tmp_path),
13
+ voice="af_heart",
14
+ language="a",
15
+ speed=1.0,
16
+ split_mode="auto",
17
+ output_format="m4b",
18
+ chapters=[
19
+ ChapterState(
20
+ index=0,
21
+ title="Chapter 1",
22
+ content_hash="hash",
23
+ completed=True,
24
+ audio_file="chapter_001.wav",
25
+ duration=1.2,
26
+ char_count=100,
27
+ ssmd_file="chapter_001.ssmd",
28
+ ssmd_hash="ssmdhash",
29
+ )
30
+ ],
31
+ started_at="2024-01-01 00:00:00",
32
+ )
33
+ state_file = tmp_path / "state.json"
34
+ state.save(state_file)
35
+
36
+ loaded = ConversionState.load(state_file)
37
+ assert loaded is not None
38
+ assert loaded.voice == "af_heart"
39
+ assert loaded.chapters[0].audio_file == "chapter_001.wav"
40
+ assert loaded.chapters[0].completed is True
41
+
42
+ assert not (tmp_path / "state.json.tmp").exists()
43
+
44
+
45
+ def test_conversion_state_backward_compat(tmp_path: Path) -> None:
46
+ data = {
47
+ "version": 1,
48
+ "source_file": "book.epub",
49
+ "source_hash": "hash",
50
+ "output_file": "book.m4b",
51
+ "work_dir": str(tmp_path),
52
+ "voice": "af_heart",
53
+ "language": "a",
54
+ "speed": 1.0,
55
+ "split_mode": "auto",
56
+ "output_format": "m4b",
57
+ "chapters": [
58
+ {
59
+ "index": 0,
60
+ "title": "Chapter 1",
61
+ "content_hash": "hash",
62
+ "completed": False,
63
+ "audio_file": None,
64
+ "duration": 0.0,
65
+ "char_count": 10,
66
+ "ssmd_file": None,
67
+ "ssmd_hash": None,
68
+ }
69
+ ],
70
+ "segment_pause_min": 0.1,
71
+ "segment_pause_max": 0.3,
72
+ "paragraph_pause_min": 0.5,
73
+ "paragraph_pause_max": 1.0,
74
+ }
75
+
76
+ state_file = tmp_path / "legacy_state.json"
77
+ state_file.write_text(json.dumps(data), encoding="utf-8")
78
+
79
+ loaded = ConversionState.load(state_file)
80
+ assert loaded is not None
81
+ assert loaded.pause_sentence == 0.2
82
+ assert loaded.pause_paragraph == 0.75
83
+ assert loaded.pause_clause == 0.3
84
+ assert loaded.pause_variance >= 0.01
@@ -13,8 +13,8 @@ class TestPhonemeDictionary:
13
13
  def test_load_simple_dictionary(self):
14
14
  """Test loading a simple phoneme dictionary."""
15
15
  test_dict = {
16
- "Misaki": "/misˈɑki/",
17
- "Kubernetes": "/kubɚnˈɛtɪs/",
16
+ "Misaki": "misˈɑki",
17
+ "Kubernetes": "kubɚnˈɛtɪs",
18
18
  }
19
19
 
20
20
  with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
@@ -40,8 +40,8 @@ class TestPhonemeDictionary:
40
40
  "language": "en-us",
41
41
  },
42
42
  "entries": {
43
- "Misaki": {"phoneme": "/misˈɑki/", "occurrences": 42},
44
- "nginx": {"phoneme": "/ˈɛnʤɪnˈɛks/", "occurrences": 8},
43
+ "Misaki": {"phoneme": "misˈɑki", "occurrences": 42},
44
+ "nginx": {"phoneme": "ˈɛnʤɪnˈɛks", "occurrences": 8},
45
45
  },
46
46
  }
47
47
 
@@ -67,8 +67,8 @@ class TestPhonemeDictionary:
67
67
  """Test loading dictionary with metadata format but simple string values."""
68
68
  test_dict = {
69
69
  "entries": {
70
- "Misaki": "/misˈɑki/",
71
- "nginx": "/ˈɛnʤɪnˈɛks/",
70
+ "Misaki": "misˈɑki",
71
+ "nginx": "ˈɛnʤɪnˈɛks",
72
72
  }
73
73
  }
74
74
 
@@ -112,8 +112,8 @@ class TestPhonemeDictionary:
112
112
  def test_phonemize_with_dictionary(self):
113
113
  """Test phonemization with custom dictionary - through SSMD notation."""
114
114
  test_dict = {
115
- "Misaki": "/misˈɑki/",
116
- "Kubernetes": "/kubɚnˈɛtɪs/",
115
+ "Misaki": "misˈɑki",
116
+ "Kubernetes": "kubɚnˈɛtɪs",
117
117
  }
118
118
 
119
119
  with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
@@ -130,8 +130,8 @@ class TestPhonemeDictionary:
130
130
  ssmd_text = tokenizer._phoneme_dictionary_obj.apply(text)
131
131
 
132
132
  # Verify SSMD notation is applied
133
- assert "[Misaki]{ph=" in ssmd_text or "[Misaki](ph:" in ssmd_text
134
- assert "[Kubernetes]{ph=" in ssmd_text or "[Kubernetes](ph:" in ssmd_text
133
+ assert "[Misaki]{ph=" in ssmd_text
134
+ assert "[Kubernetes]{ph=" in ssmd_text
135
135
  finally:
136
136
  Path(temp_path).unlink()
137
137
 
@@ -162,7 +162,7 @@ class TestPhonemeDictionary:
162
162
 
163
163
  def test_case_sensitive_matching(self):
164
164
  """Test case-sensitive dictionary matching."""
165
- test_dict = {"Misaki": "/misˈɑki/"}
165
+ test_dict = {"Misaki": "misˈɑki"}
166
166
 
167
167
  with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
168
168
  json.dump(test_dict, f)
@@ -184,13 +184,13 @@ class TestPhonemeDictionary:
184
184
  assert phoneme_count == 1, f"Expected 1 match, got {phoneme_count}"
185
185
 
186
186
  # Verify it's "Misaki" that matched
187
- assert "[Misaki]{ph=" in ssmd_text or "[Misaki](ph:" in ssmd_text
187
+ assert "[Misaki]{ph=" in ssmd_text
188
188
  finally:
189
189
  Path(temp_path).unlink()
190
190
 
191
191
  def test_word_boundaries(self):
192
192
  """Test that word boundaries are respected."""
193
- test_dict = {"test": "/tˈɛst/"}
193
+ test_dict = {"test": "tˈɛst"}
194
194
 
195
195
  with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
196
196
  json.dump(test_dict, f)
@@ -246,7 +246,7 @@ class TestPhonemeDictionary:
246
246
  def test_special_characters_in_words(self):
247
247
  """Test dictionary words with special regex characters (periods, etc.)."""
248
248
  # Use a simple word that can be phonemized
249
- test_dict = {"Misaki": "/misˈɑki/"}
249
+ test_dict = {"Misaki": "misˈɑki"}
250
250
 
251
251
  with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
252
252
  json.dump(test_dict, f)
@@ -262,13 +262,13 @@ class TestPhonemeDictionary:
262
262
  ssmd_text = tokenizer._phoneme_dictionary_obj.apply(text)
263
263
 
264
264
  # Should use custom phoneme
265
- assert "[Misaki]{ph=" in ssmd_text or "[Misaki](ph:" in ssmd_text
265
+ assert "[Misaki]{ph=" in ssmd_text
266
266
  finally:
267
267
  Path(temp_path).unlink()
268
268
 
269
269
  def test_multiple_occurrences(self):
270
270
  """Test that all occurrences of a word are replaced."""
271
- test_dict = {"test": "/tˈɛst/"}
271
+ test_dict = {"test": "tˈɛst"}
272
272
 
273
273
  with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
274
274
  json.dump(test_dict, f)
@@ -293,8 +293,8 @@ class TestPhonemeDictionary:
293
293
  # Note: Multi-word phoneme annotations have limitations in kokorog2p's
294
294
  # markdown processing. Testing with overlapping single words instead.
295
295
  test_dict = {
296
- "testing": "/tˈɛstɪŋ/",
297
- "test": "/tˈɛst/", # Shorter word, different pronunciation
296
+ "testing": "tˈɛstɪŋ",
297
+ "test": "tˈɛst", # Shorter word, different pronunciation
298
298
  }
299
299
 
300
300
  with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f: