ttsforge 0.1.1__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. {ttsforge-0.1.1 → ttsforge-0.1.2}/.github/workflows/codecov.yml +1 -1
  2. {ttsforge-0.1.1 → ttsforge-0.1.2}/PKG-INFO +2 -1
  3. {ttsforge-0.1.1 → ttsforge-0.1.2}/README.md +1 -0
  4. {ttsforge-0.1.1 → ttsforge-0.1.2}/docs/cli.rst +3 -0
  5. {ttsforge-0.1.1 → ttsforge-0.1.2}/docs/configuration.rst +1 -0
  6. {ttsforge-0.1.1 → ttsforge-0.1.2}/tests/test_constants.py +1 -6
  7. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge/__init__.py +3 -18
  8. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge/_version.py +3 -3
  9. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge/cli/commands_conversion.py +69 -3
  10. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge/cli/commands_phonemes.py +19 -1
  11. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge/cli/commands_utility.py +18 -1
  12. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge/cli/helpers.py +1 -0
  13. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge/constants.py +11 -2
  14. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge/conversion.py +103 -42
  15. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge/kokoro_runner.py +38 -5
  16. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge/phoneme_conversion.py +52 -1
  17. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge.egg-info/PKG-INFO +2 -1
  18. {ttsforge-0.1.1 → ttsforge-0.1.2}/.codecrate.toml +0 -0
  19. {ttsforge-0.1.1 → ttsforge-0.1.2}/.coveragerc +0 -0
  20. {ttsforge-0.1.1 → ttsforge-0.1.2}/.github/pytest.ini +0 -0
  21. {ttsforge-0.1.1 → ttsforge-0.1.2}/.github/workflows/pre-commit.yml +0 -0
  22. {ttsforge-0.1.1 → ttsforge-0.1.2}/.github/workflows/python-publish.yml +0 -0
  23. {ttsforge-0.1.1 → ttsforge-0.1.2}/.github/workflows/tests.yml +0 -0
  24. {ttsforge-0.1.1 → ttsforge-0.1.2}/.gitignore +0 -0
  25. {ttsforge-0.1.1 → ttsforge-0.1.2}/.pre-commit-config.yaml +0 -0
  26. {ttsforge-0.1.1 → ttsforge-0.1.2}/.prettierrc.yml +0 -0
  27. {ttsforge-0.1.1 → ttsforge-0.1.2}/.readthedocs.yaml +0 -0
  28. {ttsforge-0.1.1 → ttsforge-0.1.2}/.ruff.toml +0 -0
  29. {ttsforge-0.1.1 → ttsforge-0.1.2}/LICENSE +0 -0
  30. {ttsforge-0.1.1 → ttsforge-0.1.2}/docs/api/index.rst +0 -0
  31. {ttsforge-0.1.1 → ttsforge-0.1.2}/docs/conf.py +0 -0
  32. {ttsforge-0.1.1 → ttsforge-0.1.2}/docs/filename_templates.rst +0 -0
  33. {ttsforge-0.1.1 → ttsforge-0.1.2}/docs/index.rst +0 -0
  34. {ttsforge-0.1.1 → ttsforge-0.1.2}/docs/installation.rst +0 -0
  35. {ttsforge-0.1.1 → ttsforge-0.1.2}/docs/make.bat +0 -0
  36. {ttsforge-0.1.1 → ttsforge-0.1.2}/docs/make.py +0 -0
  37. {ttsforge-0.1.1 → ttsforge-0.1.2}/docs/quickstart.rst +0 -0
  38. {ttsforge-0.1.1 → ttsforge-0.1.2}/docs/requirements.txt +0 -0
  39. {ttsforge-0.1.1 → ttsforge-0.1.2}/docs/ssmd.rst +0 -0
  40. {ttsforge-0.1.1 → ttsforge-0.1.2}/docs/voices.rst +0 -0
  41. {ttsforge-0.1.1 → ttsforge-0.1.2}/examples/__init__.py +0 -0
  42. {ttsforge-0.1.1 → ttsforge-0.1.2}/examples/phoneme_export.py +0 -0
  43. {ttsforge-0.1.1 → ttsforge-0.1.2}/pyproject.toml +0 -0
  44. {ttsforge-0.1.1 → ttsforge-0.1.2}/requirements-test.txt +0 -0
  45. {ttsforge-0.1.1 → ttsforge-0.1.2}/setup.cfg +0 -0
  46. {ttsforge-0.1.1 → ttsforge-0.1.2}/setup.py +0 -0
  47. {ttsforge-0.1.1 → ttsforge-0.1.2}/tests/__init__.py +0 -0
  48. {ttsforge-0.1.1 → ttsforge-0.1.2}/tests/test_chapter_announcement.py +0 -0
  49. {ttsforge-0.1.1 → ttsforge-0.1.2}/tests/test_chapter_marker_leading_space.py +0 -0
  50. {ttsforge-0.1.1 → ttsforge-0.1.2}/tests/test_chapter_selection.py +0 -0
  51. {ttsforge-0.1.1 → ttsforge-0.1.2}/tests/test_cli.py +0 -0
  52. {ttsforge-0.1.1 → ttsforge-0.1.2}/tests/test_cli_smoke.py +0 -0
  53. {ttsforge-0.1.1 → ttsforge-0.1.2}/tests/test_conversion.py +0 -0
  54. {ttsforge-0.1.1 → ttsforge-0.1.2}/tests/test_conversion_state.py +0 -0
  55. {ttsforge-0.1.1 → ttsforge-0.1.2}/tests/test_epub_chapter_markers.py +0 -0
  56. {ttsforge-0.1.1 → ttsforge-0.1.2}/tests/test_name_extractor.py +0 -0
  57. {ttsforge-0.1.1 → ttsforge-0.1.2}/tests/test_onnx_backend.py +0 -0
  58. {ttsforge-0.1.1 → ttsforge-0.1.2}/tests/test_phoneme_conversion.py +0 -0
  59. {ttsforge-0.1.1 → ttsforge-0.1.2}/tests/test_phoneme_dictionary.py +0 -0
  60. {ttsforge-0.1.1 → ttsforge-0.1.2}/tests/test_phonemes.py +0 -0
  61. {ttsforge-0.1.1 → ttsforge-0.1.2}/tests/test_ssmd_generator.py +0 -0
  62. {ttsforge-0.1.1 → ttsforge-0.1.2}/tests/test_tokenizer.py +0 -0
  63. {ttsforge-0.1.1 → ttsforge-0.1.2}/tests/test_utils.py +0 -0
  64. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge/audio_merge.py +0 -0
  65. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge/audio_player.py +0 -0
  66. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge/chapter_selection.py +0 -0
  67. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge/cli/__init__.py +0 -0
  68. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge/input_reader.py +0 -0
  69. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge/kokoro_lang.py +0 -0
  70. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge/name_extractor.py +0 -0
  71. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge/phonemes.py +0 -0
  72. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge/ssmd_generator.py +0 -0
  73. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge/utils.py +0 -0
  74. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge/vocab/__init__.py +0 -0
  75. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge.egg-info/SOURCES.txt +0 -0
  76. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge.egg-info/dependency_links.txt +0 -0
  77. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge.egg-info/entry_points.txt +0 -0
  78. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge.egg-info/requires.txt +0 -0
  79. {ttsforge-0.1.1 → ttsforge-0.1.2}/ttsforge.egg-info/top_level.txt +0 -0
@@ -12,7 +12,7 @@ jobs:
12
12
  - name: Install espeak-ng
13
13
  run: |
14
14
  sudo apt-get update
15
- sudo apt-get install -y espeak-ng
15
+ sudo apt-get install -y espeak-ng ffmpeg
16
16
  - name: 'generate report'
17
17
  run: |
18
18
  pip install coverage click pytest pytest-cov
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ttsforge
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: Generate audiobooks from EPUB files using Kokoro ONNX TTS.
5
5
  Author-email: Holger Nahrstaedt <nahrstaedt@gmail.com>
6
6
  License: MIT License
@@ -617,6 +617,7 @@ ttsforge convert book.epub --gpu
617
617
  | `pause_paragraph` | `0.9` | Paragraph pause (seconds) |
618
618
  | `pause_variance` | `0.05` | Pause variance (seconds) |
619
619
  | `pause_mode` | `auto` | Pause mode (`tts`, `manual`, `auto`) |
620
+ | `enable_short_sentence` | `None` | Handle short sentences |
620
621
  | `announce_chapters` | `true` | Speak chapter titles |
621
622
  | `chapter_pause_after_title` | `2.0` | Pause after chapter title |
622
623
  | `phonemization_lang` | `None` | Override phonemization language |
@@ -554,6 +554,7 @@ ttsforge convert book.epub --gpu
554
554
  | `pause_paragraph` | `0.9` | Paragraph pause (seconds) |
555
555
  | `pause_variance` | `0.05` | Pause variance (seconds) |
556
556
  | `pause_mode` | `auto` | Pause mode (`tts`, `manual`, `auto`) |
557
+ | `enable_short_sentence` | `None` | Handle short sentences |
557
558
  | `announce_chapters` | `true` | Speak chapter titles |
558
559
  | `chapter_pause_after_title` | `2.0` | Pause after chapter title |
559
560
  | `phonemization_lang` | `None` | Override phonemization language |
@@ -85,6 +85,9 @@ Options
85
85
  ``--pause-mode MODE``
86
86
  Pause mode: ``tts``, ``manual``, or ``auto``. Default: ``auto``.
87
87
 
88
+ ``--enable-short-sentence``
89
+ Enable special handling for short sentences (less than 5 words).
90
+
88
91
  ``--announce-chapters / --no-announce-chapters``
89
92
  Read chapter titles aloud before chapter content. Default: enabled.
90
93
 
@@ -452,6 +452,7 @@ Here's an example ``config.json`` with custom settings:
452
452
  "pause_paragraph": 0.9,
453
453
  "pause_variance": 0.05,
454
454
  "pause_mode": "auto",
455
+ "enable_short_sentence": None,
455
456
  "announce_chapters": true,
456
457
  "chapter_pause_after_title": 2.0,
457
458
  "save_chapters_separately": false,
@@ -20,7 +20,7 @@ class TestLanguageDescriptions:
20
20
 
21
21
  def test_all_language_codes_have_descriptions(self):
22
22
  """All language codes should have descriptions."""
23
- expected_codes = {"a", "b", "e", "f", "h", "i", "j", "p", "z"}
23
+ expected_codes = {"a", "b", "d", "e", "f", "h", "i", "j", "p", "z"}
24
24
  assert set(LANGUAGE_DESCRIPTIONS.keys()) == expected_codes
25
25
 
26
26
  def test_english_variants(self):
@@ -132,11 +132,6 @@ class TestDefaultVoiceForLang:
132
132
  lang in DEFAULT_VOICE_FOR_LANG
133
133
  ), f"Language {lang} needs default voice"
134
134
 
135
- def test_default_voices_exist_in_voices_list(self):
136
- """All default voices should exist in VOICES list."""
137
- for lang, voice in DEFAULT_VOICE_FOR_LANG.items():
138
- assert voice in VOICES, f"Default voice {voice} for {lang} not in VOICES"
139
-
140
135
  def test_default_voices_match_language(self):
141
136
  """Default voices should match their language."""
142
137
  for lang, voice in DEFAULT_VOICE_FOR_LANG.items():
@@ -18,7 +18,7 @@ from pykokoro.tokenizer import (
18
18
  Tokenizer,
19
19
  )
20
20
  from pykokoro.constants import SUPPORTED_LANGUAGES
21
-
21
+ from pykokoro.onnx_backend import VOICE_NAMES_BY_VARIANT
22
22
  from .constants import (
23
23
  DEFAULT_CONFIG,
24
24
  LANGUAGE_DESCRIPTIONS,
@@ -27,23 +27,7 @@ from .constants import (
27
27
  )
28
28
 
29
29
  # Import from pykokoro
30
- try:
31
- from pykokoro.constants import SAMPLE_RATE
32
- from pykokoro.onnx_backend import LANG_CODE_TO_ONNX
33
- except ImportError:
34
- # Fallback values if pykokoro not installed
35
- SAMPLE_RATE = 24000
36
- LANG_CODE_TO_ONNX = {
37
- "a": "en-us",
38
- "b": "en-gb",
39
- "e": "es",
40
- "f": "fr-fr",
41
- "h": "hi",
42
- "i": "it",
43
- "j": "ja",
44
- "p": "pt",
45
- "z": "zh",
46
- }
30
+ from pykokoro.constants import SAMPLE_RATE
47
31
 
48
32
  from .conversion import (
49
33
  Chapter,
@@ -73,6 +57,7 @@ __all__ = [
73
57
  "LANGUAGE_DESCRIPTIONS",
74
58
  "SUPPORTED_OUTPUT_FORMATS",
75
59
  "VOICES",
60
+ "VOICE_NAMES_BY_VARIANT",
76
61
  # Conversion
77
62
  "Chapter",
78
63
  "ConversionOptions",
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.1'
32
- __version_tuple__ = version_tuple = (0, 1, 1)
31
+ __version__ = version = '0.1.2'
32
+ __version_tuple__ = version_tuple = (0, 1, 2)
33
33
 
34
- __commit_id__ = commit_id = 'g08367e850'
34
+ __commit_id__ = commit_id = 'gb31ed0898'
@@ -17,6 +17,7 @@ from typing import Literal, TypedDict, cast
17
17
 
18
18
  import click
19
19
  import numpy as np
20
+ from pykokoro.onnx_backend import DEFAULT_MODEL_QUALITY, ModelQuality
20
21
  from rich.panel import Panel
21
22
  from rich.progress import (
22
23
  BarColumn,
@@ -37,7 +38,6 @@ from ..constants import (
37
38
  LANGUAGE_DESCRIPTIONS,
38
39
  SUPPORTED_OUTPUT_FORMATS,
39
40
  VOICE_PREFIX_TO_LANG,
40
- VOICES,
41
41
  )
42
42
  from ..conversion import (
43
43
  Chapter,
@@ -54,6 +54,7 @@ from ..utils import (
54
54
  load_config,
55
55
  resolve_conversion_defaults,
56
56
  )
57
+ from .commands_utility import _resolve_model_source_and_variant, _resolve_voice_names
57
58
  from .helpers import DEFAULT_SAMPLE_TEXT, console, parse_voice_parameter
58
59
 
59
60
 
@@ -64,6 +65,14 @@ class ContentItem(TypedDict):
64
65
  page_number: NotRequired[int]
65
66
 
66
67
 
68
+ def get_voices() -> list[str]:
69
+ """Get the list of available voices."""
70
+ cfg = load_config()
71
+
72
+ model_source, model_variant = _resolve_model_source_and_variant(cfg)
73
+ return _resolve_voice_names(model_source, model_variant)
74
+
75
+
67
76
  @click.command()
68
77
  @click.argument("epub_file", type=click.Path(exists=True, path_type=Path))
69
78
  @click.option(
@@ -82,7 +91,7 @@ class ContentItem(TypedDict):
82
91
  @click.option(
83
92
  "-v",
84
93
  "--voice",
85
- type=click.Choice(VOICES),
94
+ type=click.Choice(get_voices()),
86
95
  help="Voice to use for TTS.",
87
96
  )
88
97
  @click.option(
@@ -150,6 +159,12 @@ class ContentItem(TypedDict):
150
159
  default=None,
151
160
  help="Pause mode: 'tts', 'manual', or 'auto' (default: auto).",
152
161
  )
162
+ @click.option(
163
+ "--enable-short-sentence/--disable-short-sentence",
164
+ "enable_short_sentence",
165
+ default=None,
166
+ help="Enable/disable special handling for short sentences.",
167
+ )
153
168
  @click.option(
154
169
  "--announce-chapters/--no-announce-chapters",
155
170
  "announce_chapters",
@@ -296,6 +311,7 @@ def convert( # noqa: C901
296
311
  pause_paragraph: float | None,
297
312
  pause_variance: float | None,
298
313
  pause_mode: str | None,
314
+ enable_short_sentence: bool | None,
299
315
  announce_chapters: bool | None,
300
316
  chapter_pause: float | None,
301
317
  title: str | None,
@@ -325,6 +341,10 @@ def convert( # noqa: C901
325
341
  config = load_config()
326
342
  model_path = ctx.obj.get("model_path") if ctx.obj else None
327
343
  voices_path = ctx.obj.get("voices_path") if ctx.obj else None
344
+ model_source, model_variant = _resolve_model_source_and_variant(config)
345
+ model_quality = cast(
346
+ ModelQuality, config.get("model_quality", DEFAULT_MODEL_QUALITY)
347
+ )
328
348
 
329
349
  # Get format first (needed for output path construction)
330
350
  fmt = output_format or config.get("default_format", "m4b")
@@ -467,6 +487,9 @@ def convert( # noqa: C901
467
487
  language=language or "a",
468
488
  speed=speed or config.get("default_speed", 1.0),
469
489
  use_gpu=use_gpu if use_gpu is not None else config.get("use_gpu", False),
490
+ model_source=model_source,
491
+ model_variant=model_variant,
492
+ model_quality=model_quality,
470
493
  num_chapters=len(selected_indices) if selected_indices else len(epub_chapters),
471
494
  title=effective_title,
472
495
  author=effective_author,
@@ -510,6 +533,9 @@ def convert( # noqa: C901
510
533
  output_format=output_format or config.get("default_format", "m4b"),
511
534
  output_dir=output.parent,
512
535
  use_gpu=use_gpu if use_gpu is not None else config.get("use_gpu", False),
536
+ model_quality=model_quality,
537
+ model_source=model_source,
538
+ model_variant=model_variant,
513
539
  silence_between_chapters=silence or config.get("silence_between_chapters", 2.0),
514
540
  lang=lang or config.get("phonemization_lang"),
515
541
  use_mixed_language=(
@@ -556,6 +582,11 @@ def convert( # noqa: C901
556
582
  pause_mode=(
557
583
  pause_mode if pause_mode is not None else config.get("pause_mode", "auto")
558
584
  ),
585
+ enable_short_sentence=(
586
+ enable_short_sentence
587
+ if enable_short_sentence is not None
588
+ else config.get("enable_short_sentence", None)
589
+ ),
559
590
  announce_chapters=(
560
591
  announce_chapters
561
592
  if announce_chapters is not None
@@ -947,6 +978,10 @@ def sample(
947
978
 
948
979
  # Load config for defaults
949
980
  user_config = load_config()
981
+ model_source, model_variant = _resolve_model_source_and_variant(user_config)
982
+ model_quality = cast(
983
+ ModelQuality, user_config.get("model_quality", DEFAULT_MODEL_QUALITY)
984
+ )
950
985
  resolved_defaults = resolve_conversion_defaults(
951
986
  user_config,
952
987
  {
@@ -980,6 +1015,9 @@ def sample(
980
1015
  use_gpu=resolved_defaults["use_gpu"],
981
1016
  split_mode=resolved_defaults["split_mode"],
982
1017
  lang=resolved_defaults["lang"],
1018
+ model_quality=model_quality,
1019
+ model_source=model_source,
1020
+ model_variant=model_variant,
983
1021
  use_mixed_language=(
984
1022
  use_mixed_language or user_config.get("use_mixed_language", False)
985
1023
  ),
@@ -1117,6 +1155,9 @@ def _show_conversion_summary(
1117
1155
  language: str,
1118
1156
  speed: float,
1119
1157
  use_gpu: bool,
1158
+ model_source: str,
1159
+ model_variant: str,
1160
+ model_quality: str | None,
1120
1161
  num_chapters: int,
1121
1162
  title: str,
1122
1163
  author: str,
@@ -1139,6 +1180,9 @@ def _show_conversion_summary(
1139
1180
  table.add_row("Chapters", str(num_chapters))
1140
1181
  table.add_row("Voice", voice)
1141
1182
  table.add_row("Language", LANGUAGE_DESCRIPTIONS.get(language, language))
1183
+ table.add_row("Model Source", model_source)
1184
+ table.add_row("Model Variant", model_variant)
1185
+ table.add_row("Model Quality", str(model_quality))
1142
1186
  if lang:
1143
1187
  table.add_row("Phonemization Lang", f"{lang} (override)")
1144
1188
  if use_mixed_language:
@@ -1167,7 +1211,7 @@ def _show_conversion_summary(
1167
1211
  @click.option(
1168
1212
  "-v",
1169
1213
  "--voice",
1170
- type=click.Choice(VOICES),
1214
+ type=click.Choice(get_voices()),
1171
1215
  help="TTS voice to use.",
1172
1216
  )
1173
1217
  @click.option(
@@ -1271,6 +1315,11 @@ def _show_conversion_summary(
1271
1315
  default=None,
1272
1316
  help="Trim leading/trailing silence from audio.",
1273
1317
  )
1318
+ @click.option(
1319
+ "--enable-short-sentence/--disable-short-sentence",
1320
+ default=None,
1321
+ help="Enable special handling for short sentences.",
1322
+ )
1274
1323
  @click.pass_context
1275
1324
  def read( # noqa: C901
1276
1325
  ctx: click.Context,
@@ -1293,6 +1342,7 @@ def read( # noqa: C901
1293
1342
  pause_paragraph: float | None,
1294
1343
  pause_variance: float | None,
1295
1344
  pause_mode: str | None,
1345
+ enable_short_sentence: bool | None,
1296
1346
  ) -> None:
1297
1347
  """Read an EPUB or text file aloud with streaming playback.
1298
1348
 
@@ -1340,6 +1390,10 @@ def read( # noqa: C901
1340
1390
 
1341
1391
  # Load config for defaults
1342
1392
  config = load_config()
1393
+ model_source, model_variant = _resolve_model_source_and_variant(config)
1394
+ model_quality = cast(
1395
+ ModelQuality, config.get("model_quality", DEFAULT_MODEL_QUALITY)
1396
+ )
1343
1397
  resolved_defaults = resolve_conversion_defaults(
1344
1398
  config,
1345
1399
  {
@@ -1389,6 +1443,11 @@ def read( # noqa: C901
1389
1443
  effective_pause_mode = (
1390
1444
  pause_mode if pause_mode is not None else config.get("pause_mode", "auto")
1391
1445
  )
1446
+ effective_enable_short_sentence = (
1447
+ enable_short_sentence
1448
+ if enable_short_sentence is not None
1449
+ else config.get("enable_short_sentence", None)
1450
+ )
1392
1451
 
1393
1452
  # Get language code for TTS
1394
1453
  espeak_lang = LANG_CODE_TO_ONNX.get(effective_language, "en-us")
@@ -1645,11 +1704,15 @@ def read( # noqa: C901
1645
1704
  model_path=model_path,
1646
1705
  voices_path=voices_path,
1647
1706
  use_gpu=effective_use_gpu,
1707
+ model_quality=model_quality,
1708
+ model_source=model_source,
1709
+ model_variant=model_variant,
1648
1710
  )
1649
1711
  generation = GenerationConfig(
1650
1712
  speed=effective_speed,
1651
1713
  lang=espeak_lang,
1652
1714
  pause_mode=cast(Literal["tts", "manual", "auto"], effective_pause_mode),
1715
+ enable_short_sentence=effective_enable_short_sentence,
1653
1716
  pause_clause=effective_pause_clause,
1654
1717
  pause_sentence=effective_pause_sentence,
1655
1718
  pause_paragraph=effective_pause_paragraph,
@@ -1658,6 +1721,9 @@ def read( # noqa: C901
1658
1721
  pipeline_config = PipelineConfig(
1659
1722
  voice=effective_voice,
1660
1723
  generation=generation,
1724
+ model_quality=model_quality,
1725
+ model_source=model_source,
1726
+ model_variant=model_variant,
1661
1727
  model_path=model_path,
1662
1728
  voices_path=voices_path,
1663
1729
  )
@@ -10,9 +10,10 @@ This module contains commands for working with phonemes and pre-tokenized conten
10
10
  import re
11
11
  import sys
12
12
  from pathlib import Path
13
- from typing import Any
13
+ from typing import Any, cast
14
14
 
15
15
  import click
16
+ from pykokoro.onnx_backend import DEFAULT_MODEL_QUALITY, ModelQuality
16
17
  from rich.progress import (
17
18
  BarColumn,
18
19
  Progress,
@@ -37,6 +38,7 @@ from ..utils import (
37
38
  format_filename_template,
38
39
  load_config,
39
40
  )
41
+ from .commands_utility import _resolve_model_source_and_variant
40
42
  from .helpers import console, parse_voice_parameter
41
43
 
42
44
 
@@ -500,6 +502,10 @@ def phonemes_convert(
500
502
  config = load_config()
501
503
  model_path = ctx.obj.get("model_path") if ctx.obj else None
502
504
  voices_path = ctx.obj.get("voices_path") if ctx.obj else None
505
+ model_source, model_variant = _resolve_model_source_and_variant(config)
506
+ model_quality = cast(
507
+ ModelQuality, config.get("model_quality", DEFAULT_MODEL_QUALITY)
508
+ )
503
509
 
504
510
  # Get book info and metadata
505
511
  book_info = book.get_info()
@@ -599,6 +605,9 @@ def phonemes_convert(
599
605
  speed=speed,
600
606
  output_format=fmt,
601
607
  use_gpu=gpu,
608
+ model_quality=model_quality,
609
+ model_source=model_source,
610
+ model_variant=model_variant,
602
611
  silence_between_chapters=silence,
603
612
  pause_clause=(
604
613
  pause_clause
@@ -834,6 +843,12 @@ def phonemes_preview(
834
843
  # Auto-detect if voice is a blend
835
844
  parsed_voice, parsed_voice_blend = parse_voice_parameter(voice)
836
845
 
846
+ config = load_config()
847
+ model_source, model_variant = _resolve_model_source_and_variant(config)
848
+ model_quality = cast(
849
+ ModelQuality, config.get("model_quality", DEFAULT_MODEL_QUALITY)
850
+ )
851
+
837
852
  # Initialize converter
838
853
  options = ConversionOptions(
839
854
  phoneme_dictionary_path=str(phoneme_dict) if phoneme_dict else None,
@@ -841,6 +856,9 @@ def phonemes_preview(
841
856
  voice_blend=parsed_voice_blend,
842
857
  language=language,
843
858
  output_format="wav", # Explicitly set WAV format
859
+ model_quality=model_quality,
860
+ model_source=model_source,
861
+ model_variant=model_variant,
844
862
  )
845
863
  converter = TTSConverter(options)
846
864
 
@@ -555,6 +555,14 @@ def _resolve_model_source_and_variant(cfg: dict) -> tuple[ModelSource, ModelVari
555
555
  return cast(ModelSource, source), cast(ModelVariant, variant)
556
556
 
557
557
 
558
+ def _resolve_voice_names(
559
+ model_source: ModelSource = "huggingface",
560
+ model_variant: ModelVariant = "v1.0",
561
+ ) -> list[str]:
562
+ """Return the list of voice names for the given model variant."""
563
+ return VOICE_NAMES_BY_VARIANT.get(model_variant, VOICE_NAMES)
564
+
565
+
558
566
  def _get_cache_voices_path(
559
567
  model_source: ModelSource,
560
568
  model_variant: ModelVariant,
@@ -708,7 +716,7 @@ def download(ctx: click.Context, force: bool, quality: str | None) -> None:
708
716
 
709
717
  # ---- voices
710
718
  if model_source == "huggingface":
711
- voice_names = VOICE_NAMES_BY_VARIANT.get(model_variant, VOICE_NAMES)
719
+ voice_names = _resolve_voice_names(model_source, model_variant)
712
720
  total_voices = len(voice_names)
713
721
  voices_task = progress.add_task(
714
722
  f"Downloading voices (0/{total_voices})...", total=total_voices
@@ -1269,6 +1277,12 @@ def list_names( # noqa: C901
1269
1277
  )
1270
1278
  console.print("[dim]Type 'q' to quit, 's' to skip, 'r' to replay.[/dim]\n")
1271
1279
 
1280
+ cfg = load_config()
1281
+ model_source, model_variant = _resolve_model_source_and_variant(cfg)
1282
+ model_quality = cast(
1283
+ ModelQuality, cfg.get("model_quality", DEFAULT_MODEL_QUALITY)
1284
+ )
1285
+
1272
1286
  # Initialize converter with phoneme dictionary
1273
1287
  try:
1274
1288
  # Auto-detect if voice is a blend
@@ -1279,6 +1293,9 @@ def list_names( # noqa: C901
1279
1293
  voice=parsed_voice or "af_sky",
1280
1294
  voice_blend=parsed_voice_blend,
1281
1295
  language=language,
1296
+ model_quality=model_quality,
1297
+ model_source=model_source,
1298
+ model_variant=model_variant,
1282
1299
  )
1283
1300
  converter = TTSConverter(options)
1284
1301
 
@@ -50,6 +50,7 @@ DEFAULT_SAMPLE_TEXT = (
50
50
  DEMO_TEXT = {
51
51
  "a": "Hello! This audio was generated by {voice}. How do you like it?",
52
52
  "b": "Hello! This audio was generated by {voice}. How do you like it?",
53
+ "d": "Hallo! Dieses Audio wurde von {voice} erzeugt. Wie gefallt es Ihnen?",
53
54
  "e": "Hola! Este audio fue generado por {voice}. Que te parece?",
54
55
  "f": "Bonjour! Cet audio a ete genere par {voice}. Comment le trouvez-vous?",
55
56
  "h": "Namaste! Yah audio {voice} dwara banaya gaya hai. Aapko kaisa laga?",
@@ -3,10 +3,10 @@
3
3
  # from pykokoro.onnx_backend import VOICE_NAMES_V1_0
4
4
  # from pykokoro.onnx_backend import VOICE_NAMES_V1_1_ZH, VOICE_NAMES_V1_1_DE
5
5
 
6
- from pykokoro.onnx_backend import VOICE_NAMES_V1_0 as VOICE_NAMES
6
+ from pykokoro.onnx_backend import DEFAULT_MODEL_SOURCE, VOICE_NAMES_V1_0
7
7
 
8
8
  # Re-export from pykokoro for convenience
9
- VOICES = VOICE_NAMES
9
+ VOICES = VOICE_NAMES_V1_0
10
10
 
11
11
  # Audio constants from pykokoro
12
12
  try:
@@ -24,6 +24,7 @@ PROGRAM_DESCRIPTION = "Generate audiobooks from EPUB files using Kokoro ONNX TTS
24
24
  LANGUAGE_DESCRIPTIONS = {
25
25
  "a": "American English",
26
26
  "b": "British English",
27
+ "d": "German",
27
28
  "e": "Spanish",
28
29
  "f": "French",
29
30
  "h": "Hindi",
@@ -35,6 +36,8 @@ LANGUAGE_DESCRIPTIONS = {
35
36
 
36
37
  # ISO language code to ttsforge language code mapping
37
38
  ISO_TO_LANG_CODE = {
39
+ "de": "d",
40
+ "de-de": "d",
38
41
  "en": "a", # Default to American English
39
42
  "en-us": "a",
40
43
  "en-gb": "b",
@@ -62,6 +65,8 @@ VOICE_PREFIX_TO_LANG = {
62
65
  "am": "a", # American Male
63
66
  "bf": "b", # British Female
64
67
  "bm": "b", # British Male
68
+ "df": "d", # German Female
69
+ "dm": "d", # German Male
65
70
  "ef": "e", # Spanish Female
66
71
  "em": "e", # Spanish Male
67
72
  "ff": "f", # French Female
@@ -82,6 +87,7 @@ VOICE_PREFIX_TO_LANG = {
82
87
  DEFAULT_VOICE_FOR_LANG = {
83
88
  "a": "af_heart",
84
89
  "b": "bf_emma",
90
+ "d": "df_eva",
85
91
  "e": "ef_dora",
86
92
  "f": "ff_siwis",
87
93
  "h": "hf_alpha",
@@ -115,6 +121,7 @@ DEFAULT_CONFIG = {
115
121
  "use_gpu": False, # GPU requires onnxruntime-gpu
116
122
  # Model quality: fp32, fp16, q8, q8f16, q4, q4f16, uint8, uint8f16
117
123
  "model_quality": "fp32",
124
+ "model_source": DEFAULT_MODEL_SOURCE,
118
125
  "model_variant": "v1.0",
119
126
  "silence_between_chapters": 2.0,
120
127
  "save_chapters_separately": False,
@@ -128,6 +135,7 @@ DEFAULT_CONFIG = {
128
135
  "pause_paragraph": 0.9,
129
136
  "pause_variance": 0.05,
130
137
  "pause_mode": "auto", # "tts", "manual", or "auto
138
+ "enable_short_sentence": None,
131
139
  # Language override for phonemization (e.g., 'de', 'fr', 'en-us')
132
140
  # If None, language is determined from voice prefix
133
141
  "phonemization_lang": None,
@@ -154,6 +162,7 @@ AUDIO_CHANNELS = 1
154
162
  SAMPLE_TEXTS = {
155
163
  "a": "This is a sample of the selected voice.",
156
164
  "b": "This is a sample of the selected voice.",
165
+ "d": "Dies ist ein Beispiel für die ausgewählte Stimme.",
157
166
  "e": "Este es una muestra de la voz seleccionada.",
158
167
  "f": "Ceci est un exemple de la voix sélectionnée.",
159
168
  "h": "यह चयनित आवाज़ का एक नमूना है।", # noqa: E501