PyPI - ttsforge - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

ttsforge 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

ttsforge/__init__.py +3 -18
ttsforge/_version.py +2 -2
ttsforge/cli/commands_conversion.py +75 -10
ttsforge/cli/commands_phonemes.py +22 -4
ttsforge/cli/commands_utility.py +18 -1
ttsforge/cli/helpers.py +1 -0
ttsforge/constants.py +13 -4
ttsforge/conversion.py +112 -51
ttsforge/kokoro_runner.py +38 -5
ttsforge/name_extractor.py +3 -3
ttsforge/phoneme_conversion.py +61 -10
ttsforge/ssmd_generator.py +4 -4
{ttsforge-0.1.0.dist-info → ttsforge-0.1.2.dist-info}/METADATA +13 -12
ttsforge-0.1.2.dist-info/RECORD +27 -0
ttsforge-0.1.0.dist-info/RECORD +0 -27
{ttsforge-0.1.0.dist-info → ttsforge-0.1.2.dist-info}/WHEEL +0 -0
{ttsforge-0.1.0.dist-info → ttsforge-0.1.2.dist-info}/entry_points.txt +0 -0
{ttsforge-0.1.0.dist-info → ttsforge-0.1.2.dist-info}/licenses/LICENSE +0 -0
{ttsforge-0.1.0.dist-info → ttsforge-0.1.2.dist-info}/top_level.txt +0 -0

ttsforge/__init__.py CHANGED Viewed

@@ -18,7 +18,7 @@ from pykokoro.tokenizer import (
     Tokenizer,
 )
 from pykokoro.constants import SUPPORTED_LANGUAGES
+from pykokoro.onnx_backend import VOICE_NAMES_BY_VARIANT
 from .constants import (
     DEFAULT_CONFIG,
     LANGUAGE_DESCRIPTIONS,
@@ -27,23 +27,7 @@ from .constants import (
 )
 # Import from pykokoro
-try:
-    from pykokoro.constants import SAMPLE_RATE
-    from pykokoro.onnx_backend import LANG_CODE_TO_ONNX
-except ImportError:
-    # Fallback values if pykokoro not installed
-    SAMPLE_RATE = 24000
-    LANG_CODE_TO_ONNX = {
-        "a": "en-us",
-        "b": "en-gb",
-        "e": "es",
-        "f": "fr-fr",
-        "h": "hi",
-        "i": "it",
-        "j": "ja",
-        "p": "pt",
-        "z": "zh",
-    }
+from pykokoro.constants import SAMPLE_RATE
 from .conversion import (
     Chapter,
@@ -73,6 +57,7 @@ __all__ = [
     "LANGUAGE_DESCRIPTIONS",
     "SUPPORTED_OUTPUT_FORMATS",
     "VOICES",
+    "VOICE_NAMES_BY_VARIANT",
     # Conversion
     "Chapter",
     "ConversionOptions",

ttsforge/_version.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.1.0'
-__version_tuple__ = version_tuple = (0, 1, 0)
+__version__ = version = '0.1.2'
+__version_tuple__ = version_tuple = (0, 1, 2)
 __commit_id__ = commit_id = None

ttsforge/cli/commands_conversion.py CHANGED Viewed

@@ -17,6 +17,7 @@ from typing import Literal, TypedDict, cast
 import click
 import numpy as np
+from pykokoro.onnx_backend import DEFAULT_MODEL_QUALITY, ModelQuality
 from rich.panel import Panel
 from rich.progress import (
     BarColumn,
@@ -37,7 +38,6 @@ from ..constants import (
     LANGUAGE_DESCRIPTIONS,
     SUPPORTED_OUTPUT_FORMATS,
     VOICE_PREFIX_TO_LANG,
-    VOICES,
 )
 from ..conversion import (
     Chapter,
@@ -54,6 +54,7 @@ from ..utils import (
     load_config,
     resolve_conversion_defaults,
 )
+from .commands_utility import _resolve_model_source_and_variant, _resolve_voice_names
 from .helpers import DEFAULT_SAMPLE_TEXT, console, parse_voice_parameter
@@ -64,6 +65,14 @@ class ContentItem(TypedDict):
     page_number: NotRequired[int]
+def get_voices() -> list[str]:
+    """Get the list of available voices."""
+    cfg = load_config()
+    model_source, model_variant = _resolve_model_source_and_variant(cfg)
+    return _resolve_voice_names(model_source, model_variant)
 @click.command()
 @click.argument("epub_file", type=click.Path(exists=True, path_type=Path))
 @click.option(
@@ -82,7 +91,7 @@ class ContentItem(TypedDict):
 @click.option(
     "-v",
     "--voice",
-    type=click.Choice(VOICES),
+    type=click.Choice(get_voices()),
     help="Voice to use for TTS.",
 )
 @click.option(
@@ -150,6 +159,12 @@ class ContentItem(TypedDict):
     default=None,
     help="Pause mode: 'tts', 'manual', or 'auto' (default: auto).",
 )
+@click.option(
+    "--enable-short-sentence/--disable-short-sentence",
+    "enable_short_sentence",
+    default=None,
+    help="Enable/disable special handling for short sentences.",
+)
 @click.option(
     "--announce-chapters/--no-announce-chapters",
     "announce_chapters",
@@ -296,6 +311,7 @@ def convert(  # noqa: C901
     pause_paragraph: float | None,
     pause_variance: float | None,
     pause_mode: str | None,
+    enable_short_sentence: bool | None,
     announce_chapters: bool | None,
     chapter_pause: float | None,
     title: str | None,
@@ -325,6 +341,10 @@ def convert(  # noqa: C901
     config = load_config()
     model_path = ctx.obj.get("model_path") if ctx.obj else None
     voices_path = ctx.obj.get("voices_path") if ctx.obj else None
+    model_source, model_variant = _resolve_model_source_and_variant(config)
+    model_quality = cast(
+        ModelQuality, config.get("model_quality", DEFAULT_MODEL_QUALITY)
+    )
     # Get format first (needed for output path construction)
     fmt = output_format or config.get("default_format", "m4b")
@@ -467,6 +487,9 @@ def convert(  # noqa: C901
         language=language or "a",
         speed=speed or config.get("default_speed", 1.0),
         use_gpu=use_gpu if use_gpu is not None else config.get("use_gpu", False),
+        model_source=model_source,
+        model_variant=model_variant,
+        model_quality=model_quality,
         num_chapters=len(selected_indices) if selected_indices else len(epub_chapters),
         title=effective_title,
         author=effective_author,
@@ -510,6 +533,9 @@ def convert(  # noqa: C901
         output_format=output_format or config.get("default_format", "m4b"),
         output_dir=output.parent,
         use_gpu=use_gpu if use_gpu is not None else config.get("use_gpu", False),
+        model_quality=model_quality,
+        model_source=model_source,
+        model_variant=model_variant,
         silence_between_chapters=silence or config.get("silence_between_chapters", 2.0),
         lang=lang or config.get("phonemization_lang"),
         use_mixed_language=(
@@ -536,17 +562,17 @@ def convert(  # noqa: C901
         pause_clause=(
             pause_clause
             if pause_clause is not None
-            else config.get("pause_clause", 0.25)
+            else config.get("pause_clause", 0.3)
         ),
         pause_sentence=(
             pause_sentence
             if pause_sentence is not None
-            else config.get("pause_sentence", 0.2)
+            else config.get("pause_sentence", 0.5)
         ),
         pause_paragraph=(
             pause_paragraph
             if pause_paragraph is not None
-            else config.get("pause_paragraph", 0.75)
+            else config.get("pause_paragraph", 0.9)
         ),
         pause_variance=(
             pause_variance
@@ -556,6 +582,11 @@ def convert(  # noqa: C901
         pause_mode=(
             pause_mode if pause_mode is not None else config.get("pause_mode", "auto")
         ),
+        enable_short_sentence=(
+            enable_short_sentence
+            if enable_short_sentence is not None
+            else config.get("enable_short_sentence", None)
+        ),
         announce_chapters=(
             announce_chapters
             if announce_chapters is not None
@@ -947,6 +978,10 @@ def sample(
     # Load config for defaults
     user_config = load_config()
+    model_source, model_variant = _resolve_model_source_and_variant(user_config)
+    model_quality = cast(
+        ModelQuality, user_config.get("model_quality", DEFAULT_MODEL_QUALITY)
+    )
     resolved_defaults = resolve_conversion_defaults(
         user_config,
         {
@@ -980,6 +1015,9 @@ def sample(
         use_gpu=resolved_defaults["use_gpu"],
         split_mode=resolved_defaults["split_mode"],
         lang=resolved_defaults["lang"],
+        model_quality=model_quality,
+        model_source=model_source,
+        model_variant=model_variant,
         use_mixed_language=(
             use_mixed_language or user_config.get("use_mixed_language", False)
         ),
@@ -1117,6 +1155,9 @@ def _show_conversion_summary(
     language: str,
     speed: float,
     use_gpu: bool,
+    model_source: str,
+    model_variant: str,
+    model_quality: str | None,
     num_chapters: int,
     title: str,
     author: str,
@@ -1139,6 +1180,9 @@ def _show_conversion_summary(
     table.add_row("Chapters", str(num_chapters))
     table.add_row("Voice", voice)
     table.add_row("Language", LANGUAGE_DESCRIPTIONS.get(language, language))
+    table.add_row("Model Source", model_source)
+    table.add_row("Model Variant", model_variant)
+    table.add_row("Model Quality", str(model_quality))
     if lang:
         table.add_row("Phonemization Lang", f"{lang} (override)")
     if use_mixed_language:
@@ -1167,7 +1211,7 @@ def _show_conversion_summary(
 @click.option(
     "-v",
     "--voice",
-    type=click.Choice(VOICES),
+    type=click.Choice(get_voices()),
     help="TTS voice to use.",
 )
 @click.option(
@@ -1271,6 +1315,11 @@ def _show_conversion_summary(
     default=None,
     help="Trim leading/trailing silence from audio.",
 )
+@click.option(
+    "--enable-short-sentence/--disable-short-sentence",
+    default=None,
+    help="Enable special handling for short sentences.",
+)
 @click.pass_context
 def read(  # noqa: C901
     ctx: click.Context,
@@ -1293,6 +1342,7 @@ def read(  # noqa: C901
     pause_paragraph: float | None,
     pause_variance: float | None,
     pause_mode: str | None,
+    enable_short_sentence: bool | None,
 ) -> None:
     """Read an EPUB or text file aloud with streaming playback.
@@ -1340,6 +1390,10 @@ def read(  # noqa: C901
     # Load config for defaults
     config = load_config()
+    model_source, model_variant = _resolve_model_source_and_variant(config)
+    model_quality = cast(
+        ModelQuality, config.get("model_quality", DEFAULT_MODEL_QUALITY)
+    )
     resolved_defaults = resolve_conversion_defaults(
         config,
         {
@@ -1369,17 +1423,17 @@ def read(  # noqa: C901
         effective_split_mode = config_split_mode
     # Pause settings
     effective_pause_clause = (
-        pause_clause if pause_clause is not None else config.get("pause_clause", 0.25)
+        pause_clause if pause_clause is not None else config.get("pause_clause", 0.3)
     )
     effective_pause_sentence = (
         pause_sentence
         if pause_sentence is not None
-        else config.get("pause_sentence", 0.2)
+        else config.get("pause_sentence", 0.5)
     )
     effective_pause_paragraph = (
         pause_paragraph
         if pause_paragraph is not None
-        else config.get("pause_paragraph", 0.75)
+        else config.get("pause_paragraph", 0.9)
     )
     effective_pause_variance = (
         pause_variance
@@ -1389,6 +1443,11 @@ def read(  # noqa: C901
     effective_pause_mode = (
         pause_mode if pause_mode is not None else config.get("pause_mode", "auto")
     )
+    effective_enable_short_sentence = (
+        enable_short_sentence
+        if enable_short_sentence is not None
+        else config.get("enable_short_sentence", None)
+    )
     # Get language code for TTS
     espeak_lang = LANG_CODE_TO_ONNX.get(effective_language, "en-us")
@@ -1645,11 +1704,15 @@ def read(  # noqa: C901
             model_path=model_path,
             voices_path=voices_path,
             use_gpu=effective_use_gpu,
+            model_quality=model_quality,
+            model_source=model_source,
+            model_variant=model_variant,
         )
         generation = GenerationConfig(
             speed=effective_speed,
             lang=espeak_lang,
             pause_mode=cast(Literal["tts", "manual", "auto"], effective_pause_mode),
+            enable_short_sentence=effective_enable_short_sentence,
             pause_clause=effective_pause_clause,
             pause_sentence=effective_pause_sentence,
             pause_paragraph=effective_pause_paragraph,
@@ -1658,6 +1721,9 @@ def read(  # noqa: C901
         pipeline_config = PipelineConfig(
             voice=effective_voice,
             generation=generation,
+            model_quality=model_quality,
+            model_source=model_source,
+            model_variant=model_variant,
             model_path=model_path,
             voices_path=voices_path,
         )
@@ -1695,7 +1761,6 @@ def read(  # noqa: C901
         def generate_audio(text_segment: str) -> tuple[np.ndarray, int]:
             """Generate audio for a text segment."""
-            print(text_segment)
             result = pipeline.run(text_segment)
             return result.audio, result.sample_rate

ttsforge/cli/commands_phonemes.py CHANGED Viewed

@@ -10,9 +10,10 @@ This module contains commands for working with phonemes and pre-tokenized conten
 import re
 import sys
 from pathlib import Path
-from typing import Any
+from typing import Any, cast
 import click
+from pykokoro.onnx_backend import DEFAULT_MODEL_QUALITY, ModelQuality
 from rich.progress import (
     BarColumn,
     Progress,
@@ -37,6 +38,7 @@ from ..utils import (
     format_filename_template,
     load_config,
 )
+from .commands_utility import _resolve_model_source_and_variant
 from .helpers import console, parse_voice_parameter
@@ -500,6 +502,10 @@ def phonemes_convert(
     config = load_config()
     model_path = ctx.obj.get("model_path") if ctx.obj else None
     voices_path = ctx.obj.get("voices_path") if ctx.obj else None
+    model_source, model_variant = _resolve_model_source_and_variant(config)
+    model_quality = cast(
+        ModelQuality, config.get("model_quality", DEFAULT_MODEL_QUALITY)
+    )
     # Get book info and metadata
     book_info = book.get_info()
@@ -599,21 +605,24 @@ def phonemes_convert(
         speed=speed,
         output_format=fmt,
         use_gpu=gpu,
+        model_quality=model_quality,
+        model_source=model_source,
+        model_variant=model_variant,
         silence_between_chapters=silence,
         pause_clause=(
             pause_clause
             if pause_clause is not None
-            else config.get("pause_clause", 0.25)
+            else config.get("pause_clause", 0.3)
         ),
         pause_sentence=(
             pause_sentence
             if pause_sentence is not None
-            else config.get("pause_sentence", 0.2)
+            else config.get("pause_sentence", 0.5)
         ),
         pause_paragraph=(
             pause_paragraph
             if pause_paragraph is not None
-            else config.get("pause_paragraph", 0.75)
+            else config.get("pause_paragraph", 0.9)
         ),
         pause_variance=(
             pause_variance
@@ -834,6 +843,12 @@ def phonemes_preview(
             # Auto-detect if voice is a blend
             parsed_voice, parsed_voice_blend = parse_voice_parameter(voice)
+            config = load_config()
+            model_source, model_variant = _resolve_model_source_and_variant(config)
+            model_quality = cast(
+                ModelQuality, config.get("model_quality", DEFAULT_MODEL_QUALITY)
+            )
             # Initialize converter
             options = ConversionOptions(
                 phoneme_dictionary_path=str(phoneme_dict) if phoneme_dict else None,
@@ -841,6 +856,9 @@ def phonemes_preview(
                 voice_blend=parsed_voice_blend,
                 language=language,
                 output_format="wav",  # Explicitly set WAV format
+                model_quality=model_quality,
+                model_source=model_source,
+                model_variant=model_variant,
             )
             converter = TTSConverter(options)

ttsforge/cli/commands_utility.py CHANGED Viewed

@@ -555,6 +555,14 @@ def _resolve_model_source_and_variant(cfg: dict) -> tuple[ModelSource, ModelVari
     return cast(ModelSource, source), cast(ModelVariant, variant)
+def _resolve_voice_names(
+    model_source: ModelSource = "huggingface",
+    model_variant: ModelVariant = "v1.0",
+) -> list[str]:
+    """Return the list of voice names for the given model variant."""
+    return VOICE_NAMES_BY_VARIANT.get(model_variant, VOICE_NAMES)
 def _get_cache_voices_path(
     model_source: ModelSource,
     model_variant: ModelVariant,
@@ -708,7 +716,7 @@ def download(ctx: click.Context, force: bool, quality: str | None) -> None:
         # ---- voices
         if model_source == "huggingface":
-            voice_names = VOICE_NAMES_BY_VARIANT.get(model_variant, VOICE_NAMES)
+            voice_names = _resolve_voice_names(model_source, model_variant)
             total_voices = len(voice_names)
             voices_task = progress.add_task(
                 f"Downloading voices (0/{total_voices})...", total=total_voices
@@ -1269,6 +1277,12 @@ def list_names(  # noqa: C901
         )
         console.print("[dim]Type 'q' to quit, 's' to skip, 'r' to replay.[/dim]\n")
+        cfg = load_config()
+        model_source, model_variant = _resolve_model_source_and_variant(cfg)
+        model_quality = cast(
+            ModelQuality, cfg.get("model_quality", DEFAULT_MODEL_QUALITY)
+        )
         # Initialize converter with phoneme dictionary
         try:
             # Auto-detect if voice is a blend
@@ -1279,6 +1293,9 @@ def list_names(  # noqa: C901
                 voice=parsed_voice or "af_sky",
                 voice_blend=parsed_voice_blend,
                 language=language,
+                model_quality=model_quality,
+                model_source=model_source,
+                model_variant=model_variant,
             )
             converter = TTSConverter(options)

ttsforge/cli/helpers.py CHANGED Viewed

@@ -50,6 +50,7 @@ DEFAULT_SAMPLE_TEXT = (
 DEMO_TEXT = {
     "a": "Hello! This audio was generated by {voice}. How do you like it?",
     "b": "Hello! This audio was generated by {voice}. How do you like it?",
+    "d": "Hallo! Dieses Audio wurde von {voice} erzeugt. Wie gefallt es Ihnen?",
     "e": "Hola! Este audio fue generado por {voice}. Que te parece?",
     "f": "Bonjour! Cet audio a ete genere par {voice}. Comment le trouvez-vous?",
     "h": "Namaste! Yah audio {voice} dwara banaya gaya hai. Aapko kaisa laga?",

ttsforge/constants.py CHANGED Viewed

@@ -3,10 +3,10 @@
 # from pykokoro.onnx_backend import VOICE_NAMES_V1_0
 # from pykokoro.onnx_backend import VOICE_NAMES_V1_1_ZH, VOICE_NAMES_V1_1_DE
-from pykokoro.onnx_backend import VOICE_NAMES_V1_0 as VOICE_NAMES
+from pykokoro.onnx_backend import DEFAULT_MODEL_SOURCE, VOICE_NAMES_V1_0
 # Re-export from pykokoro for convenience
-VOICES = VOICE_NAMES
+VOICES = VOICE_NAMES_V1_0
 # Audio constants from pykokoro
 try:
@@ -24,6 +24,7 @@ PROGRAM_DESCRIPTION = "Generate audiobooks from EPUB files using Kokoro ONNX TTS
 LANGUAGE_DESCRIPTIONS = {
     "a": "American English",
     "b": "British English",
+    "d": "German",
     "e": "Spanish",
     "f": "French",
     "h": "Hindi",
@@ -35,6 +36,8 @@ LANGUAGE_DESCRIPTIONS = {
 # ISO language code to ttsforge language code mapping
 ISO_TO_LANG_CODE = {
+    "de": "d",
+    "de-de": "d",
     "en": "a",  # Default to American English
     "en-us": "a",
     "en-gb": "b",
@@ -62,6 +65,8 @@ VOICE_PREFIX_TO_LANG = {
     "am": "a",  # American Male
     "bf": "b",  # British Female
     "bm": "b",  # British Male
+    "df": "d",  # German Female
+    "dm": "d",  # German Male
     "ef": "e",  # Spanish Female
     "em": "e",  # Spanish Male
     "ff": "f",  # French Female
@@ -82,6 +87,7 @@ VOICE_PREFIX_TO_LANG = {
 DEFAULT_VOICE_FOR_LANG = {
     "a": "af_heart",
     "b": "bf_emma",
+    "d": "df_eva",
     "e": "ef_dora",
     "f": "ff_siwis",
     "h": "hf_alpha",
@@ -115,6 +121,7 @@ DEFAULT_CONFIG = {
     "use_gpu": False,  # GPU requires onnxruntime-gpu
     # Model quality: fp32, fp16, q8, q8f16, q4, q4f16, uint8, uint8f16
     "model_quality": "fp32",
+    "model_source": DEFAULT_MODEL_SOURCE,
     "model_variant": "v1.0",
     "silence_between_chapters": 2.0,
     "save_chapters_separately": False,
@@ -123,11 +130,12 @@ DEFAULT_CONFIG = {
     "default_split_mode": "auto",
     "default_content_mode": "chapters",  # Content mode for read: chapters or pages
     "default_page_size": 2000,  # Synthetic page size in characters for pages mode
-    "pause_clause": 0.5,
-    "pause_sentence": 0.7,
+    "pause_clause": 0.3,
+    "pause_sentence": 0.5,
     "pause_paragraph": 0.9,
     "pause_variance": 0.05,
     "pause_mode": "auto",  # "tts", "manual", or "auto
+    "enable_short_sentence": None,
     # Language override for phonemization (e.g., 'de', 'fr', 'en-us')
     # If None, language is determined from voice prefix
     "phonemization_lang": None,
@@ -154,6 +162,7 @@ AUDIO_CHANNELS = 1
 SAMPLE_TEXTS = {
     "a": "This is a sample of the selected voice.",
     "b": "This is a sample of the selected voice.",
+    "d": "Dies ist ein Beispiel für die ausgewählte Stimme.",
     "e": "Este es una muestra de la voz seleccionada.",
     "f": "Ceci est un exemple de la voix sélectionnée.",
     "h": "यह चयनित आवाज़ का एक नमूना है।",  # noqa: E501

ttsforge/conversion.py CHANGED Viewed

@@ -11,6 +11,14 @@ from pathlib import Path
 from typing import Any, Literal, Optional, cast
 import soundfile as sf
+from pykokoro.onnx_backend import (
+    DEFAULT_MODEL_QUALITY,
+    DEFAULT_MODEL_SOURCE,
+    DEFAULT_MODEL_VARIANT,
+    ModelQuality,
+    ModelSource,
+    ModelVariant,
+)
 from .audio_merge import AudioMerger, MergeMeta
 from .constants import (
@@ -123,12 +131,16 @@ class ConversionState:
     speed: float = 1.0
     split_mode: str = "auto"
     output_format: str = "m4b"
+    model_quality: ModelQuality | None = DEFAULT_MODEL_QUALITY
+    model_source: ModelSource = DEFAULT_MODEL_SOURCE
+    model_variant: ModelVariant = DEFAULT_MODEL_VARIANT
     silence_between_chapters: float = 2.0
-    pause_clause: float = 0.25
-    pause_sentence: float = 0.2
-    pause_paragraph: float = 0.75
+    pause_clause: float = 0.3
+    pause_sentence: float = 0.5
+    pause_paragraph: float = 0.9
     pause_variance: float = 0.05
     pause_mode: str = "auto"  # "tts", "manual", or "auto
+    enable_short_sentence: bool | None = None
     lang: str | None = None  # Language override for phonemization
     chapters: list[ChapterState] = field(default_factory=list)
     started_at: str = ""
@@ -174,17 +186,25 @@ class ConversionState:
             # Set defaults for new parameters
             if "pause_clause" not in data:
-                data["pause_clause"] = 0.25
+                data["pause_clause"] = 0.3
             if "pause_sentence" not in data:
-                data["pause_sentence"] = 0.2
+                data["pause_sentence"] = 0.5
             if "pause_paragraph" not in data:
-                data["pause_paragraph"] = 0.75
+                data["pause_paragraph"] = 0.9
             if "pause_variance" not in data:
                 data["pause_variance"] = 0.05
             if "pause_mode" not in data:
                 data["pause_mode"] = "auto"
+            if "enable_short_sentence" not in data:
+                data["enable_short_sentence"] = None
             if "lang" not in data:
                 data["lang"] = None
+            if "model_quality" not in data:
+                data["model_quality"] = DEFAULT_MODEL_QUALITY
+            if "model_source" not in data:
+                data["model_source"] = DEFAULT_MODEL_SOURCE
+            if "model_variant" not in data:
+                data["model_variant"] = DEFAULT_MODEL_VARIANT
             return cls(**data)
         except (json.JSONDecodeError, TypeError, KeyError):
@@ -204,12 +224,16 @@ class ConversionState:
             "speed": self.speed,
             "split_mode": self.split_mode,
             "output_format": self.output_format,
+            "model_quality": self.model_quality,
+            "model_source": self.model_source,
+            "model_variant": self.model_variant,
             "silence_between_chapters": self.silence_between_chapters,
             "pause_clause": self.pause_clause,
             "pause_sentence": self.pause_sentence,
             "pause_paragraph": self.pause_paragraph,
             "pause_variance": self.pause_variance,
             "pause_mode": self.pause_mode,
+            "enable_short_sentence": self.enable_short_sentence,
             "lang": self.lang,
             "chapters": [
                 {
@@ -289,11 +313,12 @@ class ConversionOptions:
     phoneme_dictionary_path: str | None = None
     phoneme_dict_case_sensitive: bool = False
     # Pause settings (pykokoro built-in pause handling)
-    pause_clause: float = 0.25  # For clause boundaries (commas)
-    pause_sentence: float = 0.2  # For sentence boundaries
-    pause_paragraph: float = 0.75  # For paragraph boundaries
+    pause_clause: float = 0.3  # For clause boundaries (commas)
+    pause_sentence: float = 0.5  # For sentence boundaries
+    pause_paragraph: float = 0.9  # For paragraph boundaries
     pause_variance: float = 0.05  # Standard deviation for natural variation
     pause_mode: str = "auto"  # "tts", "manual", or "auto
+    enable_short_sentence: bool | None = None  # Enable short sentence handling
     # Chapter announcement settings
     announce_chapters: bool = True  # Read chapter titles aloud before content
     chapter_pause_after_title: float = 2.0  # Pause after chapter title (seconds)
@@ -315,6 +340,9 @@ class ConversionOptions:
     # Filename template for chapter files
     chapter_filename_template: str = "{chapter_num:03d}_{book_title}_{chapter_title}"
     # Custom ONNX model path (None = use default downloaded model)
+    model_quality: ModelQuality | None = DEFAULT_MODEL_QUALITY
+    model_source: ModelSource = DEFAULT_MODEL_SOURCE
+    model_variant: ModelVariant = DEFAULT_MODEL_VARIANT
     model_path: Path | None = None
     # Custom voices.bin path (None = use default downloaded voices)
     voices_path: Path | None = None
@@ -420,6 +448,9 @@ class TTSConverter:
             pause_sentence=self.options.pause_sentence,
             pause_paragraph=self.options.pause_paragraph,
             pause_variance=self.options.pause_variance,
+            model_quality=self.options.model_quality,
+            model_source=self.options.model_source,
+            model_variant=self.options.model_variant,
             model_path=self.options.model_path,
             voices_path=self.options.voices_path,
             voice_blend=self.options.voice_blend,
@@ -600,53 +631,79 @@ class TTSConverter:
                         )
                         state = None
                     else:
-                        # Check if settings differ from saved state
-                        settings_changed = (
-                            state.voice != self.options.voice
-                            or state.language != self.options.language
-                            or state.speed != self.options.speed
-                            or state.split_mode != self.options.split_mode
-                            or state.silence_between_chapters
-                            != self.options.silence_between_chapters
-                            or state.pause_clause != self.options.pause_clause
-                            or state.pause_sentence != self.options.pause_sentence
-                            or state.pause_paragraph != self.options.pause_paragraph
-                            or state.pause_variance != self.options.pause_variance
-                            or state.pause_mode != self.options.pause_mode
-                            or state.lang != self.options.lang
+                        model_settings_changed = (
+                            state.model_quality != self.options.model_quality
+                            or state.model_source != self.options.model_source
+                            or state.model_variant != self.options.model_variant
                         )
-                        if settings_changed:
+                        if model_settings_changed:
                             self.log(
-                                f"Restoring settings from previous session: "
-                                f"voice={state.voice}, language={state.language}, "
-                                f"lang_override={state.lang}, "
-                                f"speed={state.speed}, "
-                                f"split_mode={state.split_mode}, "
-                                f"silence={state.silence_between_chapters}s, "
-                                f"pauses: clause={state.pause_clause}s "
-                                f"sent={state.pause_sentence}s "
-                                f"para={state.pause_paragraph}s "
-                                f"var={state.pause_variance}s "
-                                f"pause_mode={state.pause_mode}",
-                                "info",
+                                "Model settings changed, starting fresh conversion",
+                                "warning",
+                            )
+                            state = None
+                        else:
+                            # Check if settings differ from saved state
+                            settings_changed = (
+                                state.voice != self.options.voice
+                                or state.language != self.options.language
+                                or state.speed != self.options.speed
+                                or state.split_mode != self.options.split_mode
+                                or state.silence_between_chapters
+                                != self.options.silence_between_chapters
+                                or state.pause_clause != self.options.pause_clause
+                                or state.pause_sentence != self.options.pause_sentence
+                                or state.pause_paragraph != self.options.pause_paragraph
+                                or state.pause_variance != self.options.pause_variance
+                                or state.pause_mode != self.options.pause_mode
+                                or state.enable_short_sentence
+                                != self.options.enable_short_sentence
+                                or state.lang != self.options.lang
                             )
-                        # Apply saved settings to options for consistency
-                        self.options.voice = state.voice
-                        self.options.language = state.language
-                        self.options.speed = state.speed
-                        self.options.split_mode = state.split_mode
-                        self.options.output_format = state.output_format
-                        self.options.silence_between_chapters = (
-                            state.silence_between_chapters
-                        )
-                        self.options.pause_clause = state.pause_clause
-                        self.options.pause_sentence = state.pause_sentence
-                        self.options.pause_paragraph = state.pause_paragraph
-                        self.options.pause_variance = state.pause_variance
-                        self.options.pause_mode = state.pause_mode
-                        self.options.lang = state.lang
+                            if settings_changed:
+                                self.log(
+                                    f"Restoring settings from previous session: "
+                                    f"voice={state.voice}, language={state.language}, "
+                                    f"lang_override={state.lang}, "
+                                    f"speed={state.speed}, "
+                                    f"split_mode={state.split_mode}, "
+                                    f"silence={state.silence_between_chapters}s, "
+                                    f"pauses: clause={state.pause_clause}s "
+                                    f"sent={state.pause_sentence}s "
+                                    f"para={state.pause_paragraph}s "
+                                    f"var={state.pause_variance}s "
+                                    f"pause_mode={state.pause_mode}, "
+                                    f"enable_short_sentence="
+                                    f"{state.enable_short_sentence}, "
+                                    f"model_source={state.model_source}, "
+                                    f"model_variant={state.model_variant}, "
+                                    f"model_quality={state.model_quality}",
+                                    "info",
+                                )
+                            # Apply saved settings to options for consistency
+                            self.options.voice = state.voice
+                            self.options.language = state.language
+                            self.options.speed = state.speed
+                            self.options.split_mode = state.split_mode
+                            self.options.output_format = state.output_format
+                            self.options.silence_between_chapters = (
+                                state.silence_between_chapters
+                            )
+                            self.options.pause_clause = state.pause_clause
+                            self.options.pause_sentence = state.pause_sentence
+                            self.options.pause_paragraph = state.pause_paragraph
+                            self.options.pause_variance = state.pause_variance
+                            self.options.pause_mode = state.pause_mode
+                            self.options.enable_short_sentence = (
+                                state.enable_short_sentence
+                            )
+                            self.options.lang = state.lang
+                            self.options.model_quality = state.model_quality
+                            self.options.model_source = state.model_source
+                            self.options.model_variant = state.model_variant
             if state is None:
                 # Create new state
@@ -661,12 +718,16 @@ class TTSConverter:
                     speed=self.options.speed,
                     split_mode=self.options.split_mode,
                     output_format=self.options.output_format,
+                    model_quality=self.options.model_quality,
+                    model_source=self.options.model_source,
+                    model_variant=self.options.model_variant,
                     silence_between_chapters=self.options.silence_between_chapters,
                     pause_clause=self.options.pause_clause,
                     pause_sentence=self.options.pause_sentence,
                     pause_paragraph=self.options.pause_paragraph,
                     pause_variance=self.options.pause_variance,
                     pause_mode=self.options.pause_mode,
+                    enable_short_sentence=self.options.enable_short_sentence,
                     lang=self.options.lang,
                     chapters=[
                         ChapterState(

ttsforge/kokoro_runner.py CHANGED Viewed

@@ -7,11 +7,19 @@ from typing import Any, Literal, Protocol, cast
 import numpy as np
 from pykokoro import GenerationConfig, KokoroPipeline, PipelineConfig
 from pykokoro.onnx_backend import (
+    DEFAULT_MODEL_QUALITY,
+    DEFAULT_MODEL_SOURCE,
+    DEFAULT_MODEL_VARIANT,
     Kokoro,
+    ModelQuality,
+    ModelSource,
+    ModelVariant,
     VoiceBlend,
     are_models_downloaded,
     download_all_models,
+    download_all_models_github,
 )
+from pykokoro.pipeline import build_pipeline
 from pykokoro.stages.audio_generation.onnx import OnnxAudioGenerationAdapter
 from pykokoro.stages.audio_postprocessing.onnx import OnnxAudioPostprocessingAdapter
 from pykokoro.stages.phoneme_processing.onnx import OnnxPhonemeProcessorAdapter
@@ -26,6 +34,9 @@ class KokoroRunOptions:
     pause_sentence: float
     pause_paragraph: float
     pause_variance: float
+    model_quality: ModelQuality | None = DEFAULT_MODEL_QUALITY
+    model_source: ModelSource = DEFAULT_MODEL_SOURCE
+    model_variant: ModelVariant = DEFAULT_MODEL_VARIANT
     model_path: Any | None = None
     voices_path: Any | None = None
     voice_blend: str | None = None
@@ -48,15 +59,32 @@ class KokoroRunner:
         if self._pipeline is not None:
             return
-        if not are_models_downloaded():
-            self.log("Downloading ONNX model files...")
-            download_all_models()
+        if self.opts.model_path is None or self.opts.voices_path is None:
+            model_quality = self.opts.model_quality or DEFAULT_MODEL_QUALITY
+            model_source = self.opts.model_source or DEFAULT_MODEL_SOURCE
+            if model_source == "github":
+                if not are_models_downloaded(quality=model_quality):
+                    self.log("Downloading ONNX model files from GitHub...")
+                download_all_models_github(
+                    variant=self.opts.model_variant,
+                    quality=model_quality,
+                )
+            else:
+                if not are_models_downloaded(quality=model_quality):
+                    self.log("Downloading ONNX model files...")
+                download_all_models(
+                    variant=self.opts.model_variant,
+                    quality=model_quality,
+                )
         self._kokoro = Kokoro(
             model_path=self.opts.model_path,
             voices_path=self.opts.voices_path,
             use_gpu=self.opts.use_gpu,
             tokenizer_config=self.opts.tokenizer_config,
+            model_quality=self.opts.model_quality,
+            model_source=self.opts.model_source,
+            model_variant=self.opts.model_variant,
         )
         assert self._kokoro is not None
@@ -88,14 +116,18 @@ class KokoroRunner:
         pipeline_cfg = PipelineConfig(
             voice=self._voice_style,
             generation=GenerationConfig(speed=self.opts.speed, lang="en-us"),
+            model_quality=self.opts.model_quality,
+            model_source=self.opts.model_source,
+            model_variant=self.opts.model_variant,
             model_path=self.opts.model_path,
             voices_path=self.opts.voices_path,
             tokenizer_config=self.opts.tokenizer_config,
         )
         # Use the same adapters everywhere (text + phonemes)
-        self._pipeline = KokoroPipeline(
-            pipeline_cfg,
+        self._pipeline = build_pipeline(
+            config=pipeline_cfg,
+            backend=self._kokoro,
             phoneme_processing=OnnxPhonemeProcessorAdapter(self._kokoro),
             audio_generation=OnnxAudioGenerationAdapter(self._kokoro),
             audio_postprocessing=OnnxAudioPostprocessingAdapter(self._kokoro),
@@ -116,6 +148,7 @@ class KokoroRunner:
             lang=lang_code,
             is_phonemes=is_phonemes,
             pause_mode=pause_mode,
+            enable_short_sentence=self.opts.enable_short_sentence,
             pause_clause=self.opts.pause_clause,
             pause_sentence=self.opts.pause_sentence,
             pause_paragraph=self.opts.pause_paragraph,

ttsforge/name_extractor.py CHANGED Viewed

@@ -174,7 +174,7 @@ def generate_phoneme_suggestions(
         Dictionary with phoneme suggestions and metadata:
         {
             "name": {
-                "phoneme": "/phoneme/",
+                "phoneme": "phoneme",
                 "occurrences": count,
                 "suggestion_quality": "auto"
             }
@@ -190,7 +190,7 @@ def generate_phoneme_suggestions(
             phoneme = phonemize(name, language=language).phonemes
             # Wrap in / / format for dictionary
-            phoneme_formatted = f"/{phoneme}/"
+            phoneme_formatted = f"{phoneme}"
             suggestions[name] = {
                 "phoneme": phoneme_formatted,
@@ -201,7 +201,7 @@ def generate_phoneme_suggestions(
             logger.warning(f"Failed to generate phoneme for '{name}': {e}")
             # Add placeholder
             suggestions[name] = {
-                "phoneme": "/FIXME/",
+                "phoneme": "FIXME",
                 "occurrences": count,
                 "suggestion_quality": "error",
                 "error": str(e),

ttsforge/phoneme_conversion.py CHANGED Viewed

@@ -15,6 +15,14 @@ from typing import Any, Literal, Optional, cast
 import numpy as np
 import soundfile as sf
+from pykokoro.onnx_backend import (
+    DEFAULT_MODEL_QUALITY,
+    DEFAULT_MODEL_SOURCE,
+    DEFAULT_MODEL_VARIANT,
+    ModelQuality,
+    ModelSource,
+    ModelVariant,
+)
 from .audio_merge import AudioMerger, MergeMeta
 from .chapter_selection import parse_chapter_selection
@@ -94,12 +102,16 @@ class PhonemeConversionState:
     voice: str = ""
     speed: float = 1.0
     output_format: str = "m4b"
+    model_quality: ModelQuality | None = DEFAULT_MODEL_QUALITY
+    model_source: ModelSource = DEFAULT_MODEL_SOURCE
+    model_variant: ModelVariant = DEFAULT_MODEL_VARIANT
     silence_between_chapters: float = 2.0
-    pause_clause: float = 0.25
-    pause_sentence: float = 0.2
-    pause_paragraph: float = 0.75
+    pause_clause: float = 0.3
+    pause_sentence: float = 0.5
+    pause_paragraph: float = 0.9
     pause_variance: float = 0.05
     pause_mode: str = "auto"
+    enable_short_sentence: bool | None = None
     lang: str | None = None  # Language override for phonemization
     chapters: list[PhonemeChapterState] = field(default_factory=list)
     started_at: str = ""
@@ -145,17 +157,25 @@ class PhonemeConversionState:
             # Set defaults for new parameters
             if "pause_clause" not in data:
-                data["pause_clause"] = 0.25
+                data["pause_clause"] = 0.3
             if "pause_sentence" not in data:
-                data["pause_sentence"] = 0.2
+                data["pause_sentence"] = 0.5
             if "pause_paragraph" not in data:
-                data["pause_paragraph"] = 0.75
+                data["pause_paragraph"] = 0.9
             if "pause_variance" not in data:
                 data["pause_variance"] = 0.05
             if "pause_mode" not in data:
                 data["pause_mode"] = "auto"
+            if "enable_short_sentence" not in data:
+                data["enable_short_sentence"] = None
             if "lang" not in data:
                 data["lang"] = None
+            if "model_quality" not in data:
+                data["model_quality"] = DEFAULT_MODEL_QUALITY
+            if "model_source" not in data:
+                data["model_source"] = DEFAULT_MODEL_SOURCE
+            if "model_variant" not in data:
+                data["model_variant"] = DEFAULT_MODEL_VARIANT
             return cls(**data)
         except (json.JSONDecodeError, TypeError, KeyError):
@@ -172,12 +192,16 @@ class PhonemeConversionState:
             "voice": self.voice,
             "speed": self.speed,
             "output_format": self.output_format,
+            "model_quality": self.model_quality,
+            "model_source": self.model_source,
+            "model_variant": self.model_variant,
             "silence_between_chapters": self.silence_between_chapters,
             "pause_clause": self.pause_clause,
             "pause_sentence": self.pause_sentence,
             "pause_paragraph": self.pause_paragraph,
             "pause_variance": self.pause_variance,
             "pause_mode": self.pause_mode,
+            "enable_short_sentence": self.enable_short_sentence,
             "lang": self.lang,
             "chapters": [
                 {
@@ -210,11 +234,12 @@ class PhonemeConversionOptions:
     # If None, language from PhonemeSegments is used
     lang: str | None = None
     # Pause settings (pykokoro built-in pause handling)
-    pause_clause: float = 0.25  # For clause boundaries (commas)
-    pause_sentence: float = 0.2  # For sentence boundaries
-    pause_paragraph: float = 0.75  # For paragraph boundaries
+    pause_clause: float = 0.3  # For clause boundaries (commas)
+    pause_sentence: float = 0.5  # For sentence boundaries
+    pause_paragraph: float = 0.9  # For paragraph boundaries
     pause_variance: float = 0.05  # Standard deviation for natural variation
     pause_mode: str = "auto"  # "tts", "manual", or "auto"
+    enable_short_sentence: bool | None = None  # Enable short sentence handling
     # Chapter announcement settings
     announce_chapters: bool = True  # Read chapter titles aloud before content
     chapter_pause_after_title: float = 2.0  # Pause after chapter title (seconds)
@@ -235,6 +260,9 @@ class PhonemeConversionOptions:
     # Filename template for chapter files
     chapter_filename_template: str = "{chapter_num:03d}_{book_title}_{chapter_title}"
     # Custom ONNX model path (None = use default downloaded model)
+    model_quality: ModelQuality | None = DEFAULT_MODEL_QUALITY
+    model_source: ModelSource = DEFAULT_MODEL_SOURCE
+    model_variant: ModelVariant = DEFAULT_MODEL_VARIANT
     model_path: Path | None = None
     # Custom voices.bin path (None = use default downloaded voices)
     voices_path: Path | None = None
@@ -583,6 +611,11 @@ class PhonemeConverter:
                         or state.pause_paragraph != self.options.pause_paragraph
                         or state.pause_variance != self.options.pause_variance
                         or state.pause_mode != self.options.pause_mode
+                        or state.enable_short_sentence
+                        != self.options.enable_short_sentence
+                        or state.model_quality != self.options.model_quality
+                        or state.model_source != self.options.model_source
+                        or state.model_variant != self.options.model_variant
                     ):
                         self.log(
                             f"Restoring settings from previous session: "
@@ -592,7 +625,11 @@ class PhonemeConverter:
                             f"pause_sentence={state.pause_sentence}s, "
                             f"pause_paragraph={state.pause_paragraph}s, "
                             f"pause_variance={state.pause_variance}s, "
-                            f"pause_mode={state.pause_mode}",
+                            f"pause_mode={state.pause_mode}, "
+                            f"enable_short_sentence={state.enable_short_sentence}, "
+                            f"model_source={state.model_source}, "
+                            f"model_variant={state.model_variant}, "
+                            f"model_quality={state.model_quality}",
                             "info",
                         )
                         # Apply saved settings for consistency
@@ -607,6 +644,10 @@ class PhonemeConverter:
                         self.options.pause_paragraph = state.pause_paragraph
                         self.options.pause_variance = state.pause_variance
                         self.options.pause_mode = state.pause_mode
+                        self.options.enable_short_sentence = state.enable_short_sentence
+                        self.options.model_quality = state.model_quality
+                        self.options.model_source = state.model_source
+                        self.options.model_variant = state.model_variant
             if state is None:
                 # Create new state
@@ -617,12 +658,16 @@ class PhonemeConverter:
                     voice=self.options.voice,
                     speed=self.options.speed,
                     output_format=self.options.output_format,
+                    model_quality=self.options.model_quality,
+                    model_source=self.options.model_source,
+                    model_variant=self.options.model_variant,
                     silence_between_chapters=self.options.silence_between_chapters,
                     pause_clause=self.options.pause_clause,
                     pause_sentence=self.options.pause_sentence,
                     pause_paragraph=self.options.pause_paragraph,
                     pause_variance=self.options.pause_variance,
                     pause_mode=self.options.pause_mode,
+                    enable_short_sentence=self.options.enable_short_sentence,
                     chapters=[
                         PhonemeChapterState(
                             index=idx,
@@ -648,6 +693,9 @@ class PhonemeConverter:
                 pause_sentence=self.options.pause_sentence,
                 pause_paragraph=self.options.pause_paragraph,
                 pause_variance=self.options.pause_variance,
+                model_quality=self.options.model_quality,
+                model_source=self.options.model_source,
+                model_variant=self.options.model_variant,
                 model_path=self.options.model_path,
                 voices_path=self.options.voices_path,
                 voice_blend=self.options.voice_blend,
@@ -848,6 +896,9 @@ class PhonemeConverter:
                 pause_sentence=self.options.pause_sentence,
                 pause_paragraph=self.options.pause_paragraph,
                 pause_variance=self.options.pause_variance,
+                model_quality=self.options.model_quality,
+                model_source=self.options.model_source,
+                model_variant=self.options.model_variant,
                 model_path=self.options.model_path,
                 voices_path=self.options.voices_path,
                 voice_blend=self.options.voice_blend,

ttsforge/ssmd_generator.py CHANGED Viewed

@@ -2,8 +2,8 @@
 This module converts chapter text to SSMD format with markup for:
 - Emphasis (*text* for moderate, **text** for strong)
-- Language switches ([text](lang_code))
-- Phoneme substitutions ([word](ph: /phoneme/))
+- Language switches ([text]{lang="lang_code"})
+- Phoneme substitutions ([word]{ph="phoneme"})
 Note: Structural breaks (paragraphs, sentences, clauses) are NOT automatically
 added. The SSMD parser in pykokoro handles sentence detection automatically.
@@ -170,7 +170,7 @@ def _inject_phoneme_substitutions(
         if not phoneme:
             return matched_word
         clean_phoneme = phoneme.strip("/")
-        return f"[{matched_word}](ph: /{clean_phoneme}/)"
+        return f"[{matched_word}]" + "{" + f'ph="{clean_phoneme}"' + "}"
     segments: list[str] = []
     last_index = 0
@@ -260,7 +260,7 @@ def _strip_redundant_title(chapter_title: str, chapter_text: str) -> str:
         return chapter_text
     trimmed_line = title_pattern.sub("", first_line, count=1).lstrip(
-        " \t:;\-\u2013\u2014"
+        " \t:;-\u2013\u2014"
     )
     if trimmed_line:
         lines[first_idx] = trimmed_line

{ttsforge-0.1.0.dist-info → ttsforge-0.1.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ttsforge
-Version: 0.1.0
+Version: 0.1.2
 Summary: Generate audiobooks from EPUB files using Kokoro ONNX TTS.
 Author-email: Holger Nahrstaedt <nahrstaedt@gmail.com>
 License: MIT License
@@ -396,14 +396,14 @@ SSMD files use a simple markdown-like syntax:
 **Custom Phonemes**:
 ```
-[Hermione](ph: /hɝmˈIni/)    # Override pronunciation
-[API](ph: /ˌeɪpiˈaɪ/)        # Technical terms
+[Hermione]{ph="hɝmˈIni"}    # Override pronunciation
+[API]{ph="ˌeɪpiˈaɪ"}        # Technical terms
 ```
 **Language Switching** (planned):
 ```
-[Bonjour](fr)    # Mark text as French
+[Bonjour]{lang="fr"}    # Mark text as French
 ```
 #### Example SSMD File
@@ -411,7 +411,7 @@ SSMD files use a simple markdown-like syntax:
 ```ssmd
 Chapter One ...p
-[Harry](ph: /hæɹi/) Potter was a *highly unusual* boy in many ways. ...s
+[Harry]{ph="hæɹi"} Potter was a *highly unusual* boy in many ways. ...s
 For one thing, he **hated** the summer holidays more than any other
 time of year. ...s For another, he really wanted to do his homework,
 but was forced to do it in secret, in the dead of the night. ...p
@@ -498,12 +498,12 @@ Edit `custom_phonemes.json` to fix any incorrect phonemes. The file format is:
   },
   "entries": {
     "Hermione": {
-      "phoneme": "/hɝmˈIni/",
+      "phoneme": "hɝmˈIni",
       "occurrences": 847,
       "verified": false
     },
     "Kubernetes": {
-      "phoneme": "/kubɚnˈɛtɪs/",
+      "phoneme": "kubɚnˈɛtɪs",
       "occurrences": 12,
       "verified": false
     }
@@ -515,8 +515,8 @@ Or use the simple format:
 ```json
 {
-  "Hermione": "/hɝmˈIni/",
-  "Kubernetes": "/kubɚnˈɛtɪs/"
+  "Hermione": "hɝmˈIni",
+  "Kubernetes": "kubɚnˈɛtɪs"
 }
 ```
@@ -548,9 +548,9 @@ You can create a dictionary manually without extraction:
 ```json
 {
-  "Katniss": "/kætnɪs/",
-  "Peeta": "/pitə/",
-  "Panem": "/pænəm/"
+  "Katniss": "kætnɪs",
+  "Peeta": "pitə",
+  "Panem": "pænəm"
 }
 ```
@@ -617,6 +617,7 @@ ttsforge convert book.epub --gpu
 | `pause_paragraph`           | `0.9`          | Paragraph pause (seconds)            |
 | `pause_variance`            | `0.05`         | Pause variance (seconds)             |
 | `pause_mode`                | `auto`         | Pause mode (`tts`, `manual`, `auto`) |
+| `enable_short_sentence`     | `None`         | Handle short sentences               |
 | `announce_chapters`         | `true`         | Speak chapter titles                 |
 | `chapter_pause_after_title` | `2.0`          | Pause after chapter title            |
 | `phonemization_lang`        | `None`         | Override phonemization language      |

ttsforge-0.1.2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,27 @@
+ttsforge/__init__.py,sha256=5mT7eXwuq0Z-Qn_WjYWVjA8VUOUy0lGTyaqQwNdcEOE,2149
+ttsforge/_version.py,sha256=Ok5oAXdWgR9aghaFXTafTeDW6sYO3uVe6d2Nket57R4,704
+ttsforge/audio_merge.py,sha256=Tt7o8GBNrkcfiSKycUpWvblj-y4zwlULoX-eCblqYpo,5666
+ttsforge/audio_player.py,sha256=HYc4vv46yDXjVXaWRlj1tUtWLiwNTwbzT6oDfOUB5vA,14351
+ttsforge/chapter_selection.py,sha256=a-XlEO4HMzeUBfhvnh6gQOQmDuM0wMpVCi0pw6oM2hQ,2579
+ttsforge/constants.py,sha256=c3b9s41mNIb8NHK6C7XKB-xzX4wJhFZpo0auUqCwT2s,5394
+ttsforge/conversion.py,sha256=2ZzeBfg-Kwf7kR9Fwht30Vx9ZcoAEQOVAiFsmfF2IdA,45011
+ttsforge/input_reader.py,sha256=b49SBT-mL4SnR74D8xwyWHC_smPhsJ5jpPAj4QQ5WKo,14068
+ttsforge/kokoro_lang.py,sha256=8603b5whfk0KzGrNK7pqRjzoH1Ge9TKoX7AMzKsX0sk,376
+ttsforge/kokoro_runner.py,sha256=AKvEMaBfCTCLR3KcHoE04nQnUkHDwv62BvQNb1vGC8U,5923
+ttsforge/name_extractor.py,sha256=CxxBadCO0Pcoepcj7gZwkfWPMud2oa9477h_lDYWrIA,9578
+ttsforge/phoneme_conversion.py,sha256=nbDV0adWi--XyRt2RblJbav_ImzDeXrC0xPvoPx8_9c,41093
+ttsforge/phonemes.py,sha256=EUZ1Qr-0rPThRpSeuJQe5Z3J3nz7rX1Xs3Rjjw19qIQ,15517
+ttsforge/ssmd_generator.py,sha256=LknVBSETKH9cY4CoAUBjd1vEfr5VXi0xqEKcft2CR8I,13346
+ttsforge/utils.py,sha256=3BiNFyScV3Dy_xhVm2EigpxUb4Z6YwIQPzzxwDzfCzI,24942
+ttsforge/cli/__init__.py,sha256=CTqYeUAJaKV7YTYqcmr7-VxjwJfjLcnPYM2OKyws0Oc,2103
+ttsforge/cli/commands_conversion.py,sha256=T4hPiU4EXDQ2Wkbd5I5TuHvuAT8rlfEjhfC4mlSMrzg,66007
+ttsforge/cli/commands_phonemes.py,sha256=k3CtXKnUTpbGHj1oPucjIB6syA3LWjxFpg3OIC-tzJ8,33183
+ttsforge/cli/commands_utility.py,sha256=65NSHUFYjRPOWVLnpeBKbR-TbsaDLYAa5xDGcWHS-fk,48630
+ttsforge/cli/helpers.py,sha256=IJt0VpIMPOC-lnBeR3-1keh31MuAdSemDsLh6FpiHLk,2778
+ttsforge/vocab/__init__.py,sha256=lMgS0dY9VbOYI20LnPjjqrWcjLIQ1FKkR4-xcXsvrqc,3641
+ttsforge-0.1.2.dist-info/licenses/LICENSE,sha256=9csb1sDNn0HdUPKgOTUwtb4CkvYPcFXHnkxKCS99EWQ,1074
+ttsforge-0.1.2.dist-info/METADATA,sha256=T0xJ8RtNsBidb4sa_JYR-QnugMK-fLLyVg71R-zi0QU,19655
+ttsforge-0.1.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+ttsforge-0.1.2.dist-info/entry_points.txt,sha256=SrcNdlhQpoUCzPzhVbOmMzATQeV7j7XYl0DPrVjZ-ks,47
+ttsforge-0.1.2.dist-info/top_level.txt,sha256=rNLi-3muicHF8UvZu_FuA2ML_Dz9sVPCjik2E8XnCVk,9
+ttsforge-0.1.2.dist-info/RECORD,,

ttsforge-0.1.0.dist-info/RECORD DELETED Viewed

@@ -1,27 +0,0 @@
-ttsforge/__init__.py,sha256=Jg8_0vPttTVWrnt4HBqrTOKYfmcgpVpfddSVcU4HKXo,2432
-ttsforge/_version.py,sha256=5jwwVncvCiTnhOedfkzzxmxsggwmTBORdFL_4wq0ZeY,704
-ttsforge/audio_merge.py,sha256=Tt7o8GBNrkcfiSKycUpWvblj-y4zwlULoX-eCblqYpo,5666
-ttsforge/audio_player.py,sha256=HYc4vv46yDXjVXaWRlj1tUtWLiwNTwbzT6oDfOUB5vA,14351
-ttsforge/chapter_selection.py,sha256=a-XlEO4HMzeUBfhvnh6gQOQmDuM0wMpVCi0pw6oM2hQ,2579
-ttsforge/constants.py,sha256=p_-LfE_u1oT--tzjI5PrHGXd5u9DHM7dcHsfcUgvzvY,5108
-ttsforge/conversion.py,sha256=oZ1FyyyeChb6EoCrVJ-maogXqLq537iUBne5oa_epdw,41629
-ttsforge/input_reader.py,sha256=b49SBT-mL4SnR74D8xwyWHC_smPhsJ5jpPAj4QQ5WKo,14068
-ttsforge/kokoro_lang.py,sha256=8603b5whfk0KzGrNK7pqRjzoH1Ge9TKoX7AMzKsX0sk,376
-ttsforge/kokoro_runner.py,sha256=ZGBx70_rHcfwKiUgywa_3-7d5u-wQ_0pPOukQRuACu0,4390
-ttsforge/name_extractor.py,sha256=vBVp2OT8sdYdbczs0SQdEcZff2sN1sk5URi5gBwJrcE,9584
-ttsforge/phoneme_conversion.py,sha256=so5Iex2lme9tHvZiCysLBAkBH1tp42O8pkibLSrHzh8,38280
-ttsforge/phonemes.py,sha256=EUZ1Qr-0rPThRpSeuJQe5Z3J3nz7rX1Xs3Rjjw19qIQ,15517
-ttsforge/ssmd_generator.py,sha256=Dmuvy6T8WVyzHvaNWsPDdyyTIZCpyX2pOsaVUPk8s08,13326
-ttsforge/utils.py,sha256=3BiNFyScV3Dy_xhVm2EigpxUb4Z6YwIQPzzxwDzfCzI,24942
-ttsforge/cli/__init__.py,sha256=CTqYeUAJaKV7YTYqcmr7-VxjwJfjLcnPYM2OKyws0Oc,2103
-ttsforge/cli/commands_conversion.py,sha256=H1fX1M52RdSiSnGfIknr-dBGx-MeMaVf5J6rMDVrgWU,63457
-ttsforge/cli/commands_phonemes.py,sha256=_0PQd9_hjOvzkzx2qM5BEG1fNyZYTjKc5nrZZ41HV4k,32373
-ttsforge/cli/commands_utility.py,sha256=_8KMUjVYVqp63PH_gjOjS-fw6ZCujaMDXFxoKfUzlko,48013
-ttsforge/cli/helpers.py,sha256=5Co2EvDhYspKhjW2-P3sNxj9MFFgWyTFeqOyJbPy2yA,2697
-ttsforge/vocab/__init__.py,sha256=lMgS0dY9VbOYI20LnPjjqrWcjLIQ1FKkR4-xcXsvrqc,3641
-ttsforge-0.1.0.dist-info/licenses/LICENSE,sha256=9csb1sDNn0HdUPKgOTUwtb4CkvYPcFXHnkxKCS99EWQ,1074
-ttsforge-0.1.0.dist-info/METADATA,sha256=cQJf57NrwNZJUZacAQgnokqO1cnSC_Mi1b7ZWjcZ0no,19577
-ttsforge-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
-ttsforge-0.1.0.dist-info/entry_points.txt,sha256=SrcNdlhQpoUCzPzhVbOmMzATQeV7j7XYl0DPrVjZ-ks,47
-ttsforge-0.1.0.dist-info/top_level.txt,sha256=rNLi-3muicHF8UvZu_FuA2ML_Dz9sVPCjik2E8XnCVk,9
-ttsforge-0.1.0.dist-info/RECORD,,

{ttsforge-0.1.0.dist-info → ttsforge-0.1.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{ttsforge-0.1.0.dist-info → ttsforge-0.1.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ttsforge-0.1.0.dist-info → ttsforge-0.1.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{ttsforge-0.1.0.dist-info → ttsforge-0.1.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

ttsforge 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

ttsforge 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl