ttsforge 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,76 @@
1
+ """Shared utilities and helpers for CLI commands."""
2
+
3
+ from rich.console import Console
4
+
5
+ # Shared console instance for all CLI commands
6
+ console = Console()
7
+
8
+
9
+ def parse_voice_parameter(voice: str) -> tuple[str | None, str | None]:
10
+ """Parse voice parameter to detect if it's a single voice or a blend.
11
+
12
+ Args:
13
+ voice: Voice parameter (e.g., 'af_sky' or 'af_nicole:50,am_michael:50')
14
+
15
+ Returns:
16
+ Tuple of (voice, voice_blend) where one will be None
17
+
18
+ Examples:
19
+ >>> parse_voice_parameter('af_sky')
20
+ ('af_sky', None)
21
+ >>> parse_voice_parameter('af_nicole:50,am_michael:50')
22
+ (None, 'af_nicole:50,am_michael:50')
23
+ """
24
+ # Detect if it's a blend (contains both : and ,)
25
+ if ":" in voice and "," in voice:
26
+ return (None, voice)
27
+ else:
28
+ return (voice, None)
29
+
30
+
31
+ def get_version() -> str:
32
+ """Get the package version."""
33
+ try:
34
+ from .._version import version
35
+
36
+ return version
37
+ except ImportError:
38
+ return "0.0.0+unknown"
39
+
40
+
41
+ # Default sample text for testing TTS settings
42
+ DEFAULT_SAMPLE_TEXT = (
43
+ "The quick brown fox jumps over the lazy dog. "
44
+ "This sample text demonstrates the text-to-speech capabilities, "
45
+ "including punctuation handling, and natural speech flow."
46
+ )
47
+
48
+
49
+ # Demo sample text per language
50
+ DEMO_TEXT = {
51
+ "a": "Hello! This audio was generated by {voice}. How do you like it?",
52
+ "b": "Hello! This audio was generated by {voice}. How do you like it?",
53
+ "e": "Hola! Este audio fue generado por {voice}. Que te parece?",
54
+ "f": "Bonjour! Cet audio a ete genere par {voice}. Comment le trouvez-vous?",
55
+ "h": "Namaste! Yah audio {voice} dwara banaya gaya hai. Aapko kaisa laga?",
56
+ "i": "Ciao! Questo audio e stato generato da {voice}. Ti piace?",
57
+ "j": "Konnichiwa! Kono onsei wa {voice} ni yotte sakusei saremashita.",
58
+ "p": "Ola! Este audio foi gerado por {voice}. O que voce achou?",
59
+ "z": "Ni hao! Zhe ge yinpin shi you {voice} shengcheng de.",
60
+ }
61
+
62
+ # Preset voice blends for demo command
63
+ # Format: (blend_string, description)
64
+ VOICE_BLEND_PRESETS = [
65
+ # Same language, different gender
66
+ ("af_nicole:50,am_michael:50", "American female + male blend"),
67
+ ("bf_emma:50,bm_george:50", "British female + male blend"),
68
+ # Same gender, different accent
69
+ ("af_heart:50,bf_emma:50", "American + British female blend"),
70
+ ("am_adam:50,bm_daniel:50", "American + British male blend"),
71
+ # Same gender, different voice
72
+ ("af_nicole:50,af_bella:50", "Two American females blend"),
73
+ ("am_adam:50,am_eric:50", "Two American males blend"),
74
+ # Multi-voice blend
75
+ ("af_heart:33,af_nicole:33,af_bella:34", "Three American females blend"),
76
+ ]
ttsforge/constants.py ADDED
@@ -0,0 +1,164 @@
1
+ """Constants for ttsforge - voices, languages, and formats."""
2
+
3
+ # from pykokoro.onnx_backend import VOICE_NAMES_V1_0
4
+ # from pykokoro.onnx_backend import VOICE_NAMES_V1_1_ZH, VOICE_NAMES_V1_1_DE
5
+
6
+ from pykokoro.onnx_backend import VOICE_NAMES_V1_0 as VOICE_NAMES
7
+
8
+ # Re-export from pykokoro for convenience
9
+ VOICES = VOICE_NAMES
10
+
11
+ # Audio constants from pykokoro
12
+ try:
13
+ from pykokoro.constants import SAMPLE_RATE as _SAMPLE_RATE
14
+
15
+ SAMPLE_RATE: int = int(_SAMPLE_RATE)
16
+ except ImportError:
17
+ SAMPLE_RATE = 24000 # Fallback value
18
+
19
+ # Program Information
20
+ PROGRAM_NAME = "ttsforge"
21
+ PROGRAM_DESCRIPTION = "Generate audiobooks from EPUB files using Kokoro ONNX TTS."
22
+
23
+ # Language code to description mapping
24
+ LANGUAGE_DESCRIPTIONS = {
25
+ "a": "American English",
26
+ "b": "British English",
27
+ "e": "Spanish",
28
+ "f": "French",
29
+ "h": "Hindi",
30
+ "i": "Italian",
31
+ "j": "Japanese",
32
+ "p": "Brazilian Portuguese",
33
+ "z": "Mandarin Chinese",
34
+ }
35
+
36
+ # ISO language code to ttsforge language code mapping
37
+ ISO_TO_LANG_CODE = {
38
+ "en": "a", # Default to American English
39
+ "en-us": "a",
40
+ "en-gb": "b",
41
+ "en-au": "b",
42
+ "es": "e",
43
+ "es-es": "e",
44
+ "es-mx": "e",
45
+ "fr": "f",
46
+ "fr-fr": "f",
47
+ "fr-ca": "f",
48
+ "hi": "h",
49
+ "it": "i",
50
+ "ja": "j",
51
+ "pt": "p",
52
+ "pt-br": "p",
53
+ "pt-pt": "p",
54
+ "zh": "z",
55
+ "zh-cn": "z",
56
+ "zh-tw": "z",
57
+ }
58
+
59
+ # Voice prefix to language code mapping
60
+ VOICE_PREFIX_TO_LANG = {
61
+ "af": "a", # American Female
62
+ "am": "a", # American Male
63
+ "bf": "b", # British Female
64
+ "bm": "b", # British Male
65
+ "ef": "e", # Spanish Female
66
+ "em": "e", # Spanish Male
67
+ "ff": "f", # French Female
68
+ "fm": "f", # French Male
69
+ "hf": "h", # Hindi Female
70
+ "hm": "h", # Hindi Male
71
+ "if": "i", # Italian Female
72
+ "im": "i", # Italian Male
73
+ "jf": "j", # Japanese Female
74
+ "jm": "j", # Japanese Male
75
+ "pf": "p", # Portuguese Female
76
+ "pm": "p", # Portuguese Male
77
+ "zf": "z", # Chinese Female
78
+ "zm": "z", # Chinese Male
79
+ }
80
+
81
+ # Language code to default voice mapping
82
+ DEFAULT_VOICE_FOR_LANG = {
83
+ "a": "af_heart",
84
+ "b": "bf_emma",
85
+ "e": "ef_dora",
86
+ "f": "ff_siwis",
87
+ "h": "hf_alpha",
88
+ "i": "if_sara",
89
+ "j": "jf_alpha",
90
+ "p": "pf_dora",
91
+ "z": "zf_xiaoxiao",
92
+ }
93
+
94
+ # Supported output audio formats
95
+ SUPPORTED_OUTPUT_FORMATS = [
96
+ "wav",
97
+ "mp3",
98
+ "flac",
99
+ "opus",
100
+ "m4b",
101
+ ]
102
+
103
+ # Formats that require ffmpeg
104
+ FFMPEG_FORMATS = ["m4b", "opus"]
105
+
106
+ # Formats supported by soundfile directly
107
+ SOUNDFILE_FORMATS = ["wav", "mp3", "flac"]
108
+
109
+ # Default configuration values
110
+ DEFAULT_CONFIG = {
111
+ "default_voice": "af_heart",
112
+ "default_language": "a",
113
+ "default_speed": 1.0,
114
+ "default_format": "m4b",
115
+ "use_gpu": False, # GPU requires onnxruntime-gpu
116
+ # Model quality: fp32, fp16, q8, q8f16, q4, q4f16, uint8, uint8f16
117
+ "model_quality": "fp32",
118
+ "model_variant": "v1.0",
119
+ "silence_between_chapters": 2.0,
120
+ "save_chapters_separately": False,
121
+ "merge_at_end": True,
122
+ "auto_detect_language": True,
123
+ "default_split_mode": "auto",
124
+ "default_content_mode": "chapters", # Content mode for read: chapters or pages
125
+ "default_page_size": 2000, # Synthetic page size in characters for pages mode
126
+ "pause_clause": 0.5,
127
+ "pause_sentence": 0.7,
128
+ "pause_paragraph": 0.9,
129
+ "pause_variance": 0.05,
130
+ "pause_mode": "auto", # "tts", "manual", or "auto
131
+ # Language override for phonemization (e.g., 'de', 'fr', 'en-us')
132
+ # If None, language is determined from voice prefix
133
+ "phonemization_lang": None,
134
+ # Chapter announcement settings
135
+ "announce_chapters": True, # Read chapter titles aloud before content
136
+ "chapter_pause_after_title": 2.0, # Pause after chapter title (seconds)
137
+ "output_filename_template": "{book_title}",
138
+ "chapter_filename_template": "{chapter_num:03d}_{book_title}_{chapter_title}",
139
+ "phoneme_export_template": "{book_title}",
140
+ # Fallback title when metadata is missing
141
+ "default_title": "Untitled",
142
+ # Mixed-language phonemization settings (disabled by default)
143
+ "use_mixed_language": False, # Enable automatic language detection
144
+ "mixed_language_primary": None, # Primary language (None = use current lang)
145
+ "mixed_language_allowed": None, # List of allowed languages (required if enabled)
146
+ "mixed_language_confidence": 0.7, # Detection confidence threshold (0.0-1.0)
147
+ }
148
+
149
+ # Audio settings
150
+ # SAMPLE_RATE is imported from pykokoro at top of file
151
+ AUDIO_CHANNELS = 1
152
+
153
+ # Sample texts for voice preview (per language)
154
+ SAMPLE_TEXTS = {
155
+ "a": "This is a sample of the selected voice.",
156
+ "b": "This is a sample of the selected voice.",
157
+ "e": "Este es una muestra de la voz seleccionada.",
158
+ "f": "Ceci est un exemple de la voix sélectionnée.",
159
+ "h": "यह चयनित आवाज़ का एक नमूना है।", # noqa: E501
160
+ "i": "Questo è un esempio della voce selezionata.",
161
+ "j": "これは選択した声のサンプルです。", # noqa: E501
162
+ "p": "Este é um exemplo da voz selecionada.",
163
+ "z": "这是所选语音的示例。",
164
+ }