ttsforge 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ttsforge/__init__.py ADDED
@@ -0,0 +1,114 @@
1
+ """
2
+ ttsforge - Generate audiobooks from EPUB files with TTS.
3
+
4
+ A CLI tool for converting EPUB books to audiobooks using Kokoro ONNX TTS.
5
+ """
6
+
7
+ from pykokoro import GenerationConfig, KokoroPipeline, PipelineConfig
8
+ from pykokoro.onnx_backend import (
9
+ VoiceBlend,
10
+ are_models_downloaded,
11
+ download_all_models,
12
+ download_model,
13
+ get_model_dir,
14
+ )
15
+ from pykokoro.tokenizer import (
16
+ EspeakConfig,
17
+ MAX_PHONEME_LENGTH,
18
+ Tokenizer,
19
+ )
20
+ from pykokoro.constants import SUPPORTED_LANGUAGES
21
+
22
+ from .constants import (
23
+ DEFAULT_CONFIG,
24
+ LANGUAGE_DESCRIPTIONS,
25
+ SUPPORTED_OUTPUT_FORMATS,
26
+ VOICES,
27
+ )
28
+
29
+ # Import from pykokoro
30
+ try:
31
+ from pykokoro.constants import SAMPLE_RATE
32
+ from pykokoro.onnx_backend import LANG_CODE_TO_ONNX
33
+ except ImportError:
34
+ # Fallback values if pykokoro not installed
35
+ SAMPLE_RATE = 24000
36
+ LANG_CODE_TO_ONNX = {
37
+ "a": "en-us",
38
+ "b": "en-gb",
39
+ "e": "es",
40
+ "f": "fr-fr",
41
+ "h": "hi",
42
+ "i": "it",
43
+ "j": "ja",
44
+ "p": "pt",
45
+ "z": "zh",
46
+ }
47
+
48
+ from .conversion import (
49
+ Chapter,
50
+ ConversionOptions,
51
+ ConversionProgress,
52
+ ConversionResult,
53
+ TTSConverter,
54
+ )
55
+ from .phonemes import (
56
+ FORMAT_VERSION,
57
+ PhonemeBook,
58
+ PhonemeChapter,
59
+ PhonemeSegment,
60
+ create_phoneme_book_from_chapters,
61
+ phonemize_text_list,
62
+ )
63
+ from .utils import (
64
+ load_config,
65
+ save_config,
66
+ )
67
+ from .cli.helpers import DEFAULT_SAMPLE_TEXT
68
+
69
+
70
+ __all__ = [
71
+ # Constants
72
+ "DEFAULT_CONFIG",
73
+ "LANGUAGE_DESCRIPTIONS",
74
+ "SUPPORTED_OUTPUT_FORMATS",
75
+ "VOICES",
76
+ # Conversion
77
+ "Chapter",
78
+ "ConversionOptions",
79
+ "ConversionProgress",
80
+ "ConversionResult",
81
+ "TTSConverter",
82
+ # Pipeline (from pykokoro)
83
+ "GenerationConfig",
84
+ "KokoroPipeline",
85
+ "PipelineConfig",
86
+ "VoiceBlend",
87
+ "are_models_downloaded",
88
+ "download_all_models",
89
+ "download_model",
90
+ "get_model_dir",
91
+ # Tokenizer (from pykokoro)
92
+ "EspeakConfig",
93
+ "MAX_PHONEME_LENGTH",
94
+ "SAMPLE_RATE",
95
+ "SUPPORTED_LANGUAGES",
96
+ "Tokenizer",
97
+ # Phonemes
98
+ "FORMAT_VERSION",
99
+ "PhonemeBook",
100
+ "PhonemeChapter",
101
+ "PhonemeSegment",
102
+ "create_phoneme_book_from_chapters",
103
+ "phonemize_text_list",
104
+ # Utils
105
+ "load_config",
106
+ "save_config",
107
+ # herlpers
108
+ "DEFAULT_SAMPLE_TEXT",
109
+ ]
110
+
111
+ try:
112
+ from ._version import version as __version__
113
+ except ImportError:
114
+ __version__ = "0.0.0+unknown"
ttsforge/_version.py ADDED
@@ -0,0 +1,34 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
12
+
13
+ TYPE_CHECKING = False
14
+ if TYPE_CHECKING:
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
20
+ else:
21
+ VERSION_TUPLE = object
22
+ COMMIT_ID = object
23
+
24
+ version: str
25
+ __version__: str
26
+ __version_tuple__: VERSION_TUPLE
27
+ version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
30
+
31
+ __version__ = version = '0.1.0'
32
+ __version_tuple__ = version_tuple = (0, 1, 0)
33
+
34
+ __commit_id__ = commit_id = None
@@ -0,0 +1,180 @@
1
+ # ttsforge/audio_merge.py
2
+ from __future__ import annotations
3
+
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+ from typing import Any, Protocol
7
+
8
+ import numpy as np
9
+ import soundfile as sf
10
+
11
+ from .constants import SAMPLE_RATE
12
+ from .utils import create_process, get_ffmpeg_path
13
+
14
+
15
+ @dataclass(slots=True)
16
+ class MergeMeta:
17
+ fmt: str
18
+ silence_between_chapters: float
19
+ title: str | None = None
20
+ author: str | None = None
21
+ cover_image: Path | None = None
22
+
23
+
24
+ class AudioMerger:
25
+ class LogCallback(Protocol):
26
+ def __call__(self, message: str, level: str = "info") -> None: ...
27
+
28
+ def __init__(self, log: LogCallback):
29
+ self.log = log
30
+
31
+ def add_chapters_to_m4b(
32
+ self, output_path: Path, chapters: list[dict[str, Any]], cover: Path | None
33
+ ) -> None:
34
+ if len(chapters) <= 1:
35
+ return
36
+ ffmpeg = get_ffmpeg_path()
37
+
38
+ chapters_file = output_path.with_suffix(".chapters.txt")
39
+ chapters_file.write_text(self._ffmetadata(chapters), encoding="utf-8")
40
+
41
+ tmp_path = output_path.with_suffix(".tmp.m4b")
42
+ cmd = [
43
+ ffmpeg,
44
+ "-y",
45
+ "-i",
46
+ str(output_path),
47
+ "-i",
48
+ str(chapters_file),
49
+ "-map",
50
+ "0:a",
51
+ "-map_metadata",
52
+ "1",
53
+ "-map_chapters",
54
+ "1",
55
+ "-c:a",
56
+ "copy",
57
+ ]
58
+
59
+ if cover and cover.exists():
60
+ cmd += [
61
+ "-i",
62
+ str(cover),
63
+ "-map",
64
+ "2",
65
+ "-c:v",
66
+ "copy",
67
+ "-disposition:v",
68
+ "attached_pic",
69
+ ]
70
+
71
+ cmd.append(str(tmp_path))
72
+ proc = create_process(cmd, suppress_output=True)
73
+ rc = proc.wait()
74
+ if rc != 0:
75
+ raise RuntimeError("ffmpeg failed while adding m4b chapters")
76
+
77
+ tmp_path.replace(output_path)
78
+ chapters_file.unlink(missing_ok=True)
79
+
80
+ def merge_chapter_wavs(
81
+ self,
82
+ chapter_files: list[Path],
83
+ chapter_durations: list[float],
84
+ chapter_titles: list[str],
85
+ output_path: Path,
86
+ meta: MergeMeta,
87
+ ) -> None:
88
+ ffmpeg = get_ffmpeg_path()
89
+
90
+ concat_file = output_path.with_suffix(".concat.txt")
91
+ silence_file = output_path.parent / "_silence.wav"
92
+
93
+ if meta.silence_between_chapters > 0 and len(chapter_files) > 1:
94
+ self._write_silence_wav(silence_file, meta.silence_between_chapters)
95
+
96
+ with concat_file.open("w", encoding="utf-8") as f:
97
+ for i, ch in enumerate(chapter_files):
98
+ f.write(f"file '{ch.absolute()}'\n")
99
+ if i < len(chapter_files) - 1 and meta.silence_between_chapters > 0:
100
+ f.write(f"file '{silence_file.absolute()}'\n")
101
+
102
+ cmd = [ffmpeg, "-y", "-f", "concat", "-safe", "0", "-i", str(concat_file)]
103
+
104
+ if meta.fmt == "m4b":
105
+ if meta.cover_image and meta.cover_image.exists():
106
+ cmd += [
107
+ "-i",
108
+ str(meta.cover_image),
109
+ "-map",
110
+ "0:a",
111
+ "-map",
112
+ "1",
113
+ "-c:v",
114
+ "copy",
115
+ "-disposition:v",
116
+ "attached_pic",
117
+ ]
118
+ cmd += [
119
+ "-c:a",
120
+ "aac",
121
+ "-q:a",
122
+ "2",
123
+ "-movflags",
124
+ "+faststart+use_metadata_tags",
125
+ ]
126
+ if meta.title:
127
+ cmd += ["-metadata", f"title={meta.title}"]
128
+ if meta.author:
129
+ cmd += ["-metadata", f"artist={meta.author}"]
130
+ elif meta.fmt == "opus":
131
+ cmd += ["-c:a", "libopus", "-b:a", "24000"]
132
+ elif meta.fmt == "mp3":
133
+ cmd += ["-c:a", "libmp3lame", "-q:a", "2"]
134
+ elif meta.fmt == "flac":
135
+ cmd += ["-c:a", "flac"]
136
+ elif meta.fmt == "wav":
137
+ cmd += ["-c:a", "pcm_s16le"]
138
+
139
+ cmd.append(str(output_path))
140
+ proc = create_process(cmd, suppress_output=True)
141
+ rc = proc.wait()
142
+ if rc != 0:
143
+ raise RuntimeError("ffmpeg failed while merging chapters")
144
+
145
+ concat_file.unlink(missing_ok=True)
146
+ silence_file.unlink(missing_ok=True)
147
+
148
+ if meta.fmt == "m4b" and len(chapter_files) > 1:
149
+ times = []
150
+ t = 0.0
151
+ for i, (dur, title) in enumerate(
152
+ zip(chapter_durations, chapter_titles, strict=False)
153
+ ):
154
+ times.append({"title": title, "start": t, "end": t + dur})
155
+ t += dur
156
+ if i < len(chapter_durations) - 1:
157
+ t += meta.silence_between_chapters
158
+ self.add_chapters_to_m4b(output_path, times, meta.cover_image)
159
+
160
+ def _write_silence_wav(self, path: Path, duration: float) -> None:
161
+ samples = int(duration * SAMPLE_RATE)
162
+ audio = np.zeros(samples, dtype="float32")
163
+ with sf.SoundFile(
164
+ str(path), "w", samplerate=SAMPLE_RATE, channels=1, format="wav"
165
+ ) as f:
166
+ f.write(audio)
167
+
168
+ def _ffmetadata(self, chapters: list[dict[str, Any]]) -> str:
169
+ lines = [";FFMETADATA1"]
170
+ for ch in chapters:
171
+ title = str(ch["title"]).replace("=", "\\=")
172
+ lines += [
173
+ "[CHAPTER]",
174
+ "TIMEBASE=1/1000",
175
+ f"START={int(ch['start'] * 1000)}",
176
+ f"END={int(ch['end'] * 1000)}",
177
+ f"title={title}",
178
+ "",
179
+ ]
180
+ return "\n".join(lines)