ttsforge 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ttsforge/__init__.py +114 -0
- ttsforge/_version.py +34 -0
- ttsforge/audio_merge.py +180 -0
- ttsforge/audio_player.py +473 -0
- ttsforge/chapter_selection.py +75 -0
- ttsforge/cli/__init__.py +73 -0
- ttsforge/cli/commands_conversion.py +1927 -0
- ttsforge/cli/commands_phonemes.py +1033 -0
- ttsforge/cli/commands_utility.py +1389 -0
- ttsforge/cli/helpers.py +76 -0
- ttsforge/constants.py +164 -0
- ttsforge/conversion.py +1090 -0
- ttsforge/input_reader.py +408 -0
- ttsforge/kokoro_lang.py +12 -0
- ttsforge/kokoro_runner.py +125 -0
- ttsforge/name_extractor.py +305 -0
- ttsforge/phoneme_conversion.py +978 -0
- ttsforge/phonemes.py +486 -0
- ttsforge/ssmd_generator.py +422 -0
- ttsforge/utils.py +785 -0
- ttsforge/vocab/__init__.py +139 -0
- ttsforge-0.1.0.dist-info/METADATA +659 -0
- ttsforge-0.1.0.dist-info/RECORD +27 -0
- ttsforge-0.1.0.dist-info/WHEEL +5 -0
- ttsforge-0.1.0.dist-info/entry_points.txt +2 -0
- ttsforge-0.1.0.dist-info/licenses/LICENSE +21 -0
- ttsforge-0.1.0.dist-info/top_level.txt +1 -0
ttsforge/__init__.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ttsforge - Generate audiobooks from EPUB files with TTS.
|
|
3
|
+
|
|
4
|
+
A CLI tool for converting EPUB books to audiobooks using Kokoro ONNX TTS.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from pykokoro import GenerationConfig, KokoroPipeline, PipelineConfig
|
|
8
|
+
from pykokoro.onnx_backend import (
|
|
9
|
+
VoiceBlend,
|
|
10
|
+
are_models_downloaded,
|
|
11
|
+
download_all_models,
|
|
12
|
+
download_model,
|
|
13
|
+
get_model_dir,
|
|
14
|
+
)
|
|
15
|
+
from pykokoro.tokenizer import (
|
|
16
|
+
EspeakConfig,
|
|
17
|
+
MAX_PHONEME_LENGTH,
|
|
18
|
+
Tokenizer,
|
|
19
|
+
)
|
|
20
|
+
from pykokoro.constants import SUPPORTED_LANGUAGES
|
|
21
|
+
|
|
22
|
+
from .constants import (
|
|
23
|
+
DEFAULT_CONFIG,
|
|
24
|
+
LANGUAGE_DESCRIPTIONS,
|
|
25
|
+
SUPPORTED_OUTPUT_FORMATS,
|
|
26
|
+
VOICES,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# Import from pykokoro
|
|
30
|
+
try:
|
|
31
|
+
from pykokoro.constants import SAMPLE_RATE
|
|
32
|
+
from pykokoro.onnx_backend import LANG_CODE_TO_ONNX
|
|
33
|
+
except ImportError:
|
|
34
|
+
# Fallback values if pykokoro not installed
|
|
35
|
+
SAMPLE_RATE = 24000
|
|
36
|
+
LANG_CODE_TO_ONNX = {
|
|
37
|
+
"a": "en-us",
|
|
38
|
+
"b": "en-gb",
|
|
39
|
+
"e": "es",
|
|
40
|
+
"f": "fr-fr",
|
|
41
|
+
"h": "hi",
|
|
42
|
+
"i": "it",
|
|
43
|
+
"j": "ja",
|
|
44
|
+
"p": "pt",
|
|
45
|
+
"z": "zh",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
from .conversion import (
|
|
49
|
+
Chapter,
|
|
50
|
+
ConversionOptions,
|
|
51
|
+
ConversionProgress,
|
|
52
|
+
ConversionResult,
|
|
53
|
+
TTSConverter,
|
|
54
|
+
)
|
|
55
|
+
from .phonemes import (
|
|
56
|
+
FORMAT_VERSION,
|
|
57
|
+
PhonemeBook,
|
|
58
|
+
PhonemeChapter,
|
|
59
|
+
PhonemeSegment,
|
|
60
|
+
create_phoneme_book_from_chapters,
|
|
61
|
+
phonemize_text_list,
|
|
62
|
+
)
|
|
63
|
+
from .utils import (
|
|
64
|
+
load_config,
|
|
65
|
+
save_config,
|
|
66
|
+
)
|
|
67
|
+
from .cli.helpers import DEFAULT_SAMPLE_TEXT
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
__all__ = [
|
|
71
|
+
# Constants
|
|
72
|
+
"DEFAULT_CONFIG",
|
|
73
|
+
"LANGUAGE_DESCRIPTIONS",
|
|
74
|
+
"SUPPORTED_OUTPUT_FORMATS",
|
|
75
|
+
"VOICES",
|
|
76
|
+
# Conversion
|
|
77
|
+
"Chapter",
|
|
78
|
+
"ConversionOptions",
|
|
79
|
+
"ConversionProgress",
|
|
80
|
+
"ConversionResult",
|
|
81
|
+
"TTSConverter",
|
|
82
|
+
# Pipeline (from pykokoro)
|
|
83
|
+
"GenerationConfig",
|
|
84
|
+
"KokoroPipeline",
|
|
85
|
+
"PipelineConfig",
|
|
86
|
+
"VoiceBlend",
|
|
87
|
+
"are_models_downloaded",
|
|
88
|
+
"download_all_models",
|
|
89
|
+
"download_model",
|
|
90
|
+
"get_model_dir",
|
|
91
|
+
# Tokenizer (from pykokoro)
|
|
92
|
+
"EspeakConfig",
|
|
93
|
+
"MAX_PHONEME_LENGTH",
|
|
94
|
+
"SAMPLE_RATE",
|
|
95
|
+
"SUPPORTED_LANGUAGES",
|
|
96
|
+
"Tokenizer",
|
|
97
|
+
# Phonemes
|
|
98
|
+
"FORMAT_VERSION",
|
|
99
|
+
"PhonemeBook",
|
|
100
|
+
"PhonemeChapter",
|
|
101
|
+
"PhonemeSegment",
|
|
102
|
+
"create_phoneme_book_from_chapters",
|
|
103
|
+
"phonemize_text_list",
|
|
104
|
+
# Utils
|
|
105
|
+
"load_config",
|
|
106
|
+
"save_config",
|
|
107
|
+
# herlpers
|
|
108
|
+
"DEFAULT_SAMPLE_TEXT",
|
|
109
|
+
]
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
from ._version import version as __version__
|
|
113
|
+
except ImportError:
|
|
114
|
+
__version__ = "0.0.0+unknown"
|
ttsforge/_version.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
TYPE_CHECKING = False
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
20
|
+
else:
|
|
21
|
+
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
23
|
+
|
|
24
|
+
version: str
|
|
25
|
+
__version__: str
|
|
26
|
+
__version_tuple__: VERSION_TUPLE
|
|
27
|
+
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
30
|
+
|
|
31
|
+
__version__ = version = '0.1.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 0)
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|
ttsforge/audio_merge.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
# ttsforge/audio_merge.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Protocol
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import soundfile as sf
|
|
10
|
+
|
|
11
|
+
from .constants import SAMPLE_RATE
|
|
12
|
+
from .utils import create_process, get_ffmpeg_path
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(slots=True)
|
|
16
|
+
class MergeMeta:
|
|
17
|
+
fmt: str
|
|
18
|
+
silence_between_chapters: float
|
|
19
|
+
title: str | None = None
|
|
20
|
+
author: str | None = None
|
|
21
|
+
cover_image: Path | None = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class AudioMerger:
|
|
25
|
+
class LogCallback(Protocol):
|
|
26
|
+
def __call__(self, message: str, level: str = "info") -> None: ...
|
|
27
|
+
|
|
28
|
+
def __init__(self, log: LogCallback):
|
|
29
|
+
self.log = log
|
|
30
|
+
|
|
31
|
+
def add_chapters_to_m4b(
|
|
32
|
+
self, output_path: Path, chapters: list[dict[str, Any]], cover: Path | None
|
|
33
|
+
) -> None:
|
|
34
|
+
if len(chapters) <= 1:
|
|
35
|
+
return
|
|
36
|
+
ffmpeg = get_ffmpeg_path()
|
|
37
|
+
|
|
38
|
+
chapters_file = output_path.with_suffix(".chapters.txt")
|
|
39
|
+
chapters_file.write_text(self._ffmetadata(chapters), encoding="utf-8")
|
|
40
|
+
|
|
41
|
+
tmp_path = output_path.with_suffix(".tmp.m4b")
|
|
42
|
+
cmd = [
|
|
43
|
+
ffmpeg,
|
|
44
|
+
"-y",
|
|
45
|
+
"-i",
|
|
46
|
+
str(output_path),
|
|
47
|
+
"-i",
|
|
48
|
+
str(chapters_file),
|
|
49
|
+
"-map",
|
|
50
|
+
"0:a",
|
|
51
|
+
"-map_metadata",
|
|
52
|
+
"1",
|
|
53
|
+
"-map_chapters",
|
|
54
|
+
"1",
|
|
55
|
+
"-c:a",
|
|
56
|
+
"copy",
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
if cover and cover.exists():
|
|
60
|
+
cmd += [
|
|
61
|
+
"-i",
|
|
62
|
+
str(cover),
|
|
63
|
+
"-map",
|
|
64
|
+
"2",
|
|
65
|
+
"-c:v",
|
|
66
|
+
"copy",
|
|
67
|
+
"-disposition:v",
|
|
68
|
+
"attached_pic",
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
cmd.append(str(tmp_path))
|
|
72
|
+
proc = create_process(cmd, suppress_output=True)
|
|
73
|
+
rc = proc.wait()
|
|
74
|
+
if rc != 0:
|
|
75
|
+
raise RuntimeError("ffmpeg failed while adding m4b chapters")
|
|
76
|
+
|
|
77
|
+
tmp_path.replace(output_path)
|
|
78
|
+
chapters_file.unlink(missing_ok=True)
|
|
79
|
+
|
|
80
|
+
def merge_chapter_wavs(
|
|
81
|
+
self,
|
|
82
|
+
chapter_files: list[Path],
|
|
83
|
+
chapter_durations: list[float],
|
|
84
|
+
chapter_titles: list[str],
|
|
85
|
+
output_path: Path,
|
|
86
|
+
meta: MergeMeta,
|
|
87
|
+
) -> None:
|
|
88
|
+
ffmpeg = get_ffmpeg_path()
|
|
89
|
+
|
|
90
|
+
concat_file = output_path.with_suffix(".concat.txt")
|
|
91
|
+
silence_file = output_path.parent / "_silence.wav"
|
|
92
|
+
|
|
93
|
+
if meta.silence_between_chapters > 0 and len(chapter_files) > 1:
|
|
94
|
+
self._write_silence_wav(silence_file, meta.silence_between_chapters)
|
|
95
|
+
|
|
96
|
+
with concat_file.open("w", encoding="utf-8") as f:
|
|
97
|
+
for i, ch in enumerate(chapter_files):
|
|
98
|
+
f.write(f"file '{ch.absolute()}'\n")
|
|
99
|
+
if i < len(chapter_files) - 1 and meta.silence_between_chapters > 0:
|
|
100
|
+
f.write(f"file '{silence_file.absolute()}'\n")
|
|
101
|
+
|
|
102
|
+
cmd = [ffmpeg, "-y", "-f", "concat", "-safe", "0", "-i", str(concat_file)]
|
|
103
|
+
|
|
104
|
+
if meta.fmt == "m4b":
|
|
105
|
+
if meta.cover_image and meta.cover_image.exists():
|
|
106
|
+
cmd += [
|
|
107
|
+
"-i",
|
|
108
|
+
str(meta.cover_image),
|
|
109
|
+
"-map",
|
|
110
|
+
"0:a",
|
|
111
|
+
"-map",
|
|
112
|
+
"1",
|
|
113
|
+
"-c:v",
|
|
114
|
+
"copy",
|
|
115
|
+
"-disposition:v",
|
|
116
|
+
"attached_pic",
|
|
117
|
+
]
|
|
118
|
+
cmd += [
|
|
119
|
+
"-c:a",
|
|
120
|
+
"aac",
|
|
121
|
+
"-q:a",
|
|
122
|
+
"2",
|
|
123
|
+
"-movflags",
|
|
124
|
+
"+faststart+use_metadata_tags",
|
|
125
|
+
]
|
|
126
|
+
if meta.title:
|
|
127
|
+
cmd += ["-metadata", f"title={meta.title}"]
|
|
128
|
+
if meta.author:
|
|
129
|
+
cmd += ["-metadata", f"artist={meta.author}"]
|
|
130
|
+
elif meta.fmt == "opus":
|
|
131
|
+
cmd += ["-c:a", "libopus", "-b:a", "24000"]
|
|
132
|
+
elif meta.fmt == "mp3":
|
|
133
|
+
cmd += ["-c:a", "libmp3lame", "-q:a", "2"]
|
|
134
|
+
elif meta.fmt == "flac":
|
|
135
|
+
cmd += ["-c:a", "flac"]
|
|
136
|
+
elif meta.fmt == "wav":
|
|
137
|
+
cmd += ["-c:a", "pcm_s16le"]
|
|
138
|
+
|
|
139
|
+
cmd.append(str(output_path))
|
|
140
|
+
proc = create_process(cmd, suppress_output=True)
|
|
141
|
+
rc = proc.wait()
|
|
142
|
+
if rc != 0:
|
|
143
|
+
raise RuntimeError("ffmpeg failed while merging chapters")
|
|
144
|
+
|
|
145
|
+
concat_file.unlink(missing_ok=True)
|
|
146
|
+
silence_file.unlink(missing_ok=True)
|
|
147
|
+
|
|
148
|
+
if meta.fmt == "m4b" and len(chapter_files) > 1:
|
|
149
|
+
times = []
|
|
150
|
+
t = 0.0
|
|
151
|
+
for i, (dur, title) in enumerate(
|
|
152
|
+
zip(chapter_durations, chapter_titles, strict=False)
|
|
153
|
+
):
|
|
154
|
+
times.append({"title": title, "start": t, "end": t + dur})
|
|
155
|
+
t += dur
|
|
156
|
+
if i < len(chapter_durations) - 1:
|
|
157
|
+
t += meta.silence_between_chapters
|
|
158
|
+
self.add_chapters_to_m4b(output_path, times, meta.cover_image)
|
|
159
|
+
|
|
160
|
+
def _write_silence_wav(self, path: Path, duration: float) -> None:
|
|
161
|
+
samples = int(duration * SAMPLE_RATE)
|
|
162
|
+
audio = np.zeros(samples, dtype="float32")
|
|
163
|
+
with sf.SoundFile(
|
|
164
|
+
str(path), "w", samplerate=SAMPLE_RATE, channels=1, format="wav"
|
|
165
|
+
) as f:
|
|
166
|
+
f.write(audio)
|
|
167
|
+
|
|
168
|
+
def _ffmetadata(self, chapters: list[dict[str, Any]]) -> str:
|
|
169
|
+
lines = [";FFMETADATA1"]
|
|
170
|
+
for ch in chapters:
|
|
171
|
+
title = str(ch["title"]).replace("=", "\\=")
|
|
172
|
+
lines += [
|
|
173
|
+
"[CHAPTER]",
|
|
174
|
+
"TIMEBASE=1/1000",
|
|
175
|
+
f"START={int(ch['start'] * 1000)}",
|
|
176
|
+
f"END={int(ch['end'] * 1000)}",
|
|
177
|
+
f"title={title}",
|
|
178
|
+
"",
|
|
179
|
+
]
|
|
180
|
+
return "\n".join(lines)
|