wyoming-piper 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wyoming_piper/__init__.py +1 -0
- wyoming_piper/__main__.py +158 -0
- wyoming_piper/const.py +101 -0
- wyoming_piper/download.py +161 -0
- wyoming_piper/file_hash.py +46 -0
- wyoming_piper/handler.py +136 -0
- wyoming_piper/process.py +171 -0
- wyoming_piper/voices.json +4012 -0
- wyoming_piper-1.3.0.dist-info/METADATA +23 -0
- wyoming_piper-1.3.0.dist-info/RECORD +12 -0
- wyoming_piper-1.3.0.dist-info/WHEEL +5 -0
- wyoming_piper-1.3.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Wyoming server for piper."""
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
import argparse
|
|
3
|
+
import asyncio
|
|
4
|
+
import logging
|
|
5
|
+
from functools import partial
|
|
6
|
+
from typing import Any, Dict
|
|
7
|
+
|
|
8
|
+
from wyoming.info import Attribution, Info, TtsProgram, TtsVoice, TtsVoiceSpeaker
|
|
9
|
+
from wyoming.server import AsyncServer
|
|
10
|
+
|
|
11
|
+
from .download import get_voices
|
|
12
|
+
from .handler import PiperEventHandler
|
|
13
|
+
from .process import PiperProcessManager
|
|
14
|
+
|
|
15
|
+
_LOGGER = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
async def main() -> None:
|
|
19
|
+
"""Main entry point."""
|
|
20
|
+
parser = argparse.ArgumentParser()
|
|
21
|
+
parser.add_argument(
|
|
22
|
+
"--piper",
|
|
23
|
+
required=True,
|
|
24
|
+
help="Path to piper executable",
|
|
25
|
+
)
|
|
26
|
+
parser.add_argument(
|
|
27
|
+
"--voice",
|
|
28
|
+
required=True,
|
|
29
|
+
help="Default Piper voice to use (e.g., en_US-lessac-medium)",
|
|
30
|
+
)
|
|
31
|
+
parser.add_argument("--uri", default="stdio://", help="unix:// or tcp://")
|
|
32
|
+
parser.add_argument(
|
|
33
|
+
"--data-dir",
|
|
34
|
+
required=True,
|
|
35
|
+
action="append",
|
|
36
|
+
help="Data directory to check for downloaded models",
|
|
37
|
+
)
|
|
38
|
+
parser.add_argument(
|
|
39
|
+
"--download-dir",
|
|
40
|
+
required=True,
|
|
41
|
+
help="Directory to download voices into",
|
|
42
|
+
)
|
|
43
|
+
#
|
|
44
|
+
parser.add_argument(
|
|
45
|
+
"--speaker", type=str, help="Name or id of speaker for default voice"
|
|
46
|
+
)
|
|
47
|
+
parser.add_argument("--noise-scale", type=float, help="Generator noise")
|
|
48
|
+
parser.add_argument("--length-scale", type=float, help="Phoneme length")
|
|
49
|
+
parser.add_argument("--noise-w", type=float, help="Phoneme width noise")
|
|
50
|
+
#
|
|
51
|
+
parser.add_argument(
|
|
52
|
+
"--auto-punctuation", default=".?!", help="Automatically add punctuation"
|
|
53
|
+
)
|
|
54
|
+
parser.add_argument("--samples-per-chunk", type=int, default=1024)
|
|
55
|
+
parser.add_argument(
|
|
56
|
+
"--max-piper-procs",
|
|
57
|
+
type=int,
|
|
58
|
+
default=1,
|
|
59
|
+
help="Maximum number of piper process to run simultaneously (default: 1)",
|
|
60
|
+
)
|
|
61
|
+
#
|
|
62
|
+
parser.add_argument(
|
|
63
|
+
"--update-voices",
|
|
64
|
+
action="store_true",
|
|
65
|
+
help="Download latest voices.json during startup",
|
|
66
|
+
)
|
|
67
|
+
#
|
|
68
|
+
parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
|
|
69
|
+
args = parser.parse_args()
|
|
70
|
+
|
|
71
|
+
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
|
|
72
|
+
|
|
73
|
+
# Load voice info
|
|
74
|
+
voices_info = get_voices(args.download_dir, update_voices=args.update_voices)
|
|
75
|
+
|
|
76
|
+
# Resolve aliases for backwards compatibility with old voice names
|
|
77
|
+
aliases_info: Dict[str, Any] = {}
|
|
78
|
+
for voice_info in voices_info.values():
|
|
79
|
+
for voice_alias in voice_info.get("aliases", []):
|
|
80
|
+
aliases_info[voice_alias] = {"_is_alias": True, **voice_info}
|
|
81
|
+
|
|
82
|
+
voices_info.update(aliases_info)
|
|
83
|
+
|
|
84
|
+
wyoming_info = Info(
|
|
85
|
+
tts=[
|
|
86
|
+
TtsProgram(
|
|
87
|
+
name="piper",
|
|
88
|
+
description="A fast, local, neural text to speech engine",
|
|
89
|
+
attribution=Attribution(
|
|
90
|
+
name="rhasspy", url="https://github.com/rhasspy/piper"
|
|
91
|
+
),
|
|
92
|
+
installed=True,
|
|
93
|
+
voices=[
|
|
94
|
+
TtsVoice(
|
|
95
|
+
name=voice_name,
|
|
96
|
+
description=get_description(voice_info),
|
|
97
|
+
attribution=Attribution(
|
|
98
|
+
name="rhasspy", url="https://github.com/rhasspy/piper"
|
|
99
|
+
),
|
|
100
|
+
installed=True,
|
|
101
|
+
languages=[voice_info["language"]["code"]],
|
|
102
|
+
#
|
|
103
|
+
# Don't send speakers for now because it overflows StreamReader buffers
|
|
104
|
+
# speakers=[
|
|
105
|
+
# TtsVoiceSpeaker(name=speaker_name)
|
|
106
|
+
# for speaker_name in voice_info["speaker_id_map"]
|
|
107
|
+
# ]
|
|
108
|
+
# if voice_info.get("speaker_id_map")
|
|
109
|
+
# else None,
|
|
110
|
+
)
|
|
111
|
+
for voice_name, voice_info in sorted(
|
|
112
|
+
voices_info.items(), key=lambda kv: kv[0]
|
|
113
|
+
)
|
|
114
|
+
if not voice_info.get("_is_alias", False)
|
|
115
|
+
],
|
|
116
|
+
)
|
|
117
|
+
],
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
process_manager = PiperProcessManager(args, voices_info)
|
|
121
|
+
|
|
122
|
+
# Make sure default voice is loaded.
|
|
123
|
+
# Other voices will be loaded on-demand.
|
|
124
|
+
await process_manager.get_process()
|
|
125
|
+
|
|
126
|
+
# Start server
|
|
127
|
+
server = AsyncServer.from_uri(args.uri)
|
|
128
|
+
|
|
129
|
+
_LOGGER.info("Ready")
|
|
130
|
+
await server.run(
|
|
131
|
+
partial(
|
|
132
|
+
PiperEventHandler,
|
|
133
|
+
wyoming_info,
|
|
134
|
+
args,
|
|
135
|
+
process_manager,
|
|
136
|
+
)
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
# -----------------------------------------------------------------------------
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def get_description(voice_info: Dict[str, Any]):
|
|
144
|
+
"""Get a human readable description for a voice."""
|
|
145
|
+
name = voice_info["name"]
|
|
146
|
+
name = " ".join(name.split("_"))
|
|
147
|
+
quality = voice_info["quality"]
|
|
148
|
+
|
|
149
|
+
return f"{name} ({quality})"
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
# -----------------------------------------------------------------------------
|
|
153
|
+
|
|
154
|
+
if __name__ == "__main__":
|
|
155
|
+
try:
|
|
156
|
+
asyncio.run(main())
|
|
157
|
+
except KeyboardInterrupt:
|
|
158
|
+
pass
|
wyoming_piper/const.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
WHISPER_LANGUAGES = [
|
|
2
|
+
"af",
|
|
3
|
+
"am",
|
|
4
|
+
"ar",
|
|
5
|
+
"as",
|
|
6
|
+
"az",
|
|
7
|
+
"ba",
|
|
8
|
+
"be",
|
|
9
|
+
"bg",
|
|
10
|
+
"bn",
|
|
11
|
+
"bo",
|
|
12
|
+
"br",
|
|
13
|
+
"bs",
|
|
14
|
+
"ca",
|
|
15
|
+
"cs",
|
|
16
|
+
"cy",
|
|
17
|
+
"da",
|
|
18
|
+
"de",
|
|
19
|
+
"el",
|
|
20
|
+
"en",
|
|
21
|
+
"es",
|
|
22
|
+
"et",
|
|
23
|
+
"eu",
|
|
24
|
+
"fa",
|
|
25
|
+
"fi",
|
|
26
|
+
"fo",
|
|
27
|
+
"fr",
|
|
28
|
+
"gl",
|
|
29
|
+
"gu",
|
|
30
|
+
"ha",
|
|
31
|
+
"haw",
|
|
32
|
+
"he",
|
|
33
|
+
"hi",
|
|
34
|
+
"hr",
|
|
35
|
+
"ht",
|
|
36
|
+
"hu",
|
|
37
|
+
"hy",
|
|
38
|
+
"id",
|
|
39
|
+
"is",
|
|
40
|
+
"it",
|
|
41
|
+
"ja",
|
|
42
|
+
"jw",
|
|
43
|
+
"ka",
|
|
44
|
+
"kk",
|
|
45
|
+
"km",
|
|
46
|
+
"kn",
|
|
47
|
+
"ko",
|
|
48
|
+
"la",
|
|
49
|
+
"lb",
|
|
50
|
+
"ln",
|
|
51
|
+
"lo",
|
|
52
|
+
"lt",
|
|
53
|
+
"lv",
|
|
54
|
+
"mg",
|
|
55
|
+
"mi",
|
|
56
|
+
"mk",
|
|
57
|
+
"ml",
|
|
58
|
+
"mn",
|
|
59
|
+
"mr",
|
|
60
|
+
"ms",
|
|
61
|
+
"mt",
|
|
62
|
+
"my",
|
|
63
|
+
"ne",
|
|
64
|
+
"nl",
|
|
65
|
+
"nn",
|
|
66
|
+
"no",
|
|
67
|
+
"oc",
|
|
68
|
+
"pa",
|
|
69
|
+
"pl",
|
|
70
|
+
"ps",
|
|
71
|
+
"pt",
|
|
72
|
+
"ro",
|
|
73
|
+
"ru",
|
|
74
|
+
"sa",
|
|
75
|
+
"sd",
|
|
76
|
+
"si",
|
|
77
|
+
"sk",
|
|
78
|
+
"sl",
|
|
79
|
+
"sn",
|
|
80
|
+
"so",
|
|
81
|
+
"sq",
|
|
82
|
+
"sr",
|
|
83
|
+
"su",
|
|
84
|
+
"sv",
|
|
85
|
+
"sw",
|
|
86
|
+
"ta",
|
|
87
|
+
"te",
|
|
88
|
+
"tg",
|
|
89
|
+
"th",
|
|
90
|
+
"tk",
|
|
91
|
+
"tl",
|
|
92
|
+
"tr",
|
|
93
|
+
"tt",
|
|
94
|
+
"uk",
|
|
95
|
+
"ur",
|
|
96
|
+
"uz",
|
|
97
|
+
"vi",
|
|
98
|
+
"yi",
|
|
99
|
+
"yo",
|
|
100
|
+
"zh",
|
|
101
|
+
]
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""Utility for downloading Piper voices."""
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import shutil
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Dict, Iterable, Set, Tuple, Union
|
|
7
|
+
from urllib.request import urlopen
|
|
8
|
+
|
|
9
|
+
from .file_hash import get_file_hash
|
|
10
|
+
|
|
11
|
+
URL_FORMAT = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/{file}"
|
|
12
|
+
|
|
13
|
+
_DIR = Path(__file__).parent
|
|
14
|
+
_LOGGER = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
_SKIP_FILES = {"MODEL_CARD"}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class VoiceNotFoundError(Exception):
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_voices(
|
|
24
|
+
download_dir: Union[str, Path], update_voices: bool = False
|
|
25
|
+
) -> Dict[str, Any]:
|
|
26
|
+
"""Loads available voices from downloaded or embedded JSON file."""
|
|
27
|
+
download_dir = Path(download_dir)
|
|
28
|
+
voices_download = download_dir / "voices.json"
|
|
29
|
+
|
|
30
|
+
if update_voices:
|
|
31
|
+
# Download latest voices.json
|
|
32
|
+
voices_url = URL_FORMAT.format(file="voices.json")
|
|
33
|
+
_LOGGER.debug("Downloading %s to %s", voices_url, voices_download)
|
|
34
|
+
with urlopen(voices_url) as response, open(
|
|
35
|
+
voices_download, "wb"
|
|
36
|
+
) as download_file:
|
|
37
|
+
shutil.copyfileobj(response, download_file)
|
|
38
|
+
|
|
39
|
+
# Prefer downloaded file to embedded
|
|
40
|
+
voices_embedded = _DIR / "voices.json"
|
|
41
|
+
voices_path = voices_download if voices_download.exists() else voices_embedded
|
|
42
|
+
|
|
43
|
+
_LOGGER.debug("Loading %s", voices_path)
|
|
44
|
+
with open(voices_path, "r", encoding="utf-8") as voices_file:
|
|
45
|
+
return json.load(voices_file)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def ensure_voice_exists(
|
|
49
|
+
name: str,
|
|
50
|
+
data_dirs: Iterable[Union[str, Path]],
|
|
51
|
+
download_dir: Union[str, Path],
|
|
52
|
+
voices_info: Dict[str, Any],
|
|
53
|
+
):
|
|
54
|
+
if name not in voices_info:
|
|
55
|
+
# Try as file path to a custom voice
|
|
56
|
+
onnx_path = Path(name)
|
|
57
|
+
config_path = Path(name + ".json")
|
|
58
|
+
if onnx_path.exists():
|
|
59
|
+
if config_path.exists():
|
|
60
|
+
# Custom voice found
|
|
61
|
+
return
|
|
62
|
+
|
|
63
|
+
_LOGGER.warning("Missing custom voice config: %s", config_path)
|
|
64
|
+
|
|
65
|
+
raise VoiceNotFoundError(name)
|
|
66
|
+
|
|
67
|
+
assert data_dirs, "No data dirs"
|
|
68
|
+
|
|
69
|
+
voice_info = voices_info[name]
|
|
70
|
+
voice_files = voice_info["files"]
|
|
71
|
+
files_to_download: Set[str] = set()
|
|
72
|
+
|
|
73
|
+
for data_dir in data_dirs:
|
|
74
|
+
data_dir = Path(data_dir)
|
|
75
|
+
|
|
76
|
+
# Check sizes/hashes
|
|
77
|
+
for file_path, file_info in voice_files.items():
|
|
78
|
+
if file_path in files_to_download:
|
|
79
|
+
# Already planning to download
|
|
80
|
+
continue
|
|
81
|
+
|
|
82
|
+
file_name = Path(file_path).name
|
|
83
|
+
if file_name in _SKIP_FILES:
|
|
84
|
+
continue
|
|
85
|
+
|
|
86
|
+
data_file_path = data_dir / file_name
|
|
87
|
+
_LOGGER.debug("Checking %s", data_file_path)
|
|
88
|
+
if not data_file_path.exists():
|
|
89
|
+
_LOGGER.debug("Missing %s", data_file_path)
|
|
90
|
+
files_to_download.add(file_path)
|
|
91
|
+
continue
|
|
92
|
+
|
|
93
|
+
expected_size = file_info["size_bytes"]
|
|
94
|
+
actual_size = data_file_path.stat().st_size
|
|
95
|
+
if expected_size != actual_size:
|
|
96
|
+
_LOGGER.warning(
|
|
97
|
+
"Wrong size (expected=%s, actual=%s) for %s",
|
|
98
|
+
expected_size,
|
|
99
|
+
actual_size,
|
|
100
|
+
data_file_path,
|
|
101
|
+
)
|
|
102
|
+
files_to_download.add(file_path)
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
expected_hash = file_info["md5_digest"]
|
|
106
|
+
actual_hash = get_file_hash(data_file_path)
|
|
107
|
+
if expected_hash != actual_hash:
|
|
108
|
+
_LOGGER.warning(
|
|
109
|
+
"Wrong hash (expected=%s, actual=%s) for %s",
|
|
110
|
+
expected_hash,
|
|
111
|
+
actual_hash,
|
|
112
|
+
data_file_path,
|
|
113
|
+
)
|
|
114
|
+
files_to_download.add(file_path)
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
if (not voice_files) and (not files_to_download):
|
|
118
|
+
raise ValueError(f"Unable to find or download voice: {name}")
|
|
119
|
+
|
|
120
|
+
# Download missing files
|
|
121
|
+
download_dir = Path(download_dir)
|
|
122
|
+
|
|
123
|
+
for file_path in files_to_download:
|
|
124
|
+
file_name = Path(file_path).name
|
|
125
|
+
if file_name in _SKIP_FILES:
|
|
126
|
+
continue
|
|
127
|
+
|
|
128
|
+
file_url = URL_FORMAT.format(file=file_path)
|
|
129
|
+
download_file_path = download_dir / file_name
|
|
130
|
+
download_file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
131
|
+
|
|
132
|
+
_LOGGER.debug("Downloading %s to %s", file_url, download_file_path)
|
|
133
|
+
with urlopen(file_url) as response, open(
|
|
134
|
+
download_file_path, "wb"
|
|
135
|
+
) as download_file:
|
|
136
|
+
shutil.copyfileobj(response, download_file)
|
|
137
|
+
|
|
138
|
+
_LOGGER.info("Downloaded %s (%s)", download_file_path, file_url)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def find_voice(name: str, data_dirs: Iterable[Union[str, Path]]) -> Tuple[Path, Path]:
|
|
142
|
+
"""Looks for the files for a voice.
|
|
143
|
+
|
|
144
|
+
Returns: tuple of onnx path, config path
|
|
145
|
+
"""
|
|
146
|
+
for data_dir in data_dirs:
|
|
147
|
+
data_dir = Path(data_dir)
|
|
148
|
+
onnx_path = data_dir / f"{name}.onnx"
|
|
149
|
+
config_path = data_dir / f"{name}.onnx.json"
|
|
150
|
+
|
|
151
|
+
if onnx_path.exists() and config_path.exists():
|
|
152
|
+
return onnx_path, config_path
|
|
153
|
+
|
|
154
|
+
# Try as a custom voice
|
|
155
|
+
onnx_path = Path(name)
|
|
156
|
+
config_path = Path(name + ".json")
|
|
157
|
+
|
|
158
|
+
if onnx_path.exists() and config_path.exists():
|
|
159
|
+
return onnx_path, config_path
|
|
160
|
+
|
|
161
|
+
raise ValueError(f"Missing files for voice {name}")
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import hashlib
|
|
3
|
+
import json
|
|
4
|
+
import sys
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Union
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_file_hash(path: Union[str, Path], bytes_per_chunk: int = 8192) -> str:
|
|
10
|
+
"""Hash a file in chunks using md5."""
|
|
11
|
+
path_hash = hashlib.md5()
|
|
12
|
+
with open(path, "rb") as path_file:
|
|
13
|
+
chunk = path_file.read(bytes_per_chunk)
|
|
14
|
+
while chunk:
|
|
15
|
+
path_hash.update(chunk)
|
|
16
|
+
chunk = path_file.read(bytes_per_chunk)
|
|
17
|
+
|
|
18
|
+
return path_hash.hexdigest()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# -----------------------------------------------------------------------------
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def main():
|
|
25
|
+
parser = argparse.ArgumentParser()
|
|
26
|
+
parser.add_argument("file", nargs="+")
|
|
27
|
+
parser.add_argument("--dir", help="Parent directory")
|
|
28
|
+
args = parser.parse_args()
|
|
29
|
+
|
|
30
|
+
if args.dir:
|
|
31
|
+
args.dir = Path(args.dir)
|
|
32
|
+
|
|
33
|
+
hashes = {}
|
|
34
|
+
for path_str in args.file:
|
|
35
|
+
path = Path(path_str)
|
|
36
|
+
path_hash = get_file_hash(path)
|
|
37
|
+
if args.dir:
|
|
38
|
+
path = path.relative_to(args.dir)
|
|
39
|
+
|
|
40
|
+
hashes[str(path)] = path_hash
|
|
41
|
+
|
|
42
|
+
json.dump(hashes, sys.stdout)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
if __name__ == "__main__":
|
|
46
|
+
main()
|
wyoming_piper/handler.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""Event handler for clients of the server."""
|
|
2
|
+
import argparse
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import math
|
|
6
|
+
import os
|
|
7
|
+
import wave
|
|
8
|
+
from typing import Any, Dict, Optional
|
|
9
|
+
|
|
10
|
+
from wyoming.audio import AudioChunk, AudioStart, AudioStop
|
|
11
|
+
from wyoming.event import Event
|
|
12
|
+
from wyoming.info import Describe, Info
|
|
13
|
+
from wyoming.server import AsyncEventHandler
|
|
14
|
+
from wyoming.tts import Synthesize
|
|
15
|
+
|
|
16
|
+
from .process import PiperProcessManager
|
|
17
|
+
|
|
18
|
+
_LOGGER = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class PiperEventHandler(AsyncEventHandler):
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
wyoming_info: Info,
|
|
25
|
+
cli_args: argparse.Namespace,
|
|
26
|
+
process_manager: PiperProcessManager,
|
|
27
|
+
*args,
|
|
28
|
+
**kwargs,
|
|
29
|
+
) -> None:
|
|
30
|
+
super().__init__(*args, **kwargs)
|
|
31
|
+
|
|
32
|
+
self.cli_args = cli_args
|
|
33
|
+
self.wyoming_info_event = wyoming_info.event()
|
|
34
|
+
self.process_manager = process_manager
|
|
35
|
+
|
|
36
|
+
async def handle_event(self, event: Event) -> bool:
|
|
37
|
+
if Describe.is_type(event.type):
|
|
38
|
+
await self.write_event(self.wyoming_info_event)
|
|
39
|
+
_LOGGER.debug("Sent info")
|
|
40
|
+
return True
|
|
41
|
+
|
|
42
|
+
if not Synthesize.is_type(event.type):
|
|
43
|
+
_LOGGER.warning("Unexpected event: %s", event)
|
|
44
|
+
return True
|
|
45
|
+
|
|
46
|
+
synthesize = Synthesize.from_event(event)
|
|
47
|
+
_LOGGER.debug(synthesize)
|
|
48
|
+
|
|
49
|
+
raw_text = synthesize.text
|
|
50
|
+
|
|
51
|
+
# Join multiple lines
|
|
52
|
+
text = " ".join(raw_text.strip().splitlines())
|
|
53
|
+
|
|
54
|
+
if self.cli_args.auto_punctuation and text:
|
|
55
|
+
# Add automatic punctuation (important for some voices)
|
|
56
|
+
has_punctuation = False
|
|
57
|
+
for punc_char in self.cli_args.auto_punctuation:
|
|
58
|
+
if text[-1] == punc_char:
|
|
59
|
+
has_punctuation = True
|
|
60
|
+
break
|
|
61
|
+
|
|
62
|
+
if not has_punctuation:
|
|
63
|
+
text = text + self.cli_args.auto_punctuation[0]
|
|
64
|
+
|
|
65
|
+
async with self.process_manager.processes_lock:
|
|
66
|
+
_LOGGER.debug("synthesize: raw_text=%s, text='%s'", raw_text, text)
|
|
67
|
+
voice_name: Optional[str] = None
|
|
68
|
+
voice_speaker: Optional[str] = None
|
|
69
|
+
if synthesize.voice is not None:
|
|
70
|
+
voice_name = synthesize.voice.name
|
|
71
|
+
voice_speaker = synthesize.voice.speaker
|
|
72
|
+
|
|
73
|
+
piper_proc = await self.process_manager.get_process(voice_name=voice_name)
|
|
74
|
+
|
|
75
|
+
assert piper_proc.proc.stdin is not None
|
|
76
|
+
assert piper_proc.proc.stdout is not None
|
|
77
|
+
|
|
78
|
+
# JSON in, file path out
|
|
79
|
+
input_obj: Dict[str, Any] = {"text": text}
|
|
80
|
+
if voice_speaker is not None:
|
|
81
|
+
speaker_id = piper_proc.get_speaker_id(voice_speaker)
|
|
82
|
+
if speaker_id is not None:
|
|
83
|
+
input_obj["speaker_id"] = speaker_id
|
|
84
|
+
else:
|
|
85
|
+
_LOGGER.warning(
|
|
86
|
+
"No speaker '%s' for voice '%s'", voice_speaker, voice_name
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
_LOGGER.debug("input: %s", input_obj)
|
|
90
|
+
piper_proc.proc.stdin.write(
|
|
91
|
+
(json.dumps(input_obj, ensure_ascii=False) + "\n").encode()
|
|
92
|
+
)
|
|
93
|
+
await piper_proc.proc.stdin.drain()
|
|
94
|
+
|
|
95
|
+
output_path = (await piper_proc.proc.stdout.readline()).decode().strip()
|
|
96
|
+
_LOGGER.debug(output_path)
|
|
97
|
+
|
|
98
|
+
wav_file: wave.Wave_read = wave.open(output_path, "rb")
|
|
99
|
+
with wav_file:
|
|
100
|
+
rate = wav_file.getframerate()
|
|
101
|
+
width = wav_file.getsampwidth()
|
|
102
|
+
channels = wav_file.getnchannels()
|
|
103
|
+
|
|
104
|
+
await self.write_event(
|
|
105
|
+
AudioStart(
|
|
106
|
+
rate=rate,
|
|
107
|
+
width=width,
|
|
108
|
+
channels=channels,
|
|
109
|
+
).event(),
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# Audio
|
|
113
|
+
audio_bytes = wav_file.readframes(wav_file.getnframes())
|
|
114
|
+
bytes_per_sample = width * channels
|
|
115
|
+
bytes_per_chunk = bytes_per_sample * self.cli_args.samples_per_chunk
|
|
116
|
+
num_chunks = int(math.ceil(len(audio_bytes) / bytes_per_chunk))
|
|
117
|
+
|
|
118
|
+
# Split into chunks
|
|
119
|
+
for i in range(num_chunks):
|
|
120
|
+
offset = i * bytes_per_chunk
|
|
121
|
+
chunk = audio_bytes[offset : offset + bytes_per_chunk]
|
|
122
|
+
await self.write_event(
|
|
123
|
+
AudioChunk(
|
|
124
|
+
audio=chunk,
|
|
125
|
+
rate=rate,
|
|
126
|
+
width=width,
|
|
127
|
+
channels=channels,
|
|
128
|
+
).event(),
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
await self.write_event(AudioStop().event())
|
|
132
|
+
_LOGGER.debug("Completed request")
|
|
133
|
+
|
|
134
|
+
os.unlink(output_path)
|
|
135
|
+
|
|
136
|
+
return True
|