wyoming-piper 1.4.0__py3-none-any.whl → 1.6.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wyoming_piper/__init__.py +6 -0
- wyoming_piper/__main__.py +38 -11
- wyoming_piper/download.py +9 -7
- wyoming_piper/handler.py +73 -5
- wyoming_piper/sentence_boundary.py +58 -0
- wyoming_piper/voices.json +1600 -6
- wyoming_piper-1.6.3.dist-info/METADATA +75 -0
- wyoming_piper-1.6.3.dist-info/RECORD +15 -0
- {wyoming_piper-1.4.0.dist-info → wyoming_piper-1.6.3.dist-info}/WHEEL +1 -1
- wyoming_piper-1.6.3.dist-info/entry_points.txt +2 -0
- wyoming_piper-1.4.0.dist-info/METADATA +0 -21
- wyoming_piper-1.4.0.dist-info/RECORD +0 -13
- {wyoming_piper-1.4.0.dist-info → wyoming_piper-1.6.3.dist-info/licenses}/LICENSE.md +0 -0
- {wyoming_piper-1.4.0.dist-info → wyoming_piper-1.6.3.dist-info}/top_level.txt +0 -0
wyoming_piper/__init__.py
CHANGED
wyoming_piper/__main__.py
CHANGED
|
@@ -7,9 +7,10 @@ from functools import partial
|
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
from typing import Any, Dict, Set
|
|
9
9
|
|
|
10
|
-
from wyoming.info import Attribution, Info, TtsProgram, TtsVoice
|
|
10
|
+
from wyoming.info import Attribution, Info, TtsProgram, TtsVoice, TtsVoiceSpeaker
|
|
11
11
|
from wyoming.server import AsyncServer
|
|
12
12
|
|
|
13
|
+
from . import __version__
|
|
13
14
|
from .download import find_voice, get_voices
|
|
14
15
|
from .handler import PiperEventHandler
|
|
15
16
|
from .process import PiperProcessManager
|
|
@@ -59,6 +60,11 @@ async def main() -> None:
|
|
|
59
60
|
default=1,
|
|
60
61
|
help="Maximum number of piper process to run simultaneously (default: 1)",
|
|
61
62
|
)
|
|
63
|
+
parser.add_argument(
|
|
64
|
+
"--streaming",
|
|
65
|
+
action="store_true",
|
|
66
|
+
help="Enable audio streaming on sentence boundaries",
|
|
67
|
+
)
|
|
62
68
|
#
|
|
63
69
|
parser.add_argument(
|
|
64
70
|
"--update-voices",
|
|
@@ -67,13 +73,25 @@ async def main() -> None:
|
|
|
67
73
|
)
|
|
68
74
|
#
|
|
69
75
|
parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
|
|
76
|
+
parser.add_argument(
|
|
77
|
+
"--log-format", default=logging.BASIC_FORMAT, help="Format for log messages"
|
|
78
|
+
)
|
|
79
|
+
parser.add_argument(
|
|
80
|
+
"--version",
|
|
81
|
+
action="version",
|
|
82
|
+
version=__version__,
|
|
83
|
+
help="Print version and exit",
|
|
84
|
+
)
|
|
70
85
|
args = parser.parse_args()
|
|
71
86
|
|
|
72
87
|
if not args.download_dir:
|
|
73
88
|
# Default to first data directory
|
|
74
89
|
args.download_dir = args.data_dir[0]
|
|
75
90
|
|
|
76
|
-
logging.basicConfig(
|
|
91
|
+
logging.basicConfig(
|
|
92
|
+
level=logging.DEBUG if args.debug else logging.INFO, format=args.log_format
|
|
93
|
+
)
|
|
94
|
+
_LOGGER.debug(args)
|
|
77
95
|
|
|
78
96
|
# Load voice info
|
|
79
97
|
voices_info = get_voices(args.download_dir, update_voices=args.update_voices)
|
|
@@ -93,20 +111,21 @@ async def main() -> None:
|
|
|
93
111
|
name="rhasspy", url="https://github.com/rhasspy/piper"
|
|
94
112
|
),
|
|
95
113
|
installed=True,
|
|
114
|
+
version=None,
|
|
96
115
|
languages=[
|
|
97
116
|
voice_info.get("language", {}).get(
|
|
98
117
|
"code",
|
|
99
118
|
voice_info.get("espeak", {}).get("voice", voice_name.split("_")[0]),
|
|
100
119
|
)
|
|
101
120
|
],
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
121
|
+
speakers=(
|
|
122
|
+
[
|
|
123
|
+
TtsVoiceSpeaker(name=speaker_name)
|
|
124
|
+
for speaker_name in voice_info["speaker_id_map"]
|
|
125
|
+
]
|
|
126
|
+
if voice_info.get("speaker_id_map")
|
|
127
|
+
else None
|
|
128
|
+
),
|
|
110
129
|
)
|
|
111
130
|
for voice_name, voice_info in voices_info.items()
|
|
112
131
|
if not voice_info.get("_is_alias", False)
|
|
@@ -150,6 +169,7 @@ async def main() -> None:
|
|
|
150
169
|
TtsVoice(
|
|
151
170
|
name=custom_name,
|
|
152
171
|
description=description,
|
|
172
|
+
version=None,
|
|
153
173
|
attribution=Attribution(name="", url=""),
|
|
154
174
|
installed=True,
|
|
155
175
|
languages=[lang_code],
|
|
@@ -166,6 +186,8 @@ async def main() -> None:
|
|
|
166
186
|
),
|
|
167
187
|
installed=True,
|
|
168
188
|
voices=sorted(voices, key=lambda v: v.name),
|
|
189
|
+
version=__version__,
|
|
190
|
+
supports_synthesize_streaming=args.streaming,
|
|
169
191
|
)
|
|
170
192
|
],
|
|
171
193
|
)
|
|
@@ -204,8 +226,13 @@ def get_description(voice_info: Dict[str, Any]):
|
|
|
204
226
|
|
|
205
227
|
# -----------------------------------------------------------------------------
|
|
206
228
|
|
|
229
|
+
|
|
230
|
+
def run():
|
|
231
|
+
asyncio.run(main())
|
|
232
|
+
|
|
233
|
+
|
|
207
234
|
if __name__ == "__main__":
|
|
208
235
|
try:
|
|
209
|
-
|
|
236
|
+
run()
|
|
210
237
|
except KeyboardInterrupt:
|
|
211
238
|
pass
|
wyoming_piper/download.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Utility for downloading Piper voices."""
|
|
2
|
+
|
|
2
3
|
import json
|
|
3
4
|
import logging
|
|
4
5
|
import shutil
|
|
@@ -10,7 +11,7 @@ from urllib.request import urlopen
|
|
|
10
11
|
|
|
11
12
|
from .file_hash import get_file_hash
|
|
12
13
|
|
|
13
|
-
URL_FORMAT = "https://huggingface.co/rhasspy/piper-voices/resolve/
|
|
14
|
+
URL_FORMAT = "https://huggingface.co/rhasspy/piper-voices/resolve/main/{file}"
|
|
14
15
|
|
|
15
16
|
_DIR = Path(__file__).parent
|
|
16
17
|
_LOGGER = logging.getLogger(__name__)
|
|
@@ -47,20 +48,21 @@ def get_voices(
|
|
|
47
48
|
except Exception:
|
|
48
49
|
_LOGGER.exception("Failed to update voices list")
|
|
49
50
|
|
|
51
|
+
voices_embedded = _DIR / "voices.json"
|
|
52
|
+
_LOGGER.debug("Loading %s", voices_embedded)
|
|
53
|
+
with open(voices_embedded, "r", encoding="utf-8") as voices_file:
|
|
54
|
+
voices = json.load(voices_file)
|
|
55
|
+
|
|
50
56
|
# Prefer downloaded file to embedded
|
|
51
57
|
if voices_download.exists():
|
|
52
58
|
try:
|
|
53
59
|
_LOGGER.debug("Loading %s", voices_download)
|
|
54
60
|
with open(voices_download, "r", encoding="utf-8") as voices_file:
|
|
55
|
-
|
|
61
|
+
voices.update(json.load(voices_file))
|
|
56
62
|
except Exception:
|
|
57
63
|
_LOGGER.exception("Failed to load %s", voices_download)
|
|
58
64
|
|
|
59
|
-
|
|
60
|
-
voices_embedded = _DIR / "voices.json"
|
|
61
|
-
_LOGGER.debug("Loading %s", voices_embedded)
|
|
62
|
-
with open(voices_embedded, "r", encoding="utf-8") as voices_file:
|
|
63
|
-
return json.load(voices_file)
|
|
65
|
+
return voices
|
|
64
66
|
|
|
65
67
|
|
|
66
68
|
def ensure_voice_exists(
|
wyoming_piper/handler.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Event handler for clients of the server."""
|
|
2
|
+
|
|
2
3
|
import argparse
|
|
3
4
|
import json
|
|
4
5
|
import logging
|
|
@@ -8,12 +9,20 @@ import wave
|
|
|
8
9
|
from typing import Any, Dict, Optional
|
|
9
10
|
|
|
10
11
|
from wyoming.audio import AudioChunk, AudioStart, AudioStop
|
|
12
|
+
from wyoming.error import Error
|
|
11
13
|
from wyoming.event import Event
|
|
12
14
|
from wyoming.info import Describe, Info
|
|
13
15
|
from wyoming.server import AsyncEventHandler
|
|
14
|
-
from wyoming.tts import
|
|
16
|
+
from wyoming.tts import (
|
|
17
|
+
Synthesize,
|
|
18
|
+
SynthesizeChunk,
|
|
19
|
+
SynthesizeStart,
|
|
20
|
+
SynthesizeStop,
|
|
21
|
+
SynthesizeStopped,
|
|
22
|
+
)
|
|
15
23
|
|
|
16
24
|
from .process import PiperProcessManager
|
|
25
|
+
from .sentence_boundary import SentenceBoundaryDetector, remove_asterisks
|
|
17
26
|
|
|
18
27
|
_LOGGER = logging.getLogger(__name__)
|
|
19
28
|
|
|
@@ -32,6 +41,9 @@ class PiperEventHandler(AsyncEventHandler):
|
|
|
32
41
|
self.cli_args = cli_args
|
|
33
42
|
self.wyoming_info_event = wyoming_info.event()
|
|
34
43
|
self.process_manager = process_manager
|
|
44
|
+
self.sbd = SentenceBoundaryDetector()
|
|
45
|
+
self.is_streaming: Optional[bool] = None
|
|
46
|
+
self._synthesize: Optional[Synthesize] = None
|
|
35
47
|
|
|
36
48
|
async def handle_event(self, event: Event) -> bool:
|
|
37
49
|
if Describe.is_type(event.type):
|
|
@@ -39,11 +51,67 @@ class PiperEventHandler(AsyncEventHandler):
|
|
|
39
51
|
_LOGGER.debug("Sent info")
|
|
40
52
|
return True
|
|
41
53
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
54
|
+
try:
|
|
55
|
+
if Synthesize.is_type(event.type):
|
|
56
|
+
if self.is_streaming:
|
|
57
|
+
# Ignore since this is only sent for compatibility reasons.
|
|
58
|
+
# For streaming, we expect:
|
|
59
|
+
# [synthesize-start] -> [synthesize-chunk]+ -> [synthesize]? -> [synthesize-stop]
|
|
60
|
+
return True
|
|
61
|
+
|
|
62
|
+
# Sent outside a stream, so we must process it
|
|
63
|
+
synthesize = Synthesize.from_event(event)
|
|
64
|
+
synthesize.text = remove_asterisks(synthesize.text)
|
|
65
|
+
return await self._handle_synthesize(synthesize)
|
|
66
|
+
|
|
67
|
+
if not self.cli_args.streaming:
|
|
68
|
+
# Streaming is not enabled
|
|
69
|
+
return True
|
|
70
|
+
|
|
71
|
+
if SynthesizeStart.is_type(event.type):
|
|
72
|
+
# Start of a stream
|
|
73
|
+
stream_start = SynthesizeStart.from_event(event)
|
|
74
|
+
self.is_streaming = True
|
|
75
|
+
self.sbd = SentenceBoundaryDetector()
|
|
76
|
+
self._synthesize = Synthesize(text="", voice=stream_start.voice)
|
|
77
|
+
_LOGGER.debug("Text stream started: voice=%s", stream_start.voice)
|
|
78
|
+
return True
|
|
79
|
+
|
|
80
|
+
if SynthesizeChunk.is_type(event.type):
|
|
81
|
+
assert self._synthesize is not None
|
|
82
|
+
stream_chunk = SynthesizeChunk.from_event(event)
|
|
83
|
+
for sentence in self.sbd.add_chunk(stream_chunk.text):
|
|
84
|
+
_LOGGER.debug("Synthesizing stream sentence: %s", sentence)
|
|
85
|
+
self._synthesize.text = sentence
|
|
86
|
+
await self._handle_synthesize(self._synthesize)
|
|
87
|
+
|
|
88
|
+
return True
|
|
89
|
+
|
|
90
|
+
if SynthesizeStop.is_type(event.type):
|
|
91
|
+
assert self._synthesize is not None
|
|
92
|
+
self._synthesize.text = self.sbd.finish()
|
|
93
|
+
if self._synthesize.text:
|
|
94
|
+
# Final audio chunk(s)
|
|
95
|
+
await self._handle_synthesize(self._synthesize)
|
|
96
|
+
|
|
97
|
+
# End of audio
|
|
98
|
+
await self.write_event(SynthesizeStopped().event())
|
|
99
|
+
|
|
100
|
+
_LOGGER.debug("Text stream stopped")
|
|
101
|
+
return True
|
|
102
|
+
|
|
103
|
+
if not Synthesize.is_type(event.type):
|
|
104
|
+
return True
|
|
105
|
+
|
|
106
|
+
synthesize = Synthesize.from_event(event)
|
|
107
|
+
return await self._handle_synthesize(synthesize)
|
|
108
|
+
except Exception as err:
|
|
109
|
+
await self.write_event(
|
|
110
|
+
Error(text=str(err), code=err.__class__.__name__).event()
|
|
111
|
+
)
|
|
112
|
+
raise err
|
|
45
113
|
|
|
46
|
-
|
|
114
|
+
async def _handle_synthesize(self, synthesize: Synthesize) -> bool:
|
|
47
115
|
_LOGGER.debug(synthesize)
|
|
48
116
|
|
|
49
117
|
raw_text = synthesize.text
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Guess the sentence boundaries in text."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Iterable
|
|
4
|
+
|
|
5
|
+
import regex as re
|
|
6
|
+
|
|
7
|
+
SENTENCE_END = r"[.!?…]|[。!?]|[؟]|[।॥]"
|
|
8
|
+
ABBREVIATION_RE = re.compile(r"\b\p{L}{1,3}\.$", re.UNICODE)
|
|
9
|
+
|
|
10
|
+
SENTENCE_BOUNDARY_RE = re.compile(
|
|
11
|
+
rf"(.*?(?:{SENTENCE_END}+))(?=\s+[\p{{Lu}}\p{{Lt}}\p{{Lo}}]|(?:\s+\d+\.\s+))",
|
|
12
|
+
re.DOTALL,
|
|
13
|
+
)
|
|
14
|
+
WORD_ASTERISKS = re.compile(r"\*+([^\*]+)\*+")
|
|
15
|
+
LINE_ASTERICKS = re.compile(r"(?<=^|\n)\s*\*+")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class SentenceBoundaryDetector:
|
|
19
|
+
def __init__(self) -> None:
|
|
20
|
+
self.remaining_text = ""
|
|
21
|
+
self.current_sentence = ""
|
|
22
|
+
|
|
23
|
+
def add_chunk(self, chunk: str) -> Iterable[str]:
|
|
24
|
+
self.remaining_text += chunk
|
|
25
|
+
while self.remaining_text:
|
|
26
|
+
match = SENTENCE_BOUNDARY_RE.search(self.remaining_text)
|
|
27
|
+
if not match:
|
|
28
|
+
break
|
|
29
|
+
|
|
30
|
+
match_text = match.group(0)
|
|
31
|
+
|
|
32
|
+
if not self.current_sentence:
|
|
33
|
+
self.current_sentence = match_text
|
|
34
|
+
elif ABBREVIATION_RE.search(self.current_sentence[-5:]):
|
|
35
|
+
self.current_sentence += match_text
|
|
36
|
+
else:
|
|
37
|
+
yield remove_asterisks(self.current_sentence.strip())
|
|
38
|
+
self.current_sentence = match_text
|
|
39
|
+
|
|
40
|
+
if not ABBREVIATION_RE.search(self.current_sentence[-5:]):
|
|
41
|
+
yield remove_asterisks(self.current_sentence.strip())
|
|
42
|
+
self.current_sentence = ""
|
|
43
|
+
|
|
44
|
+
self.remaining_text = self.remaining_text[match.end() :]
|
|
45
|
+
|
|
46
|
+
def finish(self) -> str:
|
|
47
|
+
text = (self.current_sentence + self.remaining_text).strip()
|
|
48
|
+
self.remaining_text = ""
|
|
49
|
+
self.current_sentence = ""
|
|
50
|
+
|
|
51
|
+
return remove_asterisks(text)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def remove_asterisks(text: str) -> str:
|
|
55
|
+
"""Remove *asterisks* surrounding **words**"""
|
|
56
|
+
text = WORD_ASTERISKS.sub(r"\1", text)
|
|
57
|
+
text = LINE_ASTERICKS.sub("", text)
|
|
58
|
+
return text
|