wyoming-piper 1.4.0__py3-none-any.whl → 1.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wyoming_piper/__init__.py CHANGED
@@ -1 +1,7 @@
1
1
  """Wyoming server for piper."""
2
+
3
+ from importlib.metadata import version
4
+
5
+ __version__ = version("wyoming_piper")
6
+
7
+ __all__ = ["__version__"]
wyoming_piper/__main__.py CHANGED
@@ -7,9 +7,10 @@ from functools import partial
7
7
  from pathlib import Path
8
8
  from typing import Any, Dict, Set
9
9
 
10
- from wyoming.info import Attribution, Info, TtsProgram, TtsVoice
10
+ from wyoming.info import Attribution, Info, TtsProgram, TtsVoice, TtsVoiceSpeaker
11
11
  from wyoming.server import AsyncServer
12
12
 
13
+ from . import __version__
13
14
  from .download import find_voice, get_voices
14
15
  from .handler import PiperEventHandler
15
16
  from .process import PiperProcessManager
@@ -59,6 +60,11 @@ async def main() -> None:
59
60
  default=1,
60
61
  help="Maximum number of piper process to run simultaneously (default: 1)",
61
62
  )
63
+ parser.add_argument(
64
+ "--streaming",
65
+ action="store_true",
66
+ help="Enable audio streaming on sentence boundaries",
67
+ )
62
68
  #
63
69
  parser.add_argument(
64
70
  "--update-voices",
@@ -67,13 +73,25 @@ async def main() -> None:
67
73
  )
68
74
  #
69
75
  parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
76
+ parser.add_argument(
77
+ "--log-format", default=logging.BASIC_FORMAT, help="Format for log messages"
78
+ )
79
+ parser.add_argument(
80
+ "--version",
81
+ action="version",
82
+ version=__version__,
83
+ help="Print version and exit",
84
+ )
70
85
  args = parser.parse_args()
71
86
 
72
87
  if not args.download_dir:
73
88
  # Default to first data directory
74
89
  args.download_dir = args.data_dir[0]
75
90
 
76
- logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
91
+ logging.basicConfig(
92
+ level=logging.DEBUG if args.debug else logging.INFO, format=args.log_format
93
+ )
94
+ _LOGGER.debug(args)
77
95
 
78
96
  # Load voice info
79
97
  voices_info = get_voices(args.download_dir, update_voices=args.update_voices)
@@ -93,20 +111,21 @@ async def main() -> None:
93
111
  name="rhasspy", url="https://github.com/rhasspy/piper"
94
112
  ),
95
113
  installed=True,
114
+ version=None,
96
115
  languages=[
97
116
  voice_info.get("language", {}).get(
98
117
  "code",
99
118
  voice_info.get("espeak", {}).get("voice", voice_name.split("_")[0]),
100
119
  )
101
120
  ],
102
- #
103
- # Don't send speakers for now because it overflows StreamReader buffers
104
- # speakers=[
105
- # TtsVoiceSpeaker(name=speaker_name)
106
- # for speaker_name in voice_info["speaker_id_map"]
107
- # ]
108
- # if voice_info.get("speaker_id_map")
109
- # else None,
121
+ speakers=(
122
+ [
123
+ TtsVoiceSpeaker(name=speaker_name)
124
+ for speaker_name in voice_info["speaker_id_map"]
125
+ ]
126
+ if voice_info.get("speaker_id_map")
127
+ else None
128
+ ),
110
129
  )
111
130
  for voice_name, voice_info in voices_info.items()
112
131
  if not voice_info.get("_is_alias", False)
@@ -150,6 +169,7 @@ async def main() -> None:
150
169
  TtsVoice(
151
170
  name=custom_name,
152
171
  description=description,
172
+ version=None,
153
173
  attribution=Attribution(name="", url=""),
154
174
  installed=True,
155
175
  languages=[lang_code],
@@ -166,6 +186,8 @@ async def main() -> None:
166
186
  ),
167
187
  installed=True,
168
188
  voices=sorted(voices, key=lambda v: v.name),
189
+ version=__version__,
190
+ supports_synthesize_streaming=args.streaming,
169
191
  )
170
192
  ],
171
193
  )
@@ -204,8 +226,13 @@ def get_description(voice_info: Dict[str, Any]):
204
226
 
205
227
  # -----------------------------------------------------------------------------
206
228
 
229
+
230
+ def run():
231
+ asyncio.run(main())
232
+
233
+
207
234
  if __name__ == "__main__":
208
235
  try:
209
- asyncio.run(main())
236
+ run()
210
237
  except KeyboardInterrupt:
211
238
  pass
wyoming_piper/download.py CHANGED
@@ -1,4 +1,5 @@
1
1
  """Utility for downloading Piper voices."""
2
+
2
3
  import json
3
4
  import logging
4
5
  import shutil
@@ -10,7 +11,7 @@ from urllib.request import urlopen
10
11
 
11
12
  from .file_hash import get_file_hash
12
13
 
13
- URL_FORMAT = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/{file}"
14
+ URL_FORMAT = "https://huggingface.co/rhasspy/piper-voices/resolve/main/{file}"
14
15
 
15
16
  _DIR = Path(__file__).parent
16
17
  _LOGGER = logging.getLogger(__name__)
@@ -47,20 +48,21 @@ def get_voices(
47
48
  except Exception:
48
49
  _LOGGER.exception("Failed to update voices list")
49
50
 
51
+ voices_embedded = _DIR / "voices.json"
52
+ _LOGGER.debug("Loading %s", voices_embedded)
53
+ with open(voices_embedded, "r", encoding="utf-8") as voices_file:
54
+ voices = json.load(voices_file)
55
+
50
56
  # Prefer downloaded file to embedded
51
57
  if voices_download.exists():
52
58
  try:
53
59
  _LOGGER.debug("Loading %s", voices_download)
54
60
  with open(voices_download, "r", encoding="utf-8") as voices_file:
55
- return json.load(voices_file)
61
+ voices.update(json.load(voices_file))
56
62
  except Exception:
57
63
  _LOGGER.exception("Failed to load %s", voices_download)
58
64
 
59
- # Fall back to embedded
60
- voices_embedded = _DIR / "voices.json"
61
- _LOGGER.debug("Loading %s", voices_embedded)
62
- with open(voices_embedded, "r", encoding="utf-8") as voices_file:
63
- return json.load(voices_file)
65
+ return voices
64
66
 
65
67
 
66
68
  def ensure_voice_exists(
wyoming_piper/handler.py CHANGED
@@ -1,4 +1,5 @@
1
1
  """Event handler for clients of the server."""
2
+
2
3
  import argparse
3
4
  import json
4
5
  import logging
@@ -8,12 +9,20 @@ import wave
8
9
  from typing import Any, Dict, Optional
9
10
 
10
11
  from wyoming.audio import AudioChunk, AudioStart, AudioStop
12
+ from wyoming.error import Error
11
13
  from wyoming.event import Event
12
14
  from wyoming.info import Describe, Info
13
15
  from wyoming.server import AsyncEventHandler
14
- from wyoming.tts import Synthesize
16
+ from wyoming.tts import (
17
+ Synthesize,
18
+ SynthesizeChunk,
19
+ SynthesizeStart,
20
+ SynthesizeStop,
21
+ SynthesizeStopped,
22
+ )
15
23
 
16
24
  from .process import PiperProcessManager
25
+ from .sentence_boundary import SentenceBoundaryDetector, remove_asterisks
17
26
 
18
27
  _LOGGER = logging.getLogger(__name__)
19
28
 
@@ -32,6 +41,9 @@ class PiperEventHandler(AsyncEventHandler):
32
41
  self.cli_args = cli_args
33
42
  self.wyoming_info_event = wyoming_info.event()
34
43
  self.process_manager = process_manager
44
+ self.sbd = SentenceBoundaryDetector()
45
+ self.is_streaming: Optional[bool] = None
46
+ self._synthesize: Optional[Synthesize] = None
35
47
 
36
48
  async def handle_event(self, event: Event) -> bool:
37
49
  if Describe.is_type(event.type):
@@ -39,11 +51,67 @@ class PiperEventHandler(AsyncEventHandler):
39
51
  _LOGGER.debug("Sent info")
40
52
  return True
41
53
 
42
- if not Synthesize.is_type(event.type):
43
- _LOGGER.warning("Unexpected event: %s", event)
44
- return True
54
+ try:
55
+ if Synthesize.is_type(event.type):
56
+ if self.is_streaming:
57
+ # Ignore since this is only sent for compatibility reasons.
58
+ # For streaming, we expect:
59
+ # [synthesize-start] -> [synthesize-chunk]+ -> [synthesize]? -> [synthesize-stop]
60
+ return True
61
+
62
+ # Sent outside a stream, so we must process it
63
+ synthesize = Synthesize.from_event(event)
64
+ synthesize.text = remove_asterisks(synthesize.text)
65
+ return await self._handle_synthesize(synthesize)
66
+
67
+ if not self.cli_args.streaming:
68
+ # Streaming is not enabled
69
+ return True
70
+
71
+ if SynthesizeStart.is_type(event.type):
72
+ # Start of a stream
73
+ stream_start = SynthesizeStart.from_event(event)
74
+ self.is_streaming = True
75
+ self.sbd = SentenceBoundaryDetector()
76
+ self._synthesize = Synthesize(text="", voice=stream_start.voice)
77
+ _LOGGER.debug("Text stream started: voice=%s", stream_start.voice)
78
+ return True
79
+
80
+ if SynthesizeChunk.is_type(event.type):
81
+ assert self._synthesize is not None
82
+ stream_chunk = SynthesizeChunk.from_event(event)
83
+ for sentence in self.sbd.add_chunk(stream_chunk.text):
84
+ _LOGGER.debug("Synthesizing stream sentence: %s", sentence)
85
+ self._synthesize.text = sentence
86
+ await self._handle_synthesize(self._synthesize)
87
+
88
+ return True
89
+
90
+ if SynthesizeStop.is_type(event.type):
91
+ assert self._synthesize is not None
92
+ self._synthesize.text = self.sbd.finish()
93
+ if self._synthesize.text:
94
+ # Final audio chunk(s)
95
+ await self._handle_synthesize(self._synthesize)
96
+
97
+ # End of audio
98
+ await self.write_event(SynthesizeStopped().event())
99
+
100
+ _LOGGER.debug("Text stream stopped")
101
+ return True
102
+
103
+ if not Synthesize.is_type(event.type):
104
+ return True
105
+
106
+ synthesize = Synthesize.from_event(event)
107
+ return await self._handle_synthesize(synthesize)
108
+ except Exception as err:
109
+ await self.write_event(
110
+ Error(text=str(err), code=err.__class__.__name__).event()
111
+ )
112
+ raise err
45
113
 
46
- synthesize = Synthesize.from_event(event)
114
+ async def _handle_synthesize(self, synthesize: Synthesize) -> bool:
47
115
  _LOGGER.debug(synthesize)
48
116
 
49
117
  raw_text = synthesize.text
@@ -0,0 +1,58 @@
1
+ """Guess the sentence boundaries in text."""
2
+
3
+ from collections.abc import Iterable
4
+
5
+ import regex as re
6
+
7
+ SENTENCE_END = r"[.!?…]|[。!?]|[؟]|[।॥]"
8
+ ABBREVIATION_RE = re.compile(r"\b\p{L}{1,3}\.$", re.UNICODE)
9
+
10
+ SENTENCE_BOUNDARY_RE = re.compile(
11
+ rf"(.*?(?:{SENTENCE_END}+))(?=\s+[\p{{Lu}}\p{{Lt}}\p{{Lo}}]|(?:\s+\d+\.\s+))",
12
+ re.DOTALL,
13
+ )
14
+ WORD_ASTERISKS = re.compile(r"\*+([^\*]+)\*+")
15
+ LINE_ASTERICKS = re.compile(r"(?<=^|\n)\s*\*+")
16
+
17
+
18
+ class SentenceBoundaryDetector:
19
+ def __init__(self) -> None:
20
+ self.remaining_text = ""
21
+ self.current_sentence = ""
22
+
23
+ def add_chunk(self, chunk: str) -> Iterable[str]:
24
+ self.remaining_text += chunk
25
+ while self.remaining_text:
26
+ match = SENTENCE_BOUNDARY_RE.search(self.remaining_text)
27
+ if not match:
28
+ break
29
+
30
+ match_text = match.group(0)
31
+
32
+ if not self.current_sentence:
33
+ self.current_sentence = match_text
34
+ elif ABBREVIATION_RE.search(self.current_sentence[-5:]):
35
+ self.current_sentence += match_text
36
+ else:
37
+ yield remove_asterisks(self.current_sentence.strip())
38
+ self.current_sentence = match_text
39
+
40
+ if not ABBREVIATION_RE.search(self.current_sentence[-5:]):
41
+ yield remove_asterisks(self.current_sentence.strip())
42
+ self.current_sentence = ""
43
+
44
+ self.remaining_text = self.remaining_text[match.end() :]
45
+
46
+ def finish(self) -> str:
47
+ text = (self.current_sentence + self.remaining_text).strip()
48
+ self.remaining_text = ""
49
+ self.current_sentence = ""
50
+
51
+ return remove_asterisks(text)
52
+
53
+
54
+ def remove_asterisks(text: str) -> str:
55
+ """Remove *asterisks* surrounding **words**"""
56
+ text = WORD_ASTERISKS.sub(r"\1", text)
57
+ text = LINE_ASTERICKS.sub("", text)
58
+ return text