wyoming-piper 1.6.3__py3-none-any.whl → 2.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wyoming_piper/__main__.py CHANGED
@@ -8,12 +8,11 @@ from pathlib import Path
8
8
  from typing import Any, Dict, Set
9
9
 
10
10
  from wyoming.info import Attribution, Info, TtsProgram, TtsVoice, TtsVoiceSpeaker
11
- from wyoming.server import AsyncServer
11
+ from wyoming.server import AsyncServer, AsyncTcpServer
12
12
 
13
13
  from . import __version__
14
- from .download import find_voice, get_voices
14
+ from .download import ensure_voice_exists, find_voice, get_voices
15
15
  from .handler import PiperEventHandler
16
- from .process import PiperProcessManager
17
16
 
18
17
  _LOGGER = logging.getLogger(__name__)
19
18
 
@@ -21,17 +20,20 @@ _LOGGER = logging.getLogger(__name__)
21
20
  async def main() -> None:
22
21
  """Main entry point."""
23
22
  parser = argparse.ArgumentParser()
24
- parser.add_argument(
25
- "--piper",
26
- required=True,
27
- help="Path to piper executable",
28
- )
29
23
  parser.add_argument(
30
24
  "--voice",
31
25
  required=True,
32
26
  help="Default Piper voice to use (e.g., en_US-lessac-medium)",
33
27
  )
34
28
  parser.add_argument("--uri", default="stdio://", help="unix:// or tcp://")
29
+ #
30
+ parser.add_argument(
31
+ "--zeroconf",
32
+ nargs="?",
33
+ const="piper",
34
+ help="Enable discovery over zeroconf with optional name (default: piper)",
35
+ )
36
+ #
35
37
  parser.add_argument(
36
38
  "--data-dir",
37
39
  required=True,
@@ -48,22 +50,18 @@ async def main() -> None:
48
50
  )
49
51
  parser.add_argument("--noise-scale", type=float, help="Generator noise")
50
52
  parser.add_argument("--length-scale", type=float, help="Phoneme length")
51
- parser.add_argument("--noise-w", type=float, help="Phoneme width noise")
53
+ parser.add_argument(
54
+ "--noise-w-scale", "--noise-w", type=float, help="Phoneme width noise"
55
+ )
52
56
  #
53
57
  parser.add_argument(
54
58
  "--auto-punctuation", default=".?!", help="Automatically add punctuation"
55
59
  )
56
60
  parser.add_argument("--samples-per-chunk", type=int, default=1024)
57
61
  parser.add_argument(
58
- "--max-piper-procs",
59
- type=int,
60
- default=1,
61
- help="Maximum number of piper process to run simultaneously (default: 1)",
62
- )
63
- parser.add_argument(
64
- "--streaming",
62
+ "--no-streaming",
65
63
  action="store_true",
66
- help="Enable audio streaming on sentence boundaries",
64
+ help="Disable audio streaming on sentence boundaries",
67
65
  )
68
66
  #
69
67
  parser.add_argument(
@@ -72,6 +70,12 @@ async def main() -> None:
72
70
  help="Download latest voices.json during startup",
73
71
  )
74
72
  #
73
+ parser.add_argument(
74
+ "--use-cuda",
75
+ action="store_true",
76
+ help="Use CUDA if available (requires onnxruntime-gpu)",
77
+ )
78
+ #
75
79
  parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
76
80
  parser.add_argument(
77
81
  "--log-format", default=logging.BASIC_FORMAT, help="Format for log messages"
@@ -187,27 +191,41 @@ async def main() -> None:
187
191
  installed=True,
188
192
  voices=sorted(voices, key=lambda v: v.name),
189
193
  version=__version__,
190
- supports_synthesize_streaming=args.streaming,
194
+ supports_synthesize_streaming=(not args.no_streaming),
191
195
  )
192
196
  ],
193
197
  )
194
198
 
195
- process_manager = PiperProcessManager(args, voices_info)
199
+ # Ensure default voice is downloaded
200
+ voice_info = voices_info.get(args.voice, {})
201
+ voice_name = voice_info.get("key", args.voice)
202
+ assert voice_name is not None
196
203
 
197
- # Make sure default voice is loaded.
198
- # Other voices will be loaded on-demand.
199
- await process_manager.get_process()
204
+ ensure_voice_exists(voice_name, args.data_dir, args.download_dir, voices_info)
200
205
 
201
206
  # Start server
202
207
  server = AsyncServer.from_uri(args.uri)
203
208
 
209
+ if args.zeroconf:
210
+ if not isinstance(server, AsyncTcpServer):
211
+ raise ValueError("Zeroconf requires tcp:// uri")
212
+
213
+ from wyoming.zeroconf import HomeAssistantZeroconf
214
+
215
+ tcp_server: AsyncTcpServer = server
216
+ hass_zeroconf = HomeAssistantZeroconf(
217
+ name=args.zeroconf, port=tcp_server.port, host=tcp_server.host
218
+ )
219
+ await hass_zeroconf.register_server()
220
+ _LOGGER.debug("Zeroconf discovery enabled")
221
+
204
222
  _LOGGER.info("Ready")
205
223
  await server.run(
206
224
  partial(
207
225
  PiperEventHandler,
208
226
  wyoming_info,
209
227
  args,
210
- process_manager,
228
+ voices_info,
211
229
  )
212
230
  )
213
231
 
wyoming_piper/download.py CHANGED
@@ -9,8 +9,6 @@ from urllib.error import URLError
9
9
  from urllib.parse import quote, urlsplit, urlunsplit
10
10
  from urllib.request import urlopen
11
11
 
12
- from .file_hash import get_file_hash
13
-
14
12
  URL_FORMAT = "https://huggingface.co/rhasspy/piper-voices/resolve/main/{file}"
15
13
 
16
14
  _DIR = Path(__file__).parent
@@ -89,8 +87,7 @@ def ensure_voice_exists(
89
87
  for data_dir in data_dirs:
90
88
  data_dir = Path(data_dir)
91
89
 
92
- # Check sizes/hashes
93
- for file_path, file_info in voice_files.items():
90
+ for file_path, _file_info in voice_files.items():
94
91
  if file_path in verified_files:
95
92
  # Already verified this file in a different data directory
96
93
  continue
@@ -101,34 +98,37 @@ def ensure_voice_exists(
101
98
 
102
99
  data_file_path = data_dir / file_name
103
100
  _LOGGER.debug("Checking %s", data_file_path)
104
- if not data_file_path.exists():
101
+ if (not data_file_path.exists()) or (data_file_path.stat().st_size == 0):
105
102
  _LOGGER.debug("Missing %s", data_file_path)
106
103
  files_to_download.add(file_path)
107
104
  continue
108
105
 
109
- expected_size = file_info["size_bytes"]
110
- actual_size = data_file_path.stat().st_size
111
- if expected_size != actual_size:
112
- _LOGGER.warning(
113
- "Wrong size (expected=%s, actual=%s) for %s",
114
- expected_size,
115
- actual_size,
116
- data_file_path,
117
- )
118
- files_to_download.add(file_path)
119
- continue
120
-
121
- expected_hash = file_info["md5_digest"]
122
- actual_hash = get_file_hash(data_file_path)
123
- if expected_hash != actual_hash:
124
- _LOGGER.warning(
125
- "Wrong hash (expected=%s, actual=%s) for %s",
126
- expected_hash,
127
- actual_hash,
128
- data_file_path,
129
- )
130
- files_to_download.add(file_path)
131
- continue
106
+ # Don't bother validating sizes or hashes.
107
+ # This causes more problems than its worth.
108
+ #
109
+ # expected_size = file_info["size_bytes"]
110
+ # actual_size = data_file_path.stat().st_size
111
+ # if expected_size != actual_size:
112
+ # _LOGGER.warning(
113
+ # "Wrong size (expected=%s, actual=%s) for %s",
114
+ # expected_size,
115
+ # actual_size,
116
+ # data_file_path,
117
+ # )
118
+ # files_to_download.add(file_path)
119
+ # continue
120
+
121
+ # expected_hash = file_info["md5_digest"]
122
+ # actual_hash = get_file_hash(data_file_path)
123
+ # if expected_hash != actual_hash:
124
+ # _LOGGER.warning(
125
+ # "Wrong hash (expected=%s, actual=%s) for %s",
126
+ # expected_hash,
127
+ # actual_hash,
128
+ # data_file_path,
129
+ # )
130
+ # files_to_download.add(file_path)
131
+ # continue
132
132
 
133
133
  # File exists and has been verified
134
134
  verified_files.add(file_path)
@@ -151,9 +151,10 @@ def ensure_voice_exists(
151
151
  download_file_path.parent.mkdir(parents=True, exist_ok=True)
152
152
 
153
153
  _LOGGER.debug("Downloading %s to %s", file_url, download_file_path)
154
- with urlopen(_quote_url(file_url)) as response, open(
155
- download_file_path, "wb"
156
- ) as download_file:
154
+ with (
155
+ urlopen(_quote_url(file_url)) as response,
156
+ open(download_file_path, "wb") as download_file,
157
+ ):
157
158
  shutil.copyfileobj(response, download_file)
158
159
 
159
160
  _LOGGER.info("Downloaded %s (%s)", download_file_path, file_url)
wyoming_piper/handler.py CHANGED
@@ -1,13 +1,15 @@
1
1
  """Event handler for clients of the server."""
2
2
 
3
3
  import argparse
4
- import json
4
+ import asyncio
5
5
  import logging
6
6
  import math
7
- import os
7
+ import tempfile
8
8
  import wave
9
9
  from typing import Any, Dict, Optional
10
10
 
11
+ from piper import PiperVoice, SynthesisConfig
12
+ from sentence_stream import SentenceBoundaryDetector
11
13
  from wyoming.audio import AudioChunk, AudioStart, AudioStop
12
14
  from wyoming.error import Error
13
15
  from wyoming.event import Event
@@ -21,18 +23,22 @@ from wyoming.tts import (
21
23
  SynthesizeStopped,
22
24
  )
23
25
 
24
- from .process import PiperProcessManager
25
- from .sentence_boundary import SentenceBoundaryDetector, remove_asterisks
26
+ from .download import ensure_voice_exists, find_voice
26
27
 
27
28
  _LOGGER = logging.getLogger(__name__)
28
29
 
30
+ # Keep the most recently used voice loaded
31
+ _VOICE: Optional[PiperVoice] = None
32
+ _VOICE_NAME: Optional[str] = None
33
+ _VOICE_LOCK = asyncio.Lock()
34
+
29
35
 
30
36
  class PiperEventHandler(AsyncEventHandler):
31
37
  def __init__(
32
38
  self,
33
39
  wyoming_info: Info,
34
40
  cli_args: argparse.Namespace,
35
- process_manager: PiperProcessManager,
41
+ voices_info: Dict[str, Any],
36
42
  *args,
37
43
  **kwargs,
38
44
  ) -> None:
@@ -40,9 +46,9 @@ class PiperEventHandler(AsyncEventHandler):
40
46
 
41
47
  self.cli_args = cli_args
42
48
  self.wyoming_info_event = wyoming_info.event()
43
- self.process_manager = process_manager
44
- self.sbd = SentenceBoundaryDetector()
49
+ self.voices_info = voices_info
45
50
  self.is_streaming: Optional[bool] = None
51
+ self.sbd = SentenceBoundaryDetector()
46
52
  self._synthesize: Optional[Synthesize] = None
47
53
 
48
54
  async def handle_event(self, event: Event) -> bool:
@@ -61,10 +67,29 @@ class PiperEventHandler(AsyncEventHandler):
61
67
 
62
68
  # Sent outside a stream, so we must process it
63
69
  synthesize = Synthesize.from_event(event)
64
- synthesize.text = remove_asterisks(synthesize.text)
65
- return await self._handle_synthesize(synthesize)
70
+ self._synthesize = Synthesize(text="", voice=synthesize.voice)
71
+ self.sbd = SentenceBoundaryDetector()
72
+ start_sent = False
73
+ for i, sentence in enumerate(self.sbd.add_chunk(synthesize.text)):
74
+ self._synthesize.text = sentence
75
+ await self._handle_synthesize(
76
+ self._synthesize, send_start=(i == 0), send_stop=False
77
+ )
78
+ start_sent = True
79
+
80
+ self._synthesize.text = self.sbd.finish()
81
+ if self._synthesize.text:
82
+ # Last sentence
83
+ await self._handle_synthesize(
84
+ self._synthesize, send_start=(not start_sent), send_stop=True
85
+ )
86
+ else:
87
+ # No final sentence
88
+ await self.write_event(AudioStop().event())
89
+
90
+ return True
66
91
 
67
- if not self.cli_args.streaming:
92
+ if self.cli_args.no_streaming:
68
93
  # Streaming is not enabled
69
94
  return True
70
95
 
@@ -111,7 +136,11 @@ class PiperEventHandler(AsyncEventHandler):
111
136
  )
112
137
  raise err
113
138
 
114
- async def _handle_synthesize(self, synthesize: Synthesize) -> bool:
139
+ async def _handle_synthesize(
140
+ self, synthesize: Synthesize, send_start: bool = True, send_stop: bool = True
141
+ ) -> bool:
142
+ global _VOICE, _VOICE_NAME
143
+
115
144
  _LOGGER.debug(synthesize)
116
145
 
117
146
  raw_text = synthesize.text
@@ -130,75 +159,117 @@ class PiperEventHandler(AsyncEventHandler):
130
159
  if not has_punctuation:
131
160
  text = text + self.cli_args.auto_punctuation[0]
132
161
 
133
- async with self.process_manager.processes_lock:
134
- _LOGGER.debug("synthesize: raw_text=%s, text='%s'", raw_text, text)
135
- voice_name: Optional[str] = None
136
- voice_speaker: Optional[str] = None
137
- if synthesize.voice is not None:
138
- voice_name = synthesize.voice.name
139
- voice_speaker = synthesize.voice.speaker
140
-
141
- piper_proc = await self.process_manager.get_process(voice_name=voice_name)
142
-
143
- assert piper_proc.proc.stdin is not None
144
- assert piper_proc.proc.stdout is not None
145
-
146
- # JSON in, file path out
147
- input_obj: Dict[str, Any] = {"text": text}
148
- if voice_speaker is not None:
149
- speaker_id = piper_proc.get_speaker_id(voice_speaker)
150
- if speaker_id is not None:
151
- input_obj["speaker_id"] = speaker_id
152
- else:
153
- _LOGGER.warning(
154
- "No speaker '%s' for voice '%s'", voice_speaker, voice_name
162
+ # Resolve voice
163
+ _LOGGER.debug("synthesize: raw_text=%s, text='%s'", raw_text, text)
164
+ voice_name: Optional[str] = None
165
+ voice_speaker: Optional[str] = None
166
+ if synthesize.voice is not None:
167
+ voice_name = synthesize.voice.name
168
+ voice_speaker = synthesize.voice.speaker
169
+
170
+ if voice_name is None:
171
+ # Default voice
172
+ voice_name = self.cli_args.voice
173
+
174
+ if voice_name == self.cli_args.voice:
175
+ # Default speaker
176
+ voice_speaker = voice_speaker or self.cli_args.speaker
177
+
178
+ assert voice_name is not None
179
+
180
+ # Resolve alias
181
+ voice_info = self.voices_info.get(voice_name, {})
182
+ voice_name = voice_info.get("key", voice_name)
183
+ assert voice_name is not None
184
+
185
+ with tempfile.NamedTemporaryFile(mode="wb+", suffix=".wav") as output_file:
186
+ async with _VOICE_LOCK:
187
+ if voice_name != _VOICE_NAME:
188
+ # Load new voice
189
+ _LOGGER.debug("Loading voice: %s", _VOICE_NAME)
190
+ ensure_voice_exists(
191
+ voice_name,
192
+ self.cli_args.data_dir,
193
+ self.cli_args.download_dir,
194
+ self.voices_info,
155
195
  )
196
+ model_path, config_path = find_voice(
197
+ voice_name, self.cli_args.data_dir
198
+ )
199
+ _VOICE = PiperVoice.load(
200
+ model_path, config_path, use_cuda=self.cli_args.use_cuda
201
+ )
202
+ _VOICE_NAME = voice_name
156
203
 
157
- _LOGGER.debug("input: %s", input_obj)
158
- piper_proc.proc.stdin.write(
159
- (json.dumps(input_obj, ensure_ascii=False) + "\n").encode()
160
- )
161
- await piper_proc.proc.stdin.drain()
162
-
163
- output_path = (await piper_proc.proc.stdout.readline()).decode().strip()
164
- _LOGGER.debug(output_path)
204
+ assert _VOICE is not None
165
205
 
166
- wav_file: wave.Wave_read = wave.open(output_path, "rb")
167
- with wav_file:
168
- rate = wav_file.getframerate()
169
- width = wav_file.getsampwidth()
170
- channels = wav_file.getnchannels()
206
+ syn_config = SynthesisConfig()
207
+ if voice_speaker is not None:
208
+ syn_config.speaker_id = _VOICE.config.speaker_id_map.get(
209
+ voice_speaker
210
+ )
211
+ if syn_config.speaker_id is None:
212
+ try:
213
+ # Try to interpret as an id
214
+ syn_config.speaker_id = int(voice_speaker)
215
+ except ValueError:
216
+ pass
217
+
218
+ if syn_config.speaker_id is None:
219
+ _LOGGER.warning(
220
+ "No speaker '%s' for voice '%s'", voice_speaker, voice_name
221
+ )
222
+
223
+ if self.cli_args.length_scale is not None:
224
+ syn_config.length_scale = self.cli_args.length_scale
225
+
226
+ if self.cli_args.noise_scale is not None:
227
+ syn_config.noise_scale = self.cli_args.noise_scale
228
+
229
+ if self.cli_args.noise_w_scale is not None:
230
+ syn_config.noise_w_scale = self.cli_args.noise_w_scale
231
+
232
+ wav_writer: wave.Wave_write = wave.open(output_file, "wb")
233
+ with wav_writer:
234
+ _VOICE.synthesize_wav(text, wav_writer, syn_config)
235
+
236
+ output_file.seek(0)
237
+
238
+ wav_file: wave.Wave_read = wave.open(output_file, "rb")
239
+ with wav_file:
240
+ rate = wav_file.getframerate()
241
+ width = wav_file.getsampwidth()
242
+ channels = wav_file.getnchannels()
243
+
244
+ if send_start:
245
+ await self.write_event(
246
+ AudioStart(
247
+ rate=rate,
248
+ width=width,
249
+ channels=channels,
250
+ ).event(),
251
+ )
171
252
 
172
- await self.write_event(
173
- AudioStart(
174
- rate=rate,
175
- width=width,
176
- channels=channels,
177
- ).event(),
178
- )
253
+ # Audio
254
+ audio_bytes = wav_file.readframes(wav_file.getnframes())
255
+ bytes_per_sample = width * channels
256
+ bytes_per_chunk = bytes_per_sample * self.cli_args.samples_per_chunk
257
+ num_chunks = int(math.ceil(len(audio_bytes) / bytes_per_chunk))
258
+
259
+ # Split into chunks
260
+ for i in range(num_chunks):
261
+ offset = i * bytes_per_chunk
262
+ chunk = audio_bytes[offset : offset + bytes_per_chunk]
263
+ await self.write_event(
264
+ AudioChunk(
265
+ audio=chunk,
266
+ rate=rate,
267
+ width=width,
268
+ channels=channels,
269
+ ).event(),
270
+ )
179
271
 
180
- # Audio
181
- audio_bytes = wav_file.readframes(wav_file.getnframes())
182
- bytes_per_sample = width * channels
183
- bytes_per_chunk = bytes_per_sample * self.cli_args.samples_per_chunk
184
- num_chunks = int(math.ceil(len(audio_bytes) / bytes_per_chunk))
185
-
186
- # Split into chunks
187
- for i in range(num_chunks):
188
- offset = i * bytes_per_chunk
189
- chunk = audio_bytes[offset : offset + bytes_per_chunk]
190
- await self.write_event(
191
- AudioChunk(
192
- audio=chunk,
193
- rate=rate,
194
- width=width,
195
- channels=channels,
196
- ).event(),
197
- )
198
-
199
- await self.write_event(AudioStop().event())
200
- _LOGGER.debug("Completed request")
201
-
202
- os.unlink(output_path)
272
+ if send_stop:
273
+ await self.write_event(AudioStop().event())
203
274
 
204
275
  return True
wyoming_piper/voices.json CHANGED
@@ -59,6 +59,36 @@
59
59
  },
60
60
  "aliases": []
61
61
  },
62
+ "bg_BG-dimitar-medium": {
63
+ "key": "bg_BG-dimitar-medium",
64
+ "name": "dimitar",
65
+ "language": {
66
+ "code": "bg_BG",
67
+ "family": "bg",
68
+ "region": "BG",
69
+ "name_native": "български",
70
+ "name_english": "Bulgarian",
71
+ "country_english": "Bulgaria"
72
+ },
73
+ "quality": "medium",
74
+ "num_speakers": 1,
75
+ "speaker_id_map": {},
76
+ "files": {
77
+ "bg/bg_BG/dimitar/medium/bg_BG-dimitar-medium.onnx": {
78
+ "size_bytes": 63221984,
79
+ "md5_digest": "fc1ce62a4f04f089e22b8c3a13bde28a"
80
+ },
81
+ "bg/bg_BG/dimitar/medium/bg_BG-dimitar-medium.onnx.json": {
82
+ "size_bytes": 5123,
83
+ "md5_digest": "5fb4aa5e5b20d4ed515d40f0b4ce16f0"
84
+ },
85
+ "bg/bg_BG/dimitar/medium/MODEL_CARD": {
86
+ "size_bytes": 282,
87
+ "md5_digest": "6056805758b0136214309e799209d9c9"
88
+ }
89
+ },
90
+ "aliases": []
91
+ },
62
92
  "ca_ES-upc_ona-medium": {
63
93
  "key": "ca_ES-upc_ona-medium",
64
94
  "name": "upc_ona",
@@ -901,6 +931,36 @@
901
931
  },
902
932
  "aliases": []
903
933
  },
934
+ "el_GR-rapunzelina-medium": {
935
+ "key": "el_GR-rapunzelina-medium",
936
+ "name": "rapunzelina",
937
+ "language": {
938
+ "code": "el_GR",
939
+ "family": "el",
940
+ "region": "GR",
941
+ "name_native": "Ελληνικά",
942
+ "name_english": "Greek",
943
+ "country_english": "Greece"
944
+ },
945
+ "quality": "medium",
946
+ "num_speakers": 1,
947
+ "speaker_id_map": {},
948
+ "files": {
949
+ "el/el_GR/rapunzelina/medium/el_GR-rapunzelina-medium.onnx": {
950
+ "size_bytes": 62950044,
951
+ "md5_digest": "265f2f9be00aa5ce81abc1f022145e42"
952
+ },
953
+ "el/el_GR/rapunzelina/medium/el_GR-rapunzelina-medium.onnx.json": {
954
+ "size_bytes": 4973,
955
+ "md5_digest": "ba83ba13667e14e2f97fbc9d950f9583"
956
+ },
957
+ "el/el_GR/rapunzelina/medium/MODEL_CARD": {
958
+ "size_bytes": 276,
959
+ "md5_digest": "8a9392d82c8a5631bad7d12fc4aa66a7"
960
+ }
961
+ },
962
+ "aliases": []
963
+ },
904
964
  "en_GB-alan-low": {
905
965
  "key": "en_GB-alan-low",
906
966
  "name": "alan",
@@ -4821,6 +4881,36 @@
4821
4881
  },
4822
4882
  "aliases": []
4823
4883
  },
4884
+ "he_IL-motek-medium": {
4885
+ "key": "he_IL-motek-medium",
4886
+ "name": "motek",
4887
+ "language": {
4888
+ "code": "he_IL",
4889
+ "family": "he",
4890
+ "region": "IL",
4891
+ "name_native": "עברית",
4892
+ "name_english": "Hebrew",
4893
+ "country_english": "Israel"
4894
+ },
4895
+ "quality": "medium",
4896
+ "num_speakers": 1,
4897
+ "speaker_id_map": {},
4898
+ "files": {
4899
+ "he/he_IL/motek/medium/he_IL-motek-medium.onnx": {
4900
+ "size_bytes": 62950044,
4901
+ "md5_digest": "b1c8ac594b3627149d043ebf0f5cc905"
4902
+ },
4903
+ "he/he_IL/motek/medium/he_IL-motek-medium.onnx.json": {
4904
+ "size_bytes": 5108,
4905
+ "md5_digest": "302a1caaad72b62b4dee04548cd19642"
4906
+ },
4907
+ "he/he_IL/motek/medium/MODEL_CARD": {
4908
+ "size_bytes": 290,
4909
+ "md5_digest": "c2a5c0e238ae9d2a5aec7d212a44a4ce"
4910
+ }
4911
+ },
4912
+ "aliases": []
4913
+ },
4824
4914
  "hi_IN-pratham-medium": {
4825
4915
  "key": "hi_IN-pratham-medium",
4826
4916
  "name": "pratham",
@@ -4881,6 +4971,36 @@
4881
4971
  },
4882
4972
  "aliases": []
4883
4973
  },
4974
+ "hi_IN-rohan-medium": {
4975
+ "key": "hi_IN-rohan-medium",
4976
+ "name": "rohan",
4977
+ "language": {
4978
+ "code": "hi_IN",
4979
+ "family": "hi",
4980
+ "region": "IN",
4981
+ "name_native": "हिन्दी",
4982
+ "name_english": "Hindi",
4983
+ "country_english": "India"
4984
+ },
4985
+ "quality": "medium",
4986
+ "num_speakers": 1,
4987
+ "speaker_id_map": {},
4988
+ "files": {
4989
+ "hi/hi_IN/rohan/medium/hi_IN-rohan-medium.onnx": {
4990
+ "size_bytes": 62950044,
4991
+ "md5_digest": "d63d31559a4ccce62be938ab252a4804"
4992
+ },
4993
+ "hi/hi_IN/rohan/medium/hi_IN-rohan-medium.onnx.json": {
4994
+ "size_bytes": 5041,
4995
+ "md5_digest": "b4aeeef53e2c469def82769aa4ce19eb"
4996
+ },
4997
+ "hi/hi_IN/rohan/medium/MODEL_CARD": {
4998
+ "size_bytes": 359,
4999
+ "md5_digest": "03084fa6c2367cf7d6aaba2a0bd79b71"
5000
+ }
5001
+ },
5002
+ "aliases": []
5003
+ },
4884
5004
  "hu_HU-anna-medium": {
4885
5005
  "key": "hu_HU-anna-medium",
4886
5006
  "name": "anna",
@@ -4971,6 +5091,36 @@
4971
5091
  },
4972
5092
  "aliases": []
4973
5093
  },
5094
+ "id_ID-news_tts-medium": {
5095
+ "key": "id_ID-news_tts-medium",
5096
+ "name": "news_tts",
5097
+ "language": {
5098
+ "code": "id_ID",
5099
+ "family": "id",
5100
+ "region": "ID",
5101
+ "name_native": "Bahasa Indonesia",
5102
+ "name_english": "Indonesian",
5103
+ "country_english": "Indonesia"
5104
+ },
5105
+ "quality": "medium",
5106
+ "num_speakers": 1,
5107
+ "speaker_id_map": {},
5108
+ "files": {
5109
+ "id/id_ID/news_tts/medium/id_ID-news_tts-medium.onnx": {
5110
+ "size_bytes": 62950044,
5111
+ "md5_digest": "17de01db7ac654655436b6e509893c72"
5112
+ },
5113
+ "id/id_ID/news_tts/medium/id_ID-news_tts-medium.onnx.json": {
5114
+ "size_bytes": 5050,
5115
+ "md5_digest": "c023cfa031cc9ed4cf5dd8d2ef1e48ed"
5116
+ },
5117
+ "id/id_ID/news_tts/medium/MODEL_CARD": {
5118
+ "size_bytes": 308,
5119
+ "md5_digest": "eb0037396b93f38ffbc093dba42cf8be"
5120
+ }
5121
+ },
5122
+ "aliases": []
5123
+ },
4974
5124
  "is_IS-bui-medium": {
4975
5125
  "key": "is_IS-bui-medium",
4976
5126
  "name": "bui",
@@ -6520,6 +6670,96 @@
6520
6670
  },
6521
6671
  "aliases": []
6522
6672
  },
6673
+ "te_IN-maya-medium": {
6674
+ "key": "te_IN-maya-medium",
6675
+ "name": "maya",
6676
+ "language": {
6677
+ "code": "te_IN",
6678
+ "family": "te",
6679
+ "region": "IN",
6680
+ "name_native": "తెలుగు",
6681
+ "name_english": "Telugu",
6682
+ "country_english": "India"
6683
+ },
6684
+ "quality": "medium",
6685
+ "num_speakers": 1,
6686
+ "speaker_id_map": {},
6687
+ "files": {
6688
+ "te/te_IN/maya/medium/te_IN-maya-medium.onnx": {
6689
+ "size_bytes": 62950044,
6690
+ "md5_digest": "604fa4083118495c0fff55826ffccefe"
6691
+ },
6692
+ "te/te_IN/maya/medium/te_IN-maya-medium.onnx.json": {
6693
+ "size_bytes": 5040,
6694
+ "md5_digest": "58a134cd3f762e9da9d6bdf72bf3d93c"
6695
+ },
6696
+ "te/te_IN/maya/medium/MODEL_CARD": {
6697
+ "size_bytes": 318,
6698
+ "md5_digest": "c01b34f46af0b9adc394ec01c1db618a"
6699
+ }
6700
+ },
6701
+ "aliases": []
6702
+ },
6703
+ "te_IN-padmavathi-medium": {
6704
+ "key": "te_IN-padmavathi-medium",
6705
+ "name": "padmavathi",
6706
+ "language": {
6707
+ "code": "te_IN",
6708
+ "family": "te",
6709
+ "region": "IN",
6710
+ "name_native": "తెలుగు",
6711
+ "name_english": "Telugu",
6712
+ "country_english": "India"
6713
+ },
6714
+ "quality": "medium",
6715
+ "num_speakers": 1,
6716
+ "speaker_id_map": {},
6717
+ "files": {
6718
+ "te/te_IN/padmavathi/medium/te_IN-padmavathi-medium.onnx": {
6719
+ "size_bytes": 63516050,
6720
+ "md5_digest": "1a7fb140ecc8b5e8b3e80e460b719319"
6721
+ },
6722
+ "te/te_IN/padmavathi/medium/te_IN-padmavathi-medium.onnx.json": {
6723
+ "size_bytes": 4974,
6724
+ "md5_digest": "3f07441340aecc2a8b89987361e8078e"
6725
+ },
6726
+ "te/te_IN/padmavathi/medium/MODEL_CARD": {
6727
+ "size_bytes": 278,
6728
+ "md5_digest": "14ff83b180d87a5fbea40554c99f3b20"
6729
+ }
6730
+ },
6731
+ "aliases": []
6732
+ },
6733
+ "te_IN-venkatesh-medium": {
6734
+ "key": "te_IN-venkatesh-medium",
6735
+ "name": "venkatesh",
6736
+ "language": {
6737
+ "code": "te_IN",
6738
+ "family": "te",
6739
+ "region": "IN",
6740
+ "name_native": "తెలుగు",
6741
+ "name_english": "Telugu",
6742
+ "country_english": "India"
6743
+ },
6744
+ "quality": "medium",
6745
+ "num_speakers": 1,
6746
+ "speaker_id_map": {},
6747
+ "files": {
6748
+ "te/te_IN/venkatesh/medium/te_IN-venkatesh-medium.onnx": {
6749
+ "size_bytes": 63516050,
6750
+ "md5_digest": "145092d2d110c4df0fa385dc606fe103"
6751
+ },
6752
+ "te/te_IN/venkatesh/medium/te_IN-venkatesh-medium.onnx.json": {
6753
+ "size_bytes": 4973,
6754
+ "md5_digest": "eaa45c58d43c518c7b2638058f6f1866"
6755
+ },
6756
+ "te/te_IN/venkatesh/medium/MODEL_CARD": {
6757
+ "size_bytes": 277,
6758
+ "md5_digest": "5cf7929a926db4a08d3934ff663f4e92"
6759
+ }
6760
+ },
6761
+ "aliases": []
6762
+ },
6523
6763
  "tr_TR-dfki-medium": {
6524
6764
  "key": "tr_TR-dfki-medium",
6525
6765
  "name": "dfki",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wyoming-piper
3
- Version: 1.6.3
3
+ Version: 2.1.2
4
4
  Summary: Wyoming Server for Piper
5
5
  Author-email: Michael Hansen <mike@rhasspy.org>
6
6
  License: MIT
@@ -8,30 +8,30 @@ Project-URL: Homepage, http://github.com/rhasspy/wyoming-piper
8
8
  Keywords: rhasspy,wyoming,piper,tts
9
9
  Classifier: Development Status :: 3 - Alpha
10
10
  Classifier: Intended Audience :: Developers
11
- Classifier: Topic :: Text Processing :: Linguistic
12
- Classifier: Programming Language :: Python :: 3.8
11
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
13
12
  Classifier: Programming Language :: Python :: 3.9
14
13
  Classifier: Programming Language :: Python :: 3.10
15
14
  Classifier: Programming Language :: Python :: 3.11
16
15
  Classifier: Programming Language :: Python :: 3.12
17
16
  Classifier: Programming Language :: Python :: 3.13
18
- Requires-Python: >=3.8
17
+ Requires-Python: >=3.9
19
18
  Description-Content-Type: text/markdown
20
19
  License-File: LICENSE.md
21
- Requires-Dist: wyoming<1.8,>=1.7.2
22
- Requires-Dist: regex==2024.11.6
20
+ Requires-Dist: wyoming<2,>=1.8
21
+ Requires-Dist: regex>=2024.11.6
22
+ Requires-Dist: piper-tts<2,>=1.3.0
23
+ Requires-Dist: sentence-stream<2,>=1.2.0
23
24
  Provides-Extra: dev
24
- Requires-Dist: black==22.12.0; extra == "dev"
25
- Requires-Dist: flake8==6.0.0; extra == "dev"
26
- Requires-Dist: isort==5.11.3; extra == "dev"
27
- Requires-Dist: mypy==0.991; extra == "dev"
28
- Requires-Dist: pylint==2.15.9; extra == "dev"
29
- Requires-Dist: pytest==7.4.4; extra == "dev"
30
- Requires-Dist: pytest-asyncio==0.23.3; extra == "dev"
31
- Requires-Dist: build==1.2.2.post1; extra == "dev"
25
+ Requires-Dist: black; extra == "dev"
26
+ Requires-Dist: flake8; extra == "dev"
27
+ Requires-Dist: mypy; extra == "dev"
28
+ Requires-Dist: pylint; extra == "dev"
29
+ Requires-Dist: pytest; extra == "dev"
30
+ Requires-Dist: pytest-asyncio; extra == "dev"
31
+ Requires-Dist: build; extra == "dev"
32
32
  Requires-Dist: scipy<2,>=1.10; extra == "dev"
33
33
  Requires-Dist: numpy<2,>=1.20; extra == "dev"
34
- Requires-Dist: python-speech-features==0.6; extra == "dev"
34
+ Requires-Dist: python-speech-features<1,>=0.6; extra == "dev"
35
35
  Dynamic: license-file
36
36
 
37
37
  # Wyoming Piper
@@ -0,0 +1,13 @@
1
+ wyoming_piper/__init__.py,sha256=z1dsCtGazHHufHQpoVgNtMObt25qYBSOM85o7xgbIJA,139
2
+ wyoming_piper/__main__.py,sha256=1LtUJH7f8cwnRHEymq7fd4uxkWdQnGyurWyXbuUm87U,8073
3
+ wyoming_piper/const.py,sha256=04sCdtJ2QGuF1BQGkOuQW10og61PgH3fCnPhaYu-YoU,1015
4
+ wyoming_piper/download.py,sha256=At1RBaVKsTTAO71LAJn8bDeJnvJXBS0vA6iCIo6BqZs,6349
5
+ wyoming_piper/file_hash.py,sha256=HMuwrgEIg-bCOXHG0wE3vtjrqGD7QaA_UNfvBMXeUcY,1107
6
+ wyoming_piper/handler.py,sha256=dW15RZliTB8KDA-2CRaWL8HBK6ojZTTbvqW6EZef8r8,10194
7
+ wyoming_piper/voices.json,sha256=MgP9i3XIMAgHvxxsgPVFFpu-lmLClb0rIt0bCHPv5HA,217342
8
+ wyoming_piper-2.1.2.dist-info/licenses/LICENSE.md,sha256=E3RtUJ105V6iJl--8gS7fNv4SoMVsCB-mIMmy1Q4cCg,1071
9
+ wyoming_piper-2.1.2.dist-info/METADATA,sha256=YwB7TPBRdAS6WPcgkUpb24jejh6q3ubufqeTd9pDZuQ,2470
10
+ wyoming_piper-2.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
+ wyoming_piper-2.1.2.dist-info/entry_points.txt,sha256=n2UgsOCQitQ5Itr20aITTWZLL2dAtaVKn5pdecXdDHE,61
12
+ wyoming_piper-2.1.2.dist-info/top_level.txt,sha256=t7U7-u1sK_4xy_qbTJhxQRbxle3cLQfPq2oVLezHVNU,14
13
+ wyoming_piper-2.1.2.dist-info/RECORD,,
wyoming_piper/process.py DELETED
@@ -1,171 +0,0 @@
1
- #!/usr/bin/env python3
2
- import argparse
3
- import asyncio
4
- import json
5
- import logging
6
- import tempfile
7
- import time
8
- from dataclasses import dataclass
9
- from typing import Any, Dict, Optional
10
-
11
- from .download import ensure_voice_exists, find_voice
12
-
13
- _LOGGER = logging.getLogger(__name__)
14
-
15
-
16
- @dataclass
17
- class PiperProcess:
18
- """Info for a running Piper process (one voice)."""
19
-
20
- name: str
21
- proc: "asyncio.subprocess.Process"
22
- config: Dict[str, Any]
23
- wav_dir: tempfile.TemporaryDirectory
24
- last_used: int = 0
25
-
26
- def get_speaker_id(self, speaker: str) -> Optional[int]:
27
- """Get speaker by name or id."""
28
- return _get_speaker_id(self.config, speaker)
29
-
30
- @property
31
- def is_multispeaker(self) -> bool:
32
- """True if model has more than one speaker."""
33
- return _is_multispeaker(self.config)
34
-
35
-
36
- def _get_speaker_id(config: Dict[str, Any], speaker: str) -> Optional[int]:
37
- """Get speaker by name or id."""
38
- speaker_id_map = config.get("speaker_id_map", {})
39
- speaker_id = speaker_id_map.get(speaker)
40
- if speaker_id is None:
41
- try:
42
- # Try to interpret as an id
43
- speaker_id = int(speaker)
44
- except ValueError:
45
- pass
46
-
47
- return speaker_id
48
-
49
-
50
- def _is_multispeaker(config: Dict[str, Any]) -> bool:
51
- """True if model has more than one speaker."""
52
- return config.get("num_speakers", 1) > 1
53
-
54
-
55
- # -----------------------------------------------------------------------------
56
-
57
-
58
- class PiperProcessManager:
59
- """Manager of running Piper processes."""
60
-
61
- def __init__(self, args: argparse.Namespace, voices_info: Dict[str, Any]):
62
- self.voices_info = voices_info
63
- self.args = args
64
- self.processes: Dict[str, PiperProcess] = {}
65
- self.processes_lock = asyncio.Lock()
66
-
67
- async def get_process(self, voice_name: Optional[str] = None) -> PiperProcess:
68
- """Get a running Piper process or start a new one if necessary."""
69
- voice_speaker: Optional[str] = None
70
- if voice_name is None:
71
- # Default voice
72
- voice_name = self.args.voice
73
-
74
- if voice_name == self.args.voice:
75
- # Default speaker
76
- voice_speaker = self.args.speaker
77
-
78
- assert voice_name is not None
79
-
80
- # Resolve alias
81
- voice_info = self.voices_info.get(voice_name, {})
82
- voice_name = voice_info.get("key", voice_name)
83
- assert voice_name is not None
84
-
85
- piper_proc = self.processes.get(voice_name)
86
- if (piper_proc is None) or (piper_proc.proc.returncode is not None):
87
- # Remove if stopped
88
- self.processes.pop(voice_name, None)
89
-
90
- # Start new Piper process
91
- if self.args.max_piper_procs > 0:
92
- # Restrict number of running processes
93
- while len(self.processes) >= self.args.max_piper_procs:
94
- # Stop least recently used process
95
- lru_proc_name, lru_proc = sorted(
96
- self.processes.items(), key=lambda kv: kv[1].last_used
97
- )[0]
98
- _LOGGER.debug("Stopping process for: %s", lru_proc_name)
99
- self.processes.pop(lru_proc_name, None)
100
- if lru_proc.proc.returncode is None:
101
- try:
102
- lru_proc.proc.terminate()
103
- await lru_proc.proc.wait()
104
- except Exception:
105
- _LOGGER.exception("Unexpected error stopping piper process")
106
-
107
- _LOGGER.debug(
108
- "Starting process for: %s (%s/%s)",
109
- voice_name,
110
- len(self.processes) + 1,
111
- self.args.max_piper_procs,
112
- )
113
-
114
- ensure_voice_exists(
115
- voice_name,
116
- self.args.data_dir,
117
- self.args.download_dir,
118
- self.voices_info,
119
- )
120
-
121
- onnx_path, config_path = find_voice(voice_name, self.args.data_dir)
122
- with open(config_path, "r", encoding="utf-8") as config_file:
123
- config = json.load(config_file)
124
-
125
- wav_dir = tempfile.TemporaryDirectory()
126
- piper_args = [
127
- "--model",
128
- str(onnx_path),
129
- "--config",
130
- str(config_path),
131
- "--output_dir",
132
- str(wav_dir.name),
133
- "--json-input", # piper 1.1+
134
- ]
135
-
136
- if voice_speaker is not None:
137
- if _is_multispeaker(config):
138
- speaker_id = _get_speaker_id(config, voice_speaker)
139
- if speaker_id is not None:
140
- piper_args.extend(["--speaker", str(speaker_id)])
141
-
142
- if self.args.noise_scale:
143
- piper_args.extend(["--noise-scale", str(self.args.noise_scale)])
144
-
145
- if self.args.length_scale:
146
- piper_args.extend(["--length-scale", str(self.args.length_scale)])
147
-
148
- if self.args.noise_w:
149
- piper_args.extend(["--noise-w", str(self.args.noise_w)])
150
-
151
- _LOGGER.debug(
152
- "Starting piper process: %s args=%s", self.args.piper, piper_args
153
- )
154
- piper_proc = PiperProcess(
155
- name=voice_name,
156
- proc=await asyncio.create_subprocess_exec(
157
- self.args.piper,
158
- *piper_args,
159
- stdin=asyncio.subprocess.PIPE,
160
- stdout=asyncio.subprocess.PIPE,
161
- stderr=asyncio.subprocess.DEVNULL,
162
- ),
163
- config=config,
164
- wav_dir=wav_dir,
165
- )
166
- self.processes[voice_name] = piper_proc
167
-
168
- # Update used
169
- piper_proc.last_used = time.monotonic_ns()
170
-
171
- return piper_proc
@@ -1,58 +0,0 @@
1
- """Guess the sentence boundaries in text."""
2
-
3
- from collections.abc import Iterable
4
-
5
- import regex as re
6
-
7
- SENTENCE_END = r"[.!?…]|[。!?]|[؟]|[।॥]"
8
- ABBREVIATION_RE = re.compile(r"\b\p{L}{1,3}\.$", re.UNICODE)
9
-
10
- SENTENCE_BOUNDARY_RE = re.compile(
11
- rf"(.*?(?:{SENTENCE_END}+))(?=\s+[\p{{Lu}}\p{{Lt}}\p{{Lo}}]|(?:\s+\d+\.\s+))",
12
- re.DOTALL,
13
- )
14
- WORD_ASTERISKS = re.compile(r"\*+([^\*]+)\*+")
15
- LINE_ASTERICKS = re.compile(r"(?<=^|\n)\s*\*+")
16
-
17
-
18
- class SentenceBoundaryDetector:
19
- def __init__(self) -> None:
20
- self.remaining_text = ""
21
- self.current_sentence = ""
22
-
23
- def add_chunk(self, chunk: str) -> Iterable[str]:
24
- self.remaining_text += chunk
25
- while self.remaining_text:
26
- match = SENTENCE_BOUNDARY_RE.search(self.remaining_text)
27
- if not match:
28
- break
29
-
30
- match_text = match.group(0)
31
-
32
- if not self.current_sentence:
33
- self.current_sentence = match_text
34
- elif ABBREVIATION_RE.search(self.current_sentence[-5:]):
35
- self.current_sentence += match_text
36
- else:
37
- yield remove_asterisks(self.current_sentence.strip())
38
- self.current_sentence = match_text
39
-
40
- if not ABBREVIATION_RE.search(self.current_sentence[-5:]):
41
- yield remove_asterisks(self.current_sentence.strip())
42
- self.current_sentence = ""
43
-
44
- self.remaining_text = self.remaining_text[match.end() :]
45
-
46
- def finish(self) -> str:
47
- text = (self.current_sentence + self.remaining_text).strip()
48
- self.remaining_text = ""
49
- self.current_sentence = ""
50
-
51
- return remove_asterisks(text)
52
-
53
-
54
- def remove_asterisks(text: str) -> str:
55
- """Remove *asterisks* surrounding **words**"""
56
- text = WORD_ASTERISKS.sub(r"\1", text)
57
- text = LINE_ASTERICKS.sub("", text)
58
- return text
@@ -1,15 +0,0 @@
1
- wyoming_piper/__init__.py,sha256=z1dsCtGazHHufHQpoVgNtMObt25qYBSOM85o7xgbIJA,139
2
- wyoming_piper/__main__.py,sha256=SuJ6XY6zy68N8L-N_n_EIWK0vpZwbYQXW5vlSC8BpW8,7445
3
- wyoming_piper/const.py,sha256=04sCdtJ2QGuF1BQGkOuQW10og61PgH3fCnPhaYu-YoU,1015
4
- wyoming_piper/download.py,sha256=UpczxHWqLkcOblHmrwgBHSR6wG1LR-hZ4V6QSsrghns,6185
5
- wyoming_piper/file_hash.py,sha256=HMuwrgEIg-bCOXHG0wE3vtjrqGD7QaA_UNfvBMXeUcY,1107
6
- wyoming_piper/handler.py,sha256=WVpmnRVYmsd3DrLcMfBOsuo_J1HJ0h00-HwQ1iZEToo,7360
7
- wyoming_piper/process.py,sha256=L_qqxQcQawrC940fwlv4u6KM9KjCq6N6ym-OADSZcrM,5794
8
- wyoming_piper/sentence_boundary.py,sha256=pHVo92_weusnVLRVicnS0-Tst_eR-pMrnRrGL96HxC8,1875
9
- wyoming_piper/voices.json,sha256=elUT3cM0Wlgo8N8E5nhMbMSCPB8zU4SY2XGKwe-T2ys,209108
10
- wyoming_piper-1.6.3.dist-info/licenses/LICENSE.md,sha256=E3RtUJ105V6iJl--8gS7fNv4SoMVsCB-mIMmy1Q4cCg,1071
11
- wyoming_piper-1.6.3.dist-info/METADATA,sha256=sNP4bue0pO2mFBb3xUnXfgibofOvaCFuJqN7Hik3fmQ,2543
12
- wyoming_piper-1.6.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
- wyoming_piper-1.6.3.dist-info/entry_points.txt,sha256=n2UgsOCQitQ5Itr20aITTWZLL2dAtaVKn5pdecXdDHE,61
14
- wyoming_piper-1.6.3.dist-info/top_level.txt,sha256=t7U7-u1sK_4xy_qbTJhxQRbxle3cLQfPq2oVLezHVNU,14
15
- wyoming_piper-1.6.3.dist-info/RECORD,,