wyoming-piper 1.6.3__tar.gz → 2.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {wyoming_piper-1.6.3/wyoming_piper.egg-info → wyoming_piper-2.1.2}/PKG-INFO +15 -15
  2. {wyoming_piper-1.6.3 → wyoming_piper-2.1.2}/pyproject.toml +15 -15
  3. {wyoming_piper-1.6.3 → wyoming_piper-2.1.2}/tests/test_piper.py +0 -21
  4. {wyoming_piper-1.6.3 → wyoming_piper-2.1.2}/wyoming_piper/__main__.py +41 -23
  5. {wyoming_piper-1.6.3 → wyoming_piper-2.1.2}/wyoming_piper/download.py +32 -31
  6. wyoming_piper-2.1.2/wyoming_piper/handler.py +275 -0
  7. {wyoming_piper-1.6.3 → wyoming_piper-2.1.2}/wyoming_piper/voices.json +240 -0
  8. {wyoming_piper-1.6.3 → wyoming_piper-2.1.2/wyoming_piper.egg-info}/PKG-INFO +15 -15
  9. {wyoming_piper-1.6.3 → wyoming_piper-2.1.2}/wyoming_piper.egg-info/SOURCES.txt +0 -3
  10. wyoming_piper-2.1.2/wyoming_piper.egg-info/requires.txt +16 -0
  11. wyoming_piper-1.6.3/tests/test_sentence_boundary.py +0 -61
  12. wyoming_piper-1.6.3/wyoming_piper/handler.py +0 -204
  13. wyoming_piper-1.6.3/wyoming_piper/process.py +0 -171
  14. wyoming_piper-1.6.3/wyoming_piper/sentence_boundary.py +0 -58
  15. wyoming_piper-1.6.3/wyoming_piper.egg-info/requires.txt +0 -15
  16. {wyoming_piper-1.6.3 → wyoming_piper-2.1.2}/LICENSE.md +0 -0
  17. {wyoming_piper-1.6.3 → wyoming_piper-2.1.2}/README.md +0 -0
  18. {wyoming_piper-1.6.3 → wyoming_piper-2.1.2}/setup.cfg +0 -0
  19. {wyoming_piper-1.6.3 → wyoming_piper-2.1.2}/wyoming_piper/__init__.py +0 -0
  20. {wyoming_piper-1.6.3 → wyoming_piper-2.1.2}/wyoming_piper/const.py +0 -0
  21. {wyoming_piper-1.6.3 → wyoming_piper-2.1.2}/wyoming_piper/file_hash.py +0 -0
  22. {wyoming_piper-1.6.3 → wyoming_piper-2.1.2}/wyoming_piper.egg-info/dependency_links.txt +0 -0
  23. {wyoming_piper-1.6.3 → wyoming_piper-2.1.2}/wyoming_piper.egg-info/entry_points.txt +0 -0
  24. {wyoming_piper-1.6.3 → wyoming_piper-2.1.2}/wyoming_piper.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wyoming-piper
3
- Version: 1.6.3
3
+ Version: 2.1.2
4
4
  Summary: Wyoming Server for Piper
5
5
  Author-email: Michael Hansen <mike@rhasspy.org>
6
6
  License: MIT
@@ -8,30 +8,30 @@ Project-URL: Homepage, http://github.com/rhasspy/wyoming-piper
8
8
  Keywords: rhasspy,wyoming,piper,tts
9
9
  Classifier: Development Status :: 3 - Alpha
10
10
  Classifier: Intended Audience :: Developers
11
- Classifier: Topic :: Text Processing :: Linguistic
12
- Classifier: Programming Language :: Python :: 3.8
11
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
13
12
  Classifier: Programming Language :: Python :: 3.9
14
13
  Classifier: Programming Language :: Python :: 3.10
15
14
  Classifier: Programming Language :: Python :: 3.11
16
15
  Classifier: Programming Language :: Python :: 3.12
17
16
  Classifier: Programming Language :: Python :: 3.13
18
- Requires-Python: >=3.8
17
+ Requires-Python: >=3.9
19
18
  Description-Content-Type: text/markdown
20
19
  License-File: LICENSE.md
21
- Requires-Dist: wyoming<1.8,>=1.7.2
22
- Requires-Dist: regex==2024.11.6
20
+ Requires-Dist: wyoming<2,>=1.8
21
+ Requires-Dist: regex>=2024.11.6
22
+ Requires-Dist: piper-tts<2,>=1.3.0
23
+ Requires-Dist: sentence-stream<2,>=1.2.0
23
24
  Provides-Extra: dev
24
- Requires-Dist: black==22.12.0; extra == "dev"
25
- Requires-Dist: flake8==6.0.0; extra == "dev"
26
- Requires-Dist: isort==5.11.3; extra == "dev"
27
- Requires-Dist: mypy==0.991; extra == "dev"
28
- Requires-Dist: pylint==2.15.9; extra == "dev"
29
- Requires-Dist: pytest==7.4.4; extra == "dev"
30
- Requires-Dist: pytest-asyncio==0.23.3; extra == "dev"
31
- Requires-Dist: build==1.2.2.post1; extra == "dev"
25
+ Requires-Dist: black; extra == "dev"
26
+ Requires-Dist: flake8; extra == "dev"
27
+ Requires-Dist: mypy; extra == "dev"
28
+ Requires-Dist: pylint; extra == "dev"
29
+ Requires-Dist: pytest; extra == "dev"
30
+ Requires-Dist: pytest-asyncio; extra == "dev"
31
+ Requires-Dist: build; extra == "dev"
32
32
  Requires-Dist: scipy<2,>=1.10; extra == "dev"
33
33
  Requires-Dist: numpy<2,>=1.20; extra == "dev"
34
- Requires-Dist: python-speech-features==0.6; extra == "dev"
34
+ Requires-Dist: python-speech-features<1,>=0.6; extra == "dev"
35
35
  Dynamic: license-file
36
36
 
37
37
  # Wyoming Piper
@@ -1,9 +1,9 @@
1
1
  [project]
2
2
  name = "wyoming-piper"
3
- version = "1.6.3"
3
+ version = "2.1.2"
4
4
  description = "Wyoming Server for Piper"
5
5
  readme = "README.md"
6
- requires-python = ">=3.8"
6
+ requires-python = ">=3.9"
7
7
  license = {text = "MIT"}
8
8
  authors = [
9
9
  {name = "Michael Hansen", email = "mike@rhasspy.org"}
@@ -12,8 +12,7 @@ keywords = ["rhasspy", "wyoming", "piper", "tts"]
12
12
  classifiers = [
13
13
  "Development Status :: 3 - Alpha",
14
14
  "Intended Audience :: Developers",
15
- "Topic :: Text Processing :: Linguistic",
16
- "Programming Language :: Python :: 3.8",
15
+ "Topic :: Multimedia :: Sound/Audio :: Speech",
17
16
  "Programming Language :: Python :: 3.9",
18
17
  "Programming Language :: Python :: 3.10",
19
18
  "Programming Language :: Python :: 3.11",
@@ -21,8 +20,10 @@ classifiers = [
21
20
  "Programming Language :: Python :: 3.13",
22
21
  ]
23
22
  dependencies = [
24
- "wyoming>=1.7.2,<1.8",
25
- "regex==2024.11.6",
23
+ "wyoming>=1.8,<2",
24
+ "regex>=2024.11.6",
25
+ "piper-tts>=1.3.0,<2",
26
+ "sentence-stream>=1.2.0,<2",
26
27
  ]
27
28
 
28
29
  [project.urls]
@@ -57,15 +58,14 @@ disallow_untyped_defs = true
57
58
 
58
59
  [project.optional-dependencies]
59
60
  dev = [
60
- "black==22.12.0",
61
- "flake8==6.0.0",
62
- "isort==5.11.3",
63
- "mypy==0.991",
64
- "pylint==2.15.9",
65
- "pytest==7.4.4",
66
- "pytest-asyncio==0.23.3",
67
- "build==1.2.2.post1",
61
+ "black",
62
+ "flake8",
63
+ "mypy",
64
+ "pylint",
65
+ "pytest",
66
+ "pytest-asyncio",
67
+ "build",
68
68
  "scipy>=1.10,<2",
69
69
  "numpy>=1.20,<2",
70
- "python-speech-features==0.6",
70
+ "python-speech-features>=0.6,<1",
71
71
  ]
@@ -2,11 +2,9 @@
2
2
 
3
3
  import asyncio
4
4
  import sys
5
- import tarfile
6
5
  import wave
7
6
  from asyncio.subprocess import PIPE
8
7
  from pathlib import Path
9
- from urllib.request import urlopen
10
8
 
11
9
  import numpy as np
12
10
  import pytest
@@ -20,36 +18,17 @@ from .dtw import compute_optimal_path
20
18
 
21
19
  _DIR = Path(__file__).parent
22
20
  _LOCAL_DIR = _DIR.parent / "local"
23
- _PIPER_URL = (
24
- "https://github.com/rhasspy/piper/releases/download/v1.2.0/piper_amd64.tar.gz"
25
- )
26
21
  _TIMEOUT = 60
27
22
 
28
23
 
29
- def download_piper() -> None:
30
- """Downloads a binary version of Piper."""
31
- piper_path = _LOCAL_DIR / "piper"
32
- if piper_path.exists():
33
- return
34
-
35
- _LOCAL_DIR.mkdir(parents=True, exist_ok=True)
36
- with urlopen(_PIPER_URL) as response:
37
- with tarfile.open(fileobj=response, mode="r|*") as piper_file:
38
- piper_file.extractall(_LOCAL_DIR)
39
-
40
-
41
24
  @pytest.mark.asyncio
42
25
  async def test_piper() -> None:
43
- download_piper()
44
-
45
26
  proc = await asyncio.create_subprocess_exec(
46
27
  sys.executable,
47
28
  "-m",
48
29
  "wyoming_piper",
49
30
  "--uri",
50
31
  "stdio://",
51
- "--piper",
52
- str(_LOCAL_DIR / "piper" / "piper"),
53
32
  "--voice",
54
33
  "en_US-ryan-low",
55
34
  "--data-dir",
@@ -8,12 +8,11 @@ from pathlib import Path
8
8
  from typing import Any, Dict, Set
9
9
 
10
10
  from wyoming.info import Attribution, Info, TtsProgram, TtsVoice, TtsVoiceSpeaker
11
- from wyoming.server import AsyncServer
11
+ from wyoming.server import AsyncServer, AsyncTcpServer
12
12
 
13
13
  from . import __version__
14
- from .download import find_voice, get_voices
14
+ from .download import ensure_voice_exists, find_voice, get_voices
15
15
  from .handler import PiperEventHandler
16
- from .process import PiperProcessManager
17
16
 
18
17
  _LOGGER = logging.getLogger(__name__)
19
18
 
@@ -21,17 +20,20 @@ _LOGGER = logging.getLogger(__name__)
21
20
  async def main() -> None:
22
21
  """Main entry point."""
23
22
  parser = argparse.ArgumentParser()
24
- parser.add_argument(
25
- "--piper",
26
- required=True,
27
- help="Path to piper executable",
28
- )
29
23
  parser.add_argument(
30
24
  "--voice",
31
25
  required=True,
32
26
  help="Default Piper voice to use (e.g., en_US-lessac-medium)",
33
27
  )
34
28
  parser.add_argument("--uri", default="stdio://", help="unix:// or tcp://")
29
+ #
30
+ parser.add_argument(
31
+ "--zeroconf",
32
+ nargs="?",
33
+ const="piper",
34
+ help="Enable discovery over zeroconf with optional name (default: piper)",
35
+ )
36
+ #
35
37
  parser.add_argument(
36
38
  "--data-dir",
37
39
  required=True,
@@ -48,22 +50,18 @@ async def main() -> None:
48
50
  )
49
51
  parser.add_argument("--noise-scale", type=float, help="Generator noise")
50
52
  parser.add_argument("--length-scale", type=float, help="Phoneme length")
51
- parser.add_argument("--noise-w", type=float, help="Phoneme width noise")
53
+ parser.add_argument(
54
+ "--noise-w-scale", "--noise-w", type=float, help="Phoneme width noise"
55
+ )
52
56
  #
53
57
  parser.add_argument(
54
58
  "--auto-punctuation", default=".?!", help="Automatically add punctuation"
55
59
  )
56
60
  parser.add_argument("--samples-per-chunk", type=int, default=1024)
57
61
  parser.add_argument(
58
- "--max-piper-procs",
59
- type=int,
60
- default=1,
61
- help="Maximum number of piper process to run simultaneously (default: 1)",
62
- )
63
- parser.add_argument(
64
- "--streaming",
62
+ "--no-streaming",
65
63
  action="store_true",
66
- help="Enable audio streaming on sentence boundaries",
64
+ help="Disable audio streaming on sentence boundaries",
67
65
  )
68
66
  #
69
67
  parser.add_argument(
@@ -72,6 +70,12 @@ async def main() -> None:
72
70
  help="Download latest voices.json during startup",
73
71
  )
74
72
  #
73
+ parser.add_argument(
74
+ "--use-cuda",
75
+ action="store_true",
76
+ help="Use CUDA if available (requires onnxruntime-gpu)",
77
+ )
78
+ #
75
79
  parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
76
80
  parser.add_argument(
77
81
  "--log-format", default=logging.BASIC_FORMAT, help="Format for log messages"
@@ -187,27 +191,41 @@ async def main() -> None:
187
191
  installed=True,
188
192
  voices=sorted(voices, key=lambda v: v.name),
189
193
  version=__version__,
190
- supports_synthesize_streaming=args.streaming,
194
+ supports_synthesize_streaming=(not args.no_streaming),
191
195
  )
192
196
  ],
193
197
  )
194
198
 
195
- process_manager = PiperProcessManager(args, voices_info)
199
+ # Ensure default voice is downloaded
200
+ voice_info = voices_info.get(args.voice, {})
201
+ voice_name = voice_info.get("key", args.voice)
202
+ assert voice_name is not None
196
203
 
197
- # Make sure default voice is loaded.
198
- # Other voices will be loaded on-demand.
199
- await process_manager.get_process()
204
+ ensure_voice_exists(voice_name, args.data_dir, args.download_dir, voices_info)
200
205
 
201
206
  # Start server
202
207
  server = AsyncServer.from_uri(args.uri)
203
208
 
209
+ if args.zeroconf:
210
+ if not isinstance(server, AsyncTcpServer):
211
+ raise ValueError("Zeroconf requires tcp:// uri")
212
+
213
+ from wyoming.zeroconf import HomeAssistantZeroconf
214
+
215
+ tcp_server: AsyncTcpServer = server
216
+ hass_zeroconf = HomeAssistantZeroconf(
217
+ name=args.zeroconf, port=tcp_server.port, host=tcp_server.host
218
+ )
219
+ await hass_zeroconf.register_server()
220
+ _LOGGER.debug("Zeroconf discovery enabled")
221
+
204
222
  _LOGGER.info("Ready")
205
223
  await server.run(
206
224
  partial(
207
225
  PiperEventHandler,
208
226
  wyoming_info,
209
227
  args,
210
- process_manager,
228
+ voices_info,
211
229
  )
212
230
  )
213
231
 
@@ -9,8 +9,6 @@ from urllib.error import URLError
9
9
  from urllib.parse import quote, urlsplit, urlunsplit
10
10
  from urllib.request import urlopen
11
11
 
12
- from .file_hash import get_file_hash
13
-
14
12
  URL_FORMAT = "https://huggingface.co/rhasspy/piper-voices/resolve/main/{file}"
15
13
 
16
14
  _DIR = Path(__file__).parent
@@ -89,8 +87,7 @@ def ensure_voice_exists(
89
87
  for data_dir in data_dirs:
90
88
  data_dir = Path(data_dir)
91
89
 
92
- # Check sizes/hashes
93
- for file_path, file_info in voice_files.items():
90
+ for file_path, _file_info in voice_files.items():
94
91
  if file_path in verified_files:
95
92
  # Already verified this file in a different data directory
96
93
  continue
@@ -101,34 +98,37 @@ def ensure_voice_exists(
101
98
 
102
99
  data_file_path = data_dir / file_name
103
100
  _LOGGER.debug("Checking %s", data_file_path)
104
- if not data_file_path.exists():
101
+ if (not data_file_path.exists()) or (data_file_path.stat().st_size == 0):
105
102
  _LOGGER.debug("Missing %s", data_file_path)
106
103
  files_to_download.add(file_path)
107
104
  continue
108
105
 
109
- expected_size = file_info["size_bytes"]
110
- actual_size = data_file_path.stat().st_size
111
- if expected_size != actual_size:
112
- _LOGGER.warning(
113
- "Wrong size (expected=%s, actual=%s) for %s",
114
- expected_size,
115
- actual_size,
116
- data_file_path,
117
- )
118
- files_to_download.add(file_path)
119
- continue
120
-
121
- expected_hash = file_info["md5_digest"]
122
- actual_hash = get_file_hash(data_file_path)
123
- if expected_hash != actual_hash:
124
- _LOGGER.warning(
125
- "Wrong hash (expected=%s, actual=%s) for %s",
126
- expected_hash,
127
- actual_hash,
128
- data_file_path,
129
- )
130
- files_to_download.add(file_path)
131
- continue
106
+ # Don't bother validating sizes or hashes.
107
+ # This causes more problems than its worth.
108
+ #
109
+ # expected_size = file_info["size_bytes"]
110
+ # actual_size = data_file_path.stat().st_size
111
+ # if expected_size != actual_size:
112
+ # _LOGGER.warning(
113
+ # "Wrong size (expected=%s, actual=%s) for %s",
114
+ # expected_size,
115
+ # actual_size,
116
+ # data_file_path,
117
+ # )
118
+ # files_to_download.add(file_path)
119
+ # continue
120
+
121
+ # expected_hash = file_info["md5_digest"]
122
+ # actual_hash = get_file_hash(data_file_path)
123
+ # if expected_hash != actual_hash:
124
+ # _LOGGER.warning(
125
+ # "Wrong hash (expected=%s, actual=%s) for %s",
126
+ # expected_hash,
127
+ # actual_hash,
128
+ # data_file_path,
129
+ # )
130
+ # files_to_download.add(file_path)
131
+ # continue
132
132
 
133
133
  # File exists and has been verified
134
134
  verified_files.add(file_path)
@@ -151,9 +151,10 @@ def ensure_voice_exists(
151
151
  download_file_path.parent.mkdir(parents=True, exist_ok=True)
152
152
 
153
153
  _LOGGER.debug("Downloading %s to %s", file_url, download_file_path)
154
- with urlopen(_quote_url(file_url)) as response, open(
155
- download_file_path, "wb"
156
- ) as download_file:
154
+ with (
155
+ urlopen(_quote_url(file_url)) as response,
156
+ open(download_file_path, "wb") as download_file,
157
+ ):
157
158
  shutil.copyfileobj(response, download_file)
158
159
 
159
160
  _LOGGER.info("Downloaded %s (%s)", download_file_path, file_url)
@@ -0,0 +1,275 @@
1
+ """Event handler for clients of the server."""
2
+
3
+ import argparse
4
+ import asyncio
5
+ import logging
6
+ import math
7
+ import tempfile
8
+ import wave
9
+ from typing import Any, Dict, Optional
10
+
11
+ from piper import PiperVoice, SynthesisConfig
12
+ from sentence_stream import SentenceBoundaryDetector
13
+ from wyoming.audio import AudioChunk, AudioStart, AudioStop
14
+ from wyoming.error import Error
15
+ from wyoming.event import Event
16
+ from wyoming.info import Describe, Info
17
+ from wyoming.server import AsyncEventHandler
18
+ from wyoming.tts import (
19
+ Synthesize,
20
+ SynthesizeChunk,
21
+ SynthesizeStart,
22
+ SynthesizeStop,
23
+ SynthesizeStopped,
24
+ )
25
+
26
+ from .download import ensure_voice_exists, find_voice
27
+
28
+ _LOGGER = logging.getLogger(__name__)
29
+
30
+ # Keep the most recently used voice loaded
31
+ _VOICE: Optional[PiperVoice] = None
32
+ _VOICE_NAME: Optional[str] = None
33
+ _VOICE_LOCK = asyncio.Lock()
34
+
35
+
36
+ class PiperEventHandler(AsyncEventHandler):
37
+ def __init__(
38
+ self,
39
+ wyoming_info: Info,
40
+ cli_args: argparse.Namespace,
41
+ voices_info: Dict[str, Any],
42
+ *args,
43
+ **kwargs,
44
+ ) -> None:
45
+ super().__init__(*args, **kwargs)
46
+
47
+ self.cli_args = cli_args
48
+ self.wyoming_info_event = wyoming_info.event()
49
+ self.voices_info = voices_info
50
+ self.is_streaming: Optional[bool] = None
51
+ self.sbd = SentenceBoundaryDetector()
52
+ self._synthesize: Optional[Synthesize] = None
53
+
54
+ async def handle_event(self, event: Event) -> bool:
55
+ if Describe.is_type(event.type):
56
+ await self.write_event(self.wyoming_info_event)
57
+ _LOGGER.debug("Sent info")
58
+ return True
59
+
60
+ try:
61
+ if Synthesize.is_type(event.type):
62
+ if self.is_streaming:
63
+ # Ignore since this is only sent for compatibility reasons.
64
+ # For streaming, we expect:
65
+ # [synthesize-start] -> [synthesize-chunk]+ -> [synthesize]? -> [synthesize-stop]
66
+ return True
67
+
68
+ # Sent outside a stream, so we must process it
69
+ synthesize = Synthesize.from_event(event)
70
+ self._synthesize = Synthesize(text="", voice=synthesize.voice)
71
+ self.sbd = SentenceBoundaryDetector()
72
+ start_sent = False
73
+ for i, sentence in enumerate(self.sbd.add_chunk(synthesize.text)):
74
+ self._synthesize.text = sentence
75
+ await self._handle_synthesize(
76
+ self._synthesize, send_start=(i == 0), send_stop=False
77
+ )
78
+ start_sent = True
79
+
80
+ self._synthesize.text = self.sbd.finish()
81
+ if self._synthesize.text:
82
+ # Last sentence
83
+ await self._handle_synthesize(
84
+ self._synthesize, send_start=(not start_sent), send_stop=True
85
+ )
86
+ else:
87
+ # No final sentence
88
+ await self.write_event(AudioStop().event())
89
+
90
+ return True
91
+
92
+ if self.cli_args.no_streaming:
93
+ # Streaming is not enabled
94
+ return True
95
+
96
+ if SynthesizeStart.is_type(event.type):
97
+ # Start of a stream
98
+ stream_start = SynthesizeStart.from_event(event)
99
+ self.is_streaming = True
100
+ self.sbd = SentenceBoundaryDetector()
101
+ self._synthesize = Synthesize(text="", voice=stream_start.voice)
102
+ _LOGGER.debug("Text stream started: voice=%s", stream_start.voice)
103
+ return True
104
+
105
+ if SynthesizeChunk.is_type(event.type):
106
+ assert self._synthesize is not None
107
+ stream_chunk = SynthesizeChunk.from_event(event)
108
+ for sentence in self.sbd.add_chunk(stream_chunk.text):
109
+ _LOGGER.debug("Synthesizing stream sentence: %s", sentence)
110
+ self._synthesize.text = sentence
111
+ await self._handle_synthesize(self._synthesize)
112
+
113
+ return True
114
+
115
+ if SynthesizeStop.is_type(event.type):
116
+ assert self._synthesize is not None
117
+ self._synthesize.text = self.sbd.finish()
118
+ if self._synthesize.text:
119
+ # Final audio chunk(s)
120
+ await self._handle_synthesize(self._synthesize)
121
+
122
+ # End of audio
123
+ await self.write_event(SynthesizeStopped().event())
124
+
125
+ _LOGGER.debug("Text stream stopped")
126
+ return True
127
+
128
+ if not Synthesize.is_type(event.type):
129
+ return True
130
+
131
+ synthesize = Synthesize.from_event(event)
132
+ return await self._handle_synthesize(synthesize)
133
+ except Exception as err:
134
+ await self.write_event(
135
+ Error(text=str(err), code=err.__class__.__name__).event()
136
+ )
137
+ raise err
138
+
139
+ async def _handle_synthesize(
140
+ self, synthesize: Synthesize, send_start: bool = True, send_stop: bool = True
141
+ ) -> bool:
142
+ global _VOICE, _VOICE_NAME
143
+
144
+ _LOGGER.debug(synthesize)
145
+
146
+ raw_text = synthesize.text
147
+
148
+ # Join multiple lines
149
+ text = " ".join(raw_text.strip().splitlines())
150
+
151
+ if self.cli_args.auto_punctuation and text:
152
+ # Add automatic punctuation (important for some voices)
153
+ has_punctuation = False
154
+ for punc_char in self.cli_args.auto_punctuation:
155
+ if text[-1] == punc_char:
156
+ has_punctuation = True
157
+ break
158
+
159
+ if not has_punctuation:
160
+ text = text + self.cli_args.auto_punctuation[0]
161
+
162
+ # Resolve voice
163
+ _LOGGER.debug("synthesize: raw_text=%s, text='%s'", raw_text, text)
164
+ voice_name: Optional[str] = None
165
+ voice_speaker: Optional[str] = None
166
+ if synthesize.voice is not None:
167
+ voice_name = synthesize.voice.name
168
+ voice_speaker = synthesize.voice.speaker
169
+
170
+ if voice_name is None:
171
+ # Default voice
172
+ voice_name = self.cli_args.voice
173
+
174
+ if voice_name == self.cli_args.voice:
175
+ # Default speaker
176
+ voice_speaker = voice_speaker or self.cli_args.speaker
177
+
178
+ assert voice_name is not None
179
+
180
+ # Resolve alias
181
+ voice_info = self.voices_info.get(voice_name, {})
182
+ voice_name = voice_info.get("key", voice_name)
183
+ assert voice_name is not None
184
+
185
+ with tempfile.NamedTemporaryFile(mode="wb+", suffix=".wav") as output_file:
186
+ async with _VOICE_LOCK:
187
+ if voice_name != _VOICE_NAME:
188
+ # Load new voice
189
+ _LOGGER.debug("Loading voice: %s", _VOICE_NAME)
190
+ ensure_voice_exists(
191
+ voice_name,
192
+ self.cli_args.data_dir,
193
+ self.cli_args.download_dir,
194
+ self.voices_info,
195
+ )
196
+ model_path, config_path = find_voice(
197
+ voice_name, self.cli_args.data_dir
198
+ )
199
+ _VOICE = PiperVoice.load(
200
+ model_path, config_path, use_cuda=self.cli_args.use_cuda
201
+ )
202
+ _VOICE_NAME = voice_name
203
+
204
+ assert _VOICE is not None
205
+
206
+ syn_config = SynthesisConfig()
207
+ if voice_speaker is not None:
208
+ syn_config.speaker_id = _VOICE.config.speaker_id_map.get(
209
+ voice_speaker
210
+ )
211
+ if syn_config.speaker_id is None:
212
+ try:
213
+ # Try to interpret as an id
214
+ syn_config.speaker_id = int(voice_speaker)
215
+ except ValueError:
216
+ pass
217
+
218
+ if syn_config.speaker_id is None:
219
+ _LOGGER.warning(
220
+ "No speaker '%s' for voice '%s'", voice_speaker, voice_name
221
+ )
222
+
223
+ if self.cli_args.length_scale is not None:
224
+ syn_config.length_scale = self.cli_args.length_scale
225
+
226
+ if self.cli_args.noise_scale is not None:
227
+ syn_config.noise_scale = self.cli_args.noise_scale
228
+
229
+ if self.cli_args.noise_w_scale is not None:
230
+ syn_config.noise_w_scale = self.cli_args.noise_w_scale
231
+
232
+ wav_writer: wave.Wave_write = wave.open(output_file, "wb")
233
+ with wav_writer:
234
+ _VOICE.synthesize_wav(text, wav_writer, syn_config)
235
+
236
+ output_file.seek(0)
237
+
238
+ wav_file: wave.Wave_read = wave.open(output_file, "rb")
239
+ with wav_file:
240
+ rate = wav_file.getframerate()
241
+ width = wav_file.getsampwidth()
242
+ channels = wav_file.getnchannels()
243
+
244
+ if send_start:
245
+ await self.write_event(
246
+ AudioStart(
247
+ rate=rate,
248
+ width=width,
249
+ channels=channels,
250
+ ).event(),
251
+ )
252
+
253
+ # Audio
254
+ audio_bytes = wav_file.readframes(wav_file.getnframes())
255
+ bytes_per_sample = width * channels
256
+ bytes_per_chunk = bytes_per_sample * self.cli_args.samples_per_chunk
257
+ num_chunks = int(math.ceil(len(audio_bytes) / bytes_per_chunk))
258
+
259
+ # Split into chunks
260
+ for i in range(num_chunks):
261
+ offset = i * bytes_per_chunk
262
+ chunk = audio_bytes[offset : offset + bytes_per_chunk]
263
+ await self.write_event(
264
+ AudioChunk(
265
+ audio=chunk,
266
+ rate=rate,
267
+ width=width,
268
+ channels=channels,
269
+ ).event(),
270
+ )
271
+
272
+ if send_stop:
273
+ await self.write_event(AudioStop().event())
274
+
275
+ return True