wyoming-piper 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ """Wyoming server for piper."""
@@ -0,0 +1,158 @@
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import asyncio
4
+ import logging
5
+ from functools import partial
6
+ from typing import Any, Dict
7
+
8
+ from wyoming.info import Attribution, Info, TtsProgram, TtsVoice, TtsVoiceSpeaker
9
+ from wyoming.server import AsyncServer
10
+
11
+ from .download import get_voices
12
+ from .handler import PiperEventHandler
13
+ from .process import PiperProcessManager
14
+
15
+ _LOGGER = logging.getLogger(__name__)
16
+
17
+
18
+ async def main() -> None:
19
+ """Main entry point."""
20
+ parser = argparse.ArgumentParser()
21
+ parser.add_argument(
22
+ "--piper",
23
+ required=True,
24
+ help="Path to piper executable",
25
+ )
26
+ parser.add_argument(
27
+ "--voice",
28
+ required=True,
29
+ help="Default Piper voice to use (e.g., en_US-lessac-medium)",
30
+ )
31
+ parser.add_argument("--uri", default="stdio://", help="unix:// or tcp://")
32
+ parser.add_argument(
33
+ "--data-dir",
34
+ required=True,
35
+ action="append",
36
+ help="Data directory to check for downloaded models",
37
+ )
38
+ parser.add_argument(
39
+ "--download-dir",
40
+ required=True,
41
+ help="Directory to download voices into",
42
+ )
43
+ #
44
+ parser.add_argument(
45
+ "--speaker", type=str, help="Name or id of speaker for default voice"
46
+ )
47
+ parser.add_argument("--noise-scale", type=float, help="Generator noise")
48
+ parser.add_argument("--length-scale", type=float, help="Phoneme length")
49
+ parser.add_argument("--noise-w", type=float, help="Phoneme width noise")
50
+ #
51
+ parser.add_argument(
52
+ "--auto-punctuation", default=".?!", help="Automatically add punctuation"
53
+ )
54
+ parser.add_argument("--samples-per-chunk", type=int, default=1024)
55
+ parser.add_argument(
56
+ "--max-piper-procs",
57
+ type=int,
58
+ default=1,
59
+ help="Maximum number of piper process to run simultaneously (default: 1)",
60
+ )
61
+ #
62
+ parser.add_argument(
63
+ "--update-voices",
64
+ action="store_true",
65
+ help="Download latest voices.json during startup",
66
+ )
67
+ #
68
+ parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
69
+ args = parser.parse_args()
70
+
71
+ logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
72
+
73
+ # Load voice info
74
+ voices_info = get_voices(args.download_dir, update_voices=args.update_voices)
75
+
76
+ # Resolve aliases for backwards compatibility with old voice names
77
+ aliases_info: Dict[str, Any] = {}
78
+ for voice_info in voices_info.values():
79
+ for voice_alias in voice_info.get("aliases", []):
80
+ aliases_info[voice_alias] = {"_is_alias": True, **voice_info}
81
+
82
+ voices_info.update(aliases_info)
83
+
84
+ wyoming_info = Info(
85
+ tts=[
86
+ TtsProgram(
87
+ name="piper",
88
+ description="A fast, local, neural text to speech engine",
89
+ attribution=Attribution(
90
+ name="rhasspy", url="https://github.com/rhasspy/piper"
91
+ ),
92
+ installed=True,
93
+ voices=[
94
+ TtsVoice(
95
+ name=voice_name,
96
+ description=get_description(voice_info),
97
+ attribution=Attribution(
98
+ name="rhasspy", url="https://github.com/rhasspy/piper"
99
+ ),
100
+ installed=True,
101
+ languages=[voice_info["language"]["code"]],
102
+ #
103
+ # Don't send speakers for now because it overflows StreamReader buffers
104
+ # speakers=[
105
+ # TtsVoiceSpeaker(name=speaker_name)
106
+ # for speaker_name in voice_info["speaker_id_map"]
107
+ # ]
108
+ # if voice_info.get("speaker_id_map")
109
+ # else None,
110
+ )
111
+ for voice_name, voice_info in sorted(
112
+ voices_info.items(), key=lambda kv: kv[0]
113
+ )
114
+ if not voice_info.get("_is_alias", False)
115
+ ],
116
+ )
117
+ ],
118
+ )
119
+
120
+ process_manager = PiperProcessManager(args, voices_info)
121
+
122
+ # Make sure default voice is loaded.
123
+ # Other voices will be loaded on-demand.
124
+ await process_manager.get_process()
125
+
126
+ # Start server
127
+ server = AsyncServer.from_uri(args.uri)
128
+
129
+ _LOGGER.info("Ready")
130
+ await server.run(
131
+ partial(
132
+ PiperEventHandler,
133
+ wyoming_info,
134
+ args,
135
+ process_manager,
136
+ )
137
+ )
138
+
139
+
140
+ # -----------------------------------------------------------------------------
141
+
142
+
143
+ def get_description(voice_info: Dict[str, Any]):
144
+ """Get a human readable description for a voice."""
145
+ name = voice_info["name"]
146
+ name = " ".join(name.split("_"))
147
+ quality = voice_info["quality"]
148
+
149
+ return f"{name} ({quality})"
150
+
151
+
152
+ # -----------------------------------------------------------------------------
153
+
154
+ if __name__ == "__main__":
155
+ try:
156
+ asyncio.run(main())
157
+ except KeyboardInterrupt:
158
+ pass
wyoming_piper/const.py ADDED
@@ -0,0 +1,101 @@
1
+ WHISPER_LANGUAGES = [
2
+ "af",
3
+ "am",
4
+ "ar",
5
+ "as",
6
+ "az",
7
+ "ba",
8
+ "be",
9
+ "bg",
10
+ "bn",
11
+ "bo",
12
+ "br",
13
+ "bs",
14
+ "ca",
15
+ "cs",
16
+ "cy",
17
+ "da",
18
+ "de",
19
+ "el",
20
+ "en",
21
+ "es",
22
+ "et",
23
+ "eu",
24
+ "fa",
25
+ "fi",
26
+ "fo",
27
+ "fr",
28
+ "gl",
29
+ "gu",
30
+ "ha",
31
+ "haw",
32
+ "he",
33
+ "hi",
34
+ "hr",
35
+ "ht",
36
+ "hu",
37
+ "hy",
38
+ "id",
39
+ "is",
40
+ "it",
41
+ "ja",
42
+ "jw",
43
+ "ka",
44
+ "kk",
45
+ "km",
46
+ "kn",
47
+ "ko",
48
+ "la",
49
+ "lb",
50
+ "ln",
51
+ "lo",
52
+ "lt",
53
+ "lv",
54
+ "mg",
55
+ "mi",
56
+ "mk",
57
+ "ml",
58
+ "mn",
59
+ "mr",
60
+ "ms",
61
+ "mt",
62
+ "my",
63
+ "ne",
64
+ "nl",
65
+ "nn",
66
+ "no",
67
+ "oc",
68
+ "pa",
69
+ "pl",
70
+ "ps",
71
+ "pt",
72
+ "ro",
73
+ "ru",
74
+ "sa",
75
+ "sd",
76
+ "si",
77
+ "sk",
78
+ "sl",
79
+ "sn",
80
+ "so",
81
+ "sq",
82
+ "sr",
83
+ "su",
84
+ "sv",
85
+ "sw",
86
+ "ta",
87
+ "te",
88
+ "tg",
89
+ "th",
90
+ "tk",
91
+ "tl",
92
+ "tr",
93
+ "tt",
94
+ "uk",
95
+ "ur",
96
+ "uz",
97
+ "vi",
98
+ "yi",
99
+ "yo",
100
+ "zh",
101
+ ]
@@ -0,0 +1,161 @@
1
+ """Utility for downloading Piper voices."""
2
+ import json
3
+ import logging
4
+ import shutil
5
+ from pathlib import Path
6
+ from typing import Any, Dict, Iterable, Set, Tuple, Union
7
+ from urllib.request import urlopen
8
+
9
+ from .file_hash import get_file_hash
10
+
11
+ URL_FORMAT = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/{file}"
12
+
13
+ _DIR = Path(__file__).parent
14
+ _LOGGER = logging.getLogger(__name__)
15
+
16
+ _SKIP_FILES = {"MODEL_CARD"}
17
+
18
+
19
+ class VoiceNotFoundError(Exception):
20
+ pass
21
+
22
+
23
+ def get_voices(
24
+ download_dir: Union[str, Path], update_voices: bool = False
25
+ ) -> Dict[str, Any]:
26
+ """Loads available voices from downloaded or embedded JSON file."""
27
+ download_dir = Path(download_dir)
28
+ voices_download = download_dir / "voices.json"
29
+
30
+ if update_voices:
31
+ # Download latest voices.json
32
+ voices_url = URL_FORMAT.format(file="voices.json")
33
+ _LOGGER.debug("Downloading %s to %s", voices_url, voices_download)
34
+ with urlopen(voices_url) as response, open(
35
+ voices_download, "wb"
36
+ ) as download_file:
37
+ shutil.copyfileobj(response, download_file)
38
+
39
+ # Prefer downloaded file to embedded
40
+ voices_embedded = _DIR / "voices.json"
41
+ voices_path = voices_download if voices_download.exists() else voices_embedded
42
+
43
+ _LOGGER.debug("Loading %s", voices_path)
44
+ with open(voices_path, "r", encoding="utf-8") as voices_file:
45
+ return json.load(voices_file)
46
+
47
+
48
+ def ensure_voice_exists(
49
+ name: str,
50
+ data_dirs: Iterable[Union[str, Path]],
51
+ download_dir: Union[str, Path],
52
+ voices_info: Dict[str, Any],
53
+ ):
54
+ if name not in voices_info:
55
+ # Try as file path to a custom voice
56
+ onnx_path = Path(name)
57
+ config_path = Path(name + ".json")
58
+ if onnx_path.exists():
59
+ if config_path.exists():
60
+ # Custom voice found
61
+ return
62
+
63
+ _LOGGER.warning("Missing custom voice config: %s", config_path)
64
+
65
+ raise VoiceNotFoundError(name)
66
+
67
+ assert data_dirs, "No data dirs"
68
+
69
+ voice_info = voices_info[name]
70
+ voice_files = voice_info["files"]
71
+ files_to_download: Set[str] = set()
72
+
73
+ for data_dir in data_dirs:
74
+ data_dir = Path(data_dir)
75
+
76
+ # Check sizes/hashes
77
+ for file_path, file_info in voice_files.items():
78
+ if file_path in files_to_download:
79
+ # Already planning to download
80
+ continue
81
+
82
+ file_name = Path(file_path).name
83
+ if file_name in _SKIP_FILES:
84
+ continue
85
+
86
+ data_file_path = data_dir / file_name
87
+ _LOGGER.debug("Checking %s", data_file_path)
88
+ if not data_file_path.exists():
89
+ _LOGGER.debug("Missing %s", data_file_path)
90
+ files_to_download.add(file_path)
91
+ continue
92
+
93
+ expected_size = file_info["size_bytes"]
94
+ actual_size = data_file_path.stat().st_size
95
+ if expected_size != actual_size:
96
+ _LOGGER.warning(
97
+ "Wrong size (expected=%s, actual=%s) for %s",
98
+ expected_size,
99
+ actual_size,
100
+ data_file_path,
101
+ )
102
+ files_to_download.add(file_path)
103
+ continue
104
+
105
+ expected_hash = file_info["md5_digest"]
106
+ actual_hash = get_file_hash(data_file_path)
107
+ if expected_hash != actual_hash:
108
+ _LOGGER.warning(
109
+ "Wrong hash (expected=%s, actual=%s) for %s",
110
+ expected_hash,
111
+ actual_hash,
112
+ data_file_path,
113
+ )
114
+ files_to_download.add(file_path)
115
+ continue
116
+
117
+ if (not voice_files) and (not files_to_download):
118
+ raise ValueError(f"Unable to find or download voice: {name}")
119
+
120
+ # Download missing files
121
+ download_dir = Path(download_dir)
122
+
123
+ for file_path in files_to_download:
124
+ file_name = Path(file_path).name
125
+ if file_name in _SKIP_FILES:
126
+ continue
127
+
128
+ file_url = URL_FORMAT.format(file=file_path)
129
+ download_file_path = download_dir / file_name
130
+ download_file_path.parent.mkdir(parents=True, exist_ok=True)
131
+
132
+ _LOGGER.debug("Downloading %s to %s", file_url, download_file_path)
133
+ with urlopen(file_url) as response, open(
134
+ download_file_path, "wb"
135
+ ) as download_file:
136
+ shutil.copyfileobj(response, download_file)
137
+
138
+ _LOGGER.info("Downloaded %s (%s)", download_file_path, file_url)
139
+
140
+
141
+ def find_voice(name: str, data_dirs: Iterable[Union[str, Path]]) -> Tuple[Path, Path]:
142
+ """Looks for the files for a voice.
143
+
144
+ Returns: tuple of onnx path, config path
145
+ """
146
+ for data_dir in data_dirs:
147
+ data_dir = Path(data_dir)
148
+ onnx_path = data_dir / f"{name}.onnx"
149
+ config_path = data_dir / f"{name}.onnx.json"
150
+
151
+ if onnx_path.exists() and config_path.exists():
152
+ return onnx_path, config_path
153
+
154
+ # Try as a custom voice
155
+ onnx_path = Path(name)
156
+ config_path = Path(name + ".json")
157
+
158
+ if onnx_path.exists() and config_path.exists():
159
+ return onnx_path, config_path
160
+
161
+ raise ValueError(f"Missing files for voice {name}")
@@ -0,0 +1,46 @@
1
+ import argparse
2
+ import hashlib
3
+ import json
4
+ import sys
5
+ from pathlib import Path
6
+ from typing import Union
7
+
8
+
9
+ def get_file_hash(path: Union[str, Path], bytes_per_chunk: int = 8192) -> str:
10
+ """Hash a file in chunks using md5."""
11
+ path_hash = hashlib.md5()
12
+ with open(path, "rb") as path_file:
13
+ chunk = path_file.read(bytes_per_chunk)
14
+ while chunk:
15
+ path_hash.update(chunk)
16
+ chunk = path_file.read(bytes_per_chunk)
17
+
18
+ return path_hash.hexdigest()
19
+
20
+
21
+ # -----------------------------------------------------------------------------
22
+
23
+
24
+ def main():
25
+ parser = argparse.ArgumentParser()
26
+ parser.add_argument("file", nargs="+")
27
+ parser.add_argument("--dir", help="Parent directory")
28
+ args = parser.parse_args()
29
+
30
+ if args.dir:
31
+ args.dir = Path(args.dir)
32
+
33
+ hashes = {}
34
+ for path_str in args.file:
35
+ path = Path(path_str)
36
+ path_hash = get_file_hash(path)
37
+ if args.dir:
38
+ path = path.relative_to(args.dir)
39
+
40
+ hashes[str(path)] = path_hash
41
+
42
+ json.dump(hashes, sys.stdout)
43
+
44
+
45
+ if __name__ == "__main__":
46
+ main()
@@ -0,0 +1,136 @@
1
+ """Event handler for clients of the server."""
2
+ import argparse
3
+ import json
4
+ import logging
5
+ import math
6
+ import os
7
+ import wave
8
+ from typing import Any, Dict, Optional
9
+
10
+ from wyoming.audio import AudioChunk, AudioStart, AudioStop
11
+ from wyoming.event import Event
12
+ from wyoming.info import Describe, Info
13
+ from wyoming.server import AsyncEventHandler
14
+ from wyoming.tts import Synthesize
15
+
16
+ from .process import PiperProcessManager
17
+
18
+ _LOGGER = logging.getLogger(__name__)
19
+
20
+
21
+ class PiperEventHandler(AsyncEventHandler):
22
+ def __init__(
23
+ self,
24
+ wyoming_info: Info,
25
+ cli_args: argparse.Namespace,
26
+ process_manager: PiperProcessManager,
27
+ *args,
28
+ **kwargs,
29
+ ) -> None:
30
+ super().__init__(*args, **kwargs)
31
+
32
+ self.cli_args = cli_args
33
+ self.wyoming_info_event = wyoming_info.event()
34
+ self.process_manager = process_manager
35
+
36
+ async def handle_event(self, event: Event) -> bool:
37
+ if Describe.is_type(event.type):
38
+ await self.write_event(self.wyoming_info_event)
39
+ _LOGGER.debug("Sent info")
40
+ return True
41
+
42
+ if not Synthesize.is_type(event.type):
43
+ _LOGGER.warning("Unexpected event: %s", event)
44
+ return True
45
+
46
+ synthesize = Synthesize.from_event(event)
47
+ _LOGGER.debug(synthesize)
48
+
49
+ raw_text = synthesize.text
50
+
51
+ # Join multiple lines
52
+ text = " ".join(raw_text.strip().splitlines())
53
+
54
+ if self.cli_args.auto_punctuation and text:
55
+ # Add automatic punctuation (important for some voices)
56
+ has_punctuation = False
57
+ for punc_char in self.cli_args.auto_punctuation:
58
+ if text[-1] == punc_char:
59
+ has_punctuation = True
60
+ break
61
+
62
+ if not has_punctuation:
63
+ text = text + self.cli_args.auto_punctuation[0]
64
+
65
+ async with self.process_manager.processes_lock:
66
+ _LOGGER.debug("synthesize: raw_text=%s, text='%s'", raw_text, text)
67
+ voice_name: Optional[str] = None
68
+ voice_speaker: Optional[str] = None
69
+ if synthesize.voice is not None:
70
+ voice_name = synthesize.voice.name
71
+ voice_speaker = synthesize.voice.speaker
72
+
73
+ piper_proc = await self.process_manager.get_process(voice_name=voice_name)
74
+
75
+ assert piper_proc.proc.stdin is not None
76
+ assert piper_proc.proc.stdout is not None
77
+
78
+ # JSON in, file path out
79
+ input_obj: Dict[str, Any] = {"text": text}
80
+ if voice_speaker is not None:
81
+ speaker_id = piper_proc.get_speaker_id(voice_speaker)
82
+ if speaker_id is not None:
83
+ input_obj["speaker_id"] = speaker_id
84
+ else:
85
+ _LOGGER.warning(
86
+ "No speaker '%s' for voice '%s'", voice_speaker, voice_name
87
+ )
88
+
89
+ _LOGGER.debug("input: %s", input_obj)
90
+ piper_proc.proc.stdin.write(
91
+ (json.dumps(input_obj, ensure_ascii=False) + "\n").encode()
92
+ )
93
+ await piper_proc.proc.stdin.drain()
94
+
95
+ output_path = (await piper_proc.proc.stdout.readline()).decode().strip()
96
+ _LOGGER.debug(output_path)
97
+
98
+ wav_file: wave.Wave_read = wave.open(output_path, "rb")
99
+ with wav_file:
100
+ rate = wav_file.getframerate()
101
+ width = wav_file.getsampwidth()
102
+ channels = wav_file.getnchannels()
103
+
104
+ await self.write_event(
105
+ AudioStart(
106
+ rate=rate,
107
+ width=width,
108
+ channels=channels,
109
+ ).event(),
110
+ )
111
+
112
+ # Audio
113
+ audio_bytes = wav_file.readframes(wav_file.getnframes())
114
+ bytes_per_sample = width * channels
115
+ bytes_per_chunk = bytes_per_sample * self.cli_args.samples_per_chunk
116
+ num_chunks = int(math.ceil(len(audio_bytes) / bytes_per_chunk))
117
+
118
+ # Split into chunks
119
+ for i in range(num_chunks):
120
+ offset = i * bytes_per_chunk
121
+ chunk = audio_bytes[offset : offset + bytes_per_chunk]
122
+ await self.write_event(
123
+ AudioChunk(
124
+ audio=chunk,
125
+ rate=rate,
126
+ width=width,
127
+ channels=channels,
128
+ ).event(),
129
+ )
130
+
131
+ await self.write_event(AudioStop().event())
132
+ _LOGGER.debug("Completed request")
133
+
134
+ os.unlink(output_path)
135
+
136
+ return True