vocence-plugins 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ *.egg
6
+ .eggs/
7
+ build/
8
+ dist/
9
+ .venv/
10
+
11
+ # IDE
12
+ .vscode/
13
+ .idea/
14
+ *.swp
15
+
16
+ # OS
17
+ .DS_Store
@@ -0,0 +1,200 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for describing the origin of the Work and
141
+ reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Support. While redistributing the Work or
166
+ Derivative Works thereof, You may accept support, warranty,
167
+ indemnity, or other liability obligations and/or rights consistent
168
+ with this License. However, in accepting such obligations, You may
169
+ act only on Your own behalf and on Your sole responsibility, not on
170
+ behalf of any other Contributor, and only if You agree to indemnify,
171
+ defend, and hold each Contributor harmless for any liability
172
+ incurred by, or claims asserted against, such Contributor by reason
173
+ of your accepting any such warranty or support.
174
+
175
+ END OF TERMS AND CONDITIONS
176
+
177
+ APPENDIX: How to apply the Apache License to your work.
178
+
179
+ To apply the Apache License to your work, attach the following
180
+ boilerplate notice, with the fields enclosed by brackets "[]"
181
+ replaced with your own identifying information. (Don't include
182
+ the brackets!) The text should be enclosed in the appropriate
183
+ comment syntax for the file format. We also recommend that a
184
+ file or class name and description of purpose be included on the
185
+ same "printed page" as the copyright notice for easier
186
+ identification within third-party archives.
187
+
188
+ Copyright 2026 Vocence
189
+
190
+ Licensed under the Apache License, Version 2.0 (the "License");
191
+ you may not use this file except in compliance with the License.
192
+ You may obtain a copy of the License at
193
+
194
+ http://www.apache.org/licenses/LICENSE-2.0
195
+
196
+ Unless required by applicable law or agreed to in writing, software
197
+ distributed under the License is distributed on an "AS IS" BASIS,
198
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
199
+ implied. See the License for the specific language governing permissions
200
+ and limitations under the License.
@@ -0,0 +1,106 @@
1
+ Metadata-Version: 2.4
2
+ Name: vocence-plugins
3
+ Version: 0.1.0
4
+ Summary: Vocence voice plug-ins — drop custom-cloned voices and streaming speech recognition into your real-time voice agents.
5
+ Project-URL: Homepage, https://www.vocence.ai
6
+ Project-URL: Documentation, https://www.vocence.ai/docs/sdk-agents
7
+ Project-URL: Repository, https://github.com/concil859856/vocence-plugins
8
+ Project-URL: Issues, https://github.com/concil859856/vocence-plugins/issues
9
+ Author-email: Vocence <space@vocence.ai>
10
+ License-Expression: Apache-2.0
11
+ License-File: LICENSE
12
+ Keywords: realtime,stt,tts,vocence,voice-agents,voice-cloning
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: Apache Software License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
22
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
+ Requires-Python: >=3.11
24
+ Requires-Dist: aiohttp>=3.9
25
+ Requires-Dist: videosdk-agents>=0.1.0
26
+ Description-Content-Type: text/markdown
27
+
28
+ # vocence-plugins
29
+
30
+ Vocence voice plug-ins for real-time agent pipelines — drop in **Vocence custom voices** for TTS and **Vocence streaming speech recognition** for STT.
31
+
32
+ - **`VocenceTTS`** — streaming text-to-speech with the Vocence voice library (cloned, designed, and built-in speakers). One persistent connection per session, sub-second TTFB on warm connections, PCM16LE @ 24 kHz output.
33
+ - **`VocenceSTT`** — streaming speech-to-text with interim + final transcripts, optional speech / silence events for VAD integration, and language auto-detect.
34
+
35
+ > Status: 0.1.0 — public alpha.
36
+
37
+ ## Install
38
+
39
+ ```bash
40
+ pip install vocence-plugins
41
+ ```
42
+
43
+ The plug-ins conform to the standard TTS / STT abstract interfaces, so they slot into any compatible voice-agent framework.
44
+
45
+ ## API key
46
+
47
+ Get one at https://www.vocence.ai/account/developer. Requires the Premium plan.
48
+
49
+ ```bash
50
+ export VOCENCE_API_KEY=voc_live_...
51
+ ```
52
+
53
+ Or pass it directly: `VocenceTTS(api_key="voc_live_...", voice="...")`.
54
+
55
+ ## Quickstart
56
+
57
+ ```python
58
+ from vocence_plugins import VocenceTTS, VocenceSTT
59
+
60
+ tts = VocenceTTS(voice="design-aria", language="English")
61
+ stt = VocenceSTT(language="English")
62
+
63
+ # Wire into your agent pipeline as the TTS / STT components.
64
+ ```
65
+
66
+ The plug-ins handle the WebSocket lifecycle, reconnection, and audio framing — your code just sees text in and audio out (TTS), or audio in and transcript events out (STT).
67
+
68
+ ## Plugin reference
69
+
70
+ ### `VocenceTTS(*, api_key=None, voice, language=None, base_url=...)`
71
+
72
+ Streaming TTS over the Vocence voice service. One WebSocket reused across many `synthesize()` calls in the same session, closed on `aclose()`.
73
+
74
+ | Arg | Default | Notes |
75
+ |---|---|---|
76
+ | `api_key` | `VOCENCE_API_KEY` env | Required (`voc_live_…`). |
77
+ | `voice` | — | Required. Either a built-in slug (`"design-aria"`, `"design-jasper"`, …) or the numeric id of a saved designed / cloned voice. |
78
+ | `language` | `None` | Optional hint sent on every speak. |
79
+ | `base_url` | `https://api.vocence.ai` | Override for staging / self-hosted. |
80
+
81
+ Audio output: PCM16LE @ 24 kHz, mono.
82
+
83
+ ### `VocenceSTT(*, api_key=None, language="auto", sample_rate=16000, enable_partials=True, vad_events=True, base_url=...)`
84
+
85
+ Streaming STT. Lazy-opens a WebSocket on the first audio frame, runs a background reader that translates events into the framework's standard transcript event shape (interim, final, speech-start, speech-end).
86
+
87
+ | Arg | Default | Notes |
88
+ |---|---|---|
89
+ | `api_key` | `VOCENCE_API_KEY` env | Required. |
90
+ | `language` | `"auto"` | ISO-639-1 (`"en"`), full name (`"English"`), or `"auto"`. Normalized to ISO before send. |
91
+ | `sample_rate` | 16000 | PCM16LE mono input. |
92
+ | `enable_partials` | `True` | Stream interim hypotheses as the model refines. |
93
+ | `vad_events` | `True` | Emit speech-start / silence events from the internal VAD. |
94
+
95
+ ## Compared to the Vocence Python SDK
96
+
97
+ | Use case | Use |
98
+ |---|---|
99
+ | Talk to a Vocence-hosted voice agent (REST + WebSocket to our service) | [`vocence`](https://pypi.org/project/vocence/) |
100
+ | Build your own agent pipeline with Vocence voices + recognition | This package |
101
+
102
+ The two don't overlap — different products for different use cases. Both authenticate with the same `voc_live_…` key.
103
+
104
+ ## License
105
+
106
+ Apache-2.0.
@@ -0,0 +1,79 @@
1
+ # vocence-plugins
2
+
3
+ Vocence voice plug-ins for real-time agent pipelines — drop in **Vocence custom voices** for TTS and **Vocence streaming speech recognition** for STT.
4
+
5
+ - **`VocenceTTS`** — streaming text-to-speech with the Vocence voice library (cloned, designed, and built-in speakers). One persistent connection per session, sub-second TTFB on warm connections, PCM16LE @ 24 kHz output.
6
+ - **`VocenceSTT`** — streaming speech-to-text with interim + final transcripts, optional speech / silence events for VAD integration, and language auto-detect.
7
+
8
+ > Status: 0.1.0 — public alpha.
9
+
10
+ ## Install
11
+
12
+ ```bash
13
+ pip install vocence-plugins
14
+ ```
15
+
16
+ The plug-ins conform to the standard TTS / STT abstract interfaces, so they slot into any compatible voice-agent framework.
17
+
18
+ ## API key
19
+
20
+ Get one at https://www.vocence.ai/account/developer. Requires the Premium plan.
21
+
22
+ ```bash
23
+ export VOCENCE_API_KEY=voc_live_...
24
+ ```
25
+
26
+ Or pass it directly: `VocenceTTS(api_key="voc_live_...", voice="...")`.
27
+
28
+ ## Quickstart
29
+
30
+ ```python
31
+ from vocence_plugins import VocenceTTS, VocenceSTT
32
+
33
+ tts = VocenceTTS(voice="design-aria", language="English")
34
+ stt = VocenceSTT(language="English")
35
+
36
+ # Wire into your agent pipeline as the TTS / STT components.
37
+ ```
38
+
39
+ The plug-ins handle the WebSocket lifecycle, reconnection, and audio framing — your code just sees text in and audio out (TTS), or audio in and transcript events out (STT).
40
+
41
+ ## Plugin reference
42
+
43
+ ### `VocenceTTS(*, api_key=None, voice, language=None, base_url=...)`
44
+
45
+ Streaming TTS over the Vocence voice service. One WebSocket reused across many `synthesize()` calls in the same session, closed on `aclose()`.
46
+
47
+ | Arg | Default | Notes |
48
+ |---|---|---|
49
+ | `api_key` | `VOCENCE_API_KEY` env | Required (`voc_live_…`). |
50
+ | `voice` | — | Required. Either a built-in slug (`"design-aria"`, `"design-jasper"`, …) or the numeric id of a saved designed / cloned voice. |
51
+ | `language` | `None` | Optional hint sent on every speak. |
52
+ | `base_url` | `https://api.vocence.ai` | Override for staging / self-hosted. |
53
+
54
+ Audio output: PCM16LE @ 24 kHz, mono.
55
+
56
+ ### `VocenceSTT(*, api_key=None, language="auto", sample_rate=16000, enable_partials=True, vad_events=True, base_url=...)`
57
+
58
+ Streaming STT. Lazy-opens a WebSocket on the first audio frame, runs a background reader that translates events into the framework's standard transcript event shape (interim, final, speech-start, speech-end).
59
+
60
+ | Arg | Default | Notes |
61
+ |---|---|---|
62
+ | `api_key` | `VOCENCE_API_KEY` env | Required. |
63
+ | `language` | `"auto"` | ISO-639-1 (`"en"`), full name (`"English"`), or `"auto"`. Normalized to ISO before send. |
64
+ | `sample_rate` | 16000 | PCM16LE mono input. |
65
+ | `enable_partials` | `True` | Stream interim hypotheses as the model refines. |
66
+ | `vad_events` | `True` | Emit speech-start / silence events from the internal VAD. |
67
+
68
+ ## Compared to the Vocence Python SDK
69
+
70
+ | Use case | Use |
71
+ |---|---|
72
+ | Talk to a Vocence-hosted voice agent (REST + WebSocket to our service) | [`vocence`](https://pypi.org/project/vocence/) |
73
+ | Build your own agent pipeline with Vocence voices + recognition | This package |
74
+
75
+ The two don't overlap — different products for different use cases. Both authenticate with the same `voc_live_…` key.
76
+
77
+ ## License
78
+
79
+ Apache-2.0.
@@ -0,0 +1,43 @@
1
+ [build-system]
2
+ requires = ["hatchling>=1.21"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "vocence-plugins"
7
+ dynamic = ["version"]
8
+ description = "Vocence voice plug-ins — drop custom-cloned voices and streaming speech recognition into your real-time voice agents."
9
+ readme = "README.md"
10
+ license = "Apache-2.0"
11
+ requires-python = ">=3.11"
12
+ authors = [{ name = "Vocence", email = "space@vocence.ai" }]
13
+ keywords = ["vocence", "voice-agents", "tts", "stt", "voice-cloning", "realtime"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Intended Audience :: Developers",
17
+ "License :: OSI Approved :: Apache Software License",
18
+ "Operating System :: OS Independent",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Programming Language :: Python :: 3.13",
23
+ "Topic :: Multimedia :: Sound/Audio :: Speech",
24
+ "Topic :: Software Development :: Libraries :: Python Modules",
25
+ ]
26
+ dependencies = [
27
+ "aiohttp>=3.9",
28
+ # The plug-ins implement the abstract interfaces of a compatible
29
+ # real-time agent framework. Required at runtime.
30
+ "videosdk-agents>=0.1.0",
31
+ ]
32
+
33
+ [project.urls]
34
+ Homepage = "https://www.vocence.ai"
35
+ Documentation = "https://www.vocence.ai/docs/sdk-agents"
36
+ Repository = "https://github.com/concil859856/vocence-plugins"
37
+ Issues = "https://github.com/concil859856/vocence-plugins/issues"
38
+
39
+ [tool.hatch.version]
40
+ path = "src/vocence_plugins/__init__.py"
41
+
42
+ [tool.hatch.build.targets.wheel]
43
+ packages = ["src/vocence_plugins"]
@@ -0,0 +1,37 @@
1
+ """Vocence voice plug-ins.
2
+
3
+ Drop-in components that bring **Vocence custom voices** and
4
+ **Vocence streaming speech recognition** into your real-time voice
5
+ agent pipeline. Authenticated with the standard Vocence
6
+ ``voc_live_…`` developer key — the same one you use across the rest
7
+ of the Vocence platform.
8
+
9
+ The headline component is :class:`VocenceTTS` — streaming
10
+ text-to-speech with the Vocence voice library: cloned voices,
11
+ designed voices, and the built-in speaker catalog. :class:`VocenceSTT`
12
+ streams audio in and transcripts out.
13
+
14
+ Example
15
+ -------
16
+
17
+ >>> from vocence_plugins import VocenceTTS, VocenceSTT
18
+ >>>
19
+ >>> tts = VocenceTTS(api_key="voc_live_...", voice="design-aria")
20
+ >>> stt = VocenceSTT(api_key="voc_live_...", language="English")
21
+
22
+ Plug them into the agent framework of your choice — both classes
23
+ conform to the standard TTS / STT abstract interfaces.
24
+
25
+ See https://www.vocence.ai/docs/sdk-agents for the full guide.
26
+ """
27
+ from __future__ import annotations
28
+
29
+ __version__ = "0.1.0"
30
+
31
+ from .tts import VocenceTTS
32
+ from .stt import VocenceSTT
33
+
34
+ __all__ = [
35
+ "VocenceTTS",
36
+ "VocenceSTT",
37
+ ]
@@ -0,0 +1,44 @@
1
+ """Language-name → ISO-639-1 mapping for the Vocence STT pod.
2
+
3
+ The pod's wire protocol expects an ISO code (or ``"auto"``); full
4
+ names like ``"English"`` silently degrade to auto-detect, which then
5
+ mis-classifies short utterances. Mirrors the helper used inside the
6
+ Vocence backend so plugin users get the same behavior the hosted
7
+ service does.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ from typing import Optional
12
+
13
+ _NAME_TO_ISO = {
14
+ "English": "en",
15
+ "Chinese": "zh",
16
+ "Japanese": "ja",
17
+ "Korean": "ko",
18
+ "German": "de",
19
+ "French": "fr",
20
+ "Russian": "ru",
21
+ "Portuguese": "pt",
22
+ "Spanish": "es",
23
+ "Italian": "it",
24
+ }
25
+
26
+
27
+ def to_iso_639_1(language: Optional[str]) -> str:
28
+ """Resolve any input form to the ISO-639-1 code the STT pod wants.
29
+
30
+ Accepts agent-config full names (``"English"``), already-ISO codes
31
+ (``"en"``), and the sentinel ``"auto"`` / ``None``. Falls back to
32
+ ``"auto"`` for unknown input so the pod is never sent a string it
33
+ doesn't understand.
34
+ """
35
+ if not language:
36
+ return "auto"
37
+ s = language.strip()
38
+ if s.lower() == "auto":
39
+ return "auto"
40
+ if s in _NAME_TO_ISO:
41
+ return _NAME_TO_ISO[s]
42
+ if len(s) == 2 and s.isalpha():
43
+ return s.lower()
44
+ return "auto"
@@ -0,0 +1,313 @@
1
+ """VocenceSTT — streaming speech-to-text with Vocence recognition.
2
+
3
+ Conforms to the standard STT abstract interface used by real-time
4
+ agent pipelines, so it slots in alongside any compatible
5
+ ``Pipeline(stt=...)``. The plug-in handles connection lifecycle,
6
+ audio framing, and event translation — callers just see audio in
7
+ and standard transcript events out.
8
+
9
+ Audio input: PCM16LE @ 16 kHz, mono. One persistent connection is
10
+ lazily opened on the first audio frame and torn down on
11
+ ``aclose()``. A background reader task translates incoming events
12
+ into the framework's standard transcript event shape.
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import asyncio
17
+ import json
18
+ import logging
19
+ import os
20
+ from typing import Any, Optional
21
+ from urllib.parse import urlparse
22
+
23
+ import aiohttp
24
+
25
+ from videosdk.agents import ( # type: ignore[import-not-found]
26
+ STT,
27
+ STTResponse,
28
+ SpeechData,
29
+ SpeechEventType,
30
+ )
31
+
32
+ from ._lang import to_iso_639_1
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ _DEFAULT_BASE_URL = os.environ.get("VOCENCE_BASE_URL", "https://api.vocence.ai")
38
+ _DEFAULT_SAMPLE_RATE = 16_000
39
+ _DEFAULT_TIMEOUT_SEC = 30.0
40
+
41
+
42
+ def _ws_url_from_base(base_url: str) -> str:
43
+ """Translate ``https://api.vocence.ai`` → ``wss://api.vocence.ai/v1/stt/stream``."""
44
+ parsed = urlparse(base_url.rstrip("/"))
45
+ scheme = "wss" if parsed.scheme == "https" else "ws"
46
+ return f"{scheme}://{parsed.netloc}{parsed.path}/v1/stt/stream"
47
+
48
+
49
+ class VocenceSTT(STT):
50
+ """Streaming STT plugin backed by the Vocence recognition service.
51
+
52
+ Parameters
53
+ ----------
54
+ api_key:
55
+ Vocence developer key (``voc_live_…``). Falls back to the
56
+ ``VOCENCE_API_KEY`` env var. Required.
57
+ language:
58
+ Spoken language. Accepts ISO-639-1 codes (``"en"``), full
59
+ names (``"English"``), or ``"auto"`` for auto-detect.
60
+ Default ``"auto"``. Normalized to ISO upfront to avoid
61
+ silent degradation to auto-detect for unrecognized forms.
62
+ sample_rate:
63
+ Audio sample rate. Default 16 kHz (mono PCM16LE). Only
64
+ 16 kHz is accepted today; argument is kept for
65
+ forward-compatibility.
66
+ enable_partials:
67
+ Stream interim hypotheses as recognition refines. Default
68
+ ``True`` — needed for any responsive UI; disable only for
69
+ batch / archival use cases that just want finals.
70
+ vad_events:
71
+ Emit speech-start / silence events from the internal VAD so
72
+ the orchestrator can drive its interrupt / speech_started
73
+ hooks off these. Default ``True``. Independent of any
74
+ external VAD plugin you wire alongside.
75
+ base_url:
76
+ Override the default ``https://api.vocence.ai``.
77
+ forward_interim_transcripts:
78
+ Whether to surface interim text to the user UI (passed
79
+ through to the standard STT base initializer).
80
+ """
81
+
82
+ def __init__(
83
+ self,
84
+ *,
85
+ api_key: Optional[str] = None,
86
+ language: str = "auto",
87
+ sample_rate: int = _DEFAULT_SAMPLE_RATE,
88
+ enable_partials: bool = True,
89
+ vad_events: bool = True,
90
+ base_url: str = _DEFAULT_BASE_URL,
91
+ forward_interim_transcripts: bool = False,
92
+ **kwargs: Any,
93
+ ) -> None:
94
+ super().__init__(forward_interim_transcripts=forward_interim_transcripts)
95
+ self.api_key = api_key or os.environ.get("VOCENCE_API_KEY")
96
+ if not self.api_key:
97
+ raise ValueError(
98
+ "Vocence API key required — pass api_key= or set VOCENCE_API_KEY"
99
+ )
100
+ self.language = language
101
+ self.sample_rate = sample_rate
102
+ self.enable_partials = enable_partials
103
+ self.vad_events = vad_events
104
+ self.base_url = base_url
105
+ self._ws_url = _ws_url_from_base(base_url)
106
+
107
+ self._session: aiohttp.ClientSession | None = None
108
+ self._ws: aiohttp.ClientWebSocketResponse | None = None
109
+ self._reader_task: asyncio.Task | None = None
110
+ self._connect_lock = asyncio.Lock()
111
+ self._closed = False
112
+
113
+ # ----- abstract overrides ---------------------------------------------
114
+
115
+ async def process_audio(
116
+ self,
117
+ audio_frames: bytes,
118
+ language: Optional[str] = None,
119
+ **kwargs: Any,
120
+ ) -> None:
121
+ """Send one PCM16LE frame to the pod. Lazy-opens the WS on
122
+ first call. The framework calls this once per audio frame
123
+ (typically 20-40 ms), so the hot path is just a bytes send.
124
+
125
+ ``language`` is accepted for API symmetry but ignored once
126
+ the connection is established — the pod's ``start`` frame
127
+ binds the language for the session. Construct a new
128
+ ``VocenceSTT`` for a different language.
129
+ """
130
+ if self._closed:
131
+ return
132
+ if self._ws is None:
133
+ await self._ensure_connection()
134
+ ws = self._ws
135
+ if ws is None or ws.closed:
136
+ return
137
+ try:
138
+ await ws.send_bytes(audio_frames)
139
+ except Exception as exc: # noqa: BLE001
140
+ logger.warning("VocenceSTT send_bytes failed: %s", exc)
141
+ await self._teardown_ws()
142
+
143
+ async def flush(self) -> None:
144
+ """Ask the pod to finalize its current partial as soon as
145
+ possible. Useful at end-of-utterance when the orchestrator
146
+ knows the turn is over but the pod hasn't auto-emitted a
147
+ final yet."""
148
+ ws = self._ws
149
+ if ws is None or ws.closed:
150
+ return
151
+ try:
152
+ await ws.send_str(json.dumps({"type": "commit"}))
153
+ except Exception as exc: # noqa: BLE001
154
+ logger.debug("VocenceSTT commit failed: %s", exc)
155
+
156
+ async def aclose(self) -> None:
157
+ """Close the WebSocket + HTTP session. Idempotent."""
158
+ self._closed = True
159
+ if self._reader_task is not None and not self._reader_task.done():
160
+ self._reader_task.cancel()
161
+ try:
162
+ await self._reader_task
163
+ except (asyncio.CancelledError, Exception):
164
+ pass
165
+ self._reader_task = None
166
+ await self._teardown_ws()
167
+
168
+ # ----- internals ------------------------------------------------------
169
+
170
+ async def _ensure_connection(self) -> None:
171
+ """Open the WS, send ``start``, wait for ``ready``, kick the reader."""
172
+ async with self._connect_lock:
173
+ if self._ws is not None and not self._ws.closed:
174
+ return
175
+ if self._session is None or self._session.closed:
176
+ self._session = aiohttp.ClientSession(
177
+ timeout=aiohttp.ClientTimeout(total=_DEFAULT_TIMEOUT_SEC),
178
+ )
179
+ headers = {"Authorization": f"Bearer {self.api_key}"}
180
+ self._ws = await self._session.ws_connect(self._ws_url, headers=headers)
181
+ start_payload = {
182
+ "type": "start",
183
+ "language": to_iso_639_1(self.language),
184
+ "sample_rate": self.sample_rate,
185
+ "encoding": "pcm_s16le",
186
+ "enable_partials": self.enable_partials,
187
+ "vad_events": self.vad_events,
188
+ }
189
+ await self._ws.send_str(json.dumps(start_payload))
190
+ ready = await self._ws.receive(timeout=_DEFAULT_TIMEOUT_SEC)
191
+ if ready.type != aiohttp.WSMsgType.TEXT:
192
+ raise RuntimeError(
193
+ f"VocenceSTT: expected ready frame, got {ready.type}"
194
+ )
195
+ data = json.loads(ready.data)
196
+ mtype = (data.get("type") or "").lower()
197
+ if mtype == "error":
198
+ raise RuntimeError(
199
+ f"VocenceSTT connect rejected: "
200
+ f"{data.get('code')}: {data.get('message')}"
201
+ )
202
+ if mtype != "ready":
203
+ raise RuntimeError(
204
+ f"VocenceSTT: unexpected first frame {mtype!r}"
205
+ )
206
+ self._reader_task = asyncio.create_task(
207
+ self._read_loop(), name="vocence_stt_reader"
208
+ )
209
+
210
+ async def _read_loop(self) -> None:
211
+ """Background task: translate pod events → STTResponse callbacks."""
212
+ ws = self._ws
213
+ if ws is None:
214
+ return
215
+ try:
216
+ async for msg in ws:
217
+ if msg.type != aiohttp.WSMsgType.TEXT:
218
+ if msg.type in (
219
+ aiohttp.WSMsgType.CLOSED,
220
+ aiohttp.WSMsgType.CLOSE,
221
+ aiohttp.WSMsgType.ERROR,
222
+ ):
223
+ return
224
+ continue
225
+ try:
226
+ data = json.loads(msg.data)
227
+ except json.JSONDecodeError:
228
+ continue
229
+ response = self._translate_event(data)
230
+ if response is None:
231
+ continue
232
+ cb = self._transcript_callback
233
+ if cb is None:
234
+ continue
235
+ try:
236
+ await cb(response)
237
+ except Exception as exc: # noqa: BLE001
238
+ logger.warning("VocenceSTT callback raised: %s", exc)
239
+ except asyncio.CancelledError:
240
+ return
241
+ except Exception as exc: # noqa: BLE001
242
+ logger.warning("VocenceSTT reader loop crashed: %s", exc)
243
+ self.emit("error", str(exc))
244
+
245
+ def _translate_event(self, data: dict) -> STTResponse | None:
246
+ """Map one pod event to the the framework's STTResponse shape.
247
+
248
+ Returns ``None`` for events we drop (errors are logged via
249
+ EventEmitter instead of being passed to the orchestrator).
250
+ """
251
+ mtype = (data.get("type") or "").lower()
252
+ if mtype == "partial":
253
+ text = (data.get("text") or "").strip()
254
+ if not text:
255
+ return None
256
+ return STTResponse(
257
+ event_type=SpeechEventType.INTERIM,
258
+ data=SpeechData(text=text, language=self.language),
259
+ )
260
+ if mtype == "final":
261
+ text = (data.get("text") or "").strip()
262
+ if not text:
263
+ return None
264
+ return STTResponse(
265
+ event_type=SpeechEventType.FINAL,
266
+ data=SpeechData(
267
+ text=text,
268
+ language=data.get("language_detected") or self.language,
269
+ ),
270
+ )
271
+ if mtype == "vad_speech":
272
+ return STTResponse(
273
+ event_type=SpeechEventType.START,
274
+ data=SpeechData(text=""),
275
+ )
276
+ if mtype == "vad_silence":
277
+ return STTResponse(
278
+ event_type=SpeechEventType.END,
279
+ data=SpeechData(text=""),
280
+ )
281
+ if mtype == "error":
282
+ logger.warning(
283
+ "VocenceSTT pod error: %s: %s",
284
+ data.get("code"), data.get("message"),
285
+ )
286
+ self.emit("error", str(data.get("message") or data.get("code")))
287
+ return None
288
+ # ready / pong / unknown — drop silently
289
+ return None
290
+
291
+ async def _teardown_ws(self) -> None:
292
+ ws = self._ws
293
+ if ws is not None and not ws.closed:
294
+ with _suppress():
295
+ await ws.send_str(json.dumps({"type": "close"}))
296
+ with _suppress():
297
+ await ws.close(code=1000)
298
+ self._ws = None
299
+ if self._session is not None and not self._session.closed:
300
+ with _suppress():
301
+ await self._session.close()
302
+ self._session = None
303
+
304
+
305
+ class _suppress:
306
+ """Inline contextlib.suppress(Exception) — keeps the module's
307
+ explicit import surface small."""
308
+
309
+ def __enter__(self) -> None:
310
+ return None
311
+
312
+ def __exit__(self, exc_type, exc, tb) -> bool:
313
+ return exc_type is not None and issubclass(exc_type, Exception)
@@ -0,0 +1,294 @@
1
+ """VocenceTTS — streaming text-to-speech with the Vocence voice library.
2
+
3
+ Conforms to the standard TTS abstract interface used by real-time
4
+ agent pipelines, so it slots in alongside any compatible
5
+ ``Pipeline(tts=...)``. The plug-in handles all of the network
6
+ plumbing — connection lifecycle, audio framing, and reconnection —
7
+ so callers just see text in and audio out.
8
+
9
+ Audio output: PCM16LE @ 24 kHz, mono. One persistent connection is
10
+ reused across many ``synthesize()`` calls in the same session;
11
+ lazily opened on the first call and torn down on ``aclose()``.
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import asyncio
16
+ import json
17
+ import logging
18
+ import os
19
+ from typing import Any, AsyncIterator, Optional, Union
20
+ from urllib.parse import urlparse
21
+
22
+ import aiohttp
23
+
24
+ from videosdk.agents import TTS, FlushMarker # type: ignore[import-not-found]
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ _DEFAULT_BASE_URL = os.environ.get("VOCENCE_BASE_URL", "https://api.vocence.ai")
30
+ _DEFAULT_SAMPLE_RATE = 24_000
31
+ _DEFAULT_CHANNELS = 1
32
+ _DEFAULT_TIMEOUT_SEC = 30.0
33
+ # Per-speak text cap. Mirrors the dev-API limit so the server doesn't
34
+ # reject mid-stream. Plugin users sending longer text should split
35
+ # their input — the framework's sentence chunker already handles this when
36
+ # wired in front of the plugin.
37
+ _MAX_TEXT_CHARS_PER_SPEAK = 4000
38
+
39
+
40
+ def _ws_url_from_base(base_url: str, voice_id: str) -> str:
41
+ """Translate ``https://api.vocence.ai`` → ``wss://api.vocence.ai/...``."""
42
+ parsed = urlparse(base_url.rstrip("/"))
43
+ scheme = "wss" if parsed.scheme == "https" else "ws"
44
+ return f"{scheme}://{parsed.netloc}{parsed.path}/v1/voices/{voice_id}/stream"
45
+
46
+
47
+ class VocenceTTS(TTS):
48
+ """Streaming TTS plugin backed by the Vocence voice service.
49
+
50
+ Parameters
51
+ ----------
52
+ api_key:
53
+ Vocence developer key (``voc_live_…``). Falls back to the
54
+ ``VOCENCE_API_KEY`` env var. Required.
55
+ voice:
56
+ Voice slug (built-in like ``"design-aria"``) or the numeric id
57
+ of a saved designed / cloned voice. Required at construction
58
+ because the WS endpoint is voice-scoped.
59
+ language:
60
+ Optional language hint passed on every ``speak`` frame.
61
+ base_url:
62
+ Override the default ``https://api.vocence.ai`` (set for
63
+ staging / self-hosted deployments).
64
+ """
65
+
66
+ def __init__(
67
+ self,
68
+ *,
69
+ api_key: Optional[str] = None,
70
+ voice: Union[str, int],
71
+ language: Optional[str] = None,
72
+ base_url: str = _DEFAULT_BASE_URL,
73
+ sample_rate: int = _DEFAULT_SAMPLE_RATE,
74
+ **kwargs: Any,
75
+ ) -> None:
76
+ super().__init__(sample_rate=sample_rate, num_channels=_DEFAULT_CHANNELS)
77
+ self.api_key = api_key or os.environ.get("VOCENCE_API_KEY")
78
+ if not self.api_key:
79
+ raise ValueError(
80
+ "Vocence API key required — pass api_key= or set VOCENCE_API_KEY"
81
+ )
82
+ self.voice = str(voice)
83
+ self.language = language
84
+ self.base_url = base_url
85
+ self._ws_url = _ws_url_from_base(base_url, self.voice)
86
+
87
+ self._session: aiohttp.ClientSession | None = None
88
+ self._ws: aiohttp.ClientWebSocketResponse | None = None
89
+ self._connect_lock = asyncio.Lock()
90
+ # Per-speak state: cleared at the start of each synthesize() call.
91
+ # The receiver loop runs INLINE with the speak request so we can
92
+ # cancel cleanly on interrupt without a separate task.
93
+ self._interrupted = False
94
+ self._first_chunk_sent = False
95
+
96
+ # ----- abstract overrides ---------------------------------------------
97
+
98
+ async def synthesize(
99
+ self,
100
+ text: AsyncIterator[Union[str, FlushMarker]] | str,
101
+ voice_id: Optional[str] = None,
102
+ **kwargs: Any,
103
+ ) -> None:
104
+ """Send text → push PCM frames to ``self.audio_track``.
105
+
106
+ ``text`` may be a plain string OR an async iterator of strings
107
+ (with optional ``FlushMarker`` segment boundaries). The async
108
+ iterator path lets the pipeline pump LLM tokens directly here
109
+ instead of waiting for the full reply. We don't yet support
110
+ the WebSocket re-flush semantics that some upstream sentence
111
+ chunkers rely on, so we concatenate iterator chunks up to a
112
+ flush / end-of-stream marker and send them as one ``speak``
113
+ frame each. This produces good audio with one network
114
+ round-trip per sentence.
115
+
116
+ ``voice_id`` is accepted for API symmetry but ignored — the
117
+ endpoint is voice-scoped at the WS URL, so changing voice
118
+ mid-session would require a fresh connection. Construct a new
119
+ ``VocenceTTS`` for a different voice.
120
+ """
121
+ self._interrupted = False
122
+ self._first_chunk_sent = False
123
+ await self._ensure_connection()
124
+ if isinstance(text, str):
125
+ await self._speak_once(text)
126
+ return
127
+ # Async iterator path — collect into segments separated by FlushMarker
128
+ # (or end-of-iterator), send each segment as one speak.
129
+ buf: list[str] = []
130
+ async for chunk in text:
131
+ if self._interrupted:
132
+ return
133
+ if isinstance(chunk, FlushMarker):
134
+ segment = "".join(buf).strip()
135
+ buf = []
136
+ if segment:
137
+ await self._speak_once(segment)
138
+ if self._interrupted:
139
+ return
140
+ continue
141
+ if chunk:
142
+ buf.append(chunk)
143
+ tail = "".join(buf).strip()
144
+ if tail and not self._interrupted:
145
+ await self._speak_once(tail)
146
+
147
+ async def interrupt(self) -> None:
148
+ """Stop the in-flight ``synthesize()`` ASAP. Doesn't close the
149
+ WebSocket — the connection stays warm for the next call."""
150
+ self._interrupted = True
151
+ ws = self._ws
152
+ if ws is not None and not ws.closed:
153
+ # Best-effort: cancel any in-flight read. The receiver loop
154
+ # checks self._interrupted between chunks and bails out.
155
+ # No control frame to send — the pod will move on once we
156
+ # send a fresh ``speak`` next time.
157
+ pass
158
+
159
+ async def aclose(self) -> None:
160
+ """Tear down the WebSocket + HTTP session. Idempotent."""
161
+ ws = self._ws
162
+ if ws is not None and not ws.closed:
163
+ with _suppress():
164
+ await ws.send_str(json.dumps({"type": "stop"}))
165
+ with _suppress():
166
+ await ws.close(code=1000)
167
+ self._ws = None
168
+ if self._session is not None and not self._session.closed:
169
+ with _suppress():
170
+ await self._session.close()
171
+ self._session = None
172
+
173
+ def reset_first_audio_tracking(self) -> None:
174
+ self._first_chunk_sent = False
175
+
176
+ # ----- internals ------------------------------------------------------
177
+
178
+ async def _ensure_connection(self) -> None:
179
+ """Open the WS if not already open; reopen if it died."""
180
+ async with self._connect_lock:
181
+ if self._ws is not None and not self._ws.closed:
182
+ return
183
+ if self._session is None or self._session.closed:
184
+ self._session = aiohttp.ClientSession(
185
+ timeout=aiohttp.ClientTimeout(total=_DEFAULT_TIMEOUT_SEC),
186
+ )
187
+ headers = {"Authorization": f"Bearer {self.api_key}"}
188
+ self._ws = await self._session.ws_connect(self._ws_url, headers=headers)
189
+ # First message from the server is ``ready``. Wait for it so
190
+ # subsequent ``speak`` frames aren't sent into a half-open
191
+ # connection. Surface any auth / not-found error here.
192
+ msg = await self._ws.receive(timeout=_DEFAULT_TIMEOUT_SEC)
193
+ if msg.type != aiohttp.WSMsgType.TEXT:
194
+ raise RuntimeError(
195
+ f"VocenceTTS: expected text ready frame, got {msg.type}"
196
+ )
197
+ data = json.loads(msg.data)
198
+ mtype = (data.get("type") or "").lower()
199
+ if mtype == "error":
200
+ raise RuntimeError(
201
+ f"VocenceTTS connect rejected: "
202
+ f"{data.get('code')}: {data.get('message')}"
203
+ )
204
+ if mtype != "ready":
205
+ raise RuntimeError(
206
+ f"VocenceTTS: unexpected first frame {mtype!r}"
207
+ )
208
+
209
+ async def _speak_once(self, text: str) -> None:
210
+ """Send one ``speak`` and drain audio frames until ``end``."""
211
+ if not text:
212
+ return
213
+ if len(text) > _MAX_TEXT_CHARS_PER_SPEAK:
214
+ # Truncate rather than fail: a sentence chunker in front
215
+ # of us should keep segments well under the cap.
216
+ logger.warning(
217
+ "VocenceTTS: truncating %d-char segment to %d (cap)",
218
+ len(text), _MAX_TEXT_CHARS_PER_SPEAK,
219
+ )
220
+ text = text[:_MAX_TEXT_CHARS_PER_SPEAK]
221
+ ws = self._ws
222
+ if ws is None:
223
+ return
224
+ payload: dict[str, Any] = {"type": "speak", "text": text}
225
+ if self.language:
226
+ payload["language"] = self.language
227
+ await ws.send_str(json.dumps(payload))
228
+ await self._drain_until_end(ws)
229
+
230
+ async def _drain_until_end(self, ws: aiohttp.ClientWebSocketResponse) -> None:
231
+ """Read frames until ``{"type":"end"}`` or interruption."""
232
+ while True:
233
+ if self._interrupted:
234
+ return
235
+ try:
236
+ msg = await ws.receive(timeout=_DEFAULT_TIMEOUT_SEC)
237
+ except asyncio.TimeoutError:
238
+ logger.warning("VocenceTTS: receive timed out")
239
+ return
240
+ if msg.type == aiohttp.WSMsgType.BINARY:
241
+ if not msg.data:
242
+ continue
243
+ # First-byte callback for TTFB metrics — the pipeline
244
+ # uses this to fire its ``first_audio_byte`` event.
245
+ if not self._first_chunk_sent:
246
+ self._first_chunk_sent = True
247
+ if self._first_audio_callback is not None:
248
+ with _suppress():
249
+ await self._first_audio_callback()
250
+ if self.audio_track is not None:
251
+ with _suppress():
252
+ await self.audio_track.add_new_bytes(msg.data)
253
+ continue
254
+ if msg.type == aiohttp.WSMsgType.TEXT:
255
+ try:
256
+ data = json.loads(msg.data)
257
+ except json.JSONDecodeError:
258
+ continue
259
+ mtype = (data.get("type") or "").lower()
260
+ if mtype == "end":
261
+ return
262
+ if mtype == "meta":
263
+ # Sample rate / encoding info — we know our pod
264
+ # output is PCM16LE @ 24 kHz mono so don't need to
265
+ # do anything with this on the plugin side. The
266
+ # the audio_track already expects this format
267
+ # because we set ``sample_rate=24000`` on the base
268
+ # class at __init__.
269
+ continue
270
+ if mtype == "error":
271
+ logger.warning(
272
+ "VocenceTTS pod error: %s: %s",
273
+ data.get("code"), data.get("message"),
274
+ )
275
+ return
276
+ continue
277
+ if msg.type in (
278
+ aiohttp.WSMsgType.CLOSED,
279
+ aiohttp.WSMsgType.CLOSE,
280
+ aiohttp.WSMsgType.ERROR,
281
+ ):
282
+ self._ws = None
283
+ return
284
+
285
+
286
+ class _suppress:
287
+ """Tiny ``contextlib.suppress(Exception)`` clone — kept inline to
288
+ keep the module's import surface small."""
289
+
290
+ def __enter__(self) -> None:
291
+ return None
292
+
293
+ def __exit__(self, exc_type, exc, tb) -> bool:
294
+ return exc_type is not None and issubclass(exc_type, Exception)