videosdk-plugins-resemble 0.0.27__py3-none-any.whl → 0.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of videosdk-plugins-resemble might be problematic. Click here for more details.
- videosdk/plugins/resemble/tts.py +57 -25
- videosdk/plugins/resemble/version.py +1 -1
- {videosdk_plugins_resemble-0.0.27.dist-info → videosdk_plugins_resemble-0.0.29.dist-info}/METADATA +2 -2
- videosdk_plugins_resemble-0.0.29.dist-info/RECORD +6 -0
- videosdk_plugins_resemble-0.0.27.dist-info/RECORD +0 -6
- {videosdk_plugins_resemble-0.0.27.dist-info → videosdk_plugins_resemble-0.0.29.dist-info}/WHEEL +0 -0
videosdk/plugins/resemble/tts.py
CHANGED
|
@@ -7,12 +7,14 @@ import httpx
|
|
|
7
7
|
from dataclasses import dataclass
|
|
8
8
|
|
|
9
9
|
from videosdk.agents import TTS
|
|
10
|
+
from videosdk.agents.utils import segment_text
|
|
10
11
|
|
|
11
12
|
RESEMBLE_HTTP_STREAMING_URL = "https://f.cluster.resemble.ai/stream"
|
|
12
13
|
DEFAULT_VOICE_UUID = "55592656"
|
|
13
14
|
DEFAULT_SAMPLE_RATE = 22050
|
|
14
15
|
DEFAULT_PRECISION = "PCM_16"
|
|
15
16
|
|
|
17
|
+
|
|
16
18
|
class ResembleTTS(TTS):
|
|
17
19
|
def __init__(
|
|
18
20
|
self,
|
|
@@ -26,16 +28,20 @@ class ResembleTTS(TTS):
|
|
|
26
28
|
|
|
27
29
|
self.api_key = api_key or os.getenv("RESEMBLE_API_KEY")
|
|
28
30
|
if not self.api_key:
|
|
29
|
-
raise ValueError(
|
|
30
|
-
|
|
31
|
+
raise ValueError(
|
|
32
|
+
"Resemble API key is required. Provide either `api_key` or set `RESEMBLE_API_KEY` environment variable.")
|
|
33
|
+
|
|
31
34
|
self.voice_uuid = voice_uuid
|
|
32
35
|
self.precision = precision
|
|
33
36
|
|
|
34
37
|
self.audio_track = None
|
|
35
38
|
self.loop = None
|
|
36
39
|
self._first_chunk_sent = False
|
|
40
|
+
self._interrupted = False
|
|
41
|
+
self._current_synthesis_task: asyncio.Task | None = None
|
|
37
42
|
self._http_client = httpx.AsyncClient(
|
|
38
|
-
timeout=httpx.Timeout(connect=15.0, read=30.0,
|
|
43
|
+
timeout=httpx.Timeout(connect=15.0, read=30.0,
|
|
44
|
+
write=5.0, pool=5.0),
|
|
39
45
|
follow_redirects=True,
|
|
40
46
|
)
|
|
41
47
|
|
|
@@ -49,28 +55,41 @@ class ResembleTTS(TTS):
|
|
|
49
55
|
**kwargs: Any,
|
|
50
56
|
) -> None:
|
|
51
57
|
try:
|
|
52
|
-
if isinstance(text, AsyncIterator):
|
|
53
|
-
full_text = ""
|
|
54
|
-
async for chunk in text:
|
|
55
|
-
full_text += chunk
|
|
56
|
-
else:
|
|
57
|
-
full_text = text
|
|
58
|
-
|
|
59
58
|
if not self.audio_track or not self.loop:
|
|
60
59
|
self.emit("error", "Audio track or event loop not set")
|
|
61
60
|
return
|
|
62
61
|
|
|
63
|
-
|
|
62
|
+
self._interrupted = False
|
|
63
|
+
|
|
64
|
+
if isinstance(text, AsyncIterator):
|
|
65
|
+
async for segment in segment_text(text):
|
|
66
|
+
if self._interrupted:
|
|
67
|
+
break
|
|
68
|
+
await self._synthesize_segment(segment, **kwargs)
|
|
69
|
+
else:
|
|
70
|
+
if not self._interrupted:
|
|
71
|
+
await self._synthesize_segment(text, **kwargs)
|
|
64
72
|
|
|
65
73
|
except Exception as e:
|
|
66
74
|
self.emit("error", f"Resemble TTS synthesis failed: {str(e)}")
|
|
67
75
|
|
|
76
|
+
async def _synthesize_segment(self, text: str, **kwargs: Any) -> None:
|
|
77
|
+
"""Synthesize a single text segment"""
|
|
78
|
+
if not text.strip() or self._interrupted:
|
|
79
|
+
return
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
await self._http_stream_synthesis(text)
|
|
83
|
+
except Exception as e:
|
|
84
|
+
if not self._interrupted:
|
|
85
|
+
self.emit("error", f"Segment synthesis failed: {str(e)}")
|
|
86
|
+
|
|
68
87
|
async def _http_stream_synthesis(self, text: str) -> None:
|
|
69
88
|
headers = {
|
|
70
89
|
"Authorization": f"Token {self.api_key}",
|
|
71
90
|
"Content-Type": "application/json",
|
|
72
91
|
}
|
|
73
|
-
|
|
92
|
+
|
|
74
93
|
payload = {
|
|
75
94
|
"voice_uuid": self.voice_uuid,
|
|
76
95
|
"data": text,
|
|
@@ -80,9 +99,9 @@ class ResembleTTS(TTS):
|
|
|
80
99
|
|
|
81
100
|
try:
|
|
82
101
|
async with self._http_client.stream(
|
|
83
|
-
"POST",
|
|
102
|
+
"POST",
|
|
84
103
|
RESEMBLE_HTTP_STREAMING_URL,
|
|
85
|
-
headers=headers,
|
|
104
|
+
headers=headers,
|
|
86
105
|
json=payload
|
|
87
106
|
) as response:
|
|
88
107
|
response.raise_for_status()
|
|
@@ -91,9 +110,11 @@ class ResembleTTS(TTS):
|
|
|
91
110
|
header_processed = False
|
|
92
111
|
|
|
93
112
|
async for chunk in response.aiter_bytes():
|
|
113
|
+
if self._interrupted:
|
|
114
|
+
break
|
|
94
115
|
if not header_processed:
|
|
95
116
|
audio_data += chunk
|
|
96
|
-
data_pos = audio_data.find(b
|
|
117
|
+
data_pos = audio_data.find(b"data")
|
|
97
118
|
if data_pos != -1:
|
|
98
119
|
header_size = data_pos + 8
|
|
99
120
|
audio_data = audio_data[header_size:]
|
|
@@ -102,31 +123,38 @@ class ResembleTTS(TTS):
|
|
|
102
123
|
if chunk:
|
|
103
124
|
audio_data += chunk
|
|
104
125
|
|
|
105
|
-
if audio_data:
|
|
126
|
+
if audio_data and not self._interrupted:
|
|
106
127
|
await self._stream_audio_chunks(audio_data)
|
|
107
|
-
|
|
128
|
+
|
|
108
129
|
except httpx.HTTPStatusError as e:
|
|
109
|
-
|
|
130
|
+
if not self._interrupted:
|
|
131
|
+
self.emit(
|
|
132
|
+
"error", f"HTTP error {e.response.status_code}: {e.response.text}")
|
|
110
133
|
except Exception as e:
|
|
111
|
-
self.
|
|
134
|
+
if not self._interrupted:
|
|
135
|
+
self.emit(
|
|
136
|
+
"error", f"HTTP streaming synthesis failed: {str(e)}")
|
|
112
137
|
|
|
113
138
|
async def _stream_audio_chunks(self, audio_bytes: bytes) -> None:
|
|
114
139
|
"""Stream audio data in chunks for smooth playback """
|
|
115
|
-
chunk_size = int(self.sample_rate * 1 * 2 * 20 / 1000)
|
|
116
|
-
|
|
140
|
+
chunk_size = int(self.sample_rate * 1 * 2 * 20 / 1000)
|
|
141
|
+
|
|
117
142
|
for i in range(0, len(audio_bytes), chunk_size):
|
|
143
|
+
if self._interrupted:
|
|
144
|
+
break
|
|
145
|
+
|
|
118
146
|
chunk = audio_bytes[i:i + chunk_size]
|
|
119
|
-
|
|
147
|
+
|
|
120
148
|
if len(chunk) < chunk_size and len(chunk) > 0:
|
|
121
149
|
padding_needed = chunk_size - len(chunk)
|
|
122
150
|
chunk += b'\x00' * padding_needed
|
|
123
|
-
|
|
151
|
+
|
|
124
152
|
if len(chunk) == chunk_size:
|
|
125
153
|
if not self._first_chunk_sent and self._first_audio_callback:
|
|
126
154
|
self._first_chunk_sent = True
|
|
127
155
|
await self._first_audio_callback()
|
|
128
|
-
|
|
129
|
-
|
|
156
|
+
|
|
157
|
+
asyncio.create_task(self.audio_track.add_new_bytes(chunk))
|
|
130
158
|
await asyncio.sleep(0.001)
|
|
131
159
|
|
|
132
160
|
async def aclose(self) -> None:
|
|
@@ -135,5 +163,9 @@ class ResembleTTS(TTS):
|
|
|
135
163
|
await super().aclose()
|
|
136
164
|
|
|
137
165
|
async def interrupt(self) -> None:
|
|
166
|
+
"""Interrupt TTS synthesis"""
|
|
167
|
+
self._interrupted = True
|
|
168
|
+
if self._current_synthesis_task and not self._current_synthesis_task.done():
|
|
169
|
+
self._current_synthesis_task.cancel()
|
|
138
170
|
if self.audio_track:
|
|
139
171
|
self.audio_track.interrupt()
|
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.0.
|
|
1
|
+
__version__ = "0.0.29"
|
{videosdk_plugins_resemble-0.0.27.dist-info → videosdk_plugins_resemble-0.0.29.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: videosdk-plugins-resemble
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.29
|
|
4
4
|
Summary: VideoSDK Agent Framework plugin for Resemble
|
|
5
5
|
Author: videosdk
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -12,7 +12,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio
|
|
|
12
12
|
Classifier: Topic :: Multimedia :: Video
|
|
13
13
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
14
14
|
Requires-Python: >=3.11
|
|
15
|
-
Requires-Dist: videosdk-agents>=0.0.
|
|
15
|
+
Requires-Dist: videosdk-agents>=0.0.29
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
|
|
18
18
|
# VideoSDK Resemble Plugin
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
videosdk/plugins/resemble/__init__.py,sha256=V4pLV2GncUWcXNFXea8Gtf7hbRYKBQSLkW2DsyxW2ks,55
|
|
2
|
+
videosdk/plugins/resemble/tts.py,sha256=5RM4QfgsXxIyp6jUO9kq9YpTgExoR-1R5iS4APLhMHY,5952
|
|
3
|
+
videosdk/plugins/resemble/version.py,sha256=x-mEbDNfu7r2SKAGR0A7P0FwPyhhSxJRlutHkucLsHk,23
|
|
4
|
+
videosdk_plugins_resemble-0.0.29.dist-info/METADATA,sha256=Ytz4EEMa0Yl0i90j19dBNR5pe-HrpH-Rzt4so8jxN3U,767
|
|
5
|
+
videosdk_plugins_resemble-0.0.29.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
6
|
+
videosdk_plugins_resemble-0.0.29.dist-info/RECORD,,
|
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
videosdk/plugins/resemble/__init__.py,sha256=V4pLV2GncUWcXNFXea8Gtf7hbRYKBQSLkW2DsyxW2ks,55
|
|
2
|
-
videosdk/plugins/resemble/tts.py,sha256=MlHx5AxWrXkUOqPfujBoAM2lwLhQt7FDkDH6IPDcwEI,4815
|
|
3
|
-
videosdk/plugins/resemble/version.py,sha256=fJCbtkUBjOoT0tN9kkSyqEm7I4rr92yi9hAJsw2fSpc,22
|
|
4
|
-
videosdk_plugins_resemble-0.0.27.dist-info/METADATA,sha256=D0QC0B5ycLnP4WWX_Ebccpkj9Qf88OTOzwIgsussfes,767
|
|
5
|
-
videosdk_plugins_resemble-0.0.27.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
6
|
-
videosdk_plugins_resemble-0.0.27.dist-info/RECORD,,
|
{videosdk_plugins_resemble-0.0.27.dist-info → videosdk_plugins_resemble-0.0.29.dist-info}/WHEEL
RENAMED
|
File without changes
|