videosdk-plugins-resemble 0.0.26__tar.gz → 0.0.28__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of videosdk-plugins-resemble might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: videosdk-plugins-resemble
3
- Version: 0.0.26
3
+ Version: 0.0.28
4
4
  Summary: VideoSDK Agent Framework plugin for Resemble
5
5
  Author: videosdk
6
6
  License-Expression: Apache-2.0
@@ -12,7 +12,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio
12
12
  Classifier: Topic :: Multimedia :: Video
13
13
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
14
  Requires-Python: >=3.11
15
- Requires-Dist: videosdk-agents>=0.0.26
15
+ Requires-Dist: videosdk-agents>=0.0.28
16
16
  Description-Content-Type: text/markdown
17
17
 
18
18
  # VideoSDK Resemble Plugin
@@ -21,7 +21,7 @@ classifiers = [
21
21
  "Topic :: Scientific/Engineering :: Artificial Intelligence",
22
22
  ]
23
23
  dependencies = [
24
- "videosdk-agents>=0.0.26"
24
+ "videosdk-agents>=0.0.28"
25
25
  ]
26
26
 
27
27
  [tool.hatch.version]
@@ -7,12 +7,14 @@ import httpx
7
7
  from dataclasses import dataclass
8
8
 
9
9
  from videosdk.agents import TTS
10
+ from videosdk.agents.utils import segment_text
10
11
 
11
12
  RESEMBLE_HTTP_STREAMING_URL = "https://f.cluster.resemble.ai/stream"
12
13
  DEFAULT_VOICE_UUID = "55592656"
13
14
  DEFAULT_SAMPLE_RATE = 22050
14
15
  DEFAULT_PRECISION = "PCM_16"
15
16
 
17
+
16
18
  class ResembleTTS(TTS):
17
19
  def __init__(
18
20
  self,
@@ -26,16 +28,20 @@ class ResembleTTS(TTS):
26
28
 
27
29
  self.api_key = api_key or os.getenv("RESEMBLE_API_KEY")
28
30
  if not self.api_key:
29
- raise ValueError("Resemble API key is required. Provide either `api_key` or set `RESEMBLE_API_KEY` environment variable.")
30
-
31
+ raise ValueError(
32
+ "Resemble API key is required. Provide either `api_key` or set `RESEMBLE_API_KEY` environment variable.")
33
+
31
34
  self.voice_uuid = voice_uuid
32
35
  self.precision = precision
33
36
 
34
37
  self.audio_track = None
35
38
  self.loop = None
36
39
  self._first_chunk_sent = False
40
+ self._interrupted = False
41
+ self._current_synthesis_task: asyncio.Task | None = None
37
42
  self._http_client = httpx.AsyncClient(
38
- timeout=httpx.Timeout(connect=15.0, read=30.0, write=5.0, pool=5.0),
43
+ timeout=httpx.Timeout(connect=15.0, read=30.0,
44
+ write=5.0, pool=5.0),
39
45
  follow_redirects=True,
40
46
  )
41
47
 
@@ -49,28 +55,41 @@ class ResembleTTS(TTS):
49
55
  **kwargs: Any,
50
56
  ) -> None:
51
57
  try:
52
- if isinstance(text, AsyncIterator):
53
- full_text = ""
54
- async for chunk in text:
55
- full_text += chunk
56
- else:
57
- full_text = text
58
-
59
58
  if not self.audio_track or not self.loop:
60
59
  self.emit("error", "Audio track or event loop not set")
61
60
  return
62
61
 
63
- await self._http_stream_synthesis(full_text)
62
+ self._interrupted = False
63
+
64
+ if isinstance(text, AsyncIterator):
65
+ async for segment in segment_text(text):
66
+ if self._interrupted:
67
+ break
68
+ await self._synthesize_segment(segment, **kwargs)
69
+ else:
70
+ if not self._interrupted:
71
+ await self._synthesize_segment(text, **kwargs)
64
72
 
65
73
  except Exception as e:
66
74
  self.emit("error", f"Resemble TTS synthesis failed: {str(e)}")
67
75
 
76
+ async def _synthesize_segment(self, text: str, **kwargs: Any) -> None:
77
+ """Synthesize a single text segment"""
78
+ if not text.strip() or self._interrupted:
79
+ return
80
+
81
+ try:
82
+ await self._http_stream_synthesis(text)
83
+ except Exception as e:
84
+ if not self._interrupted:
85
+ self.emit("error", f"Segment synthesis failed: {str(e)}")
86
+
68
87
  async def _http_stream_synthesis(self, text: str) -> None:
69
88
  headers = {
70
89
  "Authorization": f"Token {self.api_key}",
71
90
  "Content-Type": "application/json",
72
91
  }
73
-
92
+
74
93
  payload = {
75
94
  "voice_uuid": self.voice_uuid,
76
95
  "data": text,
@@ -80,9 +99,9 @@ class ResembleTTS(TTS):
80
99
 
81
100
  try:
82
101
  async with self._http_client.stream(
83
- "POST",
102
+ "POST",
84
103
  RESEMBLE_HTTP_STREAMING_URL,
85
- headers=headers,
104
+ headers=headers,
86
105
  json=payload
87
106
  ) as response:
88
107
  response.raise_for_status()
@@ -91,9 +110,11 @@ class ResembleTTS(TTS):
91
110
  header_processed = False
92
111
 
93
112
  async for chunk in response.aiter_bytes():
113
+ if self._interrupted:
114
+ break
94
115
  if not header_processed:
95
116
  audio_data += chunk
96
- data_pos = audio_data.find(b'data')
117
+ data_pos = audio_data.find(b"data")
97
118
  if data_pos != -1:
98
119
  header_size = data_pos + 8
99
120
  audio_data = audio_data[header_size:]
@@ -102,31 +123,38 @@ class ResembleTTS(TTS):
102
123
  if chunk:
103
124
  audio_data += chunk
104
125
 
105
- if audio_data:
126
+ if audio_data and not self._interrupted:
106
127
  await self._stream_audio_chunks(audio_data)
107
-
128
+
108
129
  except httpx.HTTPStatusError as e:
109
- self.emit("error", f"HTTP error {e.response.status_code}: {e.response.text}")
130
+ if not self._interrupted:
131
+ self.emit(
132
+ "error", f"HTTP error {e.response.status_code}: {e.response.text}")
110
133
  except Exception as e:
111
- self.emit("error", f"HTTP streaming synthesis failed: {str(e)}")
134
+ if not self._interrupted:
135
+ self.emit(
136
+ "error", f"HTTP streaming synthesis failed: {str(e)}")
112
137
 
113
138
  async def _stream_audio_chunks(self, audio_bytes: bytes) -> None:
114
139
  """Stream audio data in chunks for smooth playback """
115
- chunk_size = int(self.sample_rate * 1 * 2 * 20 / 1000)
116
-
140
+ chunk_size = int(self.sample_rate * 1 * 2 * 20 / 1000)
141
+
117
142
  for i in range(0, len(audio_bytes), chunk_size):
143
+ if self._interrupted:
144
+ break
145
+
118
146
  chunk = audio_bytes[i:i + chunk_size]
119
-
147
+
120
148
  if len(chunk) < chunk_size and len(chunk) > 0:
121
149
  padding_needed = chunk_size - len(chunk)
122
150
  chunk += b'\x00' * padding_needed
123
-
151
+
124
152
  if len(chunk) == chunk_size:
125
153
  if not self._first_chunk_sent and self._first_audio_callback:
126
154
  self._first_chunk_sent = True
127
155
  await self._first_audio_callback()
128
-
129
- self.loop.create_task(self.audio_track.add_new_bytes(chunk))
156
+
157
+ asyncio.create_task(self.audio_track.add_new_bytes(chunk))
130
158
  await asyncio.sleep(0.001)
131
159
 
132
160
  async def aclose(self) -> None:
@@ -135,5 +163,9 @@ class ResembleTTS(TTS):
135
163
  await super().aclose()
136
164
 
137
165
  async def interrupt(self) -> None:
166
+ """Interrupt TTS synthesis"""
167
+ self._interrupted = True
168
+ if self._current_synthesis_task and not self._current_synthesis_task.done():
169
+ self._current_synthesis_task.cancel()
138
170
  if self.audio_track:
139
171
  self.audio_track.interrupt()
@@ -0,0 +1 @@
1
+ __version__ = "0.0.28"
@@ -1 +0,0 @@
1
- __version__ = "0.0.26"