videosdk-plugins-deepgram 0.0.35__py3-none-any.whl → 0.0.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of videosdk-plugins-deepgram might be problematic. Click here for more details.
- videosdk/plugins/deepgram/__init__.py +2 -2
- videosdk/plugins/deepgram/stt.py +7 -0
- videosdk/plugins/deepgram/tts.py +186 -0
- videosdk/plugins/deepgram/version.py +1 -1
- {videosdk_plugins_deepgram-0.0.35.dist-info → videosdk_plugins_deepgram-0.0.37.dist-info}/METADATA +2 -2
- videosdk_plugins_deepgram-0.0.37.dist-info/RECORD +7 -0
- videosdk_plugins_deepgram-0.0.35.dist-info/RECORD +0 -6
- {videosdk_plugins_deepgram-0.0.35.dist-info → videosdk_plugins_deepgram-0.0.37.dist-info}/WHEEL +0 -0
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
from .stt import DeepgramSTT
|
|
2
|
-
|
|
3
|
-
__all__ = ["DeepgramSTT"]
|
|
2
|
+
from .tts import DeepgramTTS
|
|
3
|
+
__all__ = ["DeepgramSTT", "DeepgramTTS"]
|
videosdk/plugins/deepgram/stt.py
CHANGED
|
@@ -194,16 +194,23 @@ class DeepgramSTT(BaseSTT):
|
|
|
194
194
|
"""Cleanup resources"""
|
|
195
195
|
if self._ws_task:
|
|
196
196
|
self._ws_task.cancel()
|
|
197
|
+
logger.info("DeepgramSTT WebSocket task cancelled")
|
|
197
198
|
try:
|
|
198
199
|
await self._ws_task
|
|
199
200
|
except asyncio.CancelledError:
|
|
200
201
|
pass
|
|
201
202
|
self._ws_task = None
|
|
203
|
+
logger.info("DeepgramSTT WebSocket task cleared")
|
|
202
204
|
|
|
203
205
|
if self._ws:
|
|
204
206
|
await self._ws.close()
|
|
207
|
+
logger.info("DeepgramSTT WebSocket closed")
|
|
205
208
|
self._ws = None
|
|
206
209
|
|
|
207
210
|
if self._session:
|
|
208
211
|
await self._session.close()
|
|
212
|
+
logger.info("DeepgramSTT cleaned up")
|
|
209
213
|
self._session = None
|
|
214
|
+
|
|
215
|
+
# Call base class cleanup
|
|
216
|
+
await super().aclose()
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import aiohttp
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any, AsyncIterator, Union, Optional
|
|
7
|
+
import os
|
|
8
|
+
from videosdk.agents import TTS
|
|
9
|
+
DEEPGRAM_SAMPLE_RATE = 24000
|
|
10
|
+
DEEPGRAM_CHANNELS = 1
|
|
11
|
+
DEFAULT_MODEL = "aura-2-thalia-en"
|
|
12
|
+
DEFAULT_ENCODING = "linear16"
|
|
13
|
+
API_BASE_URL = "wss://api.deepgram.com/v1/speak"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DeepgramTTS(TTS):
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
*,
|
|
20
|
+
api_key: str | None = None,
|
|
21
|
+
model: str = DEFAULT_MODEL,
|
|
22
|
+
encoding: str = DEFAULT_ENCODING,
|
|
23
|
+
sample_rate: int = DEEPGRAM_SAMPLE_RATE,
|
|
24
|
+
base_url: str = API_BASE_URL,
|
|
25
|
+
**kwargs: Any,
|
|
26
|
+
) -> None:
|
|
27
|
+
super().__init__(sample_rate=sample_rate, num_channels=DEEPGRAM_CHANNELS)
|
|
28
|
+
|
|
29
|
+
self.model = model
|
|
30
|
+
self.encoding = encoding
|
|
31
|
+
self.base_url = base_url
|
|
32
|
+
self.audio_track = None
|
|
33
|
+
self.loop = None
|
|
34
|
+
self._ws_session: aiohttp.ClientSession | None = None
|
|
35
|
+
self._ws_connection: aiohttp.ClientWebSocketResponse | None = None
|
|
36
|
+
self._send_task: asyncio.Task | None = None
|
|
37
|
+
self._recv_task: asyncio.Task | None = None
|
|
38
|
+
self._should_stop = False
|
|
39
|
+
self._first_chunk_sent = False
|
|
40
|
+
self._connection_lock = asyncio.Lock()
|
|
41
|
+
|
|
42
|
+
self.api_key = api_key or os.getenv("DEEPGRAM_API_KEY")
|
|
43
|
+
if not self.api_key:
|
|
44
|
+
raise ValueError(
|
|
45
|
+
"Deepgram API key must be provided either through the 'api_key' parameter or the DEEPGRAM_API_KEY environment variable."
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
def reset_first_audio_tracking(self) -> None:
|
|
49
|
+
self._first_chunk_sent = False
|
|
50
|
+
|
|
51
|
+
async def _ensure_connection(self) -> None:
|
|
52
|
+
async with self._connection_lock:
|
|
53
|
+
if self._ws_connection and not self._ws_connection.closed:
|
|
54
|
+
return
|
|
55
|
+
|
|
56
|
+
params = {
|
|
57
|
+
"model": self.model,
|
|
58
|
+
"encoding": self.encoding,
|
|
59
|
+
"sample_rate": self.sample_rate,
|
|
60
|
+
}
|
|
61
|
+
param_string = "&".join([f"{k}={v}" for k, v in params.items()])
|
|
62
|
+
full_ws_url = f"{self.base_url}?{param_string}"
|
|
63
|
+
headers = {"Authorization": f"Token {self.api_key}"}
|
|
64
|
+
|
|
65
|
+
self._ws_session = aiohttp.ClientSession()
|
|
66
|
+
self._ws_connection = await asyncio.wait_for(
|
|
67
|
+
self._ws_session.ws_connect(full_ws_url, headers=headers),
|
|
68
|
+
timeout=50.0
|
|
69
|
+
)
|
|
70
|
+
if self._recv_task and not self._recv_task.done():
|
|
71
|
+
self._recv_task.cancel()
|
|
72
|
+
self._recv_task = asyncio.create_task(self._receive_audio_task())
|
|
73
|
+
|
|
74
|
+
async def synthesize(
|
|
75
|
+
self,
|
|
76
|
+
text: AsyncIterator[str] | str,
|
|
77
|
+
**kwargs: Any,
|
|
78
|
+
) -> None:
|
|
79
|
+
try:
|
|
80
|
+
if not self.audio_track or not self.loop:
|
|
81
|
+
self.emit("error", "Audio track or event loop not set")
|
|
82
|
+
return
|
|
83
|
+
|
|
84
|
+
await self.interrupt()
|
|
85
|
+
self._should_stop = False
|
|
86
|
+
await self._stream_synthesis(text)
|
|
87
|
+
|
|
88
|
+
except Exception as e:
|
|
89
|
+
self.emit("error", f"TTS synthesis failed: {str(e)}")
|
|
90
|
+
|
|
91
|
+
async def _stream_synthesis(self, text: Union[AsyncIterator[str], str]) -> None:
|
|
92
|
+
try:
|
|
93
|
+
await self._ensure_connection()
|
|
94
|
+
self._send_task = asyncio.create_task(self._send_text_task(text))
|
|
95
|
+
await self._send_task
|
|
96
|
+
except Exception as e:
|
|
97
|
+
self.emit("error", f"Streaming synthesis failed: {str(e)}")
|
|
98
|
+
await self.aclose()
|
|
99
|
+
finally:
|
|
100
|
+
if self._send_task and not self._send_task.done():
|
|
101
|
+
self._send_task.cancel()
|
|
102
|
+
self._send_task = None
|
|
103
|
+
|
|
104
|
+
async def _send_text_task(self, text: Union[AsyncIterator[str], str]) -> None:
|
|
105
|
+
if not self._ws_connection or self._ws_connection.closed:
|
|
106
|
+
return
|
|
107
|
+
|
|
108
|
+
try:
|
|
109
|
+
if isinstance(text, str):
|
|
110
|
+
if not self._should_stop:
|
|
111
|
+
payload = {"type": "Speak", "text": text}
|
|
112
|
+
await self._ws_connection.send_json(payload)
|
|
113
|
+
else:
|
|
114
|
+
async for chunk in text:
|
|
115
|
+
if self._ws_connection.closed or self._should_stop:
|
|
116
|
+
break
|
|
117
|
+
payload = {"type": "Speak", "text": chunk}
|
|
118
|
+
await self._ws_connection.send_json(payload)
|
|
119
|
+
|
|
120
|
+
if not self._ws_connection.closed and not self._should_stop:
|
|
121
|
+
await self._ws_connection.send_json({"type": "Flush"})
|
|
122
|
+
except asyncio.CancelledError:
|
|
123
|
+
pass
|
|
124
|
+
except Exception as e:
|
|
125
|
+
if not self._should_stop:
|
|
126
|
+
self.emit("error", f"Send task error: {str(e)}")
|
|
127
|
+
|
|
128
|
+
async def _receive_audio_task(self) -> None:
|
|
129
|
+
if not self._ws_connection:
|
|
130
|
+
return
|
|
131
|
+
|
|
132
|
+
try:
|
|
133
|
+
while not self._ws_connection.closed:
|
|
134
|
+
msg = await self._ws_connection.receive()
|
|
135
|
+
|
|
136
|
+
if msg.type == aiohttp.WSMsgType.BINARY:
|
|
137
|
+
if not self._should_stop:
|
|
138
|
+
await self._stream_audio_chunks(msg.data)
|
|
139
|
+
elif msg.type == aiohttp.WSMsgType.TEXT:
|
|
140
|
+
data = json.loads(msg.data)
|
|
141
|
+
if data.get('type') == 'Error' and not self._should_stop:
|
|
142
|
+
self.emit("error", f"Deepgram error: {data.get('description', 'Unknown error')}")
|
|
143
|
+
break
|
|
144
|
+
elif msg.type in (aiohttp.WSMsgType.CLOSED, aiohttp.WSMsgType.CLOSE, aiohttp.WSMsgType.CLOSING):
|
|
145
|
+
break
|
|
146
|
+
elif msg.type == aiohttp.WSMsgType.ERROR:
|
|
147
|
+
raise ConnectionError(f"WebSocket error: {self._ws_connection.exception()}")
|
|
148
|
+
except asyncio.CancelledError:
|
|
149
|
+
pass
|
|
150
|
+
except Exception as e:
|
|
151
|
+
if not self._should_stop:
|
|
152
|
+
self.emit("error", f"Receive task error: {str(e)}")
|
|
153
|
+
|
|
154
|
+
async def _stream_audio_chunks(self, audio_bytes: bytes) -> None:
|
|
155
|
+
if not audio_bytes or self._should_stop:
|
|
156
|
+
return
|
|
157
|
+
|
|
158
|
+
if self.audio_track and self.loop:
|
|
159
|
+
await self.audio_track.add_new_bytes(audio_bytes)
|
|
160
|
+
|
|
161
|
+
async def interrupt(self) -> None:
|
|
162
|
+
self._should_stop = True
|
|
163
|
+
|
|
164
|
+
if self.audio_track:
|
|
165
|
+
self.audio_track.interrupt()
|
|
166
|
+
|
|
167
|
+
if self._send_task and not self._send_task.done():
|
|
168
|
+
self._send_task.cancel()
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
async def aclose(self) -> None:
|
|
172
|
+
self._should_stop = True
|
|
173
|
+
|
|
174
|
+
for task in [self._send_task, self._recv_task]:
|
|
175
|
+
if task and not task.done():
|
|
176
|
+
task.cancel()
|
|
177
|
+
|
|
178
|
+
if self._ws_connection and not self._ws_connection.closed:
|
|
179
|
+
await self._ws_connection.close()
|
|
180
|
+
if self._ws_session and not self._ws_session.closed:
|
|
181
|
+
await self._ws_session.close()
|
|
182
|
+
|
|
183
|
+
self._ws_connection = None
|
|
184
|
+
self._ws_session = None
|
|
185
|
+
|
|
186
|
+
await super().aclose()
|
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.0.
|
|
1
|
+
__version__ = "0.0.37"
|
{videosdk_plugins_deepgram-0.0.35.dist-info → videosdk_plugins_deepgram-0.0.37.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: videosdk-plugins-deepgram
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.37
|
|
4
4
|
Summary: VideoSDK Agent Framework plugin for Deepgram
|
|
5
5
|
Author: videosdk
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -12,7 +12,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio
|
|
|
12
12
|
Classifier: Topic :: Multimedia :: Video
|
|
13
13
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
14
14
|
Requires-Python: >=3.11
|
|
15
|
-
Requires-Dist: videosdk-agents>=0.0.
|
|
15
|
+
Requires-Dist: videosdk-agents>=0.0.37
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
|
|
18
18
|
# VideoSDK Deepgram Plugin
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
videosdk/plugins/deepgram/__init__.py,sha256=gzJRVqNliLxQHAbCSiki_YiFqicxRCSKgTk1cCLRkFg,98
|
|
2
|
+
videosdk/plugins/deepgram/stt.py,sha256=FrhLg-57kqySa07Zo8yFJmMjcDcmdkihWcBMfFcxBRY,8309
|
|
3
|
+
videosdk/plugins/deepgram/tts.py,sha256=hd4oifQ3lRV-Ry57EGf-8VrcBm2bM9Fj3VUKSAfmgh8,6884
|
|
4
|
+
videosdk/plugins/deepgram/version.py,sha256=8S56Dzx6mwG65yKNAYNfLGkyvS8XrUW3xMrLAg1TKI0,23
|
|
5
|
+
videosdk_plugins_deepgram-0.0.37.dist-info/METADATA,sha256=z0ktgPjqnrcaIeUlOtGwlqVTiKhi65W3KPcj_pgwv58,767
|
|
6
|
+
videosdk_plugins_deepgram-0.0.37.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
7
|
+
videosdk_plugins_deepgram-0.0.37.dist-info/RECORD,,
|
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
videosdk/plugins/deepgram/__init__.py,sha256=sfiNIvpVbFqbReYTskb3D--3v_U3-FujI9gdkcEGEmY,55
|
|
2
|
-
videosdk/plugins/deepgram/stt.py,sha256=OXqOGMX-xRkjgju9lrnBu-iqC2ltScqFCemLS9wnoqY,8004
|
|
3
|
-
videosdk/plugins/deepgram/version.py,sha256=QmUV3Ydc9xA0pOuihDI_wM1kvEzVZrdkq2eiedTW2UA,23
|
|
4
|
-
videosdk_plugins_deepgram-0.0.35.dist-info/METADATA,sha256=EBV6zev_u2tdgx3AMhN97lIYhtH9-stHv4h84FO0S-A,767
|
|
5
|
-
videosdk_plugins_deepgram-0.0.35.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
6
|
-
videosdk_plugins_deepgram-0.0.35.dist-info/RECORD,,
|
{videosdk_plugins_deepgram-0.0.35.dist-info → videosdk_plugins_deepgram-0.0.37.dist-info}/WHEEL
RENAMED
|
File without changes
|