videosdk-plugins-deepgram 0.0.35__tar.gz → 0.0.36__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of videosdk-plugins-deepgram might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: videosdk-plugins-deepgram
3
- Version: 0.0.35
3
+ Version: 0.0.36
4
4
  Summary: VideoSDK Agent Framework plugin for Deepgram
5
5
  Author: videosdk
6
6
  License-Expression: Apache-2.0
@@ -12,7 +12,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio
12
12
  Classifier: Topic :: Multimedia :: Video
13
13
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
14
  Requires-Python: >=3.11
15
- Requires-Dist: videosdk-agents>=0.0.35
15
+ Requires-Dist: videosdk-agents>=0.0.36
16
16
  Description-Content-Type: text/markdown
17
17
 
18
18
  # VideoSDK Deepgram Plugin
@@ -20,7 +20,7 @@ classifiers = [
20
20
  "Topic :: Multimedia :: Video",
21
21
  "Topic :: Scientific/Engineering :: Artificial Intelligence",
22
22
  ]
23
- dependencies = ["videosdk-agents>=0.0.35"]
23
+ dependencies = ["videosdk-agents>=0.0.36"]
24
24
 
25
25
  [tool.hatch.version]
26
26
  path = "videosdk/plugins/deepgram/version.py"
@@ -0,0 +1,3 @@
1
+ from .stt import DeepgramSTT
2
+ from .tts import DeepgramTTS
3
+ __all__ = ["DeepgramSTT", "DeepgramTTS"]
@@ -0,0 +1,186 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import aiohttp
5
+ import json
6
+ from typing import Any, AsyncIterator, Union, Optional
7
+ import os
8
+ from videosdk.agents import TTS
9
+ DEEPGRAM_SAMPLE_RATE = 24000
10
+ DEEPGRAM_CHANNELS = 1
11
+ DEFAULT_MODEL = "aura-2-thalia-en"
12
+ DEFAULT_ENCODING = "linear16"
13
+ API_BASE_URL = "wss://api.deepgram.com/v1/speak"
14
+
15
+
16
+ class DeepgramTTS(TTS):
17
+ def __init__(
18
+ self,
19
+ *,
20
+ api_key: str | None = None,
21
+ model: str = DEFAULT_MODEL,
22
+ encoding: str = DEFAULT_ENCODING,
23
+ sample_rate: int = DEEPGRAM_SAMPLE_RATE,
24
+ base_url: str = API_BASE_URL,
25
+ **kwargs: Any,
26
+ ) -> None:
27
+ super().__init__(sample_rate=sample_rate, num_channels=DEEPGRAM_CHANNELS)
28
+
29
+ self.model = model
30
+ self.encoding = encoding
31
+ self.base_url = base_url
32
+ self.audio_track = None
33
+ self.loop = None
34
+ self._ws_session: aiohttp.ClientSession | None = None
35
+ self._ws_connection: aiohttp.ClientWebSocketResponse | None = None
36
+ self._send_task: asyncio.Task | None = None
37
+ self._recv_task: asyncio.Task | None = None
38
+ self._should_stop = False
39
+ self._first_chunk_sent = False
40
+ self._connection_lock = asyncio.Lock()
41
+
42
+ self.api_key = api_key or os.getenv("DEEPGRAM_API_KEY")
43
+ if not self.api_key:
44
+ raise ValueError(
45
+ "Deepgram API key must be provided either through the 'api_key' parameter or the DEEPGRAM_API_KEY environment variable."
46
+ )
47
+
48
+ def reset_first_audio_tracking(self) -> None:
49
+ self._first_chunk_sent = False
50
+
51
+ async def _ensure_connection(self) -> None:
52
+ async with self._connection_lock:
53
+ if self._ws_connection and not self._ws_connection.closed:
54
+ return
55
+
56
+ params = {
57
+ "model": self.model,
58
+ "encoding": self.encoding,
59
+ "sample_rate": self.sample_rate,
60
+ }
61
+ param_string = "&".join([f"{k}={v}" for k, v in params.items()])
62
+ full_ws_url = f"{self.base_url}?{param_string}"
63
+ headers = {"Authorization": f"Token {self.api_key}"}
64
+
65
+ self._ws_session = aiohttp.ClientSession()
66
+ self._ws_connection = await asyncio.wait_for(
67
+ self._ws_session.ws_connect(full_ws_url, headers=headers),
68
+ timeout=50.0
69
+ )
70
+ if self._recv_task and not self._recv_task.done():
71
+ self._recv_task.cancel()
72
+ self._recv_task = asyncio.create_task(self._receive_audio_task())
73
+
74
+ async def synthesize(
75
+ self,
76
+ text: AsyncIterator[str] | str,
77
+ **kwargs: Any,
78
+ ) -> None:
79
+ try:
80
+ if not self.audio_track or not self.loop:
81
+ self.emit("error", "Audio track or event loop not set")
82
+ return
83
+
84
+ await self.interrupt()
85
+ self._should_stop = False
86
+ await self._stream_synthesis(text)
87
+
88
+ except Exception as e:
89
+ self.emit("error", f"TTS synthesis failed: {str(e)}")
90
+
91
+ async def _stream_synthesis(self, text: Union[AsyncIterator[str], str]) -> None:
92
+ try:
93
+ await self._ensure_connection()
94
+ self._send_task = asyncio.create_task(self._send_text_task(text))
95
+ await self._send_task
96
+ except Exception as e:
97
+ self.emit("error", f"Streaming synthesis failed: {str(e)}")
98
+ await self.aclose()
99
+ finally:
100
+ if self._send_task and not self._send_task.done():
101
+ self._send_task.cancel()
102
+ self._send_task = None
103
+
104
+ async def _send_text_task(self, text: Union[AsyncIterator[str], str]) -> None:
105
+ if not self._ws_connection or self._ws_connection.closed:
106
+ return
107
+
108
+ try:
109
+ if isinstance(text, str):
110
+ if not self._should_stop:
111
+ payload = {"type": "Speak", "text": text}
112
+ await self._ws_connection.send_json(payload)
113
+ else:
114
+ async for chunk in text:
115
+ if self._ws_connection.closed or self._should_stop:
116
+ break
117
+ payload = {"type": "Speak", "text": chunk}
118
+ await self._ws_connection.send_json(payload)
119
+
120
+ if not self._ws_connection.closed and not self._should_stop:
121
+ await self._ws_connection.send_json({"type": "Flush"})
122
+ except asyncio.CancelledError:
123
+ pass
124
+ except Exception as e:
125
+ if not self._should_stop:
126
+ self.emit("error", f"Send task error: {str(e)}")
127
+
128
+ async def _receive_audio_task(self) -> None:
129
+ if not self._ws_connection:
130
+ return
131
+
132
+ try:
133
+ while not self._ws_connection.closed:
134
+ msg = await self._ws_connection.receive()
135
+
136
+ if msg.type == aiohttp.WSMsgType.BINARY:
137
+ if not self._should_stop:
138
+ await self._stream_audio_chunks(msg.data)
139
+ elif msg.type == aiohttp.WSMsgType.TEXT:
140
+ data = json.loads(msg.data)
141
+ if data.get('type') == 'Error' and not self._should_stop:
142
+ self.emit("error", f"Deepgram error: {data.get('description', 'Unknown error')}")
143
+ break
144
+ elif msg.type in (aiohttp.WSMsgType.CLOSED, aiohttp.WSMsgType.CLOSE, aiohttp.WSMsgType.CLOSING):
145
+ break
146
+ elif msg.type == aiohttp.WSMsgType.ERROR:
147
+ raise ConnectionError(f"WebSocket error: {self._ws_connection.exception()}")
148
+ except asyncio.CancelledError:
149
+ pass
150
+ except Exception as e:
151
+ if not self._should_stop:
152
+ self.emit("error", f"Receive task error: {str(e)}")
153
+
154
+ async def _stream_audio_chunks(self, audio_bytes: bytes) -> None:
155
+ if not audio_bytes or self._should_stop:
156
+ return
157
+
158
+ if self.audio_track and self.loop:
159
+ await self.audio_track.add_new_bytes(audio_bytes)
160
+
161
+ async def interrupt(self) -> None:
162
+ self._should_stop = True
163
+
164
+ if self.audio_track:
165
+ self.audio_track.interrupt()
166
+
167
+ if self._send_task and not self._send_task.done():
168
+ self._send_task.cancel()
169
+
170
+
171
+ async def aclose(self) -> None:
172
+ self._should_stop = True
173
+
174
+ for task in [self._send_task, self._recv_task]:
175
+ if task and not task.done():
176
+ task.cancel()
177
+
178
+ if self._ws_connection and not self._ws_connection.closed:
179
+ await self._ws_connection.close()
180
+ if self._ws_session and not self._ws_session.closed:
181
+ await self._ws_session.close()
182
+
183
+ self._ws_connection = None
184
+ self._ws_session = None
185
+
186
+ await super().aclose()
@@ -0,0 +1 @@
1
+ __version__ = "0.0.36"
@@ -1,3 +0,0 @@
1
- from .stt import DeepgramSTT
2
-
3
- __all__ = ["DeepgramSTT"]
@@ -1 +0,0 @@
1
- __version__ = "0.0.35"