videosdk-plugins-deepgram 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of videosdk-plugins-deepgram might be problematic. Click here for more details.

@@ -0,0 +1,10 @@
1
+ myenv/
2
+ venv/
3
+ env/
4
+ __pycache__
5
+
6
+ .env
7
+ .env.local
8
+ test_env/
9
+ dist/
10
+ .DS_Store
@@ -0,0 +1,25 @@
1
+ Metadata-Version: 2.4
2
+ Name: videosdk-plugins-deepgram
3
+ Version: 0.0.1
4
+ Summary: VideoSDK Agent Framework plugin for Deepgram
5
+ Author: videosdk
6
+ Keywords: ai,audio,deepgram,video,videosdk
7
+ Classifier: Development Status :: 4 - Beta
8
+ Classifier: Intended Audience :: Developers
9
+ Classifier: Topic :: Communications :: Conferencing
10
+ Classifier: Topic :: Multimedia :: Sound/Audio
11
+ Classifier: Topic :: Multimedia :: Video
12
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
13
+ Requires-Python: >=3.11
14
+ Requires-Dist: videosdk-agents>=0.0.9
15
+ Description-Content-Type: text/markdown
16
+
17
+ VideoSDK Deepgram Plugin
18
+
19
+ Agent Framework plugin for stt services from Deepgram.
20
+
21
+ ## Installation
22
+
23
+ ```bash
24
+ pip install videosdk-plugins-deepgram
25
+ ```
@@ -0,0 +1,9 @@
1
+ VideoSDK Deepgram Plugin
2
+
3
+ Agent Framework plugin for stt services from Deepgram.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install videosdk-plugins-deepgram
9
+ ```
@@ -0,0 +1,33 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "videosdk-plugins-deepgram"
7
+ dynamic = ["version"]
8
+ description = "VideoSDK Agent Framework plugin for Deepgram"
9
+ readme = "README.md"
10
+ requires-python = ">=3.11"
11
+ authors = [{ name = "videosdk"}]
12
+ keywords = ["video", "audio", "ai", "videosdk", "deepgram"]
13
+ classifiers = [
14
+ "Intended Audience :: Developers",
15
+ "Development Status :: 4 - Beta",
16
+ "Intended Audience :: Developers",
17
+ "Topic :: Communications :: Conferencing",
18
+ "Topic :: Multimedia :: Sound/Audio",
19
+ "Topic :: Multimedia :: Video",
20
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
21
+ ]
22
+ dependencies = [
23
+ "videosdk-agents>=0.0.9"
24
+ ]
25
+
26
+ [tool.hatch.version]
27
+ path = "videosdk/plugins/deepgram/version.py"
28
+
29
+ [tool.hatch.build.targets.wheel]
30
+ packages = ["videosdk"]
31
+
32
+ [tool.hatch.build.targets.sdist]
33
+ include = ["/videosdk"]
@@ -0,0 +1,3 @@
1
+ from .stt import DeepgramSTT
2
+
3
+ __all__ = ["DeepgramSTT"]
@@ -0,0 +1,191 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import time
5
+ from typing import Any, Optional
6
+ import os
7
+ from urllib.parse import urlencode
8
+ import aiohttp
9
+ from videosdk.agents import STT as BaseSTT, STTResponse, SpeechEventType, SpeechData, global_event_emitter
10
+
11
+ class DeepgramSTT(BaseSTT):
12
+ def __init__(
13
+ self,
14
+ *,
15
+ api_key: str | None = None,
16
+ model: str = "nova-2",
17
+ language: str = "en-US",
18
+ interim_results: bool = True,
19
+ punctuate: bool = True,
20
+ smart_format: bool = True,
21
+ sample_rate: int = 48000,
22
+ endpointing: int = 50,
23
+ filler_words: bool = True,
24
+ base_url: str = "wss://api.deepgram.com/v1/listen",
25
+ ) -> None:
26
+ super().__init__()
27
+
28
+ self.api_key = api_key or os.getenv("DEEPGRAM_API_KEY")
29
+ if not self.api_key:
30
+ raise ValueError("Deepgram API key must be provided either through api_key parameter or DEEPGRAM_API_KEY environment variable")
31
+
32
+ self.model = model
33
+ self.language = language
34
+ self.sample_rate = sample_rate
35
+ self.interim_results = interim_results
36
+ self.punctuate = punctuate
37
+ self.smart_format = smart_format
38
+ self.endpointing = endpointing
39
+ self.filler_words = filler_words
40
+ self.base_url = base_url
41
+
42
+ # WebSocket session for streaming
43
+ self._session: Optional[aiohttp.ClientSession] = None
44
+ self._ws: Optional[aiohttp.ClientWebSocketResponse] = None
45
+ self._ws_task: Optional[asyncio.Task] = None
46
+
47
+ self._last_speech_event_time = 0.0
48
+ self._previous_speech_event_time = 0.0
49
+
50
+ async def process_audio(
51
+ self,
52
+ audio_frames: bytes,
53
+ language: Optional[str] = None,
54
+ **kwargs: Any
55
+ ) -> None:
56
+ """Process audio frames and send to Deepgram's Streaming API"""
57
+
58
+ if not self._ws:
59
+ await self._connect_ws()
60
+ # Start listening for responses in background
61
+ self._ws_task = asyncio.create_task(self._listen_for_responses())
62
+
63
+ try:
64
+ await self._ws.send_bytes(audio_frames)
65
+ except Exception as e:
66
+ print(f"Error in process_audio: {str(e)}")
67
+ self.emit("error", str(e))
68
+ if self._ws:
69
+ await self._ws.close()
70
+ self._ws = None
71
+ if self._ws_task:
72
+ self._ws_task.cancel()
73
+ self._ws_task = None
74
+
75
+ async def _listen_for_responses(self) -> None:
76
+ """Background task to listen for WebSocket responses"""
77
+ if not self._ws:
78
+ return
79
+
80
+ try:
81
+ async for msg in self._ws:
82
+ if msg.type == aiohttp.WSMsgType.TEXT:
83
+ data = msg.json()
84
+ responses = self._handle_ws_message(data)
85
+ for response in responses:
86
+ if self._transcript_callback:
87
+ await self._transcript_callback(response)
88
+ elif msg.type == aiohttp.WSMsgType.ERROR:
89
+ self.emit("error", f"WebSocket error: {self._ws.exception()}")
90
+ break
91
+ except Exception as e:
92
+ self.emit("error", f"Error in WebSocket listener: {str(e)}")
93
+ finally:
94
+ if self._ws:
95
+ await self._ws.close()
96
+ self._ws = None
97
+
98
+ async def _connect_ws(self) -> None:
99
+ """Establish WebSocket connection with Deepgram's Streaming API"""
100
+
101
+ if not self._session:
102
+ self._session = aiohttp.ClientSession()
103
+
104
+ query_params = {
105
+ "model": self.model,
106
+ "language": self.language,
107
+ "interim_results": str(self.interim_results).lower(),
108
+ "punctuate": str(self.punctuate).lower(),
109
+ "smart_format": str(self.smart_format).lower(),
110
+ "encoding": "linear16",
111
+ "sample_rate": str(self.sample_rate),
112
+ "channels": 2,
113
+ "endpointing": self.endpointing,
114
+ "filler_words": str(self.filler_words).lower(),
115
+ "vad_events": "true",
116
+ "no_delay": "true",
117
+ }
118
+ headers = {
119
+ "Authorization": f"Token {self.api_key}",
120
+ }
121
+
122
+ ws_url = f"{self.base_url}?{urlencode(query_params)}"
123
+
124
+ try:
125
+ self._ws = await self._session.ws_connect(ws_url, headers=headers)
126
+ except Exception as e:
127
+ print(f"Error connecting to WebSocket: {str(e)}")
128
+ raise
129
+
130
+ def _handle_ws_message(self, msg: dict) -> list[STTResponse]:
131
+ """Handle incoming WebSocket messages and generate STT responses"""
132
+ responses = []
133
+ try:
134
+ if msg["type"] == "SpeechStarted":
135
+ current_time = time.time()
136
+
137
+ if self._last_speech_event_time == 0.0:
138
+ self._last_speech_event_time = current_time
139
+ return responses
140
+
141
+ if current_time - self._last_speech_event_time < 1.0:
142
+ global_event_emitter.emit("speech_started")
143
+
144
+ self._previous_speech_event_time = self._last_speech_event_time
145
+ self._last_speech_event_time = current_time
146
+
147
+ if msg["type"] == "Results":
148
+ channel = msg["channel"]
149
+ alternatives = channel["alternatives"]
150
+
151
+ if alternatives and len(alternatives) > 0:
152
+ alt = alternatives[0]
153
+ is_final = msg["is_final"]
154
+ if alt["transcript"] == "":
155
+ return responses
156
+
157
+ response = STTResponse(
158
+ event_type=SpeechEventType.FINAL if is_final else SpeechEventType.INTERIM,
159
+ data=SpeechData(
160
+ text=alt["transcript"],
161
+ language=self.language,
162
+ confidence=alt.get("confidence", 0.0),
163
+ start_time=alt["words"][0]["start"] if alt["words"] else 0.0,
164
+ end_time=alt["words"][-1]["end"] if alt["words"] else 0.0,
165
+ ),
166
+ metadata={"model": self.model}
167
+ )
168
+ responses.append(response)
169
+
170
+ except Exception as e:
171
+ print(f"Error handling WebSocket message: {str(e)}")
172
+
173
+ return responses
174
+
175
+ async def aclose(self) -> None:
176
+ """Cleanup resources"""
177
+ if self._ws_task:
178
+ self._ws_task.cancel()
179
+ try:
180
+ await self._ws_task
181
+ except asyncio.CancelledError:
182
+ pass
183
+ self._ws_task = None
184
+
185
+ if self._ws:
186
+ await self._ws.close()
187
+ self._ws = None
188
+
189
+ if self._session:
190
+ await self._session.close()
191
+ self._session = None
@@ -0,0 +1 @@
1
+ __version__ = "0.0.1"