vision-agents-plugins-deepgram 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of vision-agents-plugins-deepgram might be problematic. Click here for more details.

PKG-INFO CHANGED
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vision-agents-plugins-deepgram
3
- Version: 0.1.9
3
+ Version: 0.1.11
4
4
  Summary: Deepgram STT integration for Vision Agents
5
5
  Project-URL: Documentation, https://visionagents.ai/
6
6
  Project-URL: Website, https://visionagents.ai/
@@ -3,7 +3,7 @@ import contextlib
3
3
  import logging
4
4
  import os
5
5
  import time
6
- from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
6
+ from typing import Any, Dict, Optional
7
7
 
8
8
  import numpy as np
9
9
  import websockets
@@ -20,11 +20,11 @@ from deepgram.listen.v1.socket_client import AsyncV1SocketClient
20
20
  from getstream.video.rtc.track_util import PcmData
21
21
 
22
22
  from vision_agents.core import stt
23
+ from vision_agents.core.stt import TranscriptResponse
23
24
 
24
25
  from .utils import generate_silence
25
26
 
26
- if TYPE_CHECKING:
27
- from vision_agents.core.edge.types import Participant
27
+ from vision_agents.core.edge.types import Participant
28
28
 
29
29
  logger = logging.getLogger(__name__)
30
30
 
@@ -50,7 +50,6 @@ class STT(stt.STT):
50
50
  self,
51
51
  api_key: Optional[str] = None,
52
52
  options: Optional[dict] = None,
53
- sample_rate: int = 48000,
54
53
  language: str = "en-US",
55
54
  interim_results: bool = True,
56
55
  client: Optional[AsyncDeepgramClient] = None,
@@ -70,7 +69,7 @@ class STT(stt.STT):
70
69
  connection_timeout: Time to wait for the Deepgram connection to be established.
71
70
 
72
71
  """
73
- super().__init__(sample_rate=sample_rate)
72
+ super().__init__(provider_name="deepgram")
74
73
 
75
74
  # If no API key was provided, check for DEEPGRAM_API_KEY in environment
76
75
  if api_key is None:
@@ -86,12 +85,13 @@ class STT(stt.STT):
86
85
  client if client is not None else AsyncDeepgramClient(api_key=api_key)
87
86
  )
88
87
  self.dg_connection: Optional[AsyncV1SocketClient] = None
88
+ self.sample_rate = 48000
89
89
 
90
90
  self.options = options or {
91
91
  "model": "nova-2",
92
92
  "language": language,
93
93
  "encoding": "linear16",
94
- "sample_rate": sample_rate,
94
+ "sample_rate": self.sample_rate,
95
95
  "channels": 1,
96
96
  "interim_results": interim_results,
97
97
  }
@@ -101,7 +101,7 @@ class STT(stt.STT):
101
101
 
102
102
  # Generate a silence audio to use as keep-alive message
103
103
  self._keep_alive_data = generate_silence(
104
- sample_rate=sample_rate, duration_ms=10
104
+ sample_rate=self.sample_rate, duration_ms=10
105
105
  )
106
106
  self._keep_alive_interval = keep_alive_interval
107
107
 
@@ -121,7 +121,7 @@ class STT(stt.STT):
121
121
  """
122
122
  Start the main task establishing the Deepgram connection and processing the events.
123
123
  """
124
- if self._is_closed:
124
+ if self.closed:
125
125
  logger.warning("Cannot setup connection - Deepgram instance is closed")
126
126
  return None
127
127
 
@@ -178,15 +178,8 @@ class STT(stt.STT):
178
178
  )
179
179
 
180
180
  async def close(self):
181
+ await super().close()
181
182
  """Close the Deepgram connection and clean up resources."""
182
- if self._is_closed:
183
- logger.debug("Deepgram STT service already closed")
184
- return
185
-
186
- logger.info("Closing Deepgram STT service")
187
- self._is_closed = True
188
-
189
- # Close the Deepgram connection if it exists
190
183
  if self.dg_connection:
191
184
  logger.debug("Closing Deepgram connection")
192
185
  try:
@@ -225,20 +218,17 @@ class STT(stt.STT):
225
218
  # Check if this is a final result
226
219
  is_final = transcript.get("is_final", False)
227
220
 
228
- # Create metadata with useful information
229
- metadata = {
230
- "confidence": alternatives[0].get("confidence", 0),
231
- "words": alternatives[0].get("words", []),
232
- "is_final": is_final,
233
- "channel_index": transcript.get("channel_index", 0),
234
- }
221
+ # Create response metadata
222
+ response_metadata = TranscriptResponse(
223
+ confidence=alternatives[0].get("confidence", 0),
224
+ )
235
225
 
236
226
  # Emit immediately for real-time responsiveness
237
227
  if is_final:
238
- self._emit_transcript_event(transcript_text, self._current_user, metadata)
228
+ self._emit_transcript_event(transcript_text, self._current_user, response_metadata)
239
229
  else:
240
230
  self._emit_partial_transcript_event(
241
- transcript_text, self._current_user, metadata
231
+ transcript_text, self._current_user, response_metadata
242
232
  )
243
233
 
244
234
  logger.debug(
@@ -246,7 +236,7 @@ class STT(stt.STT):
246
236
  extra={
247
237
  "is_final": is_final,
248
238
  "text_length": len(transcript_text),
249
- "confidence": metadata["confidence"],
239
+ "confidence": response_metadata.confidence,
250
240
  },
251
241
  )
252
242
 
@@ -261,29 +251,15 @@ class STT(stt.STT):
261
251
  logger.warning(f"Deepgram connection closed. message={message}")
262
252
  await self.close()
263
253
 
264
- async def _process_audio_impl(
254
+ async def process_audio(
265
255
  self,
266
256
  pcm_data: PcmData,
267
- user_metadata: Optional[Union[Dict[str, Any], "Participant"]] = None,
268
- ) -> Optional[List[Tuple[bool, str, Dict[str, Any]]]]:
269
- """
270
- Process audio data through Deepgram for transcription.
271
-
272
- Args:
273
- pcm_data: The PCM audio data to process.
274
- user_metadata: Additional metadata about the user or session.
275
-
276
- Returns:
277
- None - Deepgram operates in asynchronous mode and emits events directly
278
- when transcripts arrive from the streaming service.
279
- """
280
- if self._is_closed:
257
+ participant: Optional[Participant] = None,
258
+ ):
259
+ if self.closed:
281
260
  logger.warning("Deepgram connection is closed, ignoring audio")
282
261
  return None
283
262
 
284
- # Store the current user context for transcript events
285
- self._current_user = user_metadata # type: ignore[assignment]
286
-
287
263
  # Check if the input sample rate matches the expected sample rate
288
264
  if pcm_data.sample_rate != self.sample_rate:
289
265
  logger.warning(
@@ -334,7 +310,7 @@ class STT(stt.STT):
334
310
  Send the silence audio every `interval` seconds
335
311
  to prevent Deepgram from closing the connection.
336
312
  """
337
- while not self._is_closed and self.dg_connection is not None:
313
+ while not self.closed and self.dg_connection is not None:
338
314
  if self._last_sent_at + self._keep_alive_interval <= time.time():
339
315
  logger.debug("Sending keepalive packet to Deepgram...")
340
316
  # Send audio silence to keep the connection open
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vision-agents-plugins-deepgram
3
- Version: 0.1.9
3
+ Version: 0.1.11
4
4
  Summary: Deepgram STT integration for Vision Agents
5
5
  Project-URL: Documentation, https://visionagents.ai/
6
6
  Project-URL: Website, https://visionagents.ai/
@@ -1,13 +1,13 @@
1
1
  ./.gitignore,sha256=S6wPCu4rBDB_yyTYoXbMIR-pn4OPv6b3Ulnx1n5RWvo,916
2
- ./PKG-INFO,sha256=DFkbCCIqjaLzcKhJ2kK22v6_O4u7iokGIm27vBsJt3E,2273
2
+ ./PKG-INFO,sha256=y4b11aPc6ENbtuH_BVY5FdYpqCI4AXGrS6PwPl-f4W8,2274
3
3
  ./README.md,sha256=CX3wmR5ztY0crI5VSmBt2K0vBVjFvEhBr-SNuycL1Uc,1717
4
4
  ./pyproject.toml,sha256=qb2egEHyFlV-ZjccfgqUqcwp0jWyoVJ5P5hz1BsJwCA,1104
5
5
  ./vision_agents/plugins/deepgram/__init__.py,sha256=iBBsZvcyd4KfkcUHsi1QiVVQnPEKvAweGZ40eHeENs4,159
6
- ./vision_agents/plugins/deepgram/stt.py,sha256=I2eNU_O_xAX5rDJufm-ooVvF4kYxOrPh0_F2i8diYWY,13124
6
+ ./vision_agents/plugins/deepgram/stt.py,sha256=2oTbjDmfXE9a3ZO4R0iTwaNN3hhtbtA_DaL8D9ohY9c,12164
7
7
  ./vision_agents/plugins/deepgram/utils.py,sha256=7xcGxnhcuVpqHIp1F_d1ARTq6y0jQGZsPx_2hwBifZ0,527
8
8
  vision_agents/plugins/deepgram/__init__.py,sha256=iBBsZvcyd4KfkcUHsi1QiVVQnPEKvAweGZ40eHeENs4,159
9
- vision_agents/plugins/deepgram/stt.py,sha256=I2eNU_O_xAX5rDJufm-ooVvF4kYxOrPh0_F2i8diYWY,13124
9
+ vision_agents/plugins/deepgram/stt.py,sha256=2oTbjDmfXE9a3ZO4R0iTwaNN3hhtbtA_DaL8D9ohY9c,12164
10
10
  vision_agents/plugins/deepgram/utils.py,sha256=7xcGxnhcuVpqHIp1F_d1ARTq6y0jQGZsPx_2hwBifZ0,527
11
- vision_agents_plugins_deepgram-0.1.9.dist-info/METADATA,sha256=DFkbCCIqjaLzcKhJ2kK22v6_O4u7iokGIm27vBsJt3E,2273
12
- vision_agents_plugins_deepgram-0.1.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
13
- vision_agents_plugins_deepgram-0.1.9.dist-info/RECORD,,
11
+ vision_agents_plugins_deepgram-0.1.11.dist-info/METADATA,sha256=y4b11aPc6ENbtuH_BVY5FdYpqCI4AXGrS6PwPl-f4W8,2274
12
+ vision_agents_plugins_deepgram-0.1.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
13
+ vision_agents_plugins_deepgram-0.1.11.dist-info/RECORD,,