videosdk-plugins-navana 0.0.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of videosdk-plugins-navana might be problematic. Click here for more details.

@@ -0,0 +1,3 @@
1
+ from .stt import NavanaSTT
2
+
3
+ __all__ = ["NavanaSTT"]
@@ -0,0 +1,141 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import os
5
+ from typing import Any, Optional
6
+ import numpy as np
7
+ from videosdk.agents import STT as BaseSTT, STTResponse, SpeechData, SpeechEventType, global_event_emitter
8
+ from bodhi import BodhiClient, TranscriptionConfig, TranscriptionResponse, LiveTranscriptionEvents
9
+
10
+ try:
11
+ from scipy import signal
12
+ SCIPY_AVAILABLE = True
13
+ except ImportError:
14
+ SCIPY_AVAILABLE = False
15
+
16
+ class NavanaSTT(BaseSTT):
17
+ """
18
+ VideoSDK Agent Framework STT plugin for Navana's Bodhi API.
19
+
20
+ This plugin uses the official 'bodhi-sdk' and implements best practices for audio handling,
21
+ including robust stereo-to-mono conversion and event model adaptation.
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ *,
27
+ customer_id: str | None = None,
28
+ api_key: str | None = None,
29
+ model: str = "en-general-v2-8khz",
30
+ language: str = "en",
31
+ input_sample_rate: int = 48000,
32
+ ) -> None:
33
+ super().__init__()
34
+
35
+ if not SCIPY_AVAILABLE:
36
+ raise ImportError("The 'scipy' library is not installed. Please install it with 'pip install scipy' to use the NavanaSTT plugin for audio resampling.")
37
+
38
+ self.customer_id = customer_id or os.getenv("NAVANA_CUSTOMER_ID")
39
+ self.api_key = api_key or os.getenv("NAVANA_API_KEY")
40
+
41
+ if not self.api_key or not self.customer_id:
42
+ raise ValueError(
43
+ "Navana API key and Customer ID must be provided either through parameters or "
44
+ "NAVANA_API_KEY/NAVANA_CUSTOMER_ID environment variables."
45
+ )
46
+
47
+ self.model = model
48
+ self.language = language
49
+ self.input_sample_rate = input_sample_rate
50
+ self.target_sample_rate = 8000
51
+
52
+ self.client = BodhiClient(api_key=self.api_key, customer_id=self.customer_id)
53
+ self._connection_started = False
54
+ self._last_transcript_text = ""
55
+
56
+ self._register_event_handlers()
57
+
58
+ def _register_event_handlers(self):
59
+ """Registers handlers for the Bodhi client's transcription events."""
60
+ self.client.on(LiveTranscriptionEvents.Transcript, self._on_transcript)
61
+ self.client.on(LiveTranscriptionEvents.UtteranceEnd, self._on_utterance_end)
62
+ self.client.on(LiveTranscriptionEvents.SpeechStarted, self._on_speech_started)
63
+ self.client.on(LiveTranscriptionEvents.Error, self._on_error)
64
+ self.client.on(LiveTranscriptionEvents.Close, self._on_close)
65
+
66
+ async def _on_transcript(self, response: TranscriptionResponse):
67
+ """Handles interim results, updating the latest transcript buffer."""
68
+ if response.text and self._transcript_callback:
69
+ self._last_transcript_text = response.text
70
+ event = STTResponse(
71
+ event_type=SpeechEventType.INTERIM,
72
+ data=SpeechData(text=response.text, language=self.language, confidence=1.0)
73
+ )
74
+ await self._transcript_callback(event)
75
+
76
+ async def _on_utterance_end(self, response: dict):
77
+ """On utterance end, promotes the last known transcript to FINAL."""
78
+ if self._last_transcript_text and self._transcript_callback:
79
+ final_text = self._last_transcript_text
80
+ self._last_transcript_text = ""
81
+ event = STTResponse(
82
+ event_type=SpeechEventType.FINAL,
83
+ data=SpeechData(text=final_text, language=self.language, confidence=1.0)
84
+ )
85
+ await self._transcript_callback(event)
86
+
87
+ async def _on_speech_started(self, response: TranscriptionResponse):
88
+ global_event_emitter.emit("speech_started")
89
+
90
+ async def _on_error(self, e: Exception):
91
+ error_message = f"Navana SDK Error: {str(e)}"
92
+ print(error_message)
93
+ self.emit("error", error_message)
94
+
95
+ async def _on_close(self):
96
+ print("Navana SDK connection closed.")
97
+ self._connection_started = False
98
+
99
+ async def process_audio(
100
+ self,
101
+ audio_frames: bytes,
102
+ language: Optional[str] = None,
103
+ **kwargs: Any
104
+ ) -> None:
105
+ """
106
+ Processes audio by converting stereo to mono, resampling, and sending to the STT service.
107
+ """
108
+ try:
109
+ if not self._connection_started:
110
+ config = TranscriptionConfig(
111
+ model=self.model,
112
+ sample_rate=self.target_sample_rate
113
+ )
114
+ await self.client.start_connection(config=config)
115
+ self._connection_started = True
116
+
117
+ raw_audio_data = np.frombuffer(audio_frames, dtype=np.int16)
118
+ stereo_audio = raw_audio_data.reshape(-1, 2)
119
+ mono_audio_float = stereo_audio.astype(np.float32).mean(axis=1)
120
+ resampled_data = signal.resample(
121
+ mono_audio_float,
122
+ int(len(mono_audio_float) * self.target_sample_rate / self.input_sample_rate)
123
+ )
124
+
125
+ audio_bytes = resampled_data.astype(np.int16).tobytes()
126
+
127
+ await self.client.send_audio_stream(audio_bytes)
128
+
129
+ except Exception as e:
130
+ error_message = f"Audio processing error: {str(e)}"
131
+ print(error_message)
132
+ self.emit("error", error_message)
133
+ self._connection_started = False
134
+ if self.client._live_client and not self.client._live_client.is_closed:
135
+ await self.client.close_connection()
136
+
137
+
138
+ async def aclose(self) -> None:
139
+ """Cleans up resources by closing the SDK connection."""
140
+ if self._connection_started:
141
+ await self.client.close_connection()
@@ -0,0 +1 @@
1
+ __version__ = "0.0.22"
@@ -0,0 +1,30 @@
1
+ Metadata-Version: 2.4
2
+ Name: videosdk-plugins-navana
3
+ Version: 0.0.22
4
+ Summary: VideoSDK Agent Framework plugin for Navana STT services
5
+ Author: videosdk
6
+ License-Expression: Apache-2.0
7
+ Keywords: ai,audio,bodhi,indian-languages,navana,speech-to-text,stt,video,videosdk
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Topic :: Communications :: Conferencing
11
+ Classifier: Topic :: Multimedia :: Sound/Audio
12
+ Classifier: Topic :: Multimedia :: Video
13
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
+ Requires-Python: >=3.11
15
+ Requires-Dist: aiohttp
16
+ Requires-Dist: bodhi-sdk
17
+ Requires-Dist: numpy
18
+ Requires-Dist: scipy>=1.11.0
19
+ Requires-Dist: videosdk-agents>=0.0.22
20
+ Description-Content-Type: text/markdown
21
+
22
+ # VideoSDK Navana Plugin
23
+
24
+ Agent Framework plugin for STT services from Navana Tech.
25
+
26
+ ## Installation
27
+
28
+ ```bash
29
+ pip install videosdk-plugins-navana
30
+ ```
@@ -0,0 +1,6 @@
1
+ videosdk/plugins/navana/__init__.py,sha256=MOJ2Ipq4ukiczJhOp_vc4LriuwLWu8Vlil2hLb1n3l0,51
2
+ videosdk/plugins/navana/stt.py,sha256=IS2PUU160PUS7PTpTz-7DAzdt1kFir5AOh7cPbVH7p4,5680
3
+ videosdk/plugins/navana/version.py,sha256=NoiGDztYD4fsDDnfSPiSzRkknkNHhFUtKZj0mhQiTYM,22
4
+ videosdk_plugins_navana-0.0.22.dist-info/METADATA,sha256=PiPEKGsYM0H5Jlj_was64JCySeYVEHADjn8qxSP1ldU,913
5
+ videosdk_plugins_navana-0.0.22.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
+ videosdk_plugins_navana-0.0.22.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any