videosdk-plugins-navana 0.0.48__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ myenv/
2
+ venv/
3
+ env/
4
+ __pycache__/
5
+ .venv/
6
+ .env
7
+ .env.local
8
+ test_env/
9
+ dist/
10
+ .DS_Store
11
+ node_modules/
12
+ credentials.json
13
+ .Python
14
+ build/
15
+ eggs/
16
+ sdist/
17
+ wheels/
18
+ docs/
19
+ agent-sdk-reference/
@@ -0,0 +1,30 @@
1
+ Metadata-Version: 2.4
2
+ Name: videosdk-plugins-navana
3
+ Version: 0.0.48
4
+ Summary: VideoSDK Agent Framework plugin for Navana STT services
5
+ Author: videosdk
6
+ License-Expression: Apache-2.0
7
+ Keywords: ai,audio,bodhi,indian-languages,navana,speech-to-text,stt,video,videosdk
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Topic :: Communications :: Conferencing
11
+ Classifier: Topic :: Multimedia :: Sound/Audio
12
+ Classifier: Topic :: Multimedia :: Video
13
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
+ Requires-Python: >=3.11
15
+ Requires-Dist: aiohttp
16
+ Requires-Dist: bodhi-sdk
17
+ Requires-Dist: numpy
18
+ Requires-Dist: scipy>=1.11.0
19
+ Requires-Dist: videosdk-agents>=0.0.48
20
+ Description-Content-Type: text/markdown
21
+
22
+ # VideoSDK Navana Plugin
23
+
24
+ Agent Framework plugin for STT services from Navana Tech.
25
+
26
+ ## Installation
27
+
28
+ ```bash
29
+ pip install videosdk-plugins-navana
30
+ ```
@@ -0,0 +1,9 @@
1
+ # VideoSDK Navana Plugin
2
+
3
+ Agent Framework plugin for STT services from Navana Tech.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install videosdk-plugins-navana
9
+ ```
@@ -0,0 +1,49 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "videosdk-plugins-navana"
7
+ dynamic = ["version"]
8
+ description = "VideoSDK Agent Framework plugin for Navana STT services"
9
+ readme = "README.md"
10
+ license = "Apache-2.0"
11
+ requires-python = ">=3.11"
12
+ authors = [{ name = "videosdk" }]
13
+ keywords = [
14
+ "video",
15
+ "audio",
16
+ "ai",
17
+ "videosdk",
18
+ "bodhi",
19
+ "navana",
20
+ "stt",
21
+ "speech-to-text",
22
+ "navana",
23
+ "indian-languages",
24
+ ]
25
+ classifiers = [
26
+ "Intended Audience :: Developers",
27
+ "Development Status :: 4 - Beta",
28
+ "Intended Audience :: Developers",
29
+ "Topic :: Communications :: Conferencing",
30
+ "Topic :: Multimedia :: Sound/Audio",
31
+ "Topic :: Multimedia :: Video",
32
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
33
+ ]
34
+ dependencies = [
35
+ "videosdk-agents>=0.0.48",
36
+ "aiohttp",
37
+ "numpy",
38
+ "bodhi-sdk",
39
+ "scipy>=1.11.0",
40
+ ]
41
+
42
+ [tool.hatch.version]
43
+ path = "videosdk/plugins/navana/version.py"
44
+
45
+ [tool.hatch.build.targets.wheel]
46
+ packages = ["videosdk"]
47
+
48
+ [tool.hatch.build.targets.sdist]
49
+ include = ["/videosdk"]
@@ -0,0 +1,3 @@
1
+ from .stt import NavanaSTT
2
+
3
+ __all__ = ["NavanaSTT"]
@@ -0,0 +1,158 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import os
5
+ from typing import Any, Optional
6
+ import numpy as np
7
+ from videosdk.agents import STT as BaseSTT, STTResponse, SpeechData, SpeechEventType, global_event_emitter
8
+ from bodhi import BodhiClient, TranscriptionConfig, TranscriptionResponse, LiveTranscriptionEvents
9
+
10
+ try:
11
+ from scipy import signal
12
+ SCIPY_AVAILABLE = True
13
+ except ImportError:
14
+ SCIPY_AVAILABLE = False
15
+
16
+
17
+ class NavanaSTT(BaseSTT):
18
+ """
19
+ VideoSDK Agent Framework STT plugin for Navana's Bodhi API.
20
+
21
+ This plugin uses the official 'bodhi-sdk' and implements best practices for audio handling,
22
+ including robust stereo-to-mono conversion and event model adaptation.
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ *,
28
+ api_key: str | None = None,
29
+ customer_id: str | None = None,
30
+ model: str = "en-general-v2-8khz",
31
+ language: str = "en",
32
+ input_sample_rate: int = 48000,
33
+ ) -> None:
34
+ """Initialize the Navana STT plugin.
35
+
36
+ Args:
37
+ api_key (Optional[str], optional): Navana API key. Defaults to None.
38
+ customer_id (Optional[str], optional): Navana customer ID. Defaults to None.
39
+ model (str): The model to use for the STT plugin. Defaults to "en-general-v2-8khz".
40
+ language (str): The language to use for the STT plugin. Defaults to "en".
41
+ input_sample_rate (int): The input sample rate to use for the STT plugin. Defaults to 48000.
42
+ """
43
+ super().__init__()
44
+
45
+ if not SCIPY_AVAILABLE:
46
+ raise ImportError(
47
+ "The 'scipy' library is not installed. Please install it with 'pip install scipy' to use the NavanaSTT plugin for audio resampling.")
48
+
49
+ self.customer_id = customer_id or os.getenv("NAVANA_CUSTOMER_ID")
50
+ self.api_key = api_key or os.getenv("NAVANA_API_KEY")
51
+
52
+ if not self.api_key or not self.customer_id:
53
+ raise ValueError(
54
+ "Navana API key and Customer ID must be provided either through parameters or "
55
+ "NAVANA_API_KEY/NAVANA_CUSTOMER_ID environment variables."
56
+ )
57
+
58
+ self.model = model
59
+ self.language = language
60
+ self.input_sample_rate = input_sample_rate
61
+ self.target_sample_rate = 8000
62
+
63
+ self.client = BodhiClient(
64
+ api_key=self.api_key, customer_id=self.customer_id)
65
+ self._connection_started = False
66
+ self._last_transcript_text = ""
67
+
68
+ self._register_event_handlers()
69
+
70
+ def _register_event_handlers(self):
71
+ """Registers handlers for the Bodhi client's transcription events."""
72
+ self.client.on(LiveTranscriptionEvents.Transcript, self._on_transcript)
73
+ self.client.on(LiveTranscriptionEvents.UtteranceEnd,
74
+ self._on_utterance_end)
75
+ self.client.on(LiveTranscriptionEvents.SpeechStarted,
76
+ self._on_speech_started)
77
+ self.client.on(LiveTranscriptionEvents.Error, self._on_error)
78
+ self.client.on(LiveTranscriptionEvents.Close, self._on_close)
79
+
80
+ async def _on_transcript(self, response: TranscriptionResponse):
81
+ """Handles interim results, updating the latest transcript buffer."""
82
+ if response.text and self._transcript_callback:
83
+ self._last_transcript_text = response.text
84
+ event = STTResponse(
85
+ event_type=SpeechEventType.INTERIM,
86
+ data=SpeechData(text=response.text,
87
+ language=self.language, confidence=1.0)
88
+ )
89
+ await self._transcript_callback(event)
90
+
91
+ async def _on_utterance_end(self, response: dict):
92
+ """On utterance end, promotes the last known transcript to FINAL."""
93
+ if self._last_transcript_text and self._transcript_callback:
94
+ final_text = self._last_transcript_text
95
+ self._last_transcript_text = ""
96
+ event = STTResponse(
97
+ event_type=SpeechEventType.FINAL,
98
+ data=SpeechData(text=final_text,
99
+ language=self.language, confidence=1.0)
100
+ )
101
+ await self._transcript_callback(event)
102
+
103
+ async def _on_speech_started(self, response: TranscriptionResponse):
104
+ global_event_emitter.emit("speech_started")
105
+
106
+ async def _on_error(self, e: Exception):
107
+ error_message = f"Navana SDK Error: {str(e)}"
108
+ print(error_message)
109
+ self.emit("error", error_message)
110
+
111
+ async def _on_close(self):
112
+ print("Navana SDK connection closed.")
113
+ self._connection_started = False
114
+
115
+ async def process_audio(
116
+ self,
117
+ audio_frames: bytes,
118
+ language: Optional[str] = None,
119
+ **kwargs: Any
120
+ ) -> None:
121
+ """
122
+ Processes audio by converting stereo to mono, resampling, and sending to the STT service.
123
+ """
124
+ try:
125
+ if not self._connection_started:
126
+ config = TranscriptionConfig(
127
+ model=self.model,
128
+ sample_rate=self.target_sample_rate
129
+ )
130
+ await self.client.start_connection(config=config)
131
+ self._connection_started = True
132
+
133
+ raw_audio_data = np.frombuffer(audio_frames, dtype=np.int16)
134
+ stereo_audio = raw_audio_data.reshape(-1, 2)
135
+ mono_audio_float = stereo_audio.astype(np.float32).mean(axis=1)
136
+ resampled_data = signal.resample(
137
+ mono_audio_float,
138
+ int(len(mono_audio_float) *
139
+ self.target_sample_rate / self.input_sample_rate)
140
+ )
141
+
142
+ audio_bytes = resampled_data.astype(np.int16).tobytes()
143
+
144
+ await self.client.send_audio_stream(audio_bytes)
145
+
146
+ except Exception as e:
147
+ error_message = f"Audio processing error: {str(e)}"
148
+ print(error_message)
149
+ self.emit("error", error_message)
150
+ self._connection_started = False
151
+ if self.client._live_client and not self.client._live_client.is_closed:
152
+ await self.client.close_connection()
153
+
154
+ async def aclose(self) -> None:
155
+ """Cleans up resources by closing the SDK connection."""
156
+ if self._connection_started:
157
+ await self.client.close_connection()
158
+ await super().aclose()
@@ -0,0 +1 @@
1
+ __version__ = "0.0.48"