videosdk-plugins-navana 0.0.30__tar.gz → 0.0.32__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of videosdk-plugins-navana might be problematic. Click here for more details.
- {videosdk_plugins_navana-0.0.30 → videosdk_plugins_navana-0.0.32}/.gitignore +3 -2
- {videosdk_plugins_navana-0.0.30 → videosdk_plugins_navana-0.0.32}/PKG-INFO +2 -2
- {videosdk_plugins_navana-0.0.30 → videosdk_plugins_navana-0.0.32}/pyproject.toml +1 -1
- {videosdk_plugins_navana-0.0.30 → videosdk_plugins_navana-0.0.32}/videosdk/plugins/navana/stt.py +38 -22
- videosdk_plugins_navana-0.0.32/videosdk/plugins/navana/version.py +1 -0
- videosdk_plugins_navana-0.0.30/videosdk/plugins/navana/version.py +0 -1
- {videosdk_plugins_navana-0.0.30 → videosdk_plugins_navana-0.0.32}/README.md +0 -0
- {videosdk_plugins_navana-0.0.30 → videosdk_plugins_navana-0.0.32}/videosdk/plugins/navana/__init__.py +0 -0
|
@@ -2,13 +2,12 @@ myenv/
|
|
|
2
2
|
venv/
|
|
3
3
|
env/
|
|
4
4
|
__pycache__/
|
|
5
|
-
|
|
5
|
+
.venv/
|
|
6
6
|
.env
|
|
7
7
|
.env.local
|
|
8
8
|
test_env/
|
|
9
9
|
dist/
|
|
10
10
|
.DS_Store
|
|
11
|
-
|
|
12
11
|
node_modules/
|
|
13
12
|
credentials.json
|
|
14
13
|
.Python
|
|
@@ -16,3 +15,5 @@ build/
|
|
|
16
15
|
eggs/
|
|
17
16
|
sdist/
|
|
18
17
|
wheels/
|
|
18
|
+
docs/
|
|
19
|
+
agent-sdk-reference/
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: videosdk-plugins-navana
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.32
|
|
4
4
|
Summary: VideoSDK Agent Framework plugin for Navana STT services
|
|
5
5
|
Author: videosdk
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -16,7 +16,7 @@ Requires-Dist: aiohttp
|
|
|
16
16
|
Requires-Dist: bodhi-sdk
|
|
17
17
|
Requires-Dist: numpy
|
|
18
18
|
Requires-Dist: scipy>=1.11.0
|
|
19
|
-
Requires-Dist: videosdk-agents>=0.0.
|
|
19
|
+
Requires-Dist: videosdk-agents>=0.0.32
|
|
20
20
|
Description-Content-Type: text/markdown
|
|
21
21
|
|
|
22
22
|
# VideoSDK Navana Plugin
|
{videosdk_plugins_navana-0.0.30 → videosdk_plugins_navana-0.0.32}/videosdk/plugins/navana/stt.py
RENAMED
|
@@ -13,53 +13,67 @@ try:
|
|
|
13
13
|
except ImportError:
|
|
14
14
|
SCIPY_AVAILABLE = False
|
|
15
15
|
|
|
16
|
+
|
|
16
17
|
class NavanaSTT(BaseSTT):
|
|
17
18
|
"""
|
|
18
19
|
VideoSDK Agent Framework STT plugin for Navana's Bodhi API.
|
|
19
|
-
|
|
20
|
+
|
|
20
21
|
This plugin uses the official 'bodhi-sdk' and implements best practices for audio handling,
|
|
21
22
|
including robust stereo-to-mono conversion and event model adaptation.
|
|
22
23
|
"""
|
|
23
|
-
|
|
24
|
+
|
|
24
25
|
def __init__(
|
|
25
26
|
self,
|
|
26
27
|
*,
|
|
27
|
-
customer_id: str | None = None,
|
|
28
28
|
api_key: str | None = None,
|
|
29
|
+
customer_id: str | None = None,
|
|
29
30
|
model: str = "en-general-v2-8khz",
|
|
30
31
|
language: str = "en",
|
|
31
32
|
input_sample_rate: int = 48000,
|
|
32
33
|
) -> None:
|
|
34
|
+
"""Initialize the Navana STT plugin.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
api_key (Optional[str], optional): Navana API key. Defaults to None.
|
|
38
|
+
customer_id (Optional[str], optional): Navana customer ID. Defaults to None.
|
|
39
|
+
model (str): The model to use for the STT plugin. Defaults to "en-general-v2-8khz".
|
|
40
|
+
language (str): The language to use for the STT plugin. Defaults to "en".
|
|
41
|
+
input_sample_rate (int): The input sample rate to use for the STT plugin. Defaults to 48000.
|
|
42
|
+
"""
|
|
33
43
|
super().__init__()
|
|
34
44
|
|
|
35
45
|
if not SCIPY_AVAILABLE:
|
|
36
|
-
raise ImportError(
|
|
46
|
+
raise ImportError(
|
|
47
|
+
"The 'scipy' library is not installed. Please install it with 'pip install scipy' to use the NavanaSTT plugin for audio resampling.")
|
|
37
48
|
|
|
38
49
|
self.customer_id = customer_id or os.getenv("NAVANA_CUSTOMER_ID")
|
|
39
50
|
self.api_key = api_key or os.getenv("NAVANA_API_KEY")
|
|
40
|
-
|
|
51
|
+
|
|
41
52
|
if not self.api_key or not self.customer_id:
|
|
42
53
|
raise ValueError(
|
|
43
54
|
"Navana API key and Customer ID must be provided either through parameters or "
|
|
44
55
|
"NAVANA_API_KEY/NAVANA_CUSTOMER_ID environment variables."
|
|
45
56
|
)
|
|
46
|
-
|
|
57
|
+
|
|
47
58
|
self.model = model
|
|
48
59
|
self.language = language
|
|
49
60
|
self.input_sample_rate = input_sample_rate
|
|
50
61
|
self.target_sample_rate = 8000
|
|
51
|
-
|
|
52
|
-
self.client = BodhiClient(
|
|
62
|
+
|
|
63
|
+
self.client = BodhiClient(
|
|
64
|
+
api_key=self.api_key, customer_id=self.customer_id)
|
|
53
65
|
self._connection_started = False
|
|
54
66
|
self._last_transcript_text = ""
|
|
55
|
-
|
|
67
|
+
|
|
56
68
|
self._register_event_handlers()
|
|
57
69
|
|
|
58
70
|
def _register_event_handlers(self):
|
|
59
71
|
"""Registers handlers for the Bodhi client's transcription events."""
|
|
60
72
|
self.client.on(LiveTranscriptionEvents.Transcript, self._on_transcript)
|
|
61
|
-
self.client.on(LiveTranscriptionEvents.UtteranceEnd,
|
|
62
|
-
|
|
73
|
+
self.client.on(LiveTranscriptionEvents.UtteranceEnd,
|
|
74
|
+
self._on_utterance_end)
|
|
75
|
+
self.client.on(LiveTranscriptionEvents.SpeechStarted,
|
|
76
|
+
self._on_speech_started)
|
|
63
77
|
self.client.on(LiveTranscriptionEvents.Error, self._on_error)
|
|
64
78
|
self.client.on(LiveTranscriptionEvents.Close, self._on_close)
|
|
65
79
|
|
|
@@ -69,7 +83,8 @@ class NavanaSTT(BaseSTT):
|
|
|
69
83
|
self._last_transcript_text = response.text
|
|
70
84
|
event = STTResponse(
|
|
71
85
|
event_type=SpeechEventType.INTERIM,
|
|
72
|
-
data=SpeechData(text=response.text,
|
|
86
|
+
data=SpeechData(text=response.text,
|
|
87
|
+
language=self.language, confidence=1.0)
|
|
73
88
|
)
|
|
74
89
|
await self._transcript_callback(event)
|
|
75
90
|
|
|
@@ -80,7 +95,8 @@ class NavanaSTT(BaseSTT):
|
|
|
80
95
|
self._last_transcript_text = ""
|
|
81
96
|
event = STTResponse(
|
|
82
97
|
event_type=SpeechEventType.FINAL,
|
|
83
|
-
data=SpeechData(text=final_text,
|
|
98
|
+
data=SpeechData(text=final_text,
|
|
99
|
+
language=self.language, confidence=1.0)
|
|
84
100
|
)
|
|
85
101
|
await self._transcript_callback(event)
|
|
86
102
|
|
|
@@ -95,7 +111,7 @@ class NavanaSTT(BaseSTT):
|
|
|
95
111
|
async def _on_close(self):
|
|
96
112
|
print("Navana SDK connection closed.")
|
|
97
113
|
self._connection_started = False
|
|
98
|
-
|
|
114
|
+
|
|
99
115
|
async def process_audio(
|
|
100
116
|
self,
|
|
101
117
|
audio_frames: bytes,
|
|
@@ -113,19 +129,20 @@ class NavanaSTT(BaseSTT):
|
|
|
113
129
|
)
|
|
114
130
|
await self.client.start_connection(config=config)
|
|
115
131
|
self._connection_started = True
|
|
116
|
-
|
|
132
|
+
|
|
117
133
|
raw_audio_data = np.frombuffer(audio_frames, dtype=np.int16)
|
|
118
134
|
stereo_audio = raw_audio_data.reshape(-1, 2)
|
|
119
135
|
mono_audio_float = stereo_audio.astype(np.float32).mean(axis=1)
|
|
120
136
|
resampled_data = signal.resample(
|
|
121
|
-
mono_audio_float,
|
|
122
|
-
int(len(mono_audio_float) *
|
|
137
|
+
mono_audio_float,
|
|
138
|
+
int(len(mono_audio_float) *
|
|
139
|
+
self.target_sample_rate / self.input_sample_rate)
|
|
123
140
|
)
|
|
124
|
-
|
|
141
|
+
|
|
125
142
|
audio_bytes = resampled_data.astype(np.int16).tobytes()
|
|
126
|
-
|
|
143
|
+
|
|
127
144
|
await self.client.send_audio_stream(audio_bytes)
|
|
128
|
-
|
|
145
|
+
|
|
129
146
|
except Exception as e:
|
|
130
147
|
error_message = f"Audio processing error: {str(e)}"
|
|
131
148
|
print(error_message)
|
|
@@ -134,8 +151,7 @@ class NavanaSTT(BaseSTT):
|
|
|
134
151
|
if self.client._live_client and not self.client._live_client.is_closed:
|
|
135
152
|
await self.client.close_connection()
|
|
136
153
|
|
|
137
|
-
|
|
138
154
|
async def aclose(self) -> None:
|
|
139
155
|
"""Cleans up resources by closing the SDK connection."""
|
|
140
156
|
if self._connection_started:
|
|
141
|
-
await self.client.close_connection()
|
|
157
|
+
await self.client.close_connection()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.32"
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.0.30"
|
|
File without changes
|
|
File without changes
|