videosdk-plugins-lmnt 0.0.47__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ from .tts import LMNTTTS
2
+
3
+ __all__ = [
4
+ 'LMNTTTS',
5
+ ]
@@ -0,0 +1,236 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, AsyncIterator, Literal, Optional, Union
4
+ import httpx
5
+ import os
6
+ import asyncio
7
+
8
+ from videosdk.agents import TTS, segment_text
9
+
10
+ LMNT_API_BASE_URL = "https://api.lmnt.com"
11
+ LMNT_SAMPLE_RATE = 24000
12
+ LMNT_CHANNELS = 1
13
+
14
+ DEFAULT_MODEL = "blizzard"
15
+ DEFAULT_VOICE = "ava"
16
+ DEFAULT_LANGUAGE = "auto"
17
+ DEFAULT_FORMAT = "wav"
18
+
19
+ _LanguageCode = Union[
20
+ Literal["auto", "de", "en", "es", "fr", "hi", "id", "it", "ja",
21
+ "ko", "nl", "pl", "pt", "ru", "sv", "th", "tr", "uk", "vi", "zh"],
22
+ str
23
+ ]
24
+ _FormatType = Union[Literal["aac", "mp3", "mulaw", "raw", "wav"], str]
25
+ _SampleRate = Union[Literal[8000, 16000, 24000], int]
26
+
27
+
28
+ class LMNTTTS(TTS):
29
+ def __init__(
30
+ self,
31
+ *,
32
+ api_key: Optional[str] = None,
33
+ voice: str = DEFAULT_VOICE,
34
+ model: str = DEFAULT_MODEL,
35
+ language: _LanguageCode = DEFAULT_LANGUAGE,
36
+ format: _FormatType = DEFAULT_FORMAT,
37
+ sample_rate: _SampleRate = LMNT_SAMPLE_RATE,
38
+ seed: Optional[int] = None,
39
+ temperature: float = 1.0,
40
+ top_p: float = 0.8,
41
+ base_url: str = LMNT_API_BASE_URL,
42
+ ) -> None:
43
+ """Initialize the LMNT TTS plugin.
44
+
45
+ Args:
46
+ api_key (Optional[str], optional): LMNT API key. Defaults to None.
47
+ voice (str): The voice to use for the TTS plugin. Defaults to "ava".
48
+ model (str): The model to use for the TTS plugin. Defaults to "blizzard".
49
+ language (_LanguageCode): The language to use for the TTS plugin. Defaults to "auto".
50
+ format (_FormatType): The format to use for the TTS plugin. Defaults to "wav".
51
+ sample_rate (_SampleRate): The sample rate to use for the TTS plugin. Must be one of: 8000, 16000, 24000. Defaults to 24000.
52
+ seed (Optional[int], optional): The seed to use for the TTS plugin. Defaults to None.
53
+ temperature (float): The temperature to use for the TTS plugin. Defaults to 1.0.
54
+ top_p (float): The top_p to use for the TTS plugin. Defaults to 0.8.
55
+ base_url (str): The base URL to use for the TTS plugin. Defaults to "https://api.lmnt.com".
56
+ """
57
+ super().__init__(sample_rate=sample_rate, num_channels=LMNT_CHANNELS)
58
+
59
+ self.voice = voice
60
+ self.model = model
61
+ self.language = language
62
+ self.format = format
63
+ self.output_sample_rate = sample_rate
64
+ self.seed = seed
65
+ self.temperature = temperature
66
+ self.top_p = top_p
67
+ self.base_url = base_url
68
+ self.audio_track = None
69
+ self.loop = None
70
+ self._first_chunk_sent = False
71
+ self._interrupted = False
72
+
73
+ self.api_key = api_key or os.getenv("LMNT_API_KEY")
74
+ if not self.api_key:
75
+ raise ValueError(
76
+ "LMNT API key must be provided either through api_key parameter "
77
+ "or LMNT_API_KEY environment variable"
78
+ )
79
+
80
+ self._client = httpx.AsyncClient(
81
+ timeout=httpx.Timeout(connect=15.0, read=30.0,
82
+ write=5.0, pool=5.0),
83
+ follow_redirects=True,
84
+ limits=httpx.Limits(
85
+ max_connections=50,
86
+ max_keepalive_connections=50,
87
+ keepalive_expiry=120,
88
+ ),
89
+ )
90
+
91
+ def reset_first_audio_tracking(self) -> None:
92
+ """Reset the first audio tracking state for next TTS task"""
93
+ self._first_chunk_sent = False
94
+
95
+ async def synthesize(
96
+ self,
97
+ text: AsyncIterator[str] | str,
98
+ voice_id: Optional[str] = None,
99
+ **kwargs: Any,
100
+ ) -> None:
101
+ """
102
+ Convert text to speech using LMNT's TTS API and stream to audio track
103
+
104
+ Args:
105
+ text: Text to convert to speech
106
+ voice_id: Optional voice override (uses voice from __init__ if not provided)
107
+ **kwargs: Additional provider-specific arguments
108
+ """
109
+ try:
110
+ if not self.audio_track or not self.loop:
111
+ self.emit("error", "Audio track or event loop not set")
112
+ return
113
+
114
+ self._interrupted = False
115
+
116
+ if isinstance(text, AsyncIterator):
117
+ async for segment in segment_text(text):
118
+ if self._interrupted:
119
+ break
120
+ await self._synthesize_segment(segment, voice_id, **kwargs)
121
+ else:
122
+ if not self._interrupted:
123
+ await self._synthesize_segment(text, voice_id, **kwargs)
124
+
125
+ except Exception as e:
126
+ self.emit("error", f"TTS synthesis failed: {str(e)}")
127
+
128
+ async def _synthesize_segment(self, text: str, voice_id: Optional[str] = None, **kwargs: Any) -> None:
129
+ """Synthesize a single text segment"""
130
+ if not text.strip() or self._interrupted:
131
+ return
132
+
133
+ target_voice = voice_id or self.voice
134
+
135
+ payload = {
136
+ "voice": target_voice,
137
+ "text": text,
138
+ "model": kwargs.get("model", self.model),
139
+ "language": kwargs.get("language", self.language),
140
+ "format": kwargs.get("format", self.format),
141
+ "sample_rate": kwargs.get("sample_rate", self.output_sample_rate),
142
+ "temperature": kwargs.get("temperature", self.temperature),
143
+ "top_p": kwargs.get("top_p", self.top_p),
144
+ }
145
+
146
+ seed = kwargs.get("seed", self.seed)
147
+ if seed is not None:
148
+ payload["seed"] = seed
149
+
150
+ headers = {
151
+ "X-API-Key": self.api_key,
152
+ "Content-Type": "application/json",
153
+ }
154
+
155
+ url = f"{self.base_url}/v1/ai/speech/bytes"
156
+
157
+ async with self._client.stream(
158
+ "POST",
159
+ url,
160
+ headers=headers,
161
+ json=payload
162
+ ) as response:
163
+ if response.status_code == 400:
164
+ error_data = await response.aread()
165
+ try:
166
+ import json
167
+ error_json = json.loads(error_data.decode())
168
+ error_msg = error_json.get("error", "Bad request")
169
+ except:
170
+ error_msg = "Bad request"
171
+ self.emit("error", f"LMNT API error: {error_msg}")
172
+ return
173
+ elif response.status_code == 401:
174
+ self.emit(
175
+ "error", "LMNT API authentication failed. Please check your API key.")
176
+ return
177
+ elif response.status_code != 200:
178
+ self.emit(
179
+ "error", f"LMNT API error: HTTP {response.status_code}")
180
+ return
181
+
182
+ header_processed = False
183
+ accumulated_data = b""
184
+
185
+ async for chunk in response.aiter_bytes():
186
+ if self._interrupted:
187
+ break
188
+ if chunk:
189
+ accumulated_data += chunk
190
+
191
+ if not header_processed and len(accumulated_data) >= 44:
192
+ if accumulated_data.startswith(b'RIFF'):
193
+ data_pos = accumulated_data.find(b'data')
194
+ if data_pos != -1:
195
+ accumulated_data = accumulated_data[data_pos + 8:]
196
+ header_processed = True
197
+
198
+ if header_processed:
199
+ chunk_size = int(
200
+ self.output_sample_rate * LMNT_CHANNELS * 2 * 20 / 1000) # 20ms chunks
201
+ while len(accumulated_data) >= chunk_size:
202
+ audio_chunk = accumulated_data[:chunk_size]
203
+ accumulated_data = accumulated_data[chunk_size:]
204
+
205
+ if not self._first_chunk_sent and self._first_audio_callback:
206
+ self._first_chunk_sent = True
207
+ await self._first_audio_callback()
208
+
209
+ self.loop.create_task(
210
+ self.audio_track.add_new_bytes(audio_chunk))
211
+ await asyncio.sleep(0.01)
212
+
213
+ if accumulated_data and header_processed:
214
+ chunk_size = int(self.output_sample_rate *
215
+ LMNT_CHANNELS * 2 * 20 / 1000)
216
+ if len(accumulated_data) < chunk_size:
217
+ accumulated_data += b'\x00' * \
218
+ (chunk_size - len(accumulated_data))
219
+
220
+ if not self._first_chunk_sent and self._first_audio_callback:
221
+ self._first_chunk_sent = True
222
+ await self._first_audio_callback()
223
+
224
+ self.loop.create_task(
225
+ self.audio_track.add_new_bytes(accumulated_data))
226
+
227
+ async def aclose(self) -> None:
228
+ """Cleanup resources"""
229
+ await self._client.aclose()
230
+ await super().aclose()
231
+
232
+ async def interrupt(self) -> None:
233
+ """Interrupt the TTS process"""
234
+ self._interrupted = True
235
+ if self.audio_track:
236
+ self.audio_track.interrupt()
@@ -0,0 +1 @@
1
+ __version__ = "0.0.47"
@@ -0,0 +1,27 @@
1
+ Metadata-Version: 2.4
2
+ Name: videosdk-plugins-lmnt
3
+ Version: 0.0.47
4
+ Summary: VideoSDK Agent Framework plugin for LMNT AI Text-to-Speech services
5
+ Author: videosdk
6
+ License-Expression: Apache-2.0
7
+ Keywords: ai,audio,lmnt,tts,video,videosdk
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Topic :: Communications :: Conferencing
11
+ Classifier: Topic :: Multimedia :: Sound/Audio
12
+ Classifier: Topic :: Multimedia :: Video
13
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
+ Requires-Python: >=3.11
15
+ Requires-Dist: httpx>=0.24.0
16
+ Requires-Dist: videosdk-agents>=0.0.47
17
+ Description-Content-Type: text/markdown
18
+
19
+ # VideoSDK LMNT AI Plugin
20
+
21
+ Agent Framework plugin for TTS services from LMNT.
22
+
23
+ ## Installation
24
+
25
+ ```bash
26
+ pip install videosdk-plugins-lmnt
27
+ ```
@@ -0,0 +1,6 @@
1
+ videosdk/plugins/lmnt/__init__.py,sha256=JI72LJNs0cCg3S1lweyO-yiBLnViM6Dfp4zPvYJkDJQ,56
2
+ videosdk/plugins/lmnt/tts.py,sha256=Gs_QkKNQqLp1GpiOJSPz-gfpCYA1KLD4EBs97SJ5aIk,9039
3
+ videosdk/plugins/lmnt/version.py,sha256=OkbXUm6WCcFd54358Y0HZk3Aq5hLc0sK6xgxJ6RmT5M,23
4
+ videosdk_plugins_lmnt-0.0.47.dist-info/METADATA,sha256=A38lqYcR_ZOLndDAY1jW7iy8cikWRKyoo3syqbOvUxU,806
5
+ videosdk_plugins_lmnt-0.0.47.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
6
+ videosdk_plugins_lmnt-0.0.47.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any