videosdk-plugins-lmnt 0.0.26__tar.gz → 0.0.28__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of videosdk-plugins-lmnt might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: videosdk-plugins-lmnt
3
- Version: 0.0.26
3
+ Version: 0.0.28
4
4
  Summary: VideoSDK Agent Framework plugin for LMNT AI Text-to-Speech services
5
5
  Author: videosdk
6
6
  License-Expression: Apache-2.0
@@ -13,7 +13,7 @@ Classifier: Topic :: Multimedia :: Video
13
13
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
14
  Requires-Python: >=3.11
15
15
  Requires-Dist: httpx>=0.24.0
16
- Requires-Dist: videosdk-agents>=0.0.26
16
+ Requires-Dist: videosdk-agents>=0.0.28
17
17
  Description-Content-Type: text/markdown
18
18
 
19
19
  # VideoSDK LMNT AI Plugin
@@ -21,7 +21,7 @@ classifiers = [
21
21
  "Topic :: Scientific/Engineering :: Artificial Intelligence",
22
22
  ]
23
23
  dependencies = [
24
- "videosdk-agents>=0.0.26",
24
+ "videosdk-agents>=0.0.28",
25
25
  "httpx>=0.24.0",
26
26
  ]
27
27
 
@@ -0,0 +1,222 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, AsyncIterator, Literal, Optional, Union
4
+ import httpx
5
+ import os
6
+ import asyncio
7
+
8
+ from videosdk.agents import TTS, segment_text
9
+
10
+ LMNT_API_BASE_URL = "https://api.lmnt.com"
11
+ LMNT_SAMPLE_RATE = 24000
12
+ LMNT_CHANNELS = 1
13
+
14
+ DEFAULT_MODEL = "blizzard"
15
+ DEFAULT_VOICE = "ava"
16
+ DEFAULT_LANGUAGE = "auto"
17
+ DEFAULT_FORMAT = "wav"
18
+
19
+ _LanguageCode = Union[
20
+ Literal["auto", "de", "en", "es", "fr", "hi", "id", "it", "ja",
21
+ "ko", "nl", "pl", "pt", "ru", "sv", "th", "tr", "uk", "vi", "zh"],
22
+ str
23
+ ]
24
+ _FormatType = Union[Literal["aac", "mp3", "mulaw", "raw", "wav"], str]
25
+ _SampleRate = Union[Literal[8000, 16000, 24000], int]
26
+
27
+
28
+ class LMNTTTS(TTS):
29
+ def __init__(
30
+ self,
31
+ *,
32
+ voice: str = DEFAULT_VOICE,
33
+ model: str = DEFAULT_MODEL,
34
+ language: _LanguageCode = DEFAULT_LANGUAGE,
35
+ format: _FormatType = DEFAULT_FORMAT,
36
+ sample_rate: _SampleRate = LMNT_SAMPLE_RATE,
37
+ seed: Optional[int] = None,
38
+ temperature: float = 1.0,
39
+ top_p: float = 0.8,
40
+ api_key: Optional[str] = None,
41
+ base_url: str = LMNT_API_BASE_URL,
42
+ ) -> None:
43
+ super().__init__(sample_rate=sample_rate, num_channels=LMNT_CHANNELS)
44
+
45
+ self.voice = voice
46
+ self.model = model
47
+ self.language = language
48
+ self.format = format
49
+ self.output_sample_rate = sample_rate
50
+ self.seed = seed
51
+ self.temperature = temperature
52
+ self.top_p = top_p
53
+ self.base_url = base_url
54
+ self.audio_track = None
55
+ self.loop = None
56
+ self._first_chunk_sent = False
57
+ self._interrupted = False
58
+
59
+ self.api_key = api_key or os.getenv("LMNT_API_KEY")
60
+ if not self.api_key:
61
+ raise ValueError(
62
+ "LMNT API key must be provided either through api_key parameter "
63
+ "or LMNT_API_KEY environment variable"
64
+ )
65
+
66
+ self._client = httpx.AsyncClient(
67
+ timeout=httpx.Timeout(connect=15.0, read=30.0,
68
+ write=5.0, pool=5.0),
69
+ follow_redirects=True,
70
+ limits=httpx.Limits(
71
+ max_connections=50,
72
+ max_keepalive_connections=50,
73
+ keepalive_expiry=120,
74
+ ),
75
+ )
76
+
77
+ def reset_first_audio_tracking(self) -> None:
78
+ """Reset the first audio tracking state for next TTS task"""
79
+ self._first_chunk_sent = False
80
+
81
+ async def synthesize(
82
+ self,
83
+ text: AsyncIterator[str] | str,
84
+ voice_id: Optional[str] = None,
85
+ **kwargs: Any,
86
+ ) -> None:
87
+ """
88
+ Convert text to speech using LMNT's TTS API and stream to audio track
89
+
90
+ Args:
91
+ text: Text to convert to speech
92
+ voice_id: Optional voice override (uses voice from __init__ if not provided)
93
+ **kwargs: Additional provider-specific arguments
94
+ """
95
+ try:
96
+ if not self.audio_track or not self.loop:
97
+ self.emit("error", "Audio track or event loop not set")
98
+ return
99
+
100
+ self._interrupted = False
101
+
102
+ if isinstance(text, AsyncIterator):
103
+ async for segment in segment_text(text):
104
+ if self._interrupted:
105
+ break
106
+ await self._synthesize_segment(segment, voice_id, **kwargs)
107
+ else:
108
+ if not self._interrupted:
109
+ await self._synthesize_segment(text, voice_id, **kwargs)
110
+
111
+ except Exception as e:
112
+ self.emit("error", f"TTS synthesis failed: {str(e)}")
113
+
114
+ async def _synthesize_segment(self, text: str, voice_id: Optional[str] = None, **kwargs: Any) -> None:
115
+ """Synthesize a single text segment"""
116
+ if not text.strip() or self._interrupted:
117
+ return
118
+
119
+ target_voice = voice_id or self.voice
120
+
121
+ payload = {
122
+ "voice": target_voice,
123
+ "text": text,
124
+ "model": kwargs.get("model", self.model),
125
+ "language": kwargs.get("language", self.language),
126
+ "format": kwargs.get("format", self.format),
127
+ "sample_rate": kwargs.get("sample_rate", self.output_sample_rate),
128
+ "temperature": kwargs.get("temperature", self.temperature),
129
+ "top_p": kwargs.get("top_p", self.top_p),
130
+ }
131
+
132
+ seed = kwargs.get("seed", self.seed)
133
+ if seed is not None:
134
+ payload["seed"] = seed
135
+
136
+ headers = {
137
+ "X-API-Key": self.api_key,
138
+ "Content-Type": "application/json",
139
+ }
140
+
141
+ url = f"{self.base_url}/v1/ai/speech/bytes"
142
+
143
+ async with self._client.stream(
144
+ "POST",
145
+ url,
146
+ headers=headers,
147
+ json=payload
148
+ ) as response:
149
+ if response.status_code == 400:
150
+ error_data = await response.aread()
151
+ try:
152
+ import json
153
+ error_json = json.loads(error_data.decode())
154
+ error_msg = error_json.get("error", "Bad request")
155
+ except:
156
+ error_msg = "Bad request"
157
+ self.emit("error", f"LMNT API error: {error_msg}")
158
+ return
159
+ elif response.status_code == 401:
160
+ self.emit(
161
+ "error", "LMNT API authentication failed. Please check your API key.")
162
+ return
163
+ elif response.status_code != 200:
164
+ self.emit(
165
+ "error", f"LMNT API error: HTTP {response.status_code}")
166
+ return
167
+
168
+ header_processed = False
169
+ accumulated_data = b""
170
+
171
+ async for chunk in response.aiter_bytes():
172
+ if self._interrupted:
173
+ break
174
+ if chunk:
175
+ accumulated_data += chunk
176
+
177
+ if not header_processed and len(accumulated_data) >= 44:
178
+ if accumulated_data.startswith(b'RIFF'):
179
+ data_pos = accumulated_data.find(b'data')
180
+ if data_pos != -1:
181
+ accumulated_data = accumulated_data[data_pos + 8:]
182
+ header_processed = True
183
+
184
+ if header_processed:
185
+ chunk_size = int(
186
+ self.output_sample_rate * LMNT_CHANNELS * 2 * 20 / 1000) # 20ms chunks
187
+ while len(accumulated_data) >= chunk_size:
188
+ audio_chunk = accumulated_data[:chunk_size]
189
+ accumulated_data = accumulated_data[chunk_size:]
190
+
191
+ if not self._first_chunk_sent and self._first_audio_callback:
192
+ self._first_chunk_sent = True
193
+ await self._first_audio_callback()
194
+
195
+ self.loop.create_task(
196
+ self.audio_track.add_new_bytes(audio_chunk))
197
+ await asyncio.sleep(0.01)
198
+
199
+ if accumulated_data and header_processed:
200
+ chunk_size = int(self.output_sample_rate *
201
+ LMNT_CHANNELS * 2 * 20 / 1000)
202
+ if len(accumulated_data) < chunk_size:
203
+ accumulated_data += b'\x00' * \
204
+ (chunk_size - len(accumulated_data))
205
+
206
+ if not self._first_chunk_sent and self._first_audio_callback:
207
+ self._first_chunk_sent = True
208
+ await self._first_audio_callback()
209
+
210
+ self.loop.create_task(
211
+ self.audio_track.add_new_bytes(accumulated_data))
212
+
213
+ async def aclose(self) -> None:
214
+ """Cleanup resources"""
215
+ await self._client.aclose()
216
+ await super().aclose()
217
+
218
+ async def interrupt(self) -> None:
219
+ """Interrupt the TTS process"""
220
+ self._interrupted = True
221
+ if self.audio_track:
222
+ self.audio_track.interrupt()
@@ -0,0 +1 @@
1
+ __version__ = "0.0.28"
@@ -1,205 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import Any, AsyncIterator, Literal, Optional, Union
4
- import httpx
5
- import os
6
- import asyncio
7
-
8
- from videosdk.agents import TTS
9
-
10
- LMNT_API_BASE_URL = "https://api.lmnt.com"
11
- LMNT_SAMPLE_RATE = 24000
12
- LMNT_CHANNELS = 1
13
-
14
- DEFAULT_MODEL = "blizzard"
15
- DEFAULT_VOICE = "ava"
16
- DEFAULT_LANGUAGE = "auto"
17
- DEFAULT_FORMAT = "wav"
18
-
19
- _LanguageCode = Union[
20
- Literal["auto", "de", "en", "es", "fr", "hi", "id", "it", "ja",
21
- "ko", "nl", "pl", "pt", "ru", "sv", "th", "tr", "uk", "vi", "zh"],
22
- str
23
- ]
24
- _FormatType = Union[Literal["aac", "mp3", "mulaw", "raw", "wav"], str]
25
- _SampleRate = Union[Literal[8000, 16000, 24000], int]
26
-
27
-
28
- class LMNTTTS(TTS):
29
- def __init__(
30
- self,
31
- *,
32
- voice: str = DEFAULT_VOICE,
33
- model: str = DEFAULT_MODEL,
34
- language: _LanguageCode = DEFAULT_LANGUAGE,
35
- format: _FormatType = DEFAULT_FORMAT,
36
- sample_rate: _SampleRate = LMNT_SAMPLE_RATE,
37
- seed: Optional[int] = None,
38
- temperature: float = 1.0,
39
- top_p: float = 0.8,
40
- api_key: Optional[str] = None,
41
- base_url: str = LMNT_API_BASE_URL,
42
- ) -> None:
43
- super().__init__(sample_rate=sample_rate, num_channels=LMNT_CHANNELS)
44
-
45
- self.voice = voice
46
- self.model = model
47
- self.language = language
48
- self.format = format
49
- self.output_sample_rate = sample_rate
50
- self.seed = seed
51
- self.temperature = temperature
52
- self.top_p = top_p
53
- self.base_url = base_url
54
- self.audio_track = None
55
- self.loop = None
56
- self._first_chunk_sent = False
57
-
58
- self.api_key = api_key or os.getenv("LMNT_API_KEY")
59
- if not self.api_key:
60
- raise ValueError(
61
- "LMNT API key must be provided either through api_key parameter "
62
- "or LMNT_API_KEY environment variable"
63
- )
64
-
65
- self._client = httpx.AsyncClient(
66
- timeout=httpx.Timeout(connect=15.0, read=30.0, write=5.0, pool=5.0),
67
- follow_redirects=True,
68
- limits=httpx.Limits(
69
- max_connections=50,
70
- max_keepalive_connections=50,
71
- keepalive_expiry=120,
72
- ),
73
- )
74
-
75
- def reset_first_audio_tracking(self) -> None:
76
- """Reset the first audio tracking state for next TTS task"""
77
- self._first_chunk_sent = False
78
-
79
- async def synthesize(
80
- self,
81
- text: AsyncIterator[str] | str,
82
- voice_id: Optional[str] = None,
83
- **kwargs: Any
84
- ) -> None:
85
- """
86
- Convert text to speech using LMNT's TTS API and stream to audio track
87
-
88
- Args:
89
- text: Text to convert to speech
90
- voice_id: Optional voice override (uses voice from __init__ if not provided)
91
- **kwargs: Additional provider-specific arguments
92
- """
93
- try:
94
- if isinstance(text, AsyncIterator):
95
- full_text = ""
96
- async for chunk in text:
97
- full_text += chunk
98
- else:
99
- full_text = text
100
-
101
- if not self.audio_track or not self.loop:
102
- self.emit("error", "Audio track or event loop not set")
103
- return
104
-
105
- target_voice = voice_id or self.voice
106
-
107
- payload = {
108
- "voice": target_voice,
109
- "text": full_text,
110
- "model": kwargs.get("model", self.model),
111
- "language": kwargs.get("language", self.language),
112
- "format": kwargs.get("format", self.format),
113
- "sample_rate": kwargs.get("sample_rate", self.output_sample_rate),
114
- "temperature": kwargs.get("temperature", self.temperature),
115
- "top_p": kwargs.get("top_p", self.top_p),
116
- }
117
-
118
- seed = kwargs.get("seed", self.seed)
119
- if seed is not None:
120
- payload["seed"] = seed
121
-
122
- headers = {
123
- "X-API-Key": self.api_key,
124
- "Content-Type": "application/json",
125
- }
126
-
127
- url = f"{self.base_url}/v1/ai/speech/bytes"
128
-
129
- async with self._client.stream(
130
- "POST",
131
- url,
132
- headers=headers,
133
- json=payload
134
- ) as response:
135
- if response.status_code == 400:
136
- error_data = await response.aread()
137
- try:
138
- import json
139
- error_json = json.loads(error_data.decode())
140
- error_msg = error_json.get("error", "Bad request")
141
- except:
142
- error_msg = "Bad request"
143
- self.emit("error", f"LMNT API error: {error_msg}")
144
- return
145
- elif response.status_code == 401:
146
- self.emit("error", "LMNT API authentication failed. Please check your API key.")
147
- return
148
- elif response.status_code != 200:
149
- self.emit("error", f"LMNT API error: HTTP {response.status_code}")
150
- return
151
-
152
- header_processed = False
153
- accumulated_data = b""
154
-
155
- async for chunk in response.aiter_bytes():
156
- if chunk:
157
- accumulated_data += chunk
158
-
159
- if not header_processed and len(accumulated_data) >= 44:
160
- if accumulated_data.startswith(b'RIFF'):
161
- data_pos = accumulated_data.find(b'data')
162
- if data_pos != -1:
163
- accumulated_data = accumulated_data[data_pos + 8:]
164
- header_processed = True
165
-
166
- if header_processed:
167
- chunk_size = int(self.output_sample_rate * LMNT_CHANNELS * 2 * 20 / 1000) # 20ms chunks
168
- while len(accumulated_data) >= chunk_size:
169
- audio_chunk = accumulated_data[:chunk_size]
170
- accumulated_data = accumulated_data[chunk_size:]
171
-
172
- if not self._first_chunk_sent and self._first_audio_callback:
173
- self._first_chunk_sent = True
174
- await self._first_audio_callback()
175
-
176
- self.loop.create_task(self.audio_track.add_new_bytes(audio_chunk))
177
- await asyncio.sleep(0.01)
178
-
179
- if accumulated_data and header_processed:
180
- chunk_size = int(self.output_sample_rate * LMNT_CHANNELS * 2 * 20 / 1000)
181
- if len(accumulated_data) < chunk_size:
182
- accumulated_data += b'\x00' * (chunk_size - len(accumulated_data))
183
-
184
- if not self._first_chunk_sent and self._first_audio_callback:
185
- self._first_chunk_sent = True
186
- await self._first_audio_callback()
187
-
188
- self.loop.create_task(self.audio_track.add_new_bytes(accumulated_data))
189
-
190
- except httpx.HTTPError as e:
191
- self.emit("error", f"HTTP error occurred: {str(e)}")
192
- except Exception as e:
193
- self.emit("error", f"TTS synthesis failed: {str(e)}")
194
-
195
-
196
-
197
- async def aclose(self) -> None:
198
- """Cleanup resources"""
199
- await self._client.aclose()
200
- await super().aclose()
201
-
202
- async def interrupt(self) -> None:
203
- """Interrupt the TTS process"""
204
- if self.audio_track:
205
- self.audio_track.interrupt()
@@ -1 +0,0 @@
1
- __version__ = "0.0.26"