videosdk-plugins-lmnt 0.0.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of videosdk-plugins-lmnt might be problematic. Click here for more details.
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, AsyncIterator, Literal, Optional, Union
|
|
4
|
+
import httpx
|
|
5
|
+
import os
|
|
6
|
+
import asyncio
|
|
7
|
+
|
|
8
|
+
from videosdk.agents import TTS
|
|
9
|
+
|
|
10
|
+
LMNT_API_BASE_URL = "https://api.lmnt.com"
|
|
11
|
+
LMNT_SAMPLE_RATE = 24000
|
|
12
|
+
LMNT_CHANNELS = 1
|
|
13
|
+
|
|
14
|
+
DEFAULT_MODEL = "blizzard"
|
|
15
|
+
DEFAULT_VOICE = "ava"
|
|
16
|
+
DEFAULT_LANGUAGE = "auto"
|
|
17
|
+
DEFAULT_FORMAT = "wav"
|
|
18
|
+
|
|
19
|
+
_LanguageCode = Union[
|
|
20
|
+
Literal["auto", "de", "en", "es", "fr", "hi", "id", "it", "ja",
|
|
21
|
+
"ko", "nl", "pl", "pt", "ru", "sv", "th", "tr", "uk", "vi", "zh"],
|
|
22
|
+
str
|
|
23
|
+
]
|
|
24
|
+
_FormatType = Union[Literal["aac", "mp3", "mulaw", "raw", "wav"], str]
|
|
25
|
+
_SampleRate = Union[Literal[8000, 16000, 24000], int]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class LMNTTTS(TTS):
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
*,
|
|
32
|
+
voice: str = DEFAULT_VOICE,
|
|
33
|
+
model: str = DEFAULT_MODEL,
|
|
34
|
+
language: _LanguageCode = DEFAULT_LANGUAGE,
|
|
35
|
+
format: _FormatType = DEFAULT_FORMAT,
|
|
36
|
+
sample_rate: _SampleRate = LMNT_SAMPLE_RATE,
|
|
37
|
+
seed: Optional[int] = None,
|
|
38
|
+
temperature: float = 1.0,
|
|
39
|
+
top_p: float = 0.8,
|
|
40
|
+
api_key: Optional[str] = None,
|
|
41
|
+
base_url: str = LMNT_API_BASE_URL,
|
|
42
|
+
) -> None:
|
|
43
|
+
super().__init__(sample_rate=sample_rate, num_channels=LMNT_CHANNELS)
|
|
44
|
+
|
|
45
|
+
self.voice = voice
|
|
46
|
+
self.model = model
|
|
47
|
+
self.language = language
|
|
48
|
+
self.format = format
|
|
49
|
+
self.output_sample_rate = sample_rate
|
|
50
|
+
self.seed = seed
|
|
51
|
+
self.temperature = temperature
|
|
52
|
+
self.top_p = top_p
|
|
53
|
+
self.base_url = base_url
|
|
54
|
+
self.audio_track = None
|
|
55
|
+
self.loop = None
|
|
56
|
+
|
|
57
|
+
self.api_key = api_key or os.getenv("LMNT_API_KEY")
|
|
58
|
+
if not self.api_key:
|
|
59
|
+
raise ValueError(
|
|
60
|
+
"LMNT API key must be provided either through api_key parameter "
|
|
61
|
+
"or LMNT_API_KEY environment variable"
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
self._client = httpx.AsyncClient(
|
|
65
|
+
timeout=httpx.Timeout(connect=15.0, read=30.0, write=5.0, pool=5.0),
|
|
66
|
+
follow_redirects=True,
|
|
67
|
+
limits=httpx.Limits(
|
|
68
|
+
max_connections=50,
|
|
69
|
+
max_keepalive_connections=50,
|
|
70
|
+
keepalive_expiry=120,
|
|
71
|
+
),
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
async def synthesize(
|
|
75
|
+
self,
|
|
76
|
+
text: AsyncIterator[str] | str,
|
|
77
|
+
voice_id: Optional[str] = None,
|
|
78
|
+
**kwargs: Any
|
|
79
|
+
) -> None:
|
|
80
|
+
"""
|
|
81
|
+
Convert text to speech using LMNT's TTS API and stream to audio track
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
text: Text to convert to speech
|
|
85
|
+
voice_id: Optional voice override (uses voice from __init__ if not provided)
|
|
86
|
+
**kwargs: Additional provider-specific arguments
|
|
87
|
+
"""
|
|
88
|
+
try:
|
|
89
|
+
if isinstance(text, AsyncIterator):
|
|
90
|
+
full_text = ""
|
|
91
|
+
async for chunk in text:
|
|
92
|
+
full_text += chunk
|
|
93
|
+
else:
|
|
94
|
+
full_text = text
|
|
95
|
+
|
|
96
|
+
if not self.audio_track or not self.loop:
|
|
97
|
+
self.emit("error", "Audio track or event loop not set")
|
|
98
|
+
return
|
|
99
|
+
|
|
100
|
+
target_voice = voice_id or self.voice
|
|
101
|
+
|
|
102
|
+
payload = {
|
|
103
|
+
"voice": target_voice,
|
|
104
|
+
"text": full_text,
|
|
105
|
+
"model": kwargs.get("model", self.model),
|
|
106
|
+
"language": kwargs.get("language", self.language),
|
|
107
|
+
"format": kwargs.get("format", self.format),
|
|
108
|
+
"sample_rate": kwargs.get("sample_rate", self.output_sample_rate),
|
|
109
|
+
"temperature": kwargs.get("temperature", self.temperature),
|
|
110
|
+
"top_p": kwargs.get("top_p", self.top_p),
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
seed = kwargs.get("seed", self.seed)
|
|
114
|
+
if seed is not None:
|
|
115
|
+
payload["seed"] = seed
|
|
116
|
+
|
|
117
|
+
headers = {
|
|
118
|
+
"X-API-Key": self.api_key,
|
|
119
|
+
"Content-Type": "application/json",
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
url = f"{self.base_url}/v1/ai/speech/bytes"
|
|
123
|
+
|
|
124
|
+
async with self._client.stream(
|
|
125
|
+
"POST",
|
|
126
|
+
url,
|
|
127
|
+
headers=headers,
|
|
128
|
+
json=payload
|
|
129
|
+
) as response:
|
|
130
|
+
if response.status_code == 400:
|
|
131
|
+
error_data = await response.aread()
|
|
132
|
+
try:
|
|
133
|
+
import json
|
|
134
|
+
error_json = json.loads(error_data.decode())
|
|
135
|
+
error_msg = error_json.get("error", "Bad request")
|
|
136
|
+
except:
|
|
137
|
+
error_msg = "Bad request"
|
|
138
|
+
self.emit("error", f"LMNT API error: {error_msg}")
|
|
139
|
+
return
|
|
140
|
+
elif response.status_code == 401:
|
|
141
|
+
self.emit("error", "LMNT API authentication failed. Please check your API key.")
|
|
142
|
+
return
|
|
143
|
+
elif response.status_code != 200:
|
|
144
|
+
self.emit("error", f"LMNT API error: HTTP {response.status_code}")
|
|
145
|
+
return
|
|
146
|
+
|
|
147
|
+
header_processed = False
|
|
148
|
+
accumulated_data = b""
|
|
149
|
+
|
|
150
|
+
async for chunk in response.aiter_bytes():
|
|
151
|
+
if chunk:
|
|
152
|
+
accumulated_data += chunk
|
|
153
|
+
|
|
154
|
+
if not header_processed and len(accumulated_data) >= 44:
|
|
155
|
+
if accumulated_data.startswith(b'RIFF'):
|
|
156
|
+
data_pos = accumulated_data.find(b'data')
|
|
157
|
+
if data_pos != -1:
|
|
158
|
+
accumulated_data = accumulated_data[data_pos + 8:]
|
|
159
|
+
header_processed = True
|
|
160
|
+
|
|
161
|
+
if header_processed:
|
|
162
|
+
chunk_size = int(self.output_sample_rate * LMNT_CHANNELS * 2 * 20 / 1000) # 20ms chunks
|
|
163
|
+
while len(accumulated_data) >= chunk_size:
|
|
164
|
+
audio_chunk = accumulated_data[:chunk_size]
|
|
165
|
+
accumulated_data = accumulated_data[chunk_size:]
|
|
166
|
+
|
|
167
|
+
self.loop.create_task(self.audio_track.add_new_bytes(audio_chunk))
|
|
168
|
+
await asyncio.sleep(0.01)
|
|
169
|
+
|
|
170
|
+
if accumulated_data and header_processed:
|
|
171
|
+
chunk_size = int(self.output_sample_rate * LMNT_CHANNELS * 2 * 20 / 1000)
|
|
172
|
+
if len(accumulated_data) < chunk_size:
|
|
173
|
+
accumulated_data += b'\x00' * (chunk_size - len(accumulated_data))
|
|
174
|
+
self.loop.create_task(self.audio_track.add_new_bytes(accumulated_data))
|
|
175
|
+
|
|
176
|
+
except httpx.HTTPError as e:
|
|
177
|
+
self.emit("error", f"HTTP error occurred: {str(e)}")
|
|
178
|
+
except Exception as e:
|
|
179
|
+
self.emit("error", f"TTS synthesis failed: {str(e)}")
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
async def aclose(self) -> None:
|
|
184
|
+
"""Cleanup resources"""
|
|
185
|
+
await self._client.aclose()
|
|
186
|
+
await super().aclose()
|
|
187
|
+
|
|
188
|
+
async def interrupt(self) -> None:
|
|
189
|
+
"""Interrupt the TTS process"""
|
|
190
|
+
if self.audio_track:
|
|
191
|
+
self.audio_track.interrupt()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.0.19"
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: videosdk-plugins-lmnt
|
|
3
|
+
Version: 0.0.19
|
|
4
|
+
Summary: VideoSDK Agent Framework plugin for LMNT AI Text-to-Speech services
|
|
5
|
+
Author: videosdk
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Keywords: ai,audio,lmnt,tts,video,videosdk
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Topic :: Communications :: Conferencing
|
|
11
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
|
12
|
+
Classifier: Topic :: Multimedia :: Video
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
14
|
+
Requires-Python: >=3.11
|
|
15
|
+
Requires-Dist: httpx>=0.24.0
|
|
16
|
+
Requires-Dist: videosdk-agents>=0.0.19
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
|
|
19
|
+
# VideoSDK LMNT AI Plugin
|
|
20
|
+
|
|
21
|
+
Agent Framework plugin for TTS services from LMNT.
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install videosdk-plugins-lmnt
|
|
27
|
+
```
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
videosdk/plugins/lmnt/__init__.py,sha256=JI72LJNs0cCg3S1lweyO-yiBLnViM6Dfp4zPvYJkDJQ,56
|
|
2
|
+
videosdk/plugins/lmnt/tts.py,sha256=V_7bjTJei5qrf7Djh79yydt9XBW8iNN5gBzXCAFAcVo,7178
|
|
3
|
+
videosdk/plugins/lmnt/version.py,sha256=t2ESZXMHiQN58q9Bo0UBQqSt6syeATooB2yMARxEW24,23
|
|
4
|
+
videosdk_plugins_lmnt-0.0.19.dist-info/METADATA,sha256=y_bR-Z1CASKSZBRQqKTgjluIqEUTKxJF9yp_OMZ1q8k,806
|
|
5
|
+
videosdk_plugins_lmnt-0.0.19.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
6
|
+
videosdk_plugins_lmnt-0.0.19.dist-info/RECORD,,
|