universal-mcp-applications 0.1.39rc8__py3-none-any.whl → 0.1.39rc16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of universal-mcp-applications might be problematic. Click here for more details.
- universal_mcp/applications/BEST_PRACTICES.md +1 -1
- universal_mcp/applications/airtable/app.py +13 -13
- universal_mcp/applications/apollo/app.py +2 -2
- universal_mcp/applications/aws_s3/app.py +30 -19
- universal_mcp/applications/browser_use/app.py +10 -7
- universal_mcp/applications/contentful/app.py +4 -4
- universal_mcp/applications/crustdata/app.py +2 -2
- universal_mcp/applications/e2b/app.py +3 -4
- universal_mcp/applications/elevenlabs/README.md +27 -3
- universal_mcp/applications/elevenlabs/app.py +753 -48
- universal_mcp/applications/exa/app.py +18 -11
- universal_mcp/applications/falai/README.md +5 -7
- universal_mcp/applications/falai/app.py +160 -159
- universal_mcp/applications/firecrawl/app.py +14 -15
- universal_mcp/applications/ghost_content/app.py +4 -4
- universal_mcp/applications/github/app.py +2 -2
- universal_mcp/applications/gong/app.py +2 -2
- universal_mcp/applications/google_docs/README.md +15 -14
- universal_mcp/applications/google_docs/app.py +5 -4
- universal_mcp/applications/google_gemini/app.py +61 -17
- universal_mcp/applications/google_sheet/README.md +2 -1
- universal_mcp/applications/google_sheet/app.py +55 -0
- universal_mcp/applications/heygen/README.md +10 -32
- universal_mcp/applications/heygen/app.py +350 -744
- universal_mcp/applications/klaviyo/app.py +2 -2
- universal_mcp/applications/linkedin/README.md +14 -2
- universal_mcp/applications/linkedin/app.py +411 -38
- universal_mcp/applications/ms_teams/app.py +420 -1285
- universal_mcp/applications/notion/app.py +2 -2
- universal_mcp/applications/openai/app.py +1 -1
- universal_mcp/applications/perplexity/app.py +6 -7
- universal_mcp/applications/reddit/app.py +4 -4
- universal_mcp/applications/resend/app.py +31 -32
- universal_mcp/applications/rocketlane/app.py +2 -2
- universal_mcp/applications/scraper/app.py +51 -21
- universal_mcp/applications/semrush/app.py +1 -1
- universal_mcp/applications/serpapi/app.py +8 -7
- universal_mcp/applications/shopify/app.py +5 -7
- universal_mcp/applications/shortcut/app.py +3 -2
- universal_mcp/applications/slack/app.py +2 -2
- universal_mcp/applications/twilio/app.py +14 -13
- {universal_mcp_applications-0.1.39rc8.dist-info → universal_mcp_applications-0.1.39rc16.dist-info}/METADATA +1 -1
- {universal_mcp_applications-0.1.39rc8.dist-info → universal_mcp_applications-0.1.39rc16.dist-info}/RECORD +45 -45
- {universal_mcp_applications-0.1.39rc8.dist-info → universal_mcp_applications-0.1.39rc16.dist-info}/WHEEL +0 -0
- {universal_mcp_applications-0.1.39rc8.dist-info → universal_mcp_applications-0.1.39rc16.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,25 +1,29 @@
|
|
|
1
|
+
import base64
|
|
1
2
|
import uuid
|
|
3
|
+
import time
|
|
2
4
|
from io import BytesIO
|
|
5
|
+
from typing import Any, Dict, List, Optional
|
|
3
6
|
import requests
|
|
7
|
+
|
|
4
8
|
from universal_mcp.applications.application import APIApplication
|
|
5
9
|
from universal_mcp.exceptions import NotAuthorizedError
|
|
6
10
|
from universal_mcp.integrations import Integration
|
|
7
|
-
from elevenlabs import ElevenLabs
|
|
8
|
-
from
|
|
11
|
+
from elevenlabs.client import ElevenLabs
|
|
12
|
+
from elevenlabs import DialogueInput
|
|
9
13
|
|
|
10
14
|
|
|
11
15
|
class ElevenlabsApp(APIApplication):
|
|
12
16
|
def __init__(self, integration: Integration = None, **kwargs) -> None:
|
|
13
17
|
super().__init__(name="elevenlabs", integration=integration, **kwargs)
|
|
14
18
|
self.base_url = "https://api.elevenlabs.io"
|
|
19
|
+
self._client = None
|
|
15
20
|
|
|
16
|
-
|
|
17
|
-
def client(self) -> ElevenLabs:
|
|
21
|
+
async def get_client(self) -> ElevenLabs:
|
|
18
22
|
"""
|
|
19
23
|
A property that lazily initializes and returns an authenticated `ElevenLabs` SDK client. On first access, it retrieves the API key from integration credentials and caches the instance, raising a `NotAuthorizedError` if credentials are not found.
|
|
20
24
|
"""
|
|
21
25
|
if self._client is None:
|
|
22
|
-
credentials = self.integration.
|
|
26
|
+
credentials = await self.integration.get_credentials_async()
|
|
23
27
|
if not credentials:
|
|
24
28
|
raise NotAuthorizedError("No credentials found")
|
|
25
29
|
api_key = credentials.get("api_key") or credentials.get("API_KEY") or credentials.get("apiKey")
|
|
@@ -28,87 +32,788 @@ class ElevenlabsApp(APIApplication):
|
|
|
28
32
|
self._client = ElevenLabs(api_key=api_key)
|
|
29
33
|
return self._client
|
|
30
34
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
35
|
+
# --- Text to Speech ---
|
|
36
|
+
|
|
37
|
+
async def text_to_speech(
|
|
38
|
+
self,
|
|
39
|
+
text: str,
|
|
40
|
+
voice_id: str = "21m00Tcm4TlvDq8ikWAM",
|
|
41
|
+
model_id: str = "eleven_multilingual_v2",
|
|
42
|
+
) -> Dict[str, Any]:
|
|
34
43
|
"""
|
|
35
|
-
Converts
|
|
44
|
+
Converts text to speech and returns the generated audio data.
|
|
36
45
|
|
|
37
46
|
Args:
|
|
38
|
-
text
|
|
39
|
-
voice_id
|
|
40
|
-
model_id
|
|
41
|
-
stability (float, optional): The stability of the voice.
|
|
42
|
-
similarity_boost (float, optional): The similarity boost of the voice.
|
|
47
|
+
text: The text to convert to speech.
|
|
48
|
+
voice_id: The ID of the voice to use. Defaults to "21m00Tcm4TlvDq8ikWAM" (Rachel).
|
|
49
|
+
model_id: The model to use. Defaults to "eleven_multilingual_v2".
|
|
43
50
|
|
|
44
51
|
Returns:
|
|
45
|
-
|
|
52
|
+
dict: A dictionary containing:
|
|
53
|
+
- 'type' (str): "audio".
|
|
54
|
+
- 'data' (str): The base64 encoded audio data.
|
|
55
|
+
- 'mime_type' (str): "audio/mpeg".
|
|
56
|
+
- 'file_name' (str): A suggested file name.
|
|
46
57
|
|
|
47
58
|
Tags:
|
|
48
|
-
important
|
|
59
|
+
text-to-speech, speech-synthesis, audio-generation, elevenlabs, important
|
|
49
60
|
"""
|
|
50
|
-
|
|
61
|
+
client = await self.get_client()
|
|
62
|
+
|
|
63
|
+
audio_generator = client.text_to_speech.convert(
|
|
64
|
+
text=text,
|
|
65
|
+
voice_id=voice_id,
|
|
66
|
+
model_id=model_id,
|
|
67
|
+
output_format="mp3_44100_128",
|
|
68
|
+
)
|
|
69
|
+
|
|
51
70
|
audio_data = b""
|
|
52
71
|
for chunk in audio_generator:
|
|
53
72
|
audio_data += chunk
|
|
54
|
-
|
|
55
|
-
|
|
73
|
+
|
|
74
|
+
audio_base64 = base64.b64encode(audio_data).decode("utf-8")
|
|
75
|
+
file_name = f"{uuid.uuid4()}.mp3"
|
|
76
|
+
return {"type": "audio", "data": audio_base64, "mime_type": "audio/mpeg", "file_name": file_name}
|
|
77
|
+
|
|
78
|
+
# --- Speech to Text ---
|
|
56
79
|
|
|
57
80
|
async def speech_to_text(self, audio_file_path: str, language_code: str = "eng", diarize: bool = True) -> str:
|
|
58
81
|
"""
|
|
59
|
-
Transcribes an audio file into text
|
|
82
|
+
Transcribes an audio file into text.
|
|
60
83
|
|
|
61
84
|
Args:
|
|
62
85
|
audio_file_path (str): The path to the audio file.
|
|
86
|
+
language_code (str): Language code (ISO 639-1). Defaults to "eng".
|
|
87
|
+
diarize (bool): Whether to distinguish speakers. Defaults to True.
|
|
63
88
|
|
|
64
89
|
Returns:
|
|
65
90
|
str: The transcribed text.
|
|
66
91
|
|
|
67
92
|
Tags:
|
|
68
|
-
important
|
|
93
|
+
speech-to-text, transcription, audio-processing, elevenlabs, important
|
|
69
94
|
"""
|
|
70
|
-
|
|
71
|
-
|
|
95
|
+
client = await self.get_client()
|
|
96
|
+
if audio_file_path.startswith(("http://", "https://")):
|
|
97
|
+
response = requests.get(audio_file_path)
|
|
98
|
+
response.raise_for_status()
|
|
99
|
+
audio_data_io = BytesIO(response.content)
|
|
100
|
+
else:
|
|
101
|
+
with open(audio_file_path, "rb") as f:
|
|
102
|
+
audio_data_io = BytesIO(f.read())
|
|
103
|
+
transcription = client.speech_to_text.convert(
|
|
104
|
+
file=audio_data_io, model_id="scribe_v1", tag_audio_events=True, language_code=language_code, diarize=diarize
|
|
72
105
|
)
|
|
73
|
-
return transcription
|
|
106
|
+
return transcription.text
|
|
107
|
+
|
|
108
|
+
# --- Speech to Speech ---
|
|
74
109
|
|
|
75
110
|
async def speech_to_speech(
|
|
76
|
-
self,
|
|
77
|
-
) ->
|
|
111
|
+
self, audio_source: str, voice_id: str = "21m00Tcm4TlvDq8ikWAM", model_id: str = "eleven_multilingual_sts_v2"
|
|
112
|
+
) -> Dict[str, Any]:
|
|
78
113
|
"""
|
|
79
|
-
|
|
114
|
+
Converts speech from an audio source (URL or local path) to a different voice.
|
|
80
115
|
|
|
81
116
|
Args:
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
model_id
|
|
117
|
+
audio_source: URL or path of the source audio.
|
|
118
|
+
voice_id: Target voice ID.
|
|
119
|
+
model_id: Model ID. Defaults to "eleven_multilingual_sts_v2".
|
|
85
120
|
|
|
86
121
|
Returns:
|
|
87
|
-
|
|
122
|
+
dict: A dictionary containing:
|
|
123
|
+
- 'type' (str): "audio".
|
|
124
|
+
- 'data' (str): The base64 encoded audio data.
|
|
125
|
+
- 'mime_type' (str): "audio/mpeg".
|
|
126
|
+
- 'file_name' (str): A suggested file name.
|
|
88
127
|
|
|
89
128
|
Tags:
|
|
90
|
-
important
|
|
129
|
+
speech-to-speech, voice-conversion, audio-processing, elevenlabs, important
|
|
91
130
|
"""
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
131
|
+
if audio_source.startswith(("http://", "https://")):
|
|
132
|
+
response = requests.get(audio_source)
|
|
133
|
+
response.raise_for_status()
|
|
134
|
+
audio_data_io = BytesIO(response.content)
|
|
135
|
+
else:
|
|
136
|
+
with open(audio_source, "rb") as f:
|
|
137
|
+
audio_data_io = BytesIO(f.read())
|
|
138
|
+
|
|
139
|
+
client = await self.get_client()
|
|
140
|
+
audio_stream = client.speech_to_speech.convert(
|
|
141
|
+
voice_id=voice_id, audio=audio_data_io, model_id=model_id, output_format="mp3_44100_128"
|
|
96
142
|
)
|
|
97
|
-
return response.content
|
|
98
143
|
|
|
99
|
-
|
|
100
|
-
|
|
144
|
+
output_data = b""
|
|
145
|
+
for chunk in audio_stream:
|
|
146
|
+
output_data += chunk
|
|
147
|
+
|
|
148
|
+
audio_base64 = base64.b64encode(output_data).decode("utf-8")
|
|
149
|
+
file_name = f"{uuid.uuid4()}.mp3"
|
|
150
|
+
return {"type": "audio", "data": audio_base64, "mime_type": "audio/mpeg", "file_name": file_name}
|
|
151
|
+
|
|
152
|
+
# --- History ---
|
|
153
|
+
|
|
154
|
+
async def get_history_items(self, page_size: int = 100, start_after_history_item_id: Optional[str] = None) -> Dict[str, Any]:
|
|
155
|
+
"""
|
|
156
|
+
Returns a list of generated audio history items.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
page_size: The number of items to return. Defaults to 100.
|
|
160
|
+
start_after_history_item_id: The ID of the item to start after for pagination.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
dict: The history response containing a list of history items.
|
|
164
|
+
|
|
165
|
+
Tags:
|
|
166
|
+
history, audio-logs, elevenlabs
|
|
167
|
+
"""
|
|
168
|
+
client = await self.get_client()
|
|
169
|
+
return client.history.list(page_size=page_size, start_after_history_item_id=start_after_history_item_id).dict()
|
|
170
|
+
|
|
171
|
+
async def get_history_item(self, history_item_id: str) -> Dict[str, Any]:
|
|
172
|
+
"""
|
|
173
|
+
Retrieves a specific history item by ID.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
history_item_id: The ID of the history item to retrieve.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
dict: The details of the history item.
|
|
180
|
+
|
|
181
|
+
Tags:
|
|
182
|
+
history, audio-logs, elevenlabs
|
|
183
|
+
"""
|
|
184
|
+
client = await self.get_client()
|
|
185
|
+
return client.history.get(history_item_id=history_item_id).dict()
|
|
186
|
+
|
|
187
|
+
async def delete_history_item(self, history_item_id: str) -> Dict[str, Any]:
|
|
188
|
+
"""
|
|
189
|
+
Deletes a history item by ID.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
history_item_id: The ID of the history item to delete.
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
dict: The deletion status.
|
|
196
|
+
|
|
197
|
+
Tags:
|
|
198
|
+
history, audio-logs, elevenlabs
|
|
199
|
+
"""
|
|
200
|
+
client = await self.get_client()
|
|
201
|
+
return client.history.delete(history_item_id=history_item_id)
|
|
202
|
+
|
|
203
|
+
async def get_history_item_audio(self, history_item_id: str) -> Dict[str, Any]:
|
|
204
|
+
"""
|
|
205
|
+
Gets the audio for a history item.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
history_item_id: The ID of the history item.
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
dict: A dictionary containing:
|
|
212
|
+
- 'type' (str): "audio".
|
|
213
|
+
- 'data' (str): The base64 encoded audio data.
|
|
214
|
+
- 'mime_type' (str): "audio/mpeg".
|
|
215
|
+
- 'file_name' (str): A suggested file name.
|
|
216
|
+
|
|
217
|
+
Tags:
|
|
218
|
+
history, audio-download, elevenlabs
|
|
219
|
+
"""
|
|
220
|
+
client = await self.get_client()
|
|
221
|
+
audio_generator = client.history.get_audio(history_item_id=history_item_id)
|
|
222
|
+
audio_data = b""
|
|
223
|
+
for chunk in audio_generator:
|
|
224
|
+
audio_data += chunk
|
|
225
|
+
|
|
226
|
+
audio_base64 = base64.b64encode(audio_data).decode("utf-8")
|
|
227
|
+
file_name = f"{history_item_id}.mp3"
|
|
228
|
+
return {"type": "audio", "data": audio_base64, "mime_type": "audio/mpeg", "file_name": file_name}
|
|
229
|
+
|
|
230
|
+
# --- Voices ---
|
|
231
|
+
|
|
232
|
+
async def get_voices(self) -> Dict[str, Any]:
|
|
233
|
+
"""
|
|
234
|
+
Lists all available voices.
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
dict: A dictionary containing the list of voices.
|
|
238
|
+
|
|
239
|
+
Tags:
|
|
240
|
+
voices, list-voices, elevenlabs
|
|
241
|
+
"""
|
|
242
|
+
client = await self.get_client()
|
|
243
|
+
return client.voices.get_all().dict()
|
|
244
|
+
|
|
245
|
+
async def get_voice(self, voice_id: str) -> Dict[str, Any]:
|
|
246
|
+
"""
|
|
247
|
+
Gets details of a specific voice.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
voice_id: The ID of the voice to retrieve.
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
dict: The voice details.
|
|
254
|
+
|
|
255
|
+
Tags:
|
|
256
|
+
voices, voice-details, elevenlabs
|
|
257
|
+
"""
|
|
258
|
+
client = await self.get_client()
|
|
259
|
+
return client.voices.get(voice_id=voice_id).dict()
|
|
260
|
+
|
|
261
|
+
async def delete_voice(self, voice_id: str) -> Dict[str, Any]:
|
|
262
|
+
"""
|
|
263
|
+
Deletes a voice by ID.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
voice_id: The ID of the voice to delete.
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
dict: The deletion status.
|
|
270
|
+
|
|
271
|
+
Tags:
|
|
272
|
+
voices, delete-voice, elevenlabs
|
|
273
|
+
"""
|
|
274
|
+
client = await self.get_client()
|
|
275
|
+
return client.voices.delete(voice_id=voice_id).dict()
|
|
276
|
+
|
|
277
|
+
# --- Samples ---
|
|
278
|
+
|
|
279
|
+
async def get_voice_samples(self, voice_id: str) -> List[Dict[str, Any]]:
|
|
280
|
+
"""
|
|
281
|
+
Gets samples for a specific voice.
|
|
282
|
+
|
|
283
|
+
Args:
|
|
284
|
+
voice_id: The ID of the voice.
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
list: A list of voice samples.
|
|
288
|
+
|
|
289
|
+
Tags:
|
|
290
|
+
samples, voice-samples, elevenlabs
|
|
291
|
+
"""
|
|
292
|
+
voice = await self.get_voice(voice_id)
|
|
293
|
+
# Check if voice is dict or object
|
|
294
|
+
if hasattr(voice, "samples"):
|
|
295
|
+
return [s.dict() for s in voice.samples]
|
|
296
|
+
# Pydantic .dict() might return 'samples' as None if items are missing
|
|
297
|
+
samples = voice.get("samples")
|
|
298
|
+
if samples is None:
|
|
299
|
+
return []
|
|
300
|
+
return samples
|
|
301
|
+
|
|
302
|
+
async def delete_sample(self, voice_id: str, sample_id: str) -> Dict[str, Any]:
|
|
303
|
+
"""
|
|
304
|
+
Deletes a sample.
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
voice_id: The ID of the voice.
|
|
308
|
+
sample_id: The ID of the sample to delete.
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
dict: The deletion status.
|
|
312
|
+
|
|
313
|
+
Tags:
|
|
314
|
+
samples, delete-sample, elevenlabs
|
|
315
|
+
"""
|
|
316
|
+
client = await self.get_client()
|
|
317
|
+
return client.samples.delete(voice_id=voice_id, sample_id=sample_id).dict()
|
|
318
|
+
|
|
319
|
+
# --- Text to Sound Effects ---
|
|
320
|
+
|
|
321
|
+
async def convert_text_to_sound_effect(
|
|
322
|
+
self, text: str, duration_seconds: Optional[float] = None, prompt_influence: float = 0.3
|
|
323
|
+
) -> Dict[str, Any]:
|
|
324
|
+
"""
|
|
325
|
+
Converts text to sound effects.
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
text: A text description of the sound effect.
|
|
329
|
+
duration_seconds: The duration of the sound effect in seconds.
|
|
330
|
+
prompt_influence: The influence of the prompt on the generation (0.0 to 1.0). Defaults to 0.3.
|
|
331
|
+
|
|
332
|
+
Returns:
|
|
333
|
+
dict: A dictionary containing:
|
|
334
|
+
- 'type' (str): "audio".
|
|
335
|
+
- 'data' (str): The base64 encoded audio data.
|
|
336
|
+
- 'mime_type' (str): "audio/mpeg".
|
|
337
|
+
- 'file_name' (str): A suggested file name.
|
|
338
|
+
|
|
339
|
+
Tags:
|
|
340
|
+
sound-effects, audio-generation, elevenlabs
|
|
341
|
+
"""
|
|
342
|
+
client = await self.get_client()
|
|
343
|
+
audio_generator = client.text_to_sound_effects.convert(
|
|
344
|
+
text=text, duration_seconds=duration_seconds, prompt_influence=prompt_influence
|
|
345
|
+
)
|
|
346
|
+
audio_data = b""
|
|
347
|
+
for chunk in audio_generator:
|
|
348
|
+
audio_data += chunk
|
|
349
|
+
|
|
350
|
+
audio_base64 = base64.b64encode(audio_data).decode("utf-8")
|
|
351
|
+
file_name = f"{uuid.uuid4()}.mp3"
|
|
352
|
+
return {"type": "audio", "data": audio_base64, "mime_type": "audio/mpeg", "file_name": file_name}
|
|
353
|
+
|
|
354
|
+
# --- Text to Dialogue ---
|
|
355
|
+
|
|
356
|
+
async def convert_text_to_dialogue(
|
|
357
|
+
self,
|
|
358
|
+
dialogue_turns: List[Dict[str, str]],
|
|
359
|
+
model_id: str = "eleven_v3",
|
|
360
|
+
output_format: str = "mp3_44100_128",
|
|
361
|
+
) -> Dict[str, Any]:
|
|
362
|
+
"""
|
|
363
|
+
Converts a list of text and voice ID pairs into speech (dialogue) and returns synthesized audio.
|
|
364
|
+
|
|
365
|
+
Args:
|
|
366
|
+
dialogue_turns: A list of dictionaries, each containing:
|
|
367
|
+
- 'text' (str): The text to be spoken.
|
|
368
|
+
- 'voice_id' (str): The ID of the voice to use.
|
|
369
|
+
model_id: The model to use. Defaults to "eleven_v3".
|
|
370
|
+
output_format: The output format. Defaults to "mp3_44100_128".
|
|
371
|
+
|
|
372
|
+
Example:
|
|
373
|
+
dialogue_turns = [
|
|
374
|
+
{"text": "Hello there! How are you doing today?", "voice_id": "9BWtsMINqrJLrRacOk9x"},
|
|
375
|
+
{"text": "I'm doing great, thanks for asking! And you?", "voice_id": "IKne3meq5aSn9XLyUdCD"},
|
|
376
|
+
{"text": "I'm fantastic. Ready to test this dialogue feature.", "voice_id": "9BWtsMINqrJLrRacOk9x"}
|
|
377
|
+
]
|
|
378
|
+
Returns:
|
|
379
|
+
dict: A dictionary containing:
|
|
380
|
+
- 'type' (str): "audio".
|
|
381
|
+
- 'data' (str): The base64 encoded audio data.
|
|
382
|
+
- 'mime_type' (str): "audio/mpeg".
|
|
383
|
+
- 'file_name' (str): A suggested file name.
|
|
384
|
+
|
|
385
|
+
Raises:
|
|
386
|
+
ValueError: If the model ID is not supported.
|
|
387
|
+
|
|
388
|
+
Tags:
|
|
389
|
+
dialogue, conversational-ai, elevenlabs
|
|
390
|
+
"""
|
|
391
|
+
client = await self.get_client()
|
|
392
|
+
|
|
393
|
+
inputs = [DialogueInput(text=turn["text"], voice_id=turn["voice_id"]) for turn in dialogue_turns]
|
|
394
|
+
|
|
395
|
+
audio_generator = client.text_to_dialogue.convert(
|
|
396
|
+
inputs=inputs,
|
|
397
|
+
model_id=model_id,
|
|
398
|
+
output_format=output_format,
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
audio_data = b""
|
|
402
|
+
for chunk in audio_generator:
|
|
403
|
+
audio_data += chunk
|
|
404
|
+
|
|
405
|
+
audio_base64 = base64.b64encode(audio_data).decode("utf-8")
|
|
406
|
+
file_name = f"dialogue_{uuid.uuid4()}.mp3"
|
|
407
|
+
return {"type": "audio", "data": audio_base64, "mime_type": "audio/mpeg", "file_name": file_name}
|
|
408
|
+
|
|
409
|
+
async def remix_voice(
|
|
410
|
+
self,
|
|
411
|
+
voice_id: str,
|
|
412
|
+
voice_description: str,
|
|
413
|
+
text: Optional[str] = None,
|
|
414
|
+
) -> Dict[str, Any]:
|
|
415
|
+
"""
|
|
416
|
+
Remixes an existing voice to create a new one based on a description.
|
|
417
|
+
|
|
418
|
+
Args:
|
|
419
|
+
voice_id: The ID of the voice to remix.
|
|
420
|
+
voice_description: A description of how to change the voice (e.g., "Make the voice have a higher pitch").
|
|
421
|
+
text: Optional text for the voice to speak in the preview.
|
|
422
|
+
|
|
423
|
+
Returns:
|
|
424
|
+
dict: A dictionary containing:
|
|
425
|
+
- 'type' (str): "audio".
|
|
426
|
+
- 'data' (str): The base64 encoded audio data of the preview.
|
|
427
|
+
- 'mime_type' (str): "audio/mpeg".
|
|
428
|
+
- 'file_name' (str): A suggested file name.
|
|
429
|
+
- 'generated_voice_id' (str): The ID of the generated voice preview.
|
|
430
|
+
|
|
431
|
+
Tags:
|
|
432
|
+
voice-remixing, voice-modification, elevenlabs
|
|
433
|
+
"""
|
|
434
|
+
client = await self.get_client()
|
|
435
|
+
|
|
436
|
+
response = client.text_to_voice.remix(
|
|
437
|
+
voice_id=voice_id,
|
|
438
|
+
voice_description=voice_description,
|
|
439
|
+
text=text,
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
if not response.previews:
|
|
443
|
+
raise ValueError("No previews generated")
|
|
444
|
+
|
|
445
|
+
preview = response.previews[0]
|
|
446
|
+
file_name = f"remix_{preview.generated_voice_id}.mp3"
|
|
447
|
+
|
|
448
|
+
# preview.audio_base_64 is already a base64 string
|
|
449
|
+
return {
|
|
450
|
+
"type": "audio",
|
|
451
|
+
"data": preview.audio_base_64,
|
|
452
|
+
"mime_type": "audio/mpeg",
|
|
453
|
+
"file_name": file_name,
|
|
454
|
+
"generated_voice_id": preview.generated_voice_id,
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
# --- Forced Alignment ---
|
|
458
|
+
|
|
459
|
+
async def align_audio(
|
|
460
|
+
self,
|
|
461
|
+
audio_file_path: str,
|
|
462
|
+
text: str,
|
|
463
|
+
) -> Dict[str, Any]:
|
|
464
|
+
"""
|
|
465
|
+
Aligns text to an audio file, returning timing information for characters and words.
|
|
466
|
+
|
|
467
|
+
Args:
|
|
468
|
+
audio_file_path: The path to the audio file (local path or URL).
|
|
469
|
+
text: The transcript text corresponding to the audio.
|
|
470
|
+
|
|
471
|
+
Returns:
|
|
472
|
+
dict: The alignment result containing 'characters', 'words', and 'loss'.
|
|
473
|
+
|
|
474
|
+
Tags:
|
|
475
|
+
alignment, audio-sync, elevenlabs
|
|
476
|
+
"""
|
|
477
|
+
client = await self.get_client()
|
|
478
|
+
|
|
479
|
+
# Handle URL or local file
|
|
480
|
+
if audio_file_path.startswith("http://") or audio_file_path.startswith("https://"):
|
|
481
|
+
response = requests.get(audio_file_path)
|
|
482
|
+
response.raise_for_status()
|
|
483
|
+
audio_data = BytesIO(response.content)
|
|
484
|
+
else:
|
|
485
|
+
with open(audio_file_path, "rb") as f:
|
|
486
|
+
audio_data = BytesIO(f.read())
|
|
487
|
+
|
|
488
|
+
alignment = client.forced_alignment.create(file=audio_data, text=text)
|
|
489
|
+
|
|
490
|
+
return alignment.dict()
|
|
491
|
+
|
|
492
|
+
# --- Text to Music ---
|
|
493
|
+
|
|
494
|
+
async def convert_text_to_music(self, prompt: str, music_length_ms: Optional[int] = None) -> Dict[str, Any]:
|
|
495
|
+
"""
|
|
496
|
+
Generates music based on a text prompt.
|
|
497
|
+
|
|
498
|
+
Args:
|
|
499
|
+
prompt: A text description of the music to generate.
|
|
500
|
+
music_length_ms: Optional duration of the music in milliseconds.
|
|
501
|
+
|
|
502
|
+
Returns:
|
|
503
|
+
dict: The generated audio data including 'type', 'data' (base64), 'mime_type', and 'file_name'.
|
|
504
|
+
|
|
505
|
+
Tags:
|
|
506
|
+
music-generation, audio-generation, elevenlabs
|
|
507
|
+
"""
|
|
508
|
+
client = await self.get_client()
|
|
509
|
+
|
|
510
|
+
# The SDK returns a sync iterator of bytes (since client is sync)
|
|
511
|
+
audio_bytes = b""
|
|
512
|
+
for chunk in client.music.compose(prompt=prompt, music_length_ms=music_length_ms):
|
|
513
|
+
audio_bytes += chunk
|
|
514
|
+
|
|
515
|
+
file_name = f"music_{uuid.uuid4()}.mp3"
|
|
516
|
+
|
|
517
|
+
return {"type": "audio", "data": base64.b64encode(audio_bytes).decode("utf-8"), "mime_type": "audio/mpeg", "file_name": file_name}
|
|
518
|
+
|
|
519
|
+
# --- Voice Cloning ---
|
|
520
|
+
|
|
521
|
+
async def clone_voice(self, name: str, file_paths: List[str], description: Optional[str] = None) -> Dict[str, Any]:
|
|
522
|
+
"""
|
|
523
|
+
Clones a voice from provided audio samples (URLs or local paths).
|
|
524
|
+
|
|
525
|
+
Args:
|
|
526
|
+
name: Name of the cloned voice.
|
|
527
|
+
file_paths: List of absolute file paths or URLs to audio samples.
|
|
528
|
+
description: Optional description of the voice.
|
|
529
|
+
|
|
530
|
+
Returns:
|
|
531
|
+
dict: Metadata of the created voice, including 'voice_id'.
|
|
532
|
+
|
|
533
|
+
Tags:
|
|
534
|
+
voice-cloning, instant-cloning, elevenlabs
|
|
535
|
+
"""
|
|
536
|
+
client = await self.get_client()
|
|
537
|
+
files_data = []
|
|
538
|
+
|
|
539
|
+
for path in file_paths:
|
|
540
|
+
if path.startswith("http"):
|
|
541
|
+
response = requests.get(path)
|
|
542
|
+
response.raise_for_status()
|
|
543
|
+
files_data.append(BytesIO(response.content))
|
|
544
|
+
else:
|
|
545
|
+
# Read into memory so we can close the file handle immediately if needed,
|
|
546
|
+
# though BytesIO is preferred for the SDK.
|
|
547
|
+
with open(path, "rb") as f:
|
|
548
|
+
files_data.append(BytesIO(f.read()))
|
|
549
|
+
|
|
550
|
+
# client.voices.ivc.create returns AddVoiceIvcResponseModel which has voice_id
|
|
551
|
+
voice = client.voices.ivc.create(name=name, description=description, files=files_data)
|
|
552
|
+
|
|
553
|
+
return {"voice_id": voice.voice_id, "name": name, "status": "created"}
|
|
554
|
+
|
|
555
|
+
# --- Voice Design ---
|
|
556
|
+
|
|
557
|
+
async def design_voice(self, voice_description: str, text: Optional[str] = None) -> List[Dict[str, Any]]:
|
|
558
|
+
"""
|
|
559
|
+
Generates voice previews based on a text description.
|
|
560
|
+
|
|
561
|
+
Args:
|
|
562
|
+
voice_description: Description of the voice to generate (e.g., "A deep, resonant voice").
|
|
563
|
+
text: Optional text for the voice to speak. If not provided, it will be automatically generated.
|
|
564
|
+
|
|
565
|
+
Returns:
|
|
566
|
+
list: A list of voice previews, each containing 'generated_voice_id', 'audio_base_64', and 'duration_secs'.
|
|
567
|
+
|
|
568
|
+
Tags:
|
|
569
|
+
voice-design, voice-generation, elevenlabs
|
|
570
|
+
"""
|
|
571
|
+
client = await self.get_client()
|
|
572
|
+
|
|
573
|
+
# design() returns VoiceDesignPreviewResponse
|
|
574
|
+
# We need to access .previews which is a list of VoicePreviewResponseModel
|
|
575
|
+
response = client.text_to_voice.design(
|
|
576
|
+
voice_description=voice_description,
|
|
577
|
+
text=text,
|
|
578
|
+
# Using a default model that supports design
|
|
579
|
+
model_id="eleven_multilingual_ttv_v2",
|
|
580
|
+
)
|
|
581
|
+
|
|
582
|
+
previews = []
|
|
583
|
+
for preview in response.previews:
|
|
584
|
+
previews.append(
|
|
585
|
+
{
|
|
586
|
+
"generated_voice_id": preview.generated_voice_id,
|
|
587
|
+
"audio_base_64": preview.audio_base_64,
|
|
588
|
+
"duration_secs": preview.duration_secs,
|
|
589
|
+
"type": "audio",
|
|
590
|
+
"mime_type": "audio/mpeg",
|
|
591
|
+
}
|
|
592
|
+
)
|
|
593
|
+
|
|
594
|
+
return previews
|
|
595
|
+
|
|
596
|
+
# --- Audio Isolation ---
|
|
597
|
+
|
|
598
|
+
async def isolate_audio(self, audio_source: str) -> Dict[str, Any]:
|
|
599
|
+
"""
|
|
600
|
+
Removes background noise from audio.
|
|
601
|
+
|
|
602
|
+
Args:
|
|
603
|
+
audio_source: URL or path of the source audio.
|
|
604
|
+
|
|
605
|
+
Returns:
|
|
606
|
+
dict: A dictionary containing:
|
|
607
|
+
- 'type' (str): "audio".
|
|
608
|
+
- 'data' (str): The base64 encoded audio data.
|
|
609
|
+
- 'mime_type' (str): "audio/mpeg".
|
|
610
|
+
- 'file_name' (str): A suggested file name.
|
|
611
|
+
|
|
612
|
+
Tags:
|
|
613
|
+
audio-isolation, noise-removal, elevenlabs
|
|
614
|
+
"""
|
|
615
|
+
if audio_source.startswith(("http://", "https://")):
|
|
616
|
+
response = requests.get(audio_source)
|
|
617
|
+
response.raise_for_status()
|
|
618
|
+
audio_data_io = BytesIO(response.content)
|
|
619
|
+
else:
|
|
620
|
+
with open(audio_source, "rb") as f:
|
|
621
|
+
audio_data_io = BytesIO(f.read())
|
|
622
|
+
audio_data_io.name = "audio.mp3"
|
|
623
|
+
|
|
624
|
+
client = await self.get_client()
|
|
625
|
+
audio_generator = client.audio_isolation.convert(audio=audio_data_io)
|
|
626
|
+
|
|
627
|
+
output_data = b""
|
|
628
|
+
for chunk in audio_generator:
|
|
629
|
+
output_data += chunk
|
|
630
|
+
|
|
631
|
+
audio_base64 = base64.b64encode(output_data).decode("utf-8")
|
|
632
|
+
file_name = f"{uuid.uuid4()}.mp3"
|
|
633
|
+
return {"type": "audio", "data": audio_base64, "mime_type": "audio/mpeg", "file_name": file_name}
|
|
634
|
+
|
|
635
|
+
# --- Dubbing ---
|
|
636
|
+
|
|
637
|
+
async def dub_file(
|
|
638
|
+
self,
|
|
639
|
+
audio_source: str,
|
|
640
|
+
target_lang: str,
|
|
641
|
+
mode: str = "automatic",
|
|
642
|
+
source_lang: Optional[str] = None,
|
|
643
|
+
num_speakers: int = 0,
|
|
644
|
+
watermark: bool = False,
|
|
645
|
+
) -> Dict[str, Any]:
|
|
646
|
+
"""
|
|
647
|
+
Dubs an audio file into another language.
|
|
648
|
+
|
|
649
|
+
Args:
|
|
650
|
+
audio_source: URL or path of the source audio.
|
|
651
|
+
target_lang: The target language code.
|
|
652
|
+
mode: The dubbing mode. Defaults to "automatic".
|
|
653
|
+
source_lang: Optional source language code.
|
|
654
|
+
num_speakers: The number of speakers (0 for automatic detection). Defaults to 0.
|
|
655
|
+
watermark: Whether to add a watermark. Defaults to False.
|
|
101
656
|
|
|
657
|
+
Returns:
|
|
658
|
+
dict: The dubbing project metadata.
|
|
102
659
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
660
|
+
Tags:
|
|
661
|
+
dubbing, translation, elevenlabs
|
|
662
|
+
"""
|
|
663
|
+
if audio_source.startswith(("http://", "https://")):
|
|
664
|
+
response = requests.get(audio_source)
|
|
665
|
+
response.raise_for_status()
|
|
666
|
+
audio_data_io = BytesIO(response.content)
|
|
667
|
+
else:
|
|
668
|
+
with open(audio_source, "rb") as f:
|
|
669
|
+
audio_data_io = BytesIO(f.read())
|
|
109
670
|
|
|
671
|
+
client = await self.get_client()
|
|
672
|
+
return client.dubbing.create(
|
|
673
|
+
file=audio_data_io, target_lang=target_lang, mode=mode, source_lang=source_lang, num_speakers=num_speakers, watermark=watermark
|
|
674
|
+
).dict()
|
|
110
675
|
|
|
111
|
-
|
|
112
|
-
|
|
676
|
+
async def get_dubbing_project_metadata(self, dubbing_id: str) -> Dict[str, Any]:
|
|
677
|
+
"""
|
|
678
|
+
Gets metadata for a dubbing project.
|
|
679
|
+
|
|
680
|
+
Args:
|
|
681
|
+
dubbing_id: The ID of the dubbing project.
|
|
682
|
+
|
|
683
|
+
Returns:
|
|
684
|
+
dict: The project metadata.
|
|
113
685
|
|
|
114
|
-
|
|
686
|
+
Tags:
|
|
687
|
+
dubbing, project-metadata, elevenlabs
|
|
688
|
+
"""
|
|
689
|
+
client = await self.get_client()
|
|
690
|
+
return client.dubbing.get(dubbing_id=dubbing_id).dict()
|
|
691
|
+
|
|
692
|
+
async def get_dubbed_file(self, dubbing_id: str, language_code: str) -> Dict[str, Any]:
|
|
693
|
+
"""
|
|
694
|
+
Downloads a dubbed file.
|
|
695
|
+
|
|
696
|
+
Args:
|
|
697
|
+
dubbing_id: The ID of the dubbing project.
|
|
698
|
+
language_code: The language code of the dubbed file.
|
|
699
|
+
|
|
700
|
+
Returns:
|
|
701
|
+
dict: A dictionary containing:
|
|
702
|
+
- 'type' (str): "audio".
|
|
703
|
+
- 'data' (str): The base64 encoded audio data.
|
|
704
|
+
- 'mime_type' (str): "audio/mpeg".
|
|
705
|
+
- 'file_name' (str): A suggested file name.
|
|
706
|
+
|
|
707
|
+
Tags:
|
|
708
|
+
dubbing, file-download, elevenlabs
|
|
709
|
+
"""
|
|
710
|
+
client = await self.get_client()
|
|
711
|
+
audio_generator = client.dubbing.audio.get(dubbing_id=dubbing_id, language_code=language_code)
|
|
712
|
+
|
|
713
|
+
output_data = b""
|
|
714
|
+
for chunk in audio_generator:
|
|
715
|
+
output_data += chunk
|
|
716
|
+
|
|
717
|
+
audio_base64 = base64.b64encode(output_data).decode("utf-8")
|
|
718
|
+
file_name = f"{dubbing_id}_{language_code}.mp3"
|
|
719
|
+
return {"type": "audio", "data": audio_base64, "mime_type": "audio/mpeg", "file_name": file_name}
|
|
720
|
+
|
|
721
|
+
# --- Models ---
|
|
722
|
+
|
|
723
|
+
async def get_models(self) -> List[Dict[str, Any]]:
|
|
724
|
+
"""
|
|
725
|
+
Lists available models.
|
|
726
|
+
|
|
727
|
+
Returns:
|
|
728
|
+
list: A list of available models and their details.
|
|
729
|
+
|
|
730
|
+
Tags:
|
|
731
|
+
models, list-models, elevenlabs
|
|
732
|
+
"""
|
|
733
|
+
client = await self.get_client()
|
|
734
|
+
return [model.dict() for model in client.models.list()]
|
|
735
|
+
|
|
736
|
+
# --- User ---
|
|
737
|
+
|
|
738
|
+
async def get_user_info(self) -> Dict[str, Any]:
|
|
739
|
+
"""
|
|
740
|
+
Gets user information.
|
|
741
|
+
|
|
742
|
+
Returns:
|
|
743
|
+
dict: The user information.
|
|
744
|
+
|
|
745
|
+
Tags:
|
|
746
|
+
user, profile, elevenlabs
|
|
747
|
+
"""
|
|
748
|
+
client = await self.get_client()
|
|
749
|
+
return client.user.get().dict()
|
|
750
|
+
|
|
751
|
+
async def get_user_subscription(self) -> Dict[str, Any]:
|
|
752
|
+
"""
|
|
753
|
+
Gets user subscription details.
|
|
754
|
+
|
|
755
|
+
Returns:
|
|
756
|
+
dict: The subscription details.
|
|
757
|
+
|
|
758
|
+
Tags:
|
|
759
|
+
user, subscription, elevenlabs
|
|
760
|
+
"""
|
|
761
|
+
client = await self.get_client()
|
|
762
|
+
return client.user.subscription.get().dict()
|
|
763
|
+
|
|
764
|
+
# --- Usage ---
|
|
765
|
+
|
|
766
|
+
async def get_usage(self, start_unix: Optional[int] = None, end_unix: Optional[int] = None) -> Dict[str, Any]:
|
|
767
|
+
"""
|
|
768
|
+
Gets usage statistics. Defaults to the last 30 days if dates are not provided.
|
|
769
|
+
|
|
770
|
+
Args:
|
|
771
|
+
start_unix (Optional[int]): Start time in Unix timestamp.
|
|
772
|
+
end_unix (Optional[int]): End time in Unix timestamp.
|
|
773
|
+
|
|
774
|
+
Returns:
|
|
775
|
+
dict: Usage statistics.
|
|
776
|
+
|
|
777
|
+
Tags:
|
|
778
|
+
usage, statistics, elevenlabs
|
|
779
|
+
"""
|
|
780
|
+
client = await self.get_client()
|
|
781
|
+
if end_unix is None:
|
|
782
|
+
end_unix = int(time.time())
|
|
783
|
+
if start_unix is None:
|
|
784
|
+
start_unix = end_unix - 30 * 24 * 3600 # 30 days ago
|
|
785
|
+
|
|
786
|
+
return client.usage.get(start_unix=start_unix, end_unix=end_unix).dict()
|
|
787
|
+
|
|
788
|
+
# --- Tool Listing ---
|
|
789
|
+
|
|
790
|
+
def list_tools(self):
|
|
791
|
+
return [
|
|
792
|
+
self.text_to_speech,
|
|
793
|
+
self.speech_to_text,
|
|
794
|
+
self.speech_to_speech,
|
|
795
|
+
self.get_history_items,
|
|
796
|
+
self.get_history_item,
|
|
797
|
+
self.delete_history_item,
|
|
798
|
+
self.get_history_item_audio,
|
|
799
|
+
self.get_voices,
|
|
800
|
+
self.get_voice,
|
|
801
|
+
self.delete_voice,
|
|
802
|
+
self.get_voice_samples,
|
|
803
|
+
self.delete_sample,
|
|
804
|
+
self.convert_text_to_sound_effect,
|
|
805
|
+
self.convert_text_to_dialogue,
|
|
806
|
+
self.remix_voice,
|
|
807
|
+
self.convert_text_to_music,
|
|
808
|
+
self.clone_voice,
|
|
809
|
+
self.design_voice,
|
|
810
|
+
self.align_audio,
|
|
811
|
+
self.isolate_audio,
|
|
812
|
+
self.dub_file,
|
|
813
|
+
self.get_dubbing_project_metadata,
|
|
814
|
+
self.get_dubbed_file,
|
|
815
|
+
self.get_models,
|
|
816
|
+
self.get_user_info,
|
|
817
|
+
self.get_user_subscription,
|
|
818
|
+
self.get_usage,
|
|
819
|
+
]
|