universal-mcp-applications 0.1.39rc8__py3-none-any.whl → 0.1.39rc16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of universal-mcp-applications might be problematic. Click here for more details.

Files changed (45) hide show
  1. universal_mcp/applications/BEST_PRACTICES.md +1 -1
  2. universal_mcp/applications/airtable/app.py +13 -13
  3. universal_mcp/applications/apollo/app.py +2 -2
  4. universal_mcp/applications/aws_s3/app.py +30 -19
  5. universal_mcp/applications/browser_use/app.py +10 -7
  6. universal_mcp/applications/contentful/app.py +4 -4
  7. universal_mcp/applications/crustdata/app.py +2 -2
  8. universal_mcp/applications/e2b/app.py +3 -4
  9. universal_mcp/applications/elevenlabs/README.md +27 -3
  10. universal_mcp/applications/elevenlabs/app.py +753 -48
  11. universal_mcp/applications/exa/app.py +18 -11
  12. universal_mcp/applications/falai/README.md +5 -7
  13. universal_mcp/applications/falai/app.py +160 -159
  14. universal_mcp/applications/firecrawl/app.py +14 -15
  15. universal_mcp/applications/ghost_content/app.py +4 -4
  16. universal_mcp/applications/github/app.py +2 -2
  17. universal_mcp/applications/gong/app.py +2 -2
  18. universal_mcp/applications/google_docs/README.md +15 -14
  19. universal_mcp/applications/google_docs/app.py +5 -4
  20. universal_mcp/applications/google_gemini/app.py +61 -17
  21. universal_mcp/applications/google_sheet/README.md +2 -1
  22. universal_mcp/applications/google_sheet/app.py +55 -0
  23. universal_mcp/applications/heygen/README.md +10 -32
  24. universal_mcp/applications/heygen/app.py +350 -744
  25. universal_mcp/applications/klaviyo/app.py +2 -2
  26. universal_mcp/applications/linkedin/README.md +14 -2
  27. universal_mcp/applications/linkedin/app.py +411 -38
  28. universal_mcp/applications/ms_teams/app.py +420 -1285
  29. universal_mcp/applications/notion/app.py +2 -2
  30. universal_mcp/applications/openai/app.py +1 -1
  31. universal_mcp/applications/perplexity/app.py +6 -7
  32. universal_mcp/applications/reddit/app.py +4 -4
  33. universal_mcp/applications/resend/app.py +31 -32
  34. universal_mcp/applications/rocketlane/app.py +2 -2
  35. universal_mcp/applications/scraper/app.py +51 -21
  36. universal_mcp/applications/semrush/app.py +1 -1
  37. universal_mcp/applications/serpapi/app.py +8 -7
  38. universal_mcp/applications/shopify/app.py +5 -7
  39. universal_mcp/applications/shortcut/app.py +3 -2
  40. universal_mcp/applications/slack/app.py +2 -2
  41. universal_mcp/applications/twilio/app.py +14 -13
  42. {universal_mcp_applications-0.1.39rc8.dist-info → universal_mcp_applications-0.1.39rc16.dist-info}/METADATA +1 -1
  43. {universal_mcp_applications-0.1.39rc8.dist-info → universal_mcp_applications-0.1.39rc16.dist-info}/RECORD +45 -45
  44. {universal_mcp_applications-0.1.39rc8.dist-info → universal_mcp_applications-0.1.39rc16.dist-info}/WHEEL +0 -0
  45. {universal_mcp_applications-0.1.39rc8.dist-info → universal_mcp_applications-0.1.39rc16.dist-info}/licenses/LICENSE +0 -0
@@ -1,25 +1,29 @@
1
+ import base64
1
2
  import uuid
3
+ import time
2
4
  from io import BytesIO
5
+ from typing import Any, Dict, List, Optional
3
6
  import requests
7
+
4
8
  from universal_mcp.applications.application import APIApplication
5
9
  from universal_mcp.exceptions import NotAuthorizedError
6
10
  from universal_mcp.integrations import Integration
7
- from elevenlabs import ElevenLabs
8
- from universal_mcp.applications.file_system.app import FileSystemApp
11
+ from elevenlabs.client import ElevenLabs
12
+ from elevenlabs import DialogueInput
9
13
 
10
14
 
11
15
  class ElevenlabsApp(APIApplication):
12
16
  def __init__(self, integration: Integration = None, **kwargs) -> None:
13
17
  super().__init__(name="elevenlabs", integration=integration, **kwargs)
14
18
  self.base_url = "https://api.elevenlabs.io"
19
+ self._client = None
15
20
 
16
- @property
17
- def client(self) -> ElevenLabs:
21
+ async def get_client(self) -> ElevenLabs:
18
22
  """
19
23
  A property that lazily initializes and returns an authenticated `ElevenLabs` SDK client. On first access, it retrieves the API key from integration credentials and caches the instance, raising a `NotAuthorizedError` if credentials are not found.
20
24
  """
21
25
  if self._client is None:
22
- credentials = self.integration.get_credentials()
26
+ credentials = await self.integration.get_credentials_async()
23
27
  if not credentials:
24
28
  raise NotAuthorizedError("No credentials found")
25
29
  api_key = credentials.get("api_key") or credentials.get("API_KEY") or credentials.get("apiKey")
@@ -28,87 +32,788 @@ class ElevenlabsApp(APIApplication):
28
32
  self._client = ElevenLabs(api_key=api_key)
29
33
  return self._client
30
34
 
31
- async def generate_speech_audio_url(
32
- self, text: str, voice_id: str = "21m00Tcm4TlvDq8ikWAM", model_id: str = "eleven_multilingual_v2"
33
- ) -> bytes:
35
+ # --- Text to Speech ---
36
+
37
+ async def text_to_speech(
38
+ self,
39
+ text: str,
40
+ voice_id: str = "21m00Tcm4TlvDq8ikWAM",
41
+ model_id: str = "eleven_multilingual_v2",
42
+ ) -> Dict[str, Any]:
34
43
  """
35
- Converts a text string into speech using the ElevenLabs API. The function then saves the generated audio to a temporary MP3 file and returns a public URL to access it, rather than the raw audio bytes.
44
+ Converts text to speech and returns the generated audio data.
36
45
 
37
46
  Args:
38
- text (str): The text to convert to speech.
39
- voice_id (str): The ID of the voice to use.
40
- model_id (str, optional): The model to use. Defaults to "eleven_multilingual_v2".
41
- stability (float, optional): The stability of the voice.
42
- similarity_boost (float, optional): The similarity boost of the voice.
47
+ text: The text to convert to speech.
48
+ voice_id: The ID of the voice to use. Defaults to "21m00Tcm4TlvDq8ikWAM" (Rachel).
49
+ model_id: The model to use. Defaults to "eleven_multilingual_v2".
43
50
 
44
51
  Returns:
45
- bytes: The audio data.
52
+ dict: A dictionary containing:
53
+ - 'type' (str): "audio".
54
+ - 'data' (str): The base64 encoded audio data.
55
+ - 'mime_type' (str): "audio/mpeg".
56
+ - 'file_name' (str): A suggested file name.
46
57
 
47
58
  Tags:
48
- important
59
+ text-to-speech, speech-synthesis, audio-generation, elevenlabs, important
49
60
  """
50
- audio_generator = self.client.text_to_speech.convert(text=text, voice_id=voice_id, model_id=model_id, output_format="mp3_44100_128")
61
+ client = await self.get_client()
62
+
63
+ audio_generator = client.text_to_speech.convert(
64
+ text=text,
65
+ voice_id=voice_id,
66
+ model_id=model_id,
67
+ output_format="mp3_44100_128",
68
+ )
69
+
51
70
  audio_data = b""
52
71
  for chunk in audio_generator:
53
72
  audio_data += chunk
54
- upload_result = await FileSystemApp.write_file(audio_data, f"/tmp/{uuid.uuid4()}.mp3")
55
- return upload_result["data"]["url"]
73
+
74
+ audio_base64 = base64.b64encode(audio_data).decode("utf-8")
75
+ file_name = f"{uuid.uuid4()}.mp3"
76
+ return {"type": "audio", "data": audio_base64, "mime_type": "audio/mpeg", "file_name": file_name}
77
+
78
+ # --- Speech to Text ---
56
79
 
57
80
  async def speech_to_text(self, audio_file_path: str, language_code: str = "eng", diarize: bool = True) -> str:
58
81
  """
59
- Transcribes an audio file into text using the ElevenLabs API. It supports language specification and speaker diarization, providing the inverse operation to the audio-generating `text_to_speech` method. Note: The docstring indicates this is a placeholder for an undocumented endpoint.
82
+ Transcribes an audio file into text.
60
83
 
61
84
  Args:
62
85
  audio_file_path (str): The path to the audio file.
86
+ language_code (str): Language code (ISO 639-1). Defaults to "eng".
87
+ diarize (bool): Whether to distinguish speakers. Defaults to True.
63
88
 
64
89
  Returns:
65
90
  str: The transcribed text.
66
91
 
67
92
  Tags:
68
- important
93
+ speech-to-text, transcription, audio-processing, elevenlabs, important
69
94
  """
70
- transcription = self.client.speech_to_text.convert(
71
- file=audio_file_path, model_id="scribe_v1", tag_audio_events=True, language_code=language_code, diarize=diarize
95
+ client = await self.get_client()
96
+ if audio_file_path.startswith(("http://", "https://")):
97
+ response = requests.get(audio_file_path)
98
+ response.raise_for_status()
99
+ audio_data_io = BytesIO(response.content)
100
+ else:
101
+ with open(audio_file_path, "rb") as f:
102
+ audio_data_io = BytesIO(f.read())
103
+ transcription = client.speech_to_text.convert(
104
+ file=audio_data_io, model_id="scribe_v1", tag_audio_events=True, language_code=language_code, diarize=diarize
72
105
  )
73
- return transcription
106
+ return transcription.text
107
+
108
+ # --- Speech to Speech ---
74
109
 
75
110
  async def speech_to_speech(
76
- self, audio_url: str, voice_id: str = "21m00Tcm4TlvDq8ikWAM", model_id: str = "eleven_multilingual_sts_v2"
77
- ) -> bytes:
111
+ self, audio_source: str, voice_id: str = "21m00Tcm4TlvDq8ikWAM", model_id: str = "eleven_multilingual_sts_v2"
112
+ ) -> Dict[str, Any]:
78
113
  """
79
- Downloads an audio file from a URL and converts the speech into a specified target voice using the ElevenLabs API. This function transforms the speaker's voice in an existing recording and returns the new audio data as bytes, distinct from creating audio from text.
114
+ Converts speech from an audio source (URL or local path) to a different voice.
80
115
 
81
116
  Args:
82
- voice_id (str): The ID of the voice to use for the conversion.
83
- audio_file_path (str): The path to the audio file to transform.
84
- model_id (str, optional): The model to use. Defaults to "eleven_multilingual_sts_v2".
117
+ audio_source: URL or path of the source audio.
118
+ voice_id: Target voice ID.
119
+ model_id: Model ID. Defaults to "eleven_multilingual_sts_v2".
85
120
 
86
121
  Returns:
87
- bytes: The transformed audio data.
122
+ dict: A dictionary containing:
123
+ - 'type' (str): "audio".
124
+ - 'data' (str): The base64 encoded audio data.
125
+ - 'mime_type' (str): "audio/mpeg".
126
+ - 'file_name' (str): A suggested file name.
88
127
 
89
128
  Tags:
90
- important
129
+ speech-to-speech, voice-conversion, audio-processing, elevenlabs, important
91
130
  """
92
- response = requests.get(audio_url)
93
- audio_data = BytesIO(response.content)
94
- response = self.client.speech_to_speech.convert(
95
- voice_id=voice_id, audio=audio_data, model_id=model_id, output_format="mp3_44100_128"
131
+ if audio_source.startswith(("http://", "https://")):
132
+ response = requests.get(audio_source)
133
+ response.raise_for_status()
134
+ audio_data_io = BytesIO(response.content)
135
+ else:
136
+ with open(audio_source, "rb") as f:
137
+ audio_data_io = BytesIO(f.read())
138
+
139
+ client = await self.get_client()
140
+ audio_stream = client.speech_to_speech.convert(
141
+ voice_id=voice_id, audio=audio_data_io, model_id=model_id, output_format="mp3_44100_128"
96
142
  )
97
- return response.content
98
143
 
99
- def list_tools(self):
100
- return [self.generate_speech_audio_url, self.speech_to_text, self.speech_to_speech]
144
+ output_data = b""
145
+ for chunk in audio_stream:
146
+ output_data += chunk
147
+
148
+ audio_base64 = base64.b64encode(output_data).decode("utf-8")
149
+ file_name = f"{uuid.uuid4()}.mp3"
150
+ return {"type": "audio", "data": audio_base64, "mime_type": "audio/mpeg", "file_name": file_name}
151
+
152
+ # --- History ---
153
+
154
+ async def get_history_items(self, page_size: int = 100, start_after_history_item_id: Optional[str] = None) -> Dict[str, Any]:
155
+ """
156
+ Returns a list of generated audio history items.
157
+
158
+ Args:
159
+ page_size: The number of items to return. Defaults to 100.
160
+ start_after_history_item_id: The ID of the item to start after for pagination.
161
+
162
+ Returns:
163
+ dict: The history response containing a list of history items.
164
+
165
+ Tags:
166
+ history, audio-logs, elevenlabs
167
+ """
168
+ client = await self.get_client()
169
+ return client.history.list(page_size=page_size, start_after_history_item_id=start_after_history_item_id).dict()
170
+
171
+ async def get_history_item(self, history_item_id: str) -> Dict[str, Any]:
172
+ """
173
+ Retrieves a specific history item by ID.
174
+
175
+ Args:
176
+ history_item_id: The ID of the history item to retrieve.
177
+
178
+ Returns:
179
+ dict: The details of the history item.
180
+
181
+ Tags:
182
+ history, audio-logs, elevenlabs
183
+ """
184
+ client = await self.get_client()
185
+ return client.history.get(history_item_id=history_item_id).dict()
186
+
187
+ async def delete_history_item(self, history_item_id: str) -> Dict[str, Any]:
188
+ """
189
+ Deletes a history item by ID.
190
+
191
+ Args:
192
+ history_item_id: The ID of the history item to delete.
193
+
194
+ Returns:
195
+ dict: The deletion status.
196
+
197
+ Tags:
198
+ history, audio-logs, elevenlabs
199
+ """
200
+ client = await self.get_client()
201
+ return client.history.delete(history_item_id=history_item_id)
202
+
203
+ async def get_history_item_audio(self, history_item_id: str) -> Dict[str, Any]:
204
+ """
205
+ Gets the audio for a history item.
206
+
207
+ Args:
208
+ history_item_id: The ID of the history item.
209
+
210
+ Returns:
211
+ dict: A dictionary containing:
212
+ - 'type' (str): "audio".
213
+ - 'data' (str): The base64 encoded audio data.
214
+ - 'mime_type' (str): "audio/mpeg".
215
+ - 'file_name' (str): A suggested file name.
216
+
217
+ Tags:
218
+ history, audio-download, elevenlabs
219
+ """
220
+ client = await self.get_client()
221
+ audio_generator = client.history.get_audio(history_item_id=history_item_id)
222
+ audio_data = b""
223
+ for chunk in audio_generator:
224
+ audio_data += chunk
225
+
226
+ audio_base64 = base64.b64encode(audio_data).decode("utf-8")
227
+ file_name = f"{history_item_id}.mp3"
228
+ return {"type": "audio", "data": audio_base64, "mime_type": "audio/mpeg", "file_name": file_name}
229
+
230
+ # --- Voices ---
231
+
232
+ async def get_voices(self) -> Dict[str, Any]:
233
+ """
234
+ Lists all available voices.
235
+
236
+ Returns:
237
+ dict: A dictionary containing the list of voices.
238
+
239
+ Tags:
240
+ voices, list-voices, elevenlabs
241
+ """
242
+ client = await self.get_client()
243
+ return client.voices.get_all().dict()
244
+
245
+ async def get_voice(self, voice_id: str) -> Dict[str, Any]:
246
+ """
247
+ Gets details of a specific voice.
248
+
249
+ Args:
250
+ voice_id: The ID of the voice to retrieve.
251
+
252
+ Returns:
253
+ dict: The voice details.
254
+
255
+ Tags:
256
+ voices, voice-details, elevenlabs
257
+ """
258
+ client = await self.get_client()
259
+ return client.voices.get(voice_id=voice_id).dict()
260
+
261
+ async def delete_voice(self, voice_id: str) -> Dict[str, Any]:
262
+ """
263
+ Deletes a voice by ID.
264
+
265
+ Args:
266
+ voice_id: The ID of the voice to delete.
267
+
268
+ Returns:
269
+ dict: The deletion status.
270
+
271
+ Tags:
272
+ voices, delete-voice, elevenlabs
273
+ """
274
+ client = await self.get_client()
275
+ return client.voices.delete(voice_id=voice_id).dict()
276
+
277
+ # --- Samples ---
278
+
279
+ async def get_voice_samples(self, voice_id: str) -> List[Dict[str, Any]]:
280
+ """
281
+ Gets samples for a specific voice.
282
+
283
+ Args:
284
+ voice_id: The ID of the voice.
285
+
286
+ Returns:
287
+ list: A list of voice samples.
288
+
289
+ Tags:
290
+ samples, voice-samples, elevenlabs
291
+ """
292
+ voice = await self.get_voice(voice_id)
293
+ # Check if voice is dict or object
294
+ if hasattr(voice, "samples"):
295
+ return [s.dict() for s in voice.samples]
296
+ # Pydantic .dict() might return 'samples' as None if items are missing
297
+ samples = voice.get("samples")
298
+ if samples is None:
299
+ return []
300
+ return samples
301
+
302
+ async def delete_sample(self, voice_id: str, sample_id: str) -> Dict[str, Any]:
303
+ """
304
+ Deletes a sample.
305
+
306
+ Args:
307
+ voice_id: The ID of the voice.
308
+ sample_id: The ID of the sample to delete.
309
+
310
+ Returns:
311
+ dict: The deletion status.
312
+
313
+ Tags:
314
+ samples, delete-sample, elevenlabs
315
+ """
316
+ client = await self.get_client()
317
+ return client.samples.delete(voice_id=voice_id, sample_id=sample_id).dict()
318
+
319
+ # --- Text to Sound Effects ---
320
+
321
+ async def convert_text_to_sound_effect(
322
+ self, text: str, duration_seconds: Optional[float] = None, prompt_influence: float = 0.3
323
+ ) -> Dict[str, Any]:
324
+ """
325
+ Converts text to sound effects.
326
+
327
+ Args:
328
+ text: A text description of the sound effect.
329
+ duration_seconds: The duration of the sound effect in seconds.
330
+ prompt_influence: The influence of the prompt on the generation (0.0 to 1.0). Defaults to 0.3.
331
+
332
+ Returns:
333
+ dict: A dictionary containing:
334
+ - 'type' (str): "audio".
335
+ - 'data' (str): The base64 encoded audio data.
336
+ - 'mime_type' (str): "audio/mpeg".
337
+ - 'file_name' (str): A suggested file name.
338
+
339
+ Tags:
340
+ sound-effects, audio-generation, elevenlabs
341
+ """
342
+ client = await self.get_client()
343
+ audio_generator = client.text_to_sound_effects.convert(
344
+ text=text, duration_seconds=duration_seconds, prompt_influence=prompt_influence
345
+ )
346
+ audio_data = b""
347
+ for chunk in audio_generator:
348
+ audio_data += chunk
349
+
350
+ audio_base64 = base64.b64encode(audio_data).decode("utf-8")
351
+ file_name = f"{uuid.uuid4()}.mp3"
352
+ return {"type": "audio", "data": audio_base64, "mime_type": "audio/mpeg", "file_name": file_name}
353
+
354
+ # --- Text to Dialogue ---
355
+
356
+ async def convert_text_to_dialogue(
357
+ self,
358
+ dialogue_turns: List[Dict[str, str]],
359
+ model_id: str = "eleven_v3",
360
+ output_format: str = "mp3_44100_128",
361
+ ) -> Dict[str, Any]:
362
+ """
363
+ Converts a list of text and voice ID pairs into speech (dialogue) and returns synthesized audio.
364
+
365
+ Args:
366
+ dialogue_turns: A list of dictionaries, each containing:
367
+ - 'text' (str): The text to be spoken.
368
+ - 'voice_id' (str): The ID of the voice to use.
369
+ model_id: The model to use. Defaults to "eleven_v3".
370
+ output_format: The output format. Defaults to "mp3_44100_128".
371
+
372
+ Example:
373
+ dialogue_turns = [
374
+ {"text": "Hello there! How are you doing today?", "voice_id": "9BWtsMINqrJLrRacOk9x"},
375
+ {"text": "I'm doing great, thanks for asking! And you?", "voice_id": "IKne3meq5aSn9XLyUdCD"},
376
+ {"text": "I'm fantastic. Ready to test this dialogue feature.", "voice_id": "9BWtsMINqrJLrRacOk9x"}
377
+ ]
378
+ Returns:
379
+ dict: A dictionary containing:
380
+ - 'type' (str): "audio".
381
+ - 'data' (str): The base64 encoded audio data.
382
+ - 'mime_type' (str): "audio/mpeg".
383
+ - 'file_name' (str): A suggested file name.
384
+
385
+ Raises:
386
+ ValueError: If the model ID is not supported.
387
+
388
+ Tags:
389
+ dialogue, conversational-ai, elevenlabs
390
+ """
391
+ client = await self.get_client()
392
+
393
+ inputs = [DialogueInput(text=turn["text"], voice_id=turn["voice_id"]) for turn in dialogue_turns]
394
+
395
+ audio_generator = client.text_to_dialogue.convert(
396
+ inputs=inputs,
397
+ model_id=model_id,
398
+ output_format=output_format,
399
+ )
400
+
401
+ audio_data = b""
402
+ for chunk in audio_generator:
403
+ audio_data += chunk
404
+
405
+ audio_base64 = base64.b64encode(audio_data).decode("utf-8")
406
+ file_name = f"dialogue_{uuid.uuid4()}.mp3"
407
+ return {"type": "audio", "data": audio_base64, "mime_type": "audio/mpeg", "file_name": file_name}
408
+
409
+ async def remix_voice(
410
+ self,
411
+ voice_id: str,
412
+ voice_description: str,
413
+ text: Optional[str] = None,
414
+ ) -> Dict[str, Any]:
415
+ """
416
+ Remixes an existing voice to create a new one based on a description.
417
+
418
+ Args:
419
+ voice_id: The ID of the voice to remix.
420
+ voice_description: A description of how to change the voice (e.g., "Make the voice have a higher pitch").
421
+ text: Optional text for the voice to speak in the preview.
422
+
423
+ Returns:
424
+ dict: A dictionary containing:
425
+ - 'type' (str): "audio".
426
+ - 'data' (str): The base64 encoded audio data of the preview.
427
+ - 'mime_type' (str): "audio/mpeg".
428
+ - 'file_name' (str): A suggested file name.
429
+ - 'generated_voice_id' (str): The ID of the generated voice preview.
430
+
431
+ Tags:
432
+ voice-remixing, voice-modification, elevenlabs
433
+ """
434
+ client = await self.get_client()
435
+
436
+ response = client.text_to_voice.remix(
437
+ voice_id=voice_id,
438
+ voice_description=voice_description,
439
+ text=text,
440
+ )
441
+
442
+ if not response.previews:
443
+ raise ValueError("No previews generated")
444
+
445
+ preview = response.previews[0]
446
+ file_name = f"remix_{preview.generated_voice_id}.mp3"
447
+
448
+ # preview.audio_base_64 is already a base64 string
449
+ return {
450
+ "type": "audio",
451
+ "data": preview.audio_base_64,
452
+ "mime_type": "audio/mpeg",
453
+ "file_name": file_name,
454
+ "generated_voice_id": preview.generated_voice_id,
455
+ }
456
+
457
+ # --- Forced Alignment ---
458
+
459
+ async def align_audio(
460
+ self,
461
+ audio_file_path: str,
462
+ text: str,
463
+ ) -> Dict[str, Any]:
464
+ """
465
+ Aligns text to an audio file, returning timing information for characters and words.
466
+
467
+ Args:
468
+ audio_file_path: The path to the audio file (local path or URL).
469
+ text: The transcript text corresponding to the audio.
470
+
471
+ Returns:
472
+ dict: The alignment result containing 'characters', 'words', and 'loss'.
473
+
474
+ Tags:
475
+ alignment, audio-sync, elevenlabs
476
+ """
477
+ client = await self.get_client()
478
+
479
+ # Handle URL or local file
480
+ if audio_file_path.startswith("http://") or audio_file_path.startswith("https://"):
481
+ response = requests.get(audio_file_path)
482
+ response.raise_for_status()
483
+ audio_data = BytesIO(response.content)
484
+ else:
485
+ with open(audio_file_path, "rb") as f:
486
+ audio_data = BytesIO(f.read())
487
+
488
+ alignment = client.forced_alignment.create(file=audio_data, text=text)
489
+
490
+ return alignment.dict()
491
+
492
+ # --- Text to Music ---
493
+
494
+ async def convert_text_to_music(self, prompt: str, music_length_ms: Optional[int] = None) -> Dict[str, Any]:
495
+ """
496
+ Generates music based on a text prompt.
497
+
498
+ Args:
499
+ prompt: A text description of the music to generate.
500
+ music_length_ms: Optional duration of the music in milliseconds.
501
+
502
+ Returns:
503
+ dict: The generated audio data including 'type', 'data' (base64), 'mime_type', and 'file_name'.
504
+
505
+ Tags:
506
+ music-generation, audio-generation, elevenlabs
507
+ """
508
+ client = await self.get_client()
509
+
510
+ # The SDK returns a sync iterator of bytes (since client is sync)
511
+ audio_bytes = b""
512
+ for chunk in client.music.compose(prompt=prompt, music_length_ms=music_length_ms):
513
+ audio_bytes += chunk
514
+
515
+ file_name = f"music_{uuid.uuid4()}.mp3"
516
+
517
+ return {"type": "audio", "data": base64.b64encode(audio_bytes).decode("utf-8"), "mime_type": "audio/mpeg", "file_name": file_name}
518
+
519
+ # --- Voice Cloning ---
520
+
521
+ async def clone_voice(self, name: str, file_paths: List[str], description: Optional[str] = None) -> Dict[str, Any]:
522
+ """
523
+ Clones a voice from provided audio samples (URLs or local paths).
524
+
525
+ Args:
526
+ name: Name of the cloned voice.
527
+ file_paths: List of absolute file paths or URLs to audio samples.
528
+ description: Optional description of the voice.
529
+
530
+ Returns:
531
+ dict: Metadata of the created voice, including 'voice_id'.
532
+
533
+ Tags:
534
+ voice-cloning, instant-cloning, elevenlabs
535
+ """
536
+ client = await self.get_client()
537
+ files_data = []
538
+
539
+ for path in file_paths:
540
+ if path.startswith("http"):
541
+ response = requests.get(path)
542
+ response.raise_for_status()
543
+ files_data.append(BytesIO(response.content))
544
+ else:
545
+ # Read into memory so we can close the file handle immediately if needed,
546
+ # though BytesIO is preferred for the SDK.
547
+ with open(path, "rb") as f:
548
+ files_data.append(BytesIO(f.read()))
549
+
550
+ # client.voices.ivc.create returns AddVoiceIvcResponseModel which has voice_id
551
+ voice = client.voices.ivc.create(name=name, description=description, files=files_data)
552
+
553
+ return {"voice_id": voice.voice_id, "name": name, "status": "created"}
554
+
555
+ # --- Voice Design ---
556
+
557
+ async def design_voice(self, voice_description: str, text: Optional[str] = None) -> List[Dict[str, Any]]:
558
+ """
559
+ Generates voice previews based on a text description.
560
+
561
+ Args:
562
+ voice_description: Description of the voice to generate (e.g., "A deep, resonant voice").
563
+ text: Optional text for the voice to speak. If not provided, it will be automatically generated.
564
+
565
+ Returns:
566
+ list: A list of voice previews, each containing 'generated_voice_id', 'audio_base_64', and 'duration_secs'.
567
+
568
+ Tags:
569
+ voice-design, voice-generation, elevenlabs
570
+ """
571
+ client = await self.get_client()
572
+
573
+ # design() returns VoiceDesignPreviewResponse
574
+ # We need to access .previews which is a list of VoicePreviewResponseModel
575
+ response = client.text_to_voice.design(
576
+ voice_description=voice_description,
577
+ text=text,
578
+ # Using a default model that supports design
579
+ model_id="eleven_multilingual_ttv_v2",
580
+ )
581
+
582
+ previews = []
583
+ for preview in response.previews:
584
+ previews.append(
585
+ {
586
+ "generated_voice_id": preview.generated_voice_id,
587
+ "audio_base_64": preview.audio_base_64,
588
+ "duration_secs": preview.duration_secs,
589
+ "type": "audio",
590
+ "mime_type": "audio/mpeg",
591
+ }
592
+ )
593
+
594
+ return previews
595
+
596
+ # --- Audio Isolation ---
597
+
598
+ async def isolate_audio(self, audio_source: str) -> Dict[str, Any]:
599
+ """
600
+ Removes background noise from audio.
601
+
602
+ Args:
603
+ audio_source: URL or path of the source audio.
604
+
605
+ Returns:
606
+ dict: A dictionary containing:
607
+ - 'type' (str): "audio".
608
+ - 'data' (str): The base64 encoded audio data.
609
+ - 'mime_type' (str): "audio/mpeg".
610
+ - 'file_name' (str): A suggested file name.
611
+
612
+ Tags:
613
+ audio-isolation, noise-removal, elevenlabs
614
+ """
615
+ if audio_source.startswith(("http://", "https://")):
616
+ response = requests.get(audio_source)
617
+ response.raise_for_status()
618
+ audio_data_io = BytesIO(response.content)
619
+ else:
620
+ with open(audio_source, "rb") as f:
621
+ audio_data_io = BytesIO(f.read())
622
+ audio_data_io.name = "audio.mp3"
623
+
624
+ client = await self.get_client()
625
+ audio_generator = client.audio_isolation.convert(audio=audio_data_io)
626
+
627
+ output_data = b""
628
+ for chunk in audio_generator:
629
+ output_data += chunk
630
+
631
+ audio_base64 = base64.b64encode(output_data).decode("utf-8")
632
+ file_name = f"{uuid.uuid4()}.mp3"
633
+ return {"type": "audio", "data": audio_base64, "mime_type": "audio/mpeg", "file_name": file_name}
634
+
635
+ # --- Dubbing ---
636
+
637
+ async def dub_file(
638
+ self,
639
+ audio_source: str,
640
+ target_lang: str,
641
+ mode: str = "automatic",
642
+ source_lang: Optional[str] = None,
643
+ num_speakers: int = 0,
644
+ watermark: bool = False,
645
+ ) -> Dict[str, Any]:
646
+ """
647
+ Dubs an audio file into another language.
648
+
649
+ Args:
650
+ audio_source: URL or path of the source audio.
651
+ target_lang: The target language code.
652
+ mode: The dubbing mode. Defaults to "automatic".
653
+ source_lang: Optional source language code.
654
+ num_speakers: The number of speakers (0 for automatic detection). Defaults to 0.
655
+ watermark: Whether to add a watermark. Defaults to False.
101
656
 
657
+ Returns:
658
+ dict: The dubbing project metadata.
102
659
 
103
- async def demo_text_to_speech():
104
- """
105
- A demonstration function that instantiates the `ElevenlabsApp` to test its `text_to_speech` method. It converts a sample string to audio and prints the resulting file URL to the console, serving as a basic usage example when the script is executed directly.
106
- """
107
- app = ElevenlabsApp()
108
- await app.generate_speech_audio_url("Hello, world!")
660
+ Tags:
661
+ dubbing, translation, elevenlabs
662
+ """
663
+ if audio_source.startswith(("http://", "https://")):
664
+ response = requests.get(audio_source)
665
+ response.raise_for_status()
666
+ audio_data_io = BytesIO(response.content)
667
+ else:
668
+ with open(audio_source, "rb") as f:
669
+ audio_data_io = BytesIO(f.read())
109
670
 
671
+ client = await self.get_client()
672
+ return client.dubbing.create(
673
+ file=audio_data_io, target_lang=target_lang, mode=mode, source_lang=source_lang, num_speakers=num_speakers, watermark=watermark
674
+ ).dict()
110
675
 
111
- if __name__ == "__main__":
112
- import asyncio
676
+ async def get_dubbing_project_metadata(self, dubbing_id: str) -> Dict[str, Any]:
677
+ """
678
+ Gets metadata for a dubbing project.
679
+
680
+ Args:
681
+ dubbing_id: The ID of the dubbing project.
682
+
683
+ Returns:
684
+ dict: The project metadata.
113
685
 
114
- asyncio.run(demo_text_to_speech())
686
+ Tags:
687
+ dubbing, project-metadata, elevenlabs
688
+ """
689
+ client = await self.get_client()
690
+ return client.dubbing.get(dubbing_id=dubbing_id).dict()
691
+
692
+ async def get_dubbed_file(self, dubbing_id: str, language_code: str) -> Dict[str, Any]:
693
+ """
694
+ Downloads a dubbed file.
695
+
696
+ Args:
697
+ dubbing_id: The ID of the dubbing project.
698
+ language_code: The language code of the dubbed file.
699
+
700
+ Returns:
701
+ dict: A dictionary containing:
702
+ - 'type' (str): "audio".
703
+ - 'data' (str): The base64 encoded audio data.
704
+ - 'mime_type' (str): "audio/mpeg".
705
+ - 'file_name' (str): A suggested file name.
706
+
707
+ Tags:
708
+ dubbing, file-download, elevenlabs
709
+ """
710
+ client = await self.get_client()
711
+ audio_generator = client.dubbing.audio.get(dubbing_id=dubbing_id, language_code=language_code)
712
+
713
+ output_data = b""
714
+ for chunk in audio_generator:
715
+ output_data += chunk
716
+
717
+ audio_base64 = base64.b64encode(output_data).decode("utf-8")
718
+ file_name = f"{dubbing_id}_{language_code}.mp3"
719
+ return {"type": "audio", "data": audio_base64, "mime_type": "audio/mpeg", "file_name": file_name}
720
+
721
+ # --- Models ---
722
+
723
+ async def get_models(self) -> List[Dict[str, Any]]:
724
+ """
725
+ Lists available models.
726
+
727
+ Returns:
728
+ list: A list of available models and their details.
729
+
730
+ Tags:
731
+ models, list-models, elevenlabs
732
+ """
733
+ client = await self.get_client()
734
+ return [model.dict() for model in client.models.list()]
735
+
736
+ # --- User ---
737
+
738
+ async def get_user_info(self) -> Dict[str, Any]:
739
+ """
740
+ Gets user information.
741
+
742
+ Returns:
743
+ dict: The user information.
744
+
745
+ Tags:
746
+ user, profile, elevenlabs
747
+ """
748
+ client = await self.get_client()
749
+ return client.user.get().dict()
750
+
751
+ async def get_user_subscription(self) -> Dict[str, Any]:
752
+ """
753
+ Gets user subscription details.
754
+
755
+ Returns:
756
+ dict: The subscription details.
757
+
758
+ Tags:
759
+ user, subscription, elevenlabs
760
+ """
761
+ client = await self.get_client()
762
+ return client.user.subscription.get().dict()
763
+
764
+ # --- Usage ---
765
+
766
+ async def get_usage(self, start_unix: Optional[int] = None, end_unix: Optional[int] = None) -> Dict[str, Any]:
767
+ """
768
+ Gets usage statistics. Defaults to the last 30 days if dates are not provided.
769
+
770
+ Args:
771
+ start_unix (Optional[int]): Start time in Unix timestamp.
772
+ end_unix (Optional[int]): End time in Unix timestamp.
773
+
774
+ Returns:
775
+ dict: Usage statistics.
776
+
777
+ Tags:
778
+ usage, statistics, elevenlabs
779
+ """
780
+ client = await self.get_client()
781
+ if end_unix is None:
782
+ end_unix = int(time.time())
783
+ if start_unix is None:
784
+ start_unix = end_unix - 30 * 24 * 3600 # 30 days ago
785
+
786
+ return client.usage.get(start_unix=start_unix, end_unix=end_unix).dict()
787
+
788
+ # --- Tool Listing ---
789
+
790
+ def list_tools(self):
791
+ return [
792
+ self.text_to_speech,
793
+ self.speech_to_text,
794
+ self.speech_to_speech,
795
+ self.get_history_items,
796
+ self.get_history_item,
797
+ self.delete_history_item,
798
+ self.get_history_item_audio,
799
+ self.get_voices,
800
+ self.get_voice,
801
+ self.delete_voice,
802
+ self.get_voice_samples,
803
+ self.delete_sample,
804
+ self.convert_text_to_sound_effect,
805
+ self.convert_text_to_dialogue,
806
+ self.remix_voice,
807
+ self.convert_text_to_music,
808
+ self.clone_voice,
809
+ self.design_voice,
810
+ self.align_audio,
811
+ self.isolate_audio,
812
+ self.dub_file,
813
+ self.get_dubbing_project_metadata,
814
+ self.get_dubbed_file,
815
+ self.get_models,
816
+ self.get_user_info,
817
+ self.get_user_subscription,
818
+ self.get_usage,
819
+ ]