universal-mcp-applications 0.1.33__py3-none-any.whl → 0.1.39rc16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of universal-mcp-applications might be problematic. Click here for more details.

Files changed (119) hide show
  1. universal_mcp/applications/BEST_PRACTICES.md +1 -1
  2. universal_mcp/applications/ahrefs/app.py +92 -238
  3. universal_mcp/applications/airtable/app.py +36 -135
  4. universal_mcp/applications/apollo/app.py +124 -477
  5. universal_mcp/applications/asana/app.py +605 -1755
  6. universal_mcp/applications/aws_s3/app.py +63 -119
  7. universal_mcp/applications/bill/app.py +644 -2055
  8. universal_mcp/applications/box/app.py +1246 -4159
  9. universal_mcp/applications/braze/app.py +410 -1476
  10. universal_mcp/applications/browser_use/README.md +15 -1
  11. universal_mcp/applications/browser_use/__init__.py +1 -0
  12. universal_mcp/applications/browser_use/app.py +91 -26
  13. universal_mcp/applications/cal_com_v2/app.py +207 -625
  14. universal_mcp/applications/calendly/app.py +103 -242
  15. universal_mcp/applications/canva/app.py +75 -140
  16. universal_mcp/applications/clickup/app.py +331 -798
  17. universal_mcp/applications/coda/app.py +240 -520
  18. universal_mcp/applications/confluence/app.py +497 -1285
  19. universal_mcp/applications/contentful/app.py +40 -155
  20. universal_mcp/applications/crustdata/app.py +44 -123
  21. universal_mcp/applications/dialpad/app.py +451 -924
  22. universal_mcp/applications/digitalocean/app.py +2071 -6082
  23. universal_mcp/applications/domain_checker/app.py +3 -54
  24. universal_mcp/applications/e2b/app.py +17 -68
  25. universal_mcp/applications/elevenlabs/README.md +27 -3
  26. universal_mcp/applications/elevenlabs/app.py +741 -74
  27. universal_mcp/applications/exa/README.md +8 -4
  28. universal_mcp/applications/exa/app.py +415 -186
  29. universal_mcp/applications/falai/README.md +5 -7
  30. universal_mcp/applications/falai/app.py +156 -232
  31. universal_mcp/applications/figma/app.py +91 -175
  32. universal_mcp/applications/file_system/app.py +2 -13
  33. universal_mcp/applications/firecrawl/app.py +198 -176
  34. universal_mcp/applications/fireflies/app.py +59 -281
  35. universal_mcp/applications/fpl/app.py +92 -529
  36. universal_mcp/applications/fpl/utils/fixtures.py +15 -49
  37. universal_mcp/applications/fpl/utils/helper.py +25 -89
  38. universal_mcp/applications/fpl/utils/league_utils.py +20 -64
  39. universal_mcp/applications/ghost_content/app.py +70 -179
  40. universal_mcp/applications/github/app.py +30 -67
  41. universal_mcp/applications/gong/app.py +142 -302
  42. universal_mcp/applications/google_calendar/app.py +26 -78
  43. universal_mcp/applications/google_docs/README.md +15 -14
  44. universal_mcp/applications/google_docs/app.py +103 -206
  45. universal_mcp/applications/google_drive/app.py +194 -793
  46. universal_mcp/applications/google_gemini/app.py +68 -59
  47. universal_mcp/applications/google_mail/README.md +1 -0
  48. universal_mcp/applications/google_mail/app.py +93 -214
  49. universal_mcp/applications/google_searchconsole/app.py +25 -58
  50. universal_mcp/applications/google_sheet/README.md +2 -1
  51. universal_mcp/applications/google_sheet/app.py +226 -624
  52. universal_mcp/applications/google_sheet/helper.py +26 -53
  53. universal_mcp/applications/hashnode/app.py +57 -269
  54. universal_mcp/applications/heygen/README.md +10 -32
  55. universal_mcp/applications/heygen/app.py +339 -811
  56. universal_mcp/applications/http_tools/app.py +10 -32
  57. universal_mcp/applications/hubspot/README.md +1 -1
  58. universal_mcp/applications/hubspot/app.py +7508 -99
  59. universal_mcp/applications/jira/app.py +2419 -8334
  60. universal_mcp/applications/klaviyo/app.py +739 -1621
  61. universal_mcp/applications/linkedin/README.md +18 -1
  62. universal_mcp/applications/linkedin/app.py +729 -251
  63. universal_mcp/applications/mailchimp/app.py +696 -1851
  64. universal_mcp/applications/markitdown/app.py +8 -20
  65. universal_mcp/applications/miro/app.py +333 -815
  66. universal_mcp/applications/ms_teams/app.py +420 -1407
  67. universal_mcp/applications/neon/app.py +144 -250
  68. universal_mcp/applications/notion/app.py +38 -53
  69. universal_mcp/applications/onedrive/app.py +26 -48
  70. universal_mcp/applications/openai/app.py +43 -166
  71. universal_mcp/applications/outlook/README.md +22 -9
  72. universal_mcp/applications/outlook/app.py +403 -141
  73. universal_mcp/applications/perplexity/README.md +2 -1
  74. universal_mcp/applications/perplexity/app.py +161 -20
  75. universal_mcp/applications/pipedrive/app.py +1021 -3331
  76. universal_mcp/applications/posthog/app.py +272 -541
  77. universal_mcp/applications/reddit/app.py +65 -164
  78. universal_mcp/applications/resend/app.py +72 -139
  79. universal_mcp/applications/retell/app.py +23 -50
  80. universal_mcp/applications/rocketlane/app.py +252 -965
  81. universal_mcp/applications/scraper/app.py +114 -142
  82. universal_mcp/applications/semanticscholar/app.py +36 -78
  83. universal_mcp/applications/semrush/app.py +44 -78
  84. universal_mcp/applications/sendgrid/app.py +826 -1576
  85. universal_mcp/applications/sentry/app.py +444 -1079
  86. universal_mcp/applications/serpapi/app.py +44 -146
  87. universal_mcp/applications/sharepoint/app.py +27 -49
  88. universal_mcp/applications/shopify/app.py +1748 -4486
  89. universal_mcp/applications/shortcut/app.py +275 -536
  90. universal_mcp/applications/slack/app.py +43 -125
  91. universal_mcp/applications/spotify/app.py +206 -405
  92. universal_mcp/applications/supabase/app.py +174 -283
  93. universal_mcp/applications/tavily/app.py +2 -2
  94. universal_mcp/applications/trello/app.py +853 -2816
  95. universal_mcp/applications/twilio/app.py +27 -62
  96. universal_mcp/applications/twitter/api_segments/compliance_api.py +4 -14
  97. universal_mcp/applications/twitter/api_segments/dm_conversations_api.py +6 -18
  98. universal_mcp/applications/twitter/api_segments/likes_api.py +1 -3
  99. universal_mcp/applications/twitter/api_segments/lists_api.py +5 -15
  100. universal_mcp/applications/twitter/api_segments/trends_api.py +1 -3
  101. universal_mcp/applications/twitter/api_segments/tweets_api.py +9 -31
  102. universal_mcp/applications/twitter/api_segments/usage_api.py +1 -5
  103. universal_mcp/applications/twitter/api_segments/users_api.py +14 -42
  104. universal_mcp/applications/whatsapp/app.py +35 -186
  105. universal_mcp/applications/whatsapp/audio.py +2 -6
  106. universal_mcp/applications/whatsapp/whatsapp.py +17 -51
  107. universal_mcp/applications/whatsapp_business/app.py +86 -299
  108. universal_mcp/applications/wrike/app.py +80 -153
  109. universal_mcp/applications/yahoo_finance/app.py +19 -65
  110. universal_mcp/applications/youtube/app.py +120 -306
  111. universal_mcp/applications/zenquotes/app.py +3 -3
  112. {universal_mcp_applications-0.1.33.dist-info → universal_mcp_applications-0.1.39rc16.dist-info}/METADATA +4 -2
  113. {universal_mcp_applications-0.1.33.dist-info → universal_mcp_applications-0.1.39rc16.dist-info}/RECORD +115 -119
  114. {universal_mcp_applications-0.1.33.dist-info → universal_mcp_applications-0.1.39rc16.dist-info}/WHEEL +1 -1
  115. universal_mcp/applications/hubspot/api_segments/__init__.py +0 -0
  116. universal_mcp/applications/hubspot/api_segments/api_segment_base.py +0 -54
  117. universal_mcp/applications/hubspot/api_segments/crm_api.py +0 -7337
  118. universal_mcp/applications/hubspot/api_segments/marketing_api.py +0 -1467
  119. {universal_mcp_applications-0.1.33.dist-info → universal_mcp_applications-0.1.39rc16.dist-info}/licenses/LICENSE +0 -0
@@ -1,152 +1,819 @@
1
+ import base64
1
2
  import uuid
3
+ import time
2
4
  from io import BytesIO
3
-
5
+ from typing import Any, Dict, List, Optional
4
6
  import requests
7
+
5
8
  from universal_mcp.applications.application import APIApplication
6
9
  from universal_mcp.exceptions import NotAuthorizedError
7
10
  from universal_mcp.integrations import Integration
8
-
9
- from elevenlabs import ElevenLabs
10
- from universal_mcp.applications.file_system.app import FileSystemApp
11
+ from elevenlabs.client import ElevenLabs
12
+ from elevenlabs import DialogueInput
11
13
 
12
14
 
13
15
  class ElevenlabsApp(APIApplication):
14
16
  def __init__(self, integration: Integration = None, **kwargs) -> None:
15
17
  super().__init__(name="elevenlabs", integration=integration, **kwargs)
16
18
  self.base_url = "https://api.elevenlabs.io"
19
+ self._client = None
17
20
 
18
- @property
19
- def client(self) -> ElevenLabs:
21
+ async def get_client(self) -> ElevenLabs:
20
22
  """
21
23
  A property that lazily initializes and returns an authenticated `ElevenLabs` SDK client. On first access, it retrieves the API key from integration credentials and caches the instance, raising a `NotAuthorizedError` if credentials are not found.
22
24
  """
23
25
  if self._client is None:
24
- credentials = self.integration.get_credentials()
26
+ credentials = await self.integration.get_credentials_async()
25
27
  if not credentials:
26
28
  raise NotAuthorizedError("No credentials found")
27
- api_key = (
28
- credentials.get("api_key")
29
- or credentials.get("API_KEY")
30
- or credentials.get("apiKey")
31
- )
29
+ api_key = credentials.get("api_key") or credentials.get("API_KEY") or credentials.get("apiKey")
32
30
  if not api_key:
33
31
  raise NotAuthorizedError("No api key found")
34
32
  self._client = ElevenLabs(api_key=api_key)
35
33
  return self._client
36
34
 
37
- # def get_voices(self):
38
- # return self.client.voices.list_voices()
35
+ # --- Text to Speech ---
39
36
 
40
- async def generate_speech_audio_url(
37
+ async def text_to_speech(
41
38
  self,
42
39
  text: str,
43
40
  voice_id: str = "21m00Tcm4TlvDq8ikWAM",
44
41
  model_id: str = "eleven_multilingual_v2",
45
- ) -> bytes:
42
+ ) -> Dict[str, Any]:
46
43
  """
47
- Converts a text string into speech using the ElevenLabs API. The function then saves the generated audio to a temporary MP3 file and returns a public URL to access it, rather than the raw audio bytes.
44
+ Converts text to speech and returns the generated audio data.
48
45
 
49
46
  Args:
50
- text (str): The text to convert to speech.
51
- voice_id (str): The ID of the voice to use.
52
- model_id (str, optional): The model to use. Defaults to "eleven_multilingual_v2".
53
- stability (float, optional): The stability of the voice.
54
- similarity_boost (float, optional): The similarity boost of the voice.
47
+ text: The text to convert to speech.
48
+ voice_id: The ID of the voice to use. Defaults to "21m00Tcm4TlvDq8ikWAM" (Rachel).
49
+ model_id: The model to use. Defaults to "eleven_multilingual_v2".
55
50
 
56
51
  Returns:
57
- bytes: The audio data.
52
+ dict: A dictionary containing:
53
+ - 'type' (str): "audio".
54
+ - 'data' (str): The base64 encoded audio data.
55
+ - 'mime_type' (str): "audio/mpeg".
56
+ - 'file_name' (str): A suggested file name.
58
57
 
59
58
  Tags:
60
- important
59
+ text-to-speech, speech-synthesis, audio-generation, elevenlabs, important
61
60
  """
62
- audio_generator = self.client.text_to_speech.convert(
61
+ client = await self.get_client()
62
+
63
+ audio_generator = client.text_to_speech.convert(
63
64
  text=text,
64
65
  voice_id=voice_id,
65
66
  model_id=model_id,
66
67
  output_format="mp3_44100_128",
67
68
  )
68
69
 
69
- # Collect all audio chunks from the generator
70
70
  audio_data = b""
71
71
  for chunk in audio_generator:
72
72
  audio_data += chunk
73
73
 
74
- upload_result = await FileSystemApp.write_file(
75
- audio_data, f"/tmp/{uuid.uuid4()}.mp3"
76
- )
77
- return upload_result["data"]["url"]
74
+ audio_base64 = base64.b64encode(audio_data).decode("utf-8")
75
+ file_name = f"{uuid.uuid4()}.mp3"
76
+ return {"type": "audio", "data": audio_base64, "mime_type": "audio/mpeg", "file_name": file_name}
77
+
78
+ # --- Speech to Text ---
78
79
 
79
- async def speech_to_text(
80
- self, audio_file_path: str, language_code: str = "eng", diarize: bool = True
81
- ) -> str:
80
+ async def speech_to_text(self, audio_file_path: str, language_code: str = "eng", diarize: bool = True) -> str:
82
81
  """
83
- Transcribes an audio file into text using the ElevenLabs API. It supports language specification and speaker diarization, providing the inverse operation to the audio-generating `text_to_speech` method. Note: The docstring indicates this is a placeholder for an undocumented endpoint.
82
+ Transcribes an audio file into text.
84
83
 
85
84
  Args:
86
85
  audio_file_path (str): The path to the audio file.
86
+ language_code (str): Language code (ISO 639-1). Defaults to "eng".
87
+ diarize (bool): Whether to distinguish speakers. Defaults to True.
87
88
 
88
89
  Returns:
89
90
  str: The transcribed text.
90
91
 
91
92
  Tags:
92
- important
93
+ speech-to-text, transcription, audio-processing, elevenlabs, important
93
94
  """
94
- transcription = self.client.speech_to_text.convert(
95
- file=audio_file_path,
96
- model_id="scribe_v1", # Model to use, for now only "scribe_v1" is supported
97
- tag_audio_events=True, # Tag audio events like laughter, applause, etc.
98
- language_code=language_code, # Language of the audio file. If set to None, the model will detect the language automatically.
99
- diarize=diarize, # Whether to annotate who is speaking
95
+ client = await self.get_client()
96
+ if audio_file_path.startswith(("http://", "https://")):
97
+ response = requests.get(audio_file_path)
98
+ response.raise_for_status()
99
+ audio_data_io = BytesIO(response.content)
100
+ else:
101
+ with open(audio_file_path, "rb") as f:
102
+ audio_data_io = BytesIO(f.read())
103
+ transcription = client.speech_to_text.convert(
104
+ file=audio_data_io, model_id="scribe_v1", tag_audio_events=True, language_code=language_code, diarize=diarize
100
105
  )
101
- return transcription
106
+ return transcription.text
107
+
108
+ # --- Speech to Speech ---
102
109
 
103
110
  async def speech_to_speech(
111
+ self, audio_source: str, voice_id: str = "21m00Tcm4TlvDq8ikWAM", model_id: str = "eleven_multilingual_sts_v2"
112
+ ) -> Dict[str, Any]:
113
+ """
114
+ Converts speech from an audio source (URL or local path) to a different voice.
115
+
116
+ Args:
117
+ audio_source: URL or path of the source audio.
118
+ voice_id: Target voice ID.
119
+ model_id: Model ID. Defaults to "eleven_multilingual_sts_v2".
120
+
121
+ Returns:
122
+ dict: A dictionary containing:
123
+ - 'type' (str): "audio".
124
+ - 'data' (str): The base64 encoded audio data.
125
+ - 'mime_type' (str): "audio/mpeg".
126
+ - 'file_name' (str): A suggested file name.
127
+
128
+ Tags:
129
+ speech-to-speech, voice-conversion, audio-processing, elevenlabs, important
130
+ """
131
+ if audio_source.startswith(("http://", "https://")):
132
+ response = requests.get(audio_source)
133
+ response.raise_for_status()
134
+ audio_data_io = BytesIO(response.content)
135
+ else:
136
+ with open(audio_source, "rb") as f:
137
+ audio_data_io = BytesIO(f.read())
138
+
139
+ client = await self.get_client()
140
+ audio_stream = client.speech_to_speech.convert(
141
+ voice_id=voice_id, audio=audio_data_io, model_id=model_id, output_format="mp3_44100_128"
142
+ )
143
+
144
+ output_data = b""
145
+ for chunk in audio_stream:
146
+ output_data += chunk
147
+
148
+ audio_base64 = base64.b64encode(output_data).decode("utf-8")
149
+ file_name = f"{uuid.uuid4()}.mp3"
150
+ return {"type": "audio", "data": audio_base64, "mime_type": "audio/mpeg", "file_name": file_name}
151
+
152
+ # --- History ---
153
+
154
+ async def get_history_items(self, page_size: int = 100, start_after_history_item_id: Optional[str] = None) -> Dict[str, Any]:
155
+ """
156
+ Returns a list of generated audio history items.
157
+
158
+ Args:
159
+ page_size: The number of items to return. Defaults to 100.
160
+ start_after_history_item_id: The ID of the item to start after for pagination.
161
+
162
+ Returns:
163
+ dict: The history response containing a list of history items.
164
+
165
+ Tags:
166
+ history, audio-logs, elevenlabs
167
+ """
168
+ client = await self.get_client()
169
+ return client.history.list(page_size=page_size, start_after_history_item_id=start_after_history_item_id).dict()
170
+
171
+ async def get_history_item(self, history_item_id: str) -> Dict[str, Any]:
172
+ """
173
+ Retrieves a specific history item by ID.
174
+
175
+ Args:
176
+ history_item_id: The ID of the history item to retrieve.
177
+
178
+ Returns:
179
+ dict: The details of the history item.
180
+
181
+ Tags:
182
+ history, audio-logs, elevenlabs
183
+ """
184
+ client = await self.get_client()
185
+ return client.history.get(history_item_id=history_item_id).dict()
186
+
187
+ async def delete_history_item(self, history_item_id: str) -> Dict[str, Any]:
188
+ """
189
+ Deletes a history item by ID.
190
+
191
+ Args:
192
+ history_item_id: The ID of the history item to delete.
193
+
194
+ Returns:
195
+ dict: The deletion status.
196
+
197
+ Tags:
198
+ history, audio-logs, elevenlabs
199
+ """
200
+ client = await self.get_client()
201
+ return client.history.delete(history_item_id=history_item_id)
202
+
203
+ async def get_history_item_audio(self, history_item_id: str) -> Dict[str, Any]:
204
+ """
205
+ Gets the audio for a history item.
206
+
207
+ Args:
208
+ history_item_id: The ID of the history item.
209
+
210
+ Returns:
211
+ dict: A dictionary containing:
212
+ - 'type' (str): "audio".
213
+ - 'data' (str): The base64 encoded audio data.
214
+ - 'mime_type' (str): "audio/mpeg".
215
+ - 'file_name' (str): A suggested file name.
216
+
217
+ Tags:
218
+ history, audio-download, elevenlabs
219
+ """
220
+ client = await self.get_client()
221
+ audio_generator = client.history.get_audio(history_item_id=history_item_id)
222
+ audio_data = b""
223
+ for chunk in audio_generator:
224
+ audio_data += chunk
225
+
226
+ audio_base64 = base64.b64encode(audio_data).decode("utf-8")
227
+ file_name = f"{history_item_id}.mp3"
228
+ return {"type": "audio", "data": audio_base64, "mime_type": "audio/mpeg", "file_name": file_name}
229
+
230
+ # --- Voices ---
231
+
232
+ async def get_voices(self) -> Dict[str, Any]:
233
+ """
234
+ Lists all available voices.
235
+
236
+ Returns:
237
+ dict: A dictionary containing the list of voices.
238
+
239
+ Tags:
240
+ voices, list-voices, elevenlabs
241
+ """
242
+ client = await self.get_client()
243
+ return client.voices.get_all().dict()
244
+
245
+ async def get_voice(self, voice_id: str) -> Dict[str, Any]:
246
+ """
247
+ Gets details of a specific voice.
248
+
249
+ Args:
250
+ voice_id: The ID of the voice to retrieve.
251
+
252
+ Returns:
253
+ dict: The voice details.
254
+
255
+ Tags:
256
+ voices, voice-details, elevenlabs
257
+ """
258
+ client = await self.get_client()
259
+ return client.voices.get(voice_id=voice_id).dict()
260
+
261
+ async def delete_voice(self, voice_id: str) -> Dict[str, Any]:
262
+ """
263
+ Deletes a voice by ID.
264
+
265
+ Args:
266
+ voice_id: The ID of the voice to delete.
267
+
268
+ Returns:
269
+ dict: The deletion status.
270
+
271
+ Tags:
272
+ voices, delete-voice, elevenlabs
273
+ """
274
+ client = await self.get_client()
275
+ return client.voices.delete(voice_id=voice_id).dict()
276
+
277
+ # --- Samples ---
278
+
279
+ async def get_voice_samples(self, voice_id: str) -> List[Dict[str, Any]]:
280
+ """
281
+ Gets samples for a specific voice.
282
+
283
+ Args:
284
+ voice_id: The ID of the voice.
285
+
286
+ Returns:
287
+ list: A list of voice samples.
288
+
289
+ Tags:
290
+ samples, voice-samples, elevenlabs
291
+ """
292
+ voice = await self.get_voice(voice_id)
293
+ # Check if voice is dict or object
294
+ if hasattr(voice, "samples"):
295
+ return [s.dict() for s in voice.samples]
296
+ # Pydantic .dict() might return 'samples' as None if items are missing
297
+ samples = voice.get("samples")
298
+ if samples is None:
299
+ return []
300
+ return samples
301
+
302
+ async def delete_sample(self, voice_id: str, sample_id: str) -> Dict[str, Any]:
303
+ """
304
+ Deletes a sample.
305
+
306
+ Args:
307
+ voice_id: The ID of the voice.
308
+ sample_id: The ID of the sample to delete.
309
+
310
+ Returns:
311
+ dict: The deletion status.
312
+
313
+ Tags:
314
+ samples, delete-sample, elevenlabs
315
+ """
316
+ client = await self.get_client()
317
+ return client.samples.delete(voice_id=voice_id, sample_id=sample_id).dict()
318
+
319
+ # --- Text to Sound Effects ---
320
+
321
+ async def convert_text_to_sound_effect(
322
+ self, text: str, duration_seconds: Optional[float] = None, prompt_influence: float = 0.3
323
+ ) -> Dict[str, Any]:
324
+ """
325
+ Converts text to sound effects.
326
+
327
+ Args:
328
+ text: A text description of the sound effect.
329
+ duration_seconds: The duration of the sound effect in seconds.
330
+ prompt_influence: The influence of the prompt on the generation (0.0 to 1.0). Defaults to 0.3.
331
+
332
+ Returns:
333
+ dict: A dictionary containing:
334
+ - 'type' (str): "audio".
335
+ - 'data' (str): The base64 encoded audio data.
336
+ - 'mime_type' (str): "audio/mpeg".
337
+ - 'file_name' (str): A suggested file name.
338
+
339
+ Tags:
340
+ sound-effects, audio-generation, elevenlabs
341
+ """
342
+ client = await self.get_client()
343
+ audio_generator = client.text_to_sound_effects.convert(
344
+ text=text, duration_seconds=duration_seconds, prompt_influence=prompt_influence
345
+ )
346
+ audio_data = b""
347
+ for chunk in audio_generator:
348
+ audio_data += chunk
349
+
350
+ audio_base64 = base64.b64encode(audio_data).decode("utf-8")
351
+ file_name = f"{uuid.uuid4()}.mp3"
352
+ return {"type": "audio", "data": audio_base64, "mime_type": "audio/mpeg", "file_name": file_name}
353
+
354
+ # --- Text to Dialogue ---
355
+
356
+ async def convert_text_to_dialogue(
104
357
  self,
105
- audio_url: str,
106
- voice_id: str = "21m00Tcm4TlvDq8ikWAM",
107
- model_id: str = "eleven_multilingual_sts_v2",
108
- ) -> bytes:
358
+ dialogue_turns: List[Dict[str, str]],
359
+ model_id: str = "eleven_v3",
360
+ output_format: str = "mp3_44100_128",
361
+ ) -> Dict[str, Any]:
109
362
  """
110
- Downloads an audio file from a URL and converts the speech into a specified target voice using the ElevenLabs API. This function transforms the speaker's voice in an existing recording and returns the new audio data as bytes, distinct from creating audio from text.
363
+ Converts a list of text and voice ID pairs into speech (dialogue) and returns synthesized audio.
111
364
 
112
365
  Args:
113
- voice_id (str): The ID of the voice to use for the conversion.
114
- audio_file_path (str): The path to the audio file to transform.
115
- model_id (str, optional): The model to use. Defaults to "eleven_multilingual_sts_v2".
366
+ dialogue_turns: A list of dictionaries, each containing:
367
+ - 'text' (str): The text to be spoken.
368
+ - 'voice_id' (str): The ID of the voice to use.
369
+ model_id: The model to use. Defaults to "eleven_v3".
370
+ output_format: The output format. Defaults to "mp3_44100_128".
116
371
 
372
+ Example:
373
+ dialogue_turns = [
374
+ {"text": "Hello there! How are you doing today?", "voice_id": "9BWtsMINqrJLrRacOk9x"},
375
+ {"text": "I'm doing great, thanks for asking! And you?", "voice_id": "IKne3meq5aSn9XLyUdCD"},
376
+ {"text": "I'm fantastic. Ready to test this dialogue feature.", "voice_id": "9BWtsMINqrJLrRacOk9x"}
377
+ ]
117
378
  Returns:
118
- bytes: The transformed audio data.
379
+ dict: A dictionary containing:
380
+ - 'type' (str): "audio".
381
+ - 'data' (str): The base64 encoded audio data.
382
+ - 'mime_type' (str): "audio/mpeg".
383
+ - 'file_name' (str): A suggested file name.
384
+
385
+ Raises:
386
+ ValueError: If the model ID is not supported.
119
387
 
120
388
  Tags:
121
- important
389
+ dialogue, conversational-ai, elevenlabs
122
390
  """
123
- response = requests.get(audio_url)
124
- audio_data = BytesIO(response.content)
125
- response = self.client.speech_to_speech.convert(
126
- voice_id=voice_id,
127
- audio=audio_data,
391
+ client = await self.get_client()
392
+
393
+ inputs = [DialogueInput(text=turn["text"], voice_id=turn["voice_id"]) for turn in dialogue_turns]
394
+
395
+ audio_generator = client.text_to_dialogue.convert(
396
+ inputs=inputs,
128
397
  model_id=model_id,
129
- output_format="mp3_44100_128",
398
+ output_format=output_format,
130
399
  )
131
- return response.content
132
400
 
133
- def list_tools(self):
134
- return [
135
- self.generate_speech_audio_url,
136
- self.speech_to_text,
137
- self.speech_to_speech,
138
- ]
401
+ audio_data = b""
402
+ for chunk in audio_generator:
403
+ audio_data += chunk
404
+
405
+ audio_base64 = base64.b64encode(audio_data).decode("utf-8")
406
+ file_name = f"dialogue_{uuid.uuid4()}.mp3"
407
+ return {"type": "audio", "data": audio_base64, "mime_type": "audio/mpeg", "file_name": file_name}
408
+
409
+ async def remix_voice(
410
+ self,
411
+ voice_id: str,
412
+ voice_description: str,
413
+ text: Optional[str] = None,
414
+ ) -> Dict[str, Any]:
415
+ """
416
+ Remixes an existing voice to create a new one based on a description.
139
417
 
418
+ Args:
419
+ voice_id: The ID of the voice to remix.
420
+ voice_description: A description of how to change the voice (e.g., "Make the voice have a higher pitch").
421
+ text: Optional text for the voice to speak in the preview.
140
422
 
141
- async def demo_text_to_speech():
142
- """
143
- A demonstration function that instantiates the `ElevenlabsApp` to test its `text_to_speech` method. It converts a sample string to audio and prints the resulting file URL to the console, serving as a basic usage example when the script is executed directly.
144
- """
145
- app = ElevenlabsApp()
146
- await app.generate_speech_audio_url("Hello, world!")
423
+ Returns:
424
+ dict: A dictionary containing:
425
+ - 'type' (str): "audio".
426
+ - 'data' (str): The base64 encoded audio data of the preview.
427
+ - 'mime_type' (str): "audio/mpeg".
428
+ - 'file_name' (str): A suggested file name.
429
+ - 'generated_voice_id' (str): The ID of the generated voice preview.
147
430
 
431
+ Tags:
432
+ voice-remixing, voice-modification, elevenlabs
433
+ """
434
+ client = await self.get_client()
148
435
 
149
- if __name__ == "__main__":
150
- import asyncio
436
+ response = client.text_to_voice.remix(
437
+ voice_id=voice_id,
438
+ voice_description=voice_description,
439
+ text=text,
440
+ )
151
441
 
152
- asyncio.run(demo_text_to_speech())
442
+ if not response.previews:
443
+ raise ValueError("No previews generated")
444
+
445
+ preview = response.previews[0]
446
+ file_name = f"remix_{preview.generated_voice_id}.mp3"
447
+
448
+ # preview.audio_base_64 is already a base64 string
449
+ return {
450
+ "type": "audio",
451
+ "data": preview.audio_base_64,
452
+ "mime_type": "audio/mpeg",
453
+ "file_name": file_name,
454
+ "generated_voice_id": preview.generated_voice_id,
455
+ }
456
+
457
+ # --- Forced Alignment ---
458
+
459
+ async def align_audio(
460
+ self,
461
+ audio_file_path: str,
462
+ text: str,
463
+ ) -> Dict[str, Any]:
464
+ """
465
+ Aligns text to an audio file, returning timing information for characters and words.
466
+
467
+ Args:
468
+ audio_file_path: The path to the audio file (local path or URL).
469
+ text: The transcript text corresponding to the audio.
470
+
471
+ Returns:
472
+ dict: The alignment result containing 'characters', 'words', and 'loss'.
473
+
474
+ Tags:
475
+ alignment, audio-sync, elevenlabs
476
+ """
477
+ client = await self.get_client()
478
+
479
+ # Handle URL or local file
480
+ if audio_file_path.startswith("http://") or audio_file_path.startswith("https://"):
481
+ response = requests.get(audio_file_path)
482
+ response.raise_for_status()
483
+ audio_data = BytesIO(response.content)
484
+ else:
485
+ with open(audio_file_path, "rb") as f:
486
+ audio_data = BytesIO(f.read())
487
+
488
+ alignment = client.forced_alignment.create(file=audio_data, text=text)
489
+
490
+ return alignment.dict()
491
+
492
+ # --- Text to Music ---
493
+
494
+ async def convert_text_to_music(self, prompt: str, music_length_ms: Optional[int] = None) -> Dict[str, Any]:
495
+ """
496
+ Generates music based on a text prompt.
497
+
498
+ Args:
499
+ prompt: A text description of the music to generate.
500
+ music_length_ms: Optional duration of the music in milliseconds.
501
+
502
+ Returns:
503
+ dict: The generated audio data including 'type', 'data' (base64), 'mime_type', and 'file_name'.
504
+
505
+ Tags:
506
+ music-generation, audio-generation, elevenlabs
507
+ """
508
+ client = await self.get_client()
509
+
510
+ # The SDK returns a sync iterator of bytes (since client is sync)
511
+ audio_bytes = b""
512
+ for chunk in client.music.compose(prompt=prompt, music_length_ms=music_length_ms):
513
+ audio_bytes += chunk
514
+
515
+ file_name = f"music_{uuid.uuid4()}.mp3"
516
+
517
+ return {"type": "audio", "data": base64.b64encode(audio_bytes).decode("utf-8"), "mime_type": "audio/mpeg", "file_name": file_name}
518
+
519
+ # --- Voice Cloning ---
520
+
521
+ async def clone_voice(self, name: str, file_paths: List[str], description: Optional[str] = None) -> Dict[str, Any]:
522
+ """
523
+ Clones a voice from provided audio samples (URLs or local paths).
524
+
525
+ Args:
526
+ name: Name of the cloned voice.
527
+ file_paths: List of absolute file paths or URLs to audio samples.
528
+ description: Optional description of the voice.
529
+
530
+ Returns:
531
+ dict: Metadata of the created voice, including 'voice_id'.
532
+
533
+ Tags:
534
+ voice-cloning, instant-cloning, elevenlabs
535
+ """
536
+ client = await self.get_client()
537
+ files_data = []
538
+
539
+ for path in file_paths:
540
+ if path.startswith("http"):
541
+ response = requests.get(path)
542
+ response.raise_for_status()
543
+ files_data.append(BytesIO(response.content))
544
+ else:
545
+ # Read into memory so we can close the file handle immediately if needed,
546
+ # though BytesIO is preferred for the SDK.
547
+ with open(path, "rb") as f:
548
+ files_data.append(BytesIO(f.read()))
549
+
550
+ # client.voices.ivc.create returns AddVoiceIvcResponseModel which has voice_id
551
+ voice = client.voices.ivc.create(name=name, description=description, files=files_data)
552
+
553
+ return {"voice_id": voice.voice_id, "name": name, "status": "created"}
554
+
555
+ # --- Voice Design ---
556
+
557
+ async def design_voice(self, voice_description: str, text: Optional[str] = None) -> List[Dict[str, Any]]:
558
+ """
559
+ Generates voice previews based on a text description.
560
+
561
+ Args:
562
+ voice_description: Description of the voice to generate (e.g., "A deep, resonant voice").
563
+ text: Optional text for the voice to speak. If not provided, it will be automatically generated.
564
+
565
+ Returns:
566
+ list: A list of voice previews, each containing 'generated_voice_id', 'audio_base_64', and 'duration_secs'.
567
+
568
+ Tags:
569
+ voice-design, voice-generation, elevenlabs
570
+ """
571
+ client = await self.get_client()
572
+
573
+ # design() returns VoiceDesignPreviewResponse
574
+ # We need to access .previews which is a list of VoicePreviewResponseModel
575
+ response = client.text_to_voice.design(
576
+ voice_description=voice_description,
577
+ text=text,
578
+ # Using a default model that supports design
579
+ model_id="eleven_multilingual_ttv_v2",
580
+ )
581
+
582
+ previews = []
583
+ for preview in response.previews:
584
+ previews.append(
585
+ {
586
+ "generated_voice_id": preview.generated_voice_id,
587
+ "audio_base_64": preview.audio_base_64,
588
+ "duration_secs": preview.duration_secs,
589
+ "type": "audio",
590
+ "mime_type": "audio/mpeg",
591
+ }
592
+ )
593
+
594
+ return previews
595
+
596
+ # --- Audio Isolation ---
597
+
598
+ async def isolate_audio(self, audio_source: str) -> Dict[str, Any]:
599
+ """
600
+ Removes background noise from audio.
601
+
602
+ Args:
603
+ audio_source: URL or path of the source audio.
604
+
605
+ Returns:
606
+ dict: A dictionary containing:
607
+ - 'type' (str): "audio".
608
+ - 'data' (str): The base64 encoded audio data.
609
+ - 'mime_type' (str): "audio/mpeg".
610
+ - 'file_name' (str): A suggested file name.
611
+
612
+ Tags:
613
+ audio-isolation, noise-removal, elevenlabs
614
+ """
615
+ if audio_source.startswith(("http://", "https://")):
616
+ response = requests.get(audio_source)
617
+ response.raise_for_status()
618
+ audio_data_io = BytesIO(response.content)
619
+ else:
620
+ with open(audio_source, "rb") as f:
621
+ audio_data_io = BytesIO(f.read())
622
+ audio_data_io.name = "audio.mp3"
623
+
624
+ client = await self.get_client()
625
+ audio_generator = client.audio_isolation.convert(audio=audio_data_io)
626
+
627
+ output_data = b""
628
+ for chunk in audio_generator:
629
+ output_data += chunk
630
+
631
+ audio_base64 = base64.b64encode(output_data).decode("utf-8")
632
+ file_name = f"{uuid.uuid4()}.mp3"
633
+ return {"type": "audio", "data": audio_base64, "mime_type": "audio/mpeg", "file_name": file_name}
634
+
635
+ # --- Dubbing ---
636
+
637
+ async def dub_file(
638
+ self,
639
+ audio_source: str,
640
+ target_lang: str,
641
+ mode: str = "automatic",
642
+ source_lang: Optional[str] = None,
643
+ num_speakers: int = 0,
644
+ watermark: bool = False,
645
+ ) -> Dict[str, Any]:
646
+ """
647
+ Dubs an audio file into another language.
648
+
649
+ Args:
650
+ audio_source: URL or path of the source audio.
651
+ target_lang: The target language code.
652
+ mode: The dubbing mode. Defaults to "automatic".
653
+ source_lang: Optional source language code.
654
+ num_speakers: The number of speakers (0 for automatic detection). Defaults to 0.
655
+ watermark: Whether to add a watermark. Defaults to False.
656
+
657
+ Returns:
658
+ dict: The dubbing project metadata.
659
+
660
+ Tags:
661
+ dubbing, translation, elevenlabs
662
+ """
663
+ if audio_source.startswith(("http://", "https://")):
664
+ response = requests.get(audio_source)
665
+ response.raise_for_status()
666
+ audio_data_io = BytesIO(response.content)
667
+ else:
668
+ with open(audio_source, "rb") as f:
669
+ audio_data_io = BytesIO(f.read())
670
+
671
+ client = await self.get_client()
672
+ return client.dubbing.create(
673
+ file=audio_data_io, target_lang=target_lang, mode=mode, source_lang=source_lang, num_speakers=num_speakers, watermark=watermark
674
+ ).dict()
675
+
676
+ async def get_dubbing_project_metadata(self, dubbing_id: str) -> Dict[str, Any]:
677
+ """
678
+ Gets metadata for a dubbing project.
679
+
680
+ Args:
681
+ dubbing_id: The ID of the dubbing project.
682
+
683
+ Returns:
684
+ dict: The project metadata.
685
+
686
+ Tags:
687
+ dubbing, project-metadata, elevenlabs
688
+ """
689
+ client = await self.get_client()
690
+ return client.dubbing.get(dubbing_id=dubbing_id).dict()
691
+
692
+ async def get_dubbed_file(self, dubbing_id: str, language_code: str) -> Dict[str, Any]:
693
+ """
694
+ Downloads a dubbed file.
695
+
696
+ Args:
697
+ dubbing_id: The ID of the dubbing project.
698
+ language_code: The language code of the dubbed file.
699
+
700
+ Returns:
701
+ dict: A dictionary containing:
702
+ - 'type' (str): "audio".
703
+ - 'data' (str): The base64 encoded audio data.
704
+ - 'mime_type' (str): "audio/mpeg".
705
+ - 'file_name' (str): A suggested file name.
706
+
707
+ Tags:
708
+ dubbing, file-download, elevenlabs
709
+ """
710
+ client = await self.get_client()
711
+ audio_generator = client.dubbing.audio.get(dubbing_id=dubbing_id, language_code=language_code)
712
+
713
+ output_data = b""
714
+ for chunk in audio_generator:
715
+ output_data += chunk
716
+
717
+ audio_base64 = base64.b64encode(output_data).decode("utf-8")
718
+ file_name = f"{dubbing_id}_{language_code}.mp3"
719
+ return {"type": "audio", "data": audio_base64, "mime_type": "audio/mpeg", "file_name": file_name}
720
+
721
+ # --- Models ---
722
+
723
+ async def get_models(self) -> List[Dict[str, Any]]:
724
+ """
725
+ Lists available models.
726
+
727
+ Returns:
728
+ list: A list of available models and their details.
729
+
730
+ Tags:
731
+ models, list-models, elevenlabs
732
+ """
733
+ client = await self.get_client()
734
+ return [model.dict() for model in client.models.list()]
735
+
736
+ # --- User ---
737
+
738
+ async def get_user_info(self) -> Dict[str, Any]:
739
+ """
740
+ Gets user information.
741
+
742
+ Returns:
743
+ dict: The user information.
744
+
745
+ Tags:
746
+ user, profile, elevenlabs
747
+ """
748
+ client = await self.get_client()
749
+ return client.user.get().dict()
750
+
751
+ async def get_user_subscription(self) -> Dict[str, Any]:
752
+ """
753
+ Gets user subscription details.
754
+
755
+ Returns:
756
+ dict: The subscription details.
757
+
758
+ Tags:
759
+ user, subscription, elevenlabs
760
+ """
761
+ client = await self.get_client()
762
+ return client.user.subscription.get().dict()
763
+
764
+ # --- Usage ---
765
+
766
+ async def get_usage(self, start_unix: Optional[int] = None, end_unix: Optional[int] = None) -> Dict[str, Any]:
767
+ """
768
+ Gets usage statistics. Defaults to the last 30 days if dates are not provided.
769
+
770
+ Args:
771
+ start_unix (Optional[int]): Start time in Unix timestamp.
772
+ end_unix (Optional[int]): End time in Unix timestamp.
773
+
774
+ Returns:
775
+ dict: Usage statistics.
776
+
777
+ Tags:
778
+ usage, statistics, elevenlabs
779
+ """
780
+ client = await self.get_client()
781
+ if end_unix is None:
782
+ end_unix = int(time.time())
783
+ if start_unix is None:
784
+ start_unix = end_unix - 30 * 24 * 3600 # 30 days ago
785
+
786
+ return client.usage.get(start_unix=start_unix, end_unix=end_unix).dict()
787
+
788
+ # --- Tool Listing ---
789
+
790
+ def list_tools(self):
791
+ return [
792
+ self.text_to_speech,
793
+ self.speech_to_text,
794
+ self.speech_to_speech,
795
+ self.get_history_items,
796
+ self.get_history_item,
797
+ self.delete_history_item,
798
+ self.get_history_item_audio,
799
+ self.get_voices,
800
+ self.get_voice,
801
+ self.delete_voice,
802
+ self.get_voice_samples,
803
+ self.delete_sample,
804
+ self.convert_text_to_sound_effect,
805
+ self.convert_text_to_dialogue,
806
+ self.remix_voice,
807
+ self.convert_text_to_music,
808
+ self.clone_voice,
809
+ self.design_voice,
810
+ self.align_audio,
811
+ self.isolate_audio,
812
+ self.dub_file,
813
+ self.get_dubbing_project_metadata,
814
+ self.get_dubbed_file,
815
+ self.get_models,
816
+ self.get_user_info,
817
+ self.get_user_subscription,
818
+ self.get_usage,
819
+ ]