vibe-aigc 0.3.0__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vibe_aigc-0.3.0/vibe_aigc.egg-info → vibe_aigc-0.5.0}/PKG-INFO +1 -1
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/pyproject.toml +3 -1
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/vibe_aigc/__init__.py +9 -0
- vibe_aigc-0.5.0/vibe_aigc/audio.py +405 -0
- vibe_aigc-0.5.0/vibe_aigc/character.py +457 -0
- vibe_aigc-0.5.0/vibe_aigc/mv_pipeline.py +448 -0
- vibe_aigc-0.5.0/vibe_aigc/video.py +388 -0
- vibe_aigc-0.5.0/vibe_aigc/workflows.py +391 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0/vibe_aigc.egg-info}/PKG-INFO +1 -1
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/vibe_aigc.egg-info/SOURCES.txt +5 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/LICENSE +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/README.md +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/setup.cfg +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/tests/test_adaptive_replanning.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/tests/test_agents.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/tests/test_assets.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/tests/test_auto_checkpoint.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/tests/test_automatic_checkpoints.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/tests/test_checkpoint_serialization.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/tests/test_error_handling.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/tests/test_executor.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/tests/test_feedback_system.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/tests/test_integration.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/tests/test_knowledge_base.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/tests/test_metaplanner_resume.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/tests/test_metaplanner_visualization.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/tests/test_models.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/tests/test_parallel_execution.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/tests/test_planner.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/tests/test_progress_callbacks.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/tests/test_tools.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/tests/test_visualization.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/tests/test_workflow_resume.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/vibe_aigc/agents.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/vibe_aigc/assets.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/vibe_aigc/cli.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/vibe_aigc/comfyui.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/vibe_aigc/executor.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/vibe_aigc/knowledge.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/vibe_aigc/llm.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/vibe_aigc/models.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/vibe_aigc/persistence.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/vibe_aigc/planner.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/vibe_aigc/tools.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/vibe_aigc/tools_multimodal.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/vibe_aigc/visualization.py +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/vibe_aigc.egg-info/dependency_links.txt +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/vibe_aigc.egg-info/entry_points.txt +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/vibe_aigc.egg-info/requires.txt +0 -0
- {vibe_aigc-0.3.0 → vibe_aigc-0.5.0}/vibe_aigc.egg-info/top_level.txt +0 -0
|
@@ -8,7 +8,7 @@ exclude = ["tests*", "docs*", "examples*", "landing*"]
|
|
|
8
8
|
|
|
9
9
|
[project]
|
|
10
10
|
name = "vibe-aigc"
|
|
11
|
-
version = "0.
|
|
11
|
+
version = "0.5.0"
|
|
12
12
|
description = "A New Paradigm for Content Generation via Agentic Orchestration"
|
|
13
13
|
authors = [{name = "Vibe AIGC Contributors"}]
|
|
14
14
|
license = "MIT"
|
|
@@ -68,3 +68,5 @@ warn_unused_configs = true
|
|
|
68
68
|
ignore_missing_imports = true
|
|
69
69
|
|
|
70
70
|
|
|
71
|
+
|
|
72
|
+
|
|
@@ -101,3 +101,12 @@ __all__ = [
|
|
|
101
101
|
]
|
|
102
102
|
# ComfyUI backend for actual image generation
|
|
103
103
|
from .comfyui import ComfyUIBackend, ComfyUIConfig, ComfyUIImageTool, create_comfyui_registry
|
|
104
|
+
|
|
105
|
+
# Workflow templates
|
|
106
|
+
from .workflows import WorkflowLibrary, WorkflowTemplate, create_workflow_library
|
|
107
|
+
|
|
108
|
+
# Audio generation
|
|
109
|
+
from .audio import MusicGenBackend, RiffusionBackend, ElevenLabsBackend, MusicGenerationTool, TTSTool
|
|
110
|
+
|
|
111
|
+
# MV Pipeline
|
|
112
|
+
from .mv_pipeline import MVPipeline, Shot, Storyboard, create_mv
|
|
@@ -0,0 +1,405 @@
|
|
|
1
|
+
"""Audio generation for music videos.
|
|
2
|
+
|
|
3
|
+
Supports:
|
|
4
|
+
- Music generation (Riffusion, MusicGen)
|
|
5
|
+
- Voice/TTS (ElevenLabs, local TTS)
|
|
6
|
+
- Sound effects
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import aiohttp
|
|
11
|
+
import base64
|
|
12
|
+
from typing import Any, Dict, List, Optional
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
from .tools import BaseTool, ToolResult, ToolSpec, ToolCategory
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class AudioConfig:
|
|
21
|
+
"""Configuration for audio generation."""
|
|
22
|
+
provider: str = "riffusion" # riffusion, musicgen, elevenlabs
|
|
23
|
+
api_key: Optional[str] = None
|
|
24
|
+
output_dir: str = "./audio_output"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class RiffusionBackend:
|
|
28
|
+
"""Music generation using Riffusion (via Replicate)."""
|
|
29
|
+
|
|
30
|
+
def __init__(self, api_token: Optional[str] = None):
|
|
31
|
+
self.api_token = api_token
|
|
32
|
+
self.base_url = "https://api.replicate.com/v1"
|
|
33
|
+
|
|
34
|
+
async def generate_music(
|
|
35
|
+
self,
|
|
36
|
+
prompt: str,
|
|
37
|
+
duration: float = 8.0, # seconds
|
|
38
|
+
seed: Optional[int] = None
|
|
39
|
+
) -> Dict[str, Any]:
|
|
40
|
+
"""Generate music from a text prompt.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
prompt: Description of the music (e.g., "upbeat electronic cyberpunk")
|
|
44
|
+
duration: Length in seconds
|
|
45
|
+
seed: Random seed for reproducibility
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Dict with audio URL or error
|
|
49
|
+
"""
|
|
50
|
+
if not self.api_token:
|
|
51
|
+
return {"error": "No Replicate API token. Set REPLICATE_API_TOKEN."}
|
|
52
|
+
|
|
53
|
+
headers = {
|
|
54
|
+
"Authorization": f"Token {self.api_token}",
|
|
55
|
+
"Content-Type": "application/json"
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
payload = {
|
|
59
|
+
"version": "8cf61ea6c56afd61d8f5b9ffd14d7c216c0a93844ce2d82ac1c9ecc9c7f24e05",
|
|
60
|
+
"input": {
|
|
61
|
+
"prompt_a": prompt,
|
|
62
|
+
"denoising": 0.75,
|
|
63
|
+
"prompt_b": prompt, # Same prompt for consistency
|
|
64
|
+
"alpha": 0.5,
|
|
65
|
+
"num_inference_steps": 50,
|
|
66
|
+
"seed_image_id": "vibes"
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if seed is not None:
|
|
71
|
+
payload["input"]["seed"] = seed
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
async with aiohttp.ClientSession() as session:
|
|
75
|
+
# Start prediction
|
|
76
|
+
async with session.post(
|
|
77
|
+
f"{self.base_url}/predictions",
|
|
78
|
+
headers=headers,
|
|
79
|
+
json=payload
|
|
80
|
+
) as resp:
|
|
81
|
+
if resp.status != 201:
|
|
82
|
+
error = await resp.text()
|
|
83
|
+
return {"error": f"Failed to start: {error}"}
|
|
84
|
+
result = await resp.json()
|
|
85
|
+
prediction_id = result.get("id")
|
|
86
|
+
|
|
87
|
+
# Poll for completion
|
|
88
|
+
for _ in range(60): # Max 60 seconds
|
|
89
|
+
async with session.get(
|
|
90
|
+
f"{self.base_url}/predictions/{prediction_id}",
|
|
91
|
+
headers=headers
|
|
92
|
+
) as resp:
|
|
93
|
+
result = await resp.json()
|
|
94
|
+
status = result.get("status")
|
|
95
|
+
|
|
96
|
+
if status == "succeeded":
|
|
97
|
+
output = result.get("output", {})
|
|
98
|
+
return {
|
|
99
|
+
"audio_url": output.get("audio"),
|
|
100
|
+
"spectrogram_url": output.get("spectrogram"),
|
|
101
|
+
"prompt": prompt
|
|
102
|
+
}
|
|
103
|
+
elif status == "failed":
|
|
104
|
+
return {"error": result.get("error", "Generation failed")}
|
|
105
|
+
|
|
106
|
+
await asyncio.sleep(1)
|
|
107
|
+
|
|
108
|
+
return {"error": "Timeout waiting for generation"}
|
|
109
|
+
|
|
110
|
+
except Exception as e:
|
|
111
|
+
return {"error": str(e)}
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class MusicGenBackend:
|
|
115
|
+
"""Music generation using Meta's MusicGen (via Replicate)."""
|
|
116
|
+
|
|
117
|
+
def __init__(self, api_token: Optional[str] = None):
|
|
118
|
+
self.api_token = api_token
|
|
119
|
+
self.base_url = "https://api.replicate.com/v1"
|
|
120
|
+
|
|
121
|
+
async def generate_music(
|
|
122
|
+
self,
|
|
123
|
+
prompt: str,
|
|
124
|
+
duration: int = 8,
|
|
125
|
+
model_version: str = "melody", # small, medium, melody, large
|
|
126
|
+
continuation: bool = False,
|
|
127
|
+
input_audio: Optional[str] = None
|
|
128
|
+
) -> Dict[str, Any]:
|
|
129
|
+
"""Generate music using MusicGen.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
prompt: Text description of desired music
|
|
133
|
+
duration: Length in seconds (max 30)
|
|
134
|
+
model_version: Model size/type
|
|
135
|
+
continuation: Whether to continue from input_audio
|
|
136
|
+
input_audio: URL of audio to continue from
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Dict with audio URL or error
|
|
140
|
+
"""
|
|
141
|
+
if not self.api_token:
|
|
142
|
+
return {"error": "No Replicate API token"}
|
|
143
|
+
|
|
144
|
+
headers = {
|
|
145
|
+
"Authorization": f"Token {self.api_token}",
|
|
146
|
+
"Content-Type": "application/json"
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
# MusicGen model on Replicate
|
|
150
|
+
payload = {
|
|
151
|
+
"version": "b05b1dff1d8c6dc63d14b0cdb42135378dcb87f6373b0d3d341ede46e59e2b38",
|
|
152
|
+
"input": {
|
|
153
|
+
"prompt": prompt,
|
|
154
|
+
"duration": min(duration, 30),
|
|
155
|
+
"model_version": model_version,
|
|
156
|
+
"output_format": "mp3",
|
|
157
|
+
"normalization_strategy": "peak"
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
if continuation and input_audio:
|
|
162
|
+
payload["input"]["continuation"] = True
|
|
163
|
+
payload["input"]["input_audio"] = input_audio
|
|
164
|
+
|
|
165
|
+
try:
|
|
166
|
+
async with aiohttp.ClientSession() as session:
|
|
167
|
+
async with session.post(
|
|
168
|
+
f"{self.base_url}/predictions",
|
|
169
|
+
headers=headers,
|
|
170
|
+
json=payload
|
|
171
|
+
) as resp:
|
|
172
|
+
if resp.status != 201:
|
|
173
|
+
return {"error": await resp.text()}
|
|
174
|
+
result = await resp.json()
|
|
175
|
+
prediction_id = result.get("id")
|
|
176
|
+
|
|
177
|
+
# Poll for completion
|
|
178
|
+
for _ in range(120): # MusicGen can take longer
|
|
179
|
+
async with session.get(
|
|
180
|
+
f"{self.base_url}/predictions/{prediction_id}",
|
|
181
|
+
headers=headers
|
|
182
|
+
) as resp:
|
|
183
|
+
result = await resp.json()
|
|
184
|
+
status = result.get("status")
|
|
185
|
+
|
|
186
|
+
if status == "succeeded":
|
|
187
|
+
return {
|
|
188
|
+
"audio_url": result.get("output"),
|
|
189
|
+
"prompt": prompt,
|
|
190
|
+
"duration": duration
|
|
191
|
+
}
|
|
192
|
+
elif status == "failed":
|
|
193
|
+
return {"error": result.get("error")}
|
|
194
|
+
|
|
195
|
+
await asyncio.sleep(1)
|
|
196
|
+
|
|
197
|
+
return {"error": "Timeout"}
|
|
198
|
+
|
|
199
|
+
except Exception as e:
|
|
200
|
+
return {"error": str(e)}
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class ElevenLabsBackend:
|
|
204
|
+
"""Voice and speech synthesis using ElevenLabs."""
|
|
205
|
+
|
|
206
|
+
def __init__(self, api_key: Optional[str] = None):
|
|
207
|
+
self.api_key = api_key
|
|
208
|
+
self.base_url = "https://api.elevenlabs.io/v1"
|
|
209
|
+
|
|
210
|
+
async def text_to_speech(
|
|
211
|
+
self,
|
|
212
|
+
text: str,
|
|
213
|
+
voice_id: str = "21m00Tcm4TlvDq8ikWAM", # Rachel (default)
|
|
214
|
+
model_id: str = "eleven_monolingual_v1",
|
|
215
|
+
stability: float = 0.5,
|
|
216
|
+
similarity_boost: float = 0.75
|
|
217
|
+
) -> Dict[str, Any]:
|
|
218
|
+
"""Generate speech from text.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
text: Text to speak
|
|
222
|
+
voice_id: ElevenLabs voice ID
|
|
223
|
+
model_id: Model to use
|
|
224
|
+
stability: Voice stability (0-1)
|
|
225
|
+
similarity_boost: Voice similarity (0-1)
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
Dict with audio data or error
|
|
229
|
+
"""
|
|
230
|
+
if not self.api_key:
|
|
231
|
+
return {"error": "No ElevenLabs API key"}
|
|
232
|
+
|
|
233
|
+
headers = {
|
|
234
|
+
"xi-api-key": self.api_key,
|
|
235
|
+
"Content-Type": "application/json"
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
payload = {
|
|
239
|
+
"text": text,
|
|
240
|
+
"model_id": model_id,
|
|
241
|
+
"voice_settings": {
|
|
242
|
+
"stability": stability,
|
|
243
|
+
"similarity_boost": similarity_boost
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
try:
|
|
248
|
+
async with aiohttp.ClientSession() as session:
|
|
249
|
+
async with session.post(
|
|
250
|
+
f"{self.base_url}/text-to-speech/{voice_id}",
|
|
251
|
+
headers=headers,
|
|
252
|
+
json=payload
|
|
253
|
+
) as resp:
|
|
254
|
+
if resp.status != 200:
|
|
255
|
+
return {"error": await resp.text()}
|
|
256
|
+
|
|
257
|
+
audio_data = await resp.read()
|
|
258
|
+
return {
|
|
259
|
+
"audio_data": base64.b64encode(audio_data).decode(),
|
|
260
|
+
"format": "mp3",
|
|
261
|
+
"text": text
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
except Exception as e:
|
|
265
|
+
return {"error": str(e)}
|
|
266
|
+
|
|
267
|
+
async def list_voices(self) -> List[Dict[str, str]]:
|
|
268
|
+
"""List available voices."""
|
|
269
|
+
if not self.api_key:
|
|
270
|
+
return []
|
|
271
|
+
|
|
272
|
+
headers = {"xi-api-key": self.api_key}
|
|
273
|
+
|
|
274
|
+
try:
|
|
275
|
+
async with aiohttp.ClientSession() as session:
|
|
276
|
+
async with session.get(
|
|
277
|
+
f"{self.base_url}/voices",
|
|
278
|
+
headers=headers
|
|
279
|
+
) as resp:
|
|
280
|
+
if resp.status == 200:
|
|
281
|
+
data = await resp.json()
|
|
282
|
+
return [
|
|
283
|
+
{"id": v["voice_id"], "name": v["name"]}
|
|
284
|
+
for v in data.get("voices", [])
|
|
285
|
+
]
|
|
286
|
+
except:
|
|
287
|
+
pass
|
|
288
|
+
return []
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
class MusicGenerationTool(BaseTool):
|
|
292
|
+
"""Tool for generating music."""
|
|
293
|
+
|
|
294
|
+
def __init__(self, api_token: Optional[str] = None, backend: str = "musicgen"):
|
|
295
|
+
self.api_token = api_token
|
|
296
|
+
self.backend_name = backend
|
|
297
|
+
|
|
298
|
+
if backend == "riffusion":
|
|
299
|
+
self.backend = RiffusionBackend(api_token)
|
|
300
|
+
else:
|
|
301
|
+
self.backend = MusicGenBackend(api_token)
|
|
302
|
+
|
|
303
|
+
self._spec = ToolSpec(
|
|
304
|
+
name="music_generation",
|
|
305
|
+
description="Generate music from text description",
|
|
306
|
+
category=ToolCategory.AUDIO,
|
|
307
|
+
input_schema={
|
|
308
|
+
"type": "object",
|
|
309
|
+
"required": ["prompt"],
|
|
310
|
+
"properties": {
|
|
311
|
+
"prompt": {"type": "string", "description": "Music description"},
|
|
312
|
+
"duration": {"type": "integer", "default": 8},
|
|
313
|
+
"seed": {"type": "integer"}
|
|
314
|
+
}
|
|
315
|
+
},
|
|
316
|
+
output_schema={
|
|
317
|
+
"type": "object",
|
|
318
|
+
"properties": {
|
|
319
|
+
"audio_url": {"type": "string"},
|
|
320
|
+
"prompt": {"type": "string"}
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
@property
|
|
326
|
+
def spec(self) -> ToolSpec:
|
|
327
|
+
return self._spec
|
|
328
|
+
|
|
329
|
+
async def execute(
|
|
330
|
+
self,
|
|
331
|
+
inputs: Dict[str, Any],
|
|
332
|
+
context: Optional[Dict[str, Any]] = None
|
|
333
|
+
) -> ToolResult:
|
|
334
|
+
prompt = inputs.get("prompt", "")
|
|
335
|
+
if not prompt:
|
|
336
|
+
return ToolResult(success=False, output=None, error="No prompt")
|
|
337
|
+
|
|
338
|
+
result = await self.backend.generate_music(
|
|
339
|
+
prompt=prompt,
|
|
340
|
+
duration=inputs.get("duration", 8),
|
|
341
|
+
seed=inputs.get("seed")
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
if "error" in result:
|
|
345
|
+
return ToolResult(success=False, output=None, error=result["error"])
|
|
346
|
+
|
|
347
|
+
return ToolResult(
|
|
348
|
+
success=True,
|
|
349
|
+
output=result,
|
|
350
|
+
metadata={"backend": self.backend_name}
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
class TTSTool(BaseTool):
|
|
355
|
+
"""Tool for text-to-speech."""
|
|
356
|
+
|
|
357
|
+
def __init__(self, api_key: Optional[str] = None):
|
|
358
|
+
self.backend = ElevenLabsBackend(api_key)
|
|
359
|
+
self._spec = ToolSpec(
|
|
360
|
+
name="text_to_speech",
|
|
361
|
+
description="Convert text to speech audio",
|
|
362
|
+
category=ToolCategory.AUDIO,
|
|
363
|
+
input_schema={
|
|
364
|
+
"type": "object",
|
|
365
|
+
"required": ["text"],
|
|
366
|
+
"properties": {
|
|
367
|
+
"text": {"type": "string"},
|
|
368
|
+
"voice_id": {"type": "string"},
|
|
369
|
+
"stability": {"type": "number", "default": 0.5},
|
|
370
|
+
"similarity_boost": {"type": "number", "default": 0.75}
|
|
371
|
+
}
|
|
372
|
+
},
|
|
373
|
+
output_schema={
|
|
374
|
+
"type": "object",
|
|
375
|
+
"properties": {
|
|
376
|
+
"audio_data": {"type": "string", "description": "Base64 encoded audio"},
|
|
377
|
+
"format": {"type": "string"}
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
@property
|
|
383
|
+
def spec(self) -> ToolSpec:
|
|
384
|
+
return self._spec
|
|
385
|
+
|
|
386
|
+
async def execute(
|
|
387
|
+
self,
|
|
388
|
+
inputs: Dict[str, Any],
|
|
389
|
+
context: Optional[Dict[str, Any]] = None
|
|
390
|
+
) -> ToolResult:
|
|
391
|
+
text = inputs.get("text", "")
|
|
392
|
+
if not text:
|
|
393
|
+
return ToolResult(success=False, output=None, error="No text")
|
|
394
|
+
|
|
395
|
+
result = await self.backend.text_to_speech(
|
|
396
|
+
text=text,
|
|
397
|
+
voice_id=inputs.get("voice_id", "21m00Tcm4TlvDq8ikWAM"),
|
|
398
|
+
stability=inputs.get("stability", 0.5),
|
|
399
|
+
similarity_boost=inputs.get("similarity_boost", 0.75)
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
if "error" in result:
|
|
403
|
+
return ToolResult(success=False, output=None, error=result["error"])
|
|
404
|
+
|
|
405
|
+
return ToolResult(success=True, output=result)
|