vibe-aigc 0.4.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {vibe_aigc-0.4.0/vibe_aigc.egg-info → vibe_aigc-0.5.0}/PKG-INFO +1 -1
  2. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/pyproject.toml +2 -1
  3. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/vibe_aigc/__init__.py +9 -0
  4. vibe_aigc-0.5.0/vibe_aigc/audio.py +405 -0
  5. vibe_aigc-0.5.0/vibe_aigc/mv_pipeline.py +448 -0
  6. vibe_aigc-0.5.0/vibe_aigc/workflows.py +391 -0
  7. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0/vibe_aigc.egg-info}/PKG-INFO +1 -1
  8. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/vibe_aigc.egg-info/SOURCES.txt +3 -0
  9. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/LICENSE +0 -0
  10. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/README.md +0 -0
  11. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/setup.cfg +0 -0
  12. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/tests/test_adaptive_replanning.py +0 -0
  13. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/tests/test_agents.py +0 -0
  14. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/tests/test_assets.py +0 -0
  15. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/tests/test_auto_checkpoint.py +0 -0
  16. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/tests/test_automatic_checkpoints.py +0 -0
  17. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/tests/test_checkpoint_serialization.py +0 -0
  18. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/tests/test_error_handling.py +0 -0
  19. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/tests/test_executor.py +0 -0
  20. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/tests/test_feedback_system.py +0 -0
  21. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/tests/test_integration.py +0 -0
  22. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/tests/test_knowledge_base.py +0 -0
  23. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/tests/test_metaplanner_resume.py +0 -0
  24. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/tests/test_metaplanner_visualization.py +0 -0
  25. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/tests/test_models.py +0 -0
  26. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/tests/test_parallel_execution.py +0 -0
  27. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/tests/test_planner.py +0 -0
  28. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/tests/test_progress_callbacks.py +0 -0
  29. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/tests/test_tools.py +0 -0
  30. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/tests/test_visualization.py +0 -0
  31. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/tests/test_workflow_resume.py +0 -0
  32. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/vibe_aigc/agents.py +0 -0
  33. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/vibe_aigc/assets.py +0 -0
  34. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/vibe_aigc/character.py +0 -0
  35. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/vibe_aigc/cli.py +0 -0
  36. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/vibe_aigc/comfyui.py +0 -0
  37. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/vibe_aigc/executor.py +0 -0
  38. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/vibe_aigc/knowledge.py +0 -0
  39. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/vibe_aigc/llm.py +0 -0
  40. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/vibe_aigc/models.py +0 -0
  41. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/vibe_aigc/persistence.py +0 -0
  42. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/vibe_aigc/planner.py +0 -0
  43. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/vibe_aigc/tools.py +0 -0
  44. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/vibe_aigc/tools_multimodal.py +0 -0
  45. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/vibe_aigc/video.py +0 -0
  46. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/vibe_aigc/visualization.py +0 -0
  47. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/vibe_aigc.egg-info/dependency_links.txt +0 -0
  48. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/vibe_aigc.egg-info/entry_points.txt +0 -0
  49. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/vibe_aigc.egg-info/requires.txt +0 -0
  50. {vibe_aigc-0.4.0 → vibe_aigc-0.5.0}/vibe_aigc.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vibe-aigc
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: A New Paradigm for Content Generation via Agentic Orchestration
5
5
  Author: Vibe AIGC Contributors
6
6
  License-Expression: MIT
@@ -8,7 +8,7 @@ exclude = ["tests*", "docs*", "examples*", "landing*"]
8
8
 
9
9
  [project]
10
10
  name = "vibe-aigc"
11
- version = "0.4.0"
11
+ version = "0.5.0"
12
12
  description = "A New Paradigm for Content Generation via Agentic Orchestration"
13
13
  authors = [{name = "Vibe AIGC Contributors"}]
14
14
  license = "MIT"
@@ -69,3 +69,4 @@ ignore_missing_imports = true
69
69
 
70
70
 
71
71
 
72
+
@@ -101,3 +101,12 @@ __all__ = [
101
101
  ]
102
102
  # ComfyUI backend for actual image generation
103
103
  from .comfyui import ComfyUIBackend, ComfyUIConfig, ComfyUIImageTool, create_comfyui_registry
104
+
105
+ # Workflow templates
106
+ from .workflows import WorkflowLibrary, WorkflowTemplate, create_workflow_library
107
+
108
+ # Audio generation
109
+ from .audio import MusicGenBackend, RiffusionBackend, ElevenLabsBackend, MusicGenerationTool, TTSTool
110
+
111
+ # MV Pipeline
112
+ from .mv_pipeline import MVPipeline, Shot, Storyboard, create_mv
@@ -0,0 +1,405 @@
1
+ """Audio generation for music videos.
2
+
3
+ Supports:
4
+ - Music generation (Riffusion, MusicGen)
5
+ - Voice/TTS (ElevenLabs, local TTS)
6
+ - Sound effects
7
+ """
8
+
9
+ import asyncio
10
+ import aiohttp
11
+ import base64
12
+ from typing import Any, Dict, List, Optional
13
+ from dataclasses import dataclass
14
+ from pathlib import Path
15
+
16
+ from .tools import BaseTool, ToolResult, ToolSpec, ToolCategory
17
+
18
+
19
+ @dataclass
20
+ class AudioConfig:
21
+ """Configuration for audio generation."""
22
+ provider: str = "riffusion" # riffusion, musicgen, elevenlabs
23
+ api_key: Optional[str] = None
24
+ output_dir: str = "./audio_output"
25
+
26
+
27
+ class RiffusionBackend:
28
+ """Music generation using Riffusion (via Replicate)."""
29
+
30
+ def __init__(self, api_token: Optional[str] = None):
31
+ self.api_token = api_token
32
+ self.base_url = "https://api.replicate.com/v1"
33
+
34
+ async def generate_music(
35
+ self,
36
+ prompt: str,
37
+ duration: float = 8.0, # seconds
38
+ seed: Optional[int] = None
39
+ ) -> Dict[str, Any]:
40
+ """Generate music from a text prompt.
41
+
42
+ Args:
43
+ prompt: Description of the music (e.g., "upbeat electronic cyberpunk")
44
+ duration: Length in seconds
45
+ seed: Random seed for reproducibility
46
+
47
+ Returns:
48
+ Dict with audio URL or error
49
+ """
50
+ if not self.api_token:
51
+ return {"error": "No Replicate API token. Set REPLICATE_API_TOKEN."}
52
+
53
+ headers = {
54
+ "Authorization": f"Token {self.api_token}",
55
+ "Content-Type": "application/json"
56
+ }
57
+
58
+ payload = {
59
+ "version": "8cf61ea6c56afd61d8f5b9ffd14d7c216c0a93844ce2d82ac1c9ecc9c7f24e05",
60
+ "input": {
61
+ "prompt_a": prompt,
62
+ "denoising": 0.75,
63
+ "prompt_b": prompt, # Same prompt for consistency
64
+ "alpha": 0.5,
65
+ "num_inference_steps": 50,
66
+ "seed_image_id": "vibes"
67
+ }
68
+ }
69
+
70
+ if seed is not None:
71
+ payload["input"]["seed"] = seed
72
+
73
+ try:
74
+ async with aiohttp.ClientSession() as session:
75
+ # Start prediction
76
+ async with session.post(
77
+ f"{self.base_url}/predictions",
78
+ headers=headers,
79
+ json=payload
80
+ ) as resp:
81
+ if resp.status != 201:
82
+ error = await resp.text()
83
+ return {"error": f"Failed to start: {error}"}
84
+ result = await resp.json()
85
+ prediction_id = result.get("id")
86
+
87
+ # Poll for completion
88
+ for _ in range(60): # Max 60 seconds
89
+ async with session.get(
90
+ f"{self.base_url}/predictions/{prediction_id}",
91
+ headers=headers
92
+ ) as resp:
93
+ result = await resp.json()
94
+ status = result.get("status")
95
+
96
+ if status == "succeeded":
97
+ output = result.get("output", {})
98
+ return {
99
+ "audio_url": output.get("audio"),
100
+ "spectrogram_url": output.get("spectrogram"),
101
+ "prompt": prompt
102
+ }
103
+ elif status == "failed":
104
+ return {"error": result.get("error", "Generation failed")}
105
+
106
+ await asyncio.sleep(1)
107
+
108
+ return {"error": "Timeout waiting for generation"}
109
+
110
+ except Exception as e:
111
+ return {"error": str(e)}
112
+
113
+
114
+ class MusicGenBackend:
115
+ """Music generation using Meta's MusicGen (via Replicate)."""
116
+
117
+ def __init__(self, api_token: Optional[str] = None):
118
+ self.api_token = api_token
119
+ self.base_url = "https://api.replicate.com/v1"
120
+
121
+ async def generate_music(
122
+ self,
123
+ prompt: str,
124
+ duration: int = 8,
125
+ model_version: str = "melody", # small, medium, melody, large
126
+ continuation: bool = False,
127
+ input_audio: Optional[str] = None
128
+ ) -> Dict[str, Any]:
129
+ """Generate music using MusicGen.
130
+
131
+ Args:
132
+ prompt: Text description of desired music
133
+ duration: Length in seconds (max 30)
134
+ model_version: Model size/type
135
+ continuation: Whether to continue from input_audio
136
+ input_audio: URL of audio to continue from
137
+
138
+ Returns:
139
+ Dict with audio URL or error
140
+ """
141
+ if not self.api_token:
142
+ return {"error": "No Replicate API token"}
143
+
144
+ headers = {
145
+ "Authorization": f"Token {self.api_token}",
146
+ "Content-Type": "application/json"
147
+ }
148
+
149
+ # MusicGen model on Replicate
150
+ payload = {
151
+ "version": "b05b1dff1d8c6dc63d14b0cdb42135378dcb87f6373b0d3d341ede46e59e2b38",
152
+ "input": {
153
+ "prompt": prompt,
154
+ "duration": min(duration, 30),
155
+ "model_version": model_version,
156
+ "output_format": "mp3",
157
+ "normalization_strategy": "peak"
158
+ }
159
+ }
160
+
161
+ if continuation and input_audio:
162
+ payload["input"]["continuation"] = True
163
+ payload["input"]["input_audio"] = input_audio
164
+
165
+ try:
166
+ async with aiohttp.ClientSession() as session:
167
+ async with session.post(
168
+ f"{self.base_url}/predictions",
169
+ headers=headers,
170
+ json=payload
171
+ ) as resp:
172
+ if resp.status != 201:
173
+ return {"error": await resp.text()}
174
+ result = await resp.json()
175
+ prediction_id = result.get("id")
176
+
177
+ # Poll for completion
178
+ for _ in range(120): # MusicGen can take longer
179
+ async with session.get(
180
+ f"{self.base_url}/predictions/{prediction_id}",
181
+ headers=headers
182
+ ) as resp:
183
+ result = await resp.json()
184
+ status = result.get("status")
185
+
186
+ if status == "succeeded":
187
+ return {
188
+ "audio_url": result.get("output"),
189
+ "prompt": prompt,
190
+ "duration": duration
191
+ }
192
+ elif status == "failed":
193
+ return {"error": result.get("error")}
194
+
195
+ await asyncio.sleep(1)
196
+
197
+ return {"error": "Timeout"}
198
+
199
+ except Exception as e:
200
+ return {"error": str(e)}
201
+
202
+
203
+ class ElevenLabsBackend:
204
+ """Voice and speech synthesis using ElevenLabs."""
205
+
206
+ def __init__(self, api_key: Optional[str] = None):
207
+ self.api_key = api_key
208
+ self.base_url = "https://api.elevenlabs.io/v1"
209
+
210
+ async def text_to_speech(
211
+ self,
212
+ text: str,
213
+ voice_id: str = "21m00Tcm4TlvDq8ikWAM", # Rachel (default)
214
+ model_id: str = "eleven_monolingual_v1",
215
+ stability: float = 0.5,
216
+ similarity_boost: float = 0.75
217
+ ) -> Dict[str, Any]:
218
+ """Generate speech from text.
219
+
220
+ Args:
221
+ text: Text to speak
222
+ voice_id: ElevenLabs voice ID
223
+ model_id: Model to use
224
+ stability: Voice stability (0-1)
225
+ similarity_boost: Voice similarity (0-1)
226
+
227
+ Returns:
228
+ Dict with audio data or error
229
+ """
230
+ if not self.api_key:
231
+ return {"error": "No ElevenLabs API key"}
232
+
233
+ headers = {
234
+ "xi-api-key": self.api_key,
235
+ "Content-Type": "application/json"
236
+ }
237
+
238
+ payload = {
239
+ "text": text,
240
+ "model_id": model_id,
241
+ "voice_settings": {
242
+ "stability": stability,
243
+ "similarity_boost": similarity_boost
244
+ }
245
+ }
246
+
247
+ try:
248
+ async with aiohttp.ClientSession() as session:
249
+ async with session.post(
250
+ f"{self.base_url}/text-to-speech/{voice_id}",
251
+ headers=headers,
252
+ json=payload
253
+ ) as resp:
254
+ if resp.status != 200:
255
+ return {"error": await resp.text()}
256
+
257
+ audio_data = await resp.read()
258
+ return {
259
+ "audio_data": base64.b64encode(audio_data).decode(),
260
+ "format": "mp3",
261
+ "text": text
262
+ }
263
+
264
+ except Exception as e:
265
+ return {"error": str(e)}
266
+
267
+ async def list_voices(self) -> List[Dict[str, str]]:
268
+ """List available voices."""
269
+ if not self.api_key:
270
+ return []
271
+
272
+ headers = {"xi-api-key": self.api_key}
273
+
274
+ try:
275
+ async with aiohttp.ClientSession() as session:
276
+ async with session.get(
277
+ f"{self.base_url}/voices",
278
+ headers=headers
279
+ ) as resp:
280
+ if resp.status == 200:
281
+ data = await resp.json()
282
+ return [
283
+ {"id": v["voice_id"], "name": v["name"]}
284
+ for v in data.get("voices", [])
285
+ ]
286
+ except:
287
+ pass
288
+ return []
289
+
290
+
291
+ class MusicGenerationTool(BaseTool):
292
+ """Tool for generating music."""
293
+
294
+ def __init__(self, api_token: Optional[str] = None, backend: str = "musicgen"):
295
+ self.api_token = api_token
296
+ self.backend_name = backend
297
+
298
+ if backend == "riffusion":
299
+ self.backend = RiffusionBackend(api_token)
300
+ else:
301
+ self.backend = MusicGenBackend(api_token)
302
+
303
+ self._spec = ToolSpec(
304
+ name="music_generation",
305
+ description="Generate music from text description",
306
+ category=ToolCategory.AUDIO,
307
+ input_schema={
308
+ "type": "object",
309
+ "required": ["prompt"],
310
+ "properties": {
311
+ "prompt": {"type": "string", "description": "Music description"},
312
+ "duration": {"type": "integer", "default": 8},
313
+ "seed": {"type": "integer"}
314
+ }
315
+ },
316
+ output_schema={
317
+ "type": "object",
318
+ "properties": {
319
+ "audio_url": {"type": "string"},
320
+ "prompt": {"type": "string"}
321
+ }
322
+ }
323
+ )
324
+
325
+ @property
326
+ def spec(self) -> ToolSpec:
327
+ return self._spec
328
+
329
+ async def execute(
330
+ self,
331
+ inputs: Dict[str, Any],
332
+ context: Optional[Dict[str, Any]] = None
333
+ ) -> ToolResult:
334
+ prompt = inputs.get("prompt", "")
335
+ if not prompt:
336
+ return ToolResult(success=False, output=None, error="No prompt")
337
+
338
+ result = await self.backend.generate_music(
339
+ prompt=prompt,
340
+ duration=inputs.get("duration", 8),
341
+ seed=inputs.get("seed")
342
+ )
343
+
344
+ if "error" in result:
345
+ return ToolResult(success=False, output=None, error=result["error"])
346
+
347
+ return ToolResult(
348
+ success=True,
349
+ output=result,
350
+ metadata={"backend": self.backend_name}
351
+ )
352
+
353
+
354
+ class TTSTool(BaseTool):
355
+ """Tool for text-to-speech."""
356
+
357
+ def __init__(self, api_key: Optional[str] = None):
358
+ self.backend = ElevenLabsBackend(api_key)
359
+ self._spec = ToolSpec(
360
+ name="text_to_speech",
361
+ description="Convert text to speech audio",
362
+ category=ToolCategory.AUDIO,
363
+ input_schema={
364
+ "type": "object",
365
+ "required": ["text"],
366
+ "properties": {
367
+ "text": {"type": "string"},
368
+ "voice_id": {"type": "string"},
369
+ "stability": {"type": "number", "default": 0.5},
370
+ "similarity_boost": {"type": "number", "default": 0.75}
371
+ }
372
+ },
373
+ output_schema={
374
+ "type": "object",
375
+ "properties": {
376
+ "audio_data": {"type": "string", "description": "Base64 encoded audio"},
377
+ "format": {"type": "string"}
378
+ }
379
+ }
380
+ )
381
+
382
+ @property
383
+ def spec(self) -> ToolSpec:
384
+ return self._spec
385
+
386
+ async def execute(
387
+ self,
388
+ inputs: Dict[str, Any],
389
+ context: Optional[Dict[str, Any]] = None
390
+ ) -> ToolResult:
391
+ text = inputs.get("text", "")
392
+ if not text:
393
+ return ToolResult(success=False, output=None, error="No text")
394
+
395
+ result = await self.backend.text_to_speech(
396
+ text=text,
397
+ voice_id=inputs.get("voice_id", "21m00Tcm4TlvDq8ikWAM"),
398
+ stability=inputs.get("stability", 0.5),
399
+ similarity_boost=inputs.get("similarity_boost", 0.75)
400
+ )
401
+
402
+ if "error" in result:
403
+ return ToolResult(success=False, output=None, error=result["error"])
404
+
405
+ return ToolResult(success=True, output=result)