vibe-aigc 0.6.0__py3-none-any.whl → 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vibe_aigc/fidelity.py ADDED
@@ -0,0 +1,401 @@
1
+ """Fidelity Measurement — Creative Unit Tests for vibe-aigc.
2
+
3
+ Paper Section 6: "The Verification Crisis... no universal unit test for a 'cinematic atmosphere'"
4
+ Paper Section 7: "We need 'Creative Unit Tests'"
5
+
6
+ This module measures how well vibe-aigc achieves user intent:
7
+ 1. Intent Alignment: Does output match the vibe?
8
+ 2. Consistency: Same prompt → similar results?
9
+ 3. Quality Distribution: What's the score spread?
10
+ 4. Refinement Efficacy: Does feedback improve scores?
11
+ """
12
+
13
+ import asyncio
14
+ import statistics
15
+ from dataclasses import dataclass, field
16
+ from typing import Any, Dict, List, Optional
17
+ from datetime import datetime
18
+ from pathlib import Path
19
+ import json
20
+
21
+ from .vibe_backend import VibeBackend, GenerationRequest, GenerationResult
22
+ from .discovery import Capability
23
+ from .vlm_feedback import VLMFeedback, FeedbackResult
24
+
25
+
26
+ @dataclass
27
+ class FidelityScore:
28
+ """Score for a single generation."""
29
+ prompt: str
30
+ output_url: str
31
+ quality_score: float
32
+ feedback: str
33
+ strengths: List[str]
34
+ weaknesses: List[str]
35
+ attempt_number: int
36
+ timestamp: str
37
+
38
+ def to_dict(self) -> Dict[str, Any]:
39
+ return {
40
+ "prompt": self.prompt,
41
+ "output_url": self.output_url,
42
+ "quality_score": self.quality_score,
43
+ "feedback": self.feedback,
44
+ "strengths": self.strengths,
45
+ "weaknesses": self.weaknesses,
46
+ "attempt_number": self.attempt_number,
47
+ "timestamp": self.timestamp
48
+ }
49
+
50
+
51
+ @dataclass
52
+ class FidelityReport:
53
+ """Complete fidelity report for a prompt."""
54
+ prompt: str
55
+ capability: str
56
+ num_runs: int
57
+ scores: List[FidelityScore]
58
+
59
+ # Statistics
60
+ mean_score: float = 0.0
61
+ std_dev: float = 0.0
62
+ min_score: float = 0.0
63
+ max_score: float = 0.0
64
+
65
+ # Refinement analysis
66
+ first_attempt_mean: float = 0.0
67
+ refined_attempts_mean: float = 0.0
68
+ refinement_improvement: float = 0.0
69
+
70
+ # Common patterns
71
+ common_strengths: List[str] = field(default_factory=list)
72
+ common_weaknesses: List[str] = field(default_factory=list)
73
+
74
+ def compute_statistics(self) -> None:
75
+ """Compute statistics from scores."""
76
+ if not self.scores:
77
+ return
78
+
79
+ quality_scores = [s.quality_score for s in self.scores]
80
+
81
+ self.mean_score = statistics.mean(quality_scores)
82
+ self.std_dev = statistics.stdev(quality_scores) if len(quality_scores) > 1 else 0.0
83
+ self.min_score = min(quality_scores)
84
+ self.max_score = max(quality_scores)
85
+
86
+ # Refinement analysis
87
+ first_attempts = [s.quality_score for s in self.scores if s.attempt_number == 1]
88
+ refined_attempts = [s.quality_score for s in self.scores if s.attempt_number > 1]
89
+
90
+ if first_attempts:
91
+ self.first_attempt_mean = statistics.mean(first_attempts)
92
+ if refined_attempts:
93
+ self.refined_attempts_mean = statistics.mean(refined_attempts)
94
+ self.refinement_improvement = self.refined_attempts_mean - self.first_attempt_mean
95
+
96
+ # Common patterns
97
+ all_strengths = []
98
+ all_weaknesses = []
99
+ for s in self.scores:
100
+ all_strengths.extend(s.strengths)
101
+ all_weaknesses.extend(s.weaknesses)
102
+
103
+ # Count frequency
104
+ from collections import Counter
105
+ strength_counts = Counter(all_strengths)
106
+ weakness_counts = Counter(all_weaknesses)
107
+
108
+ self.common_strengths = [s for s, _ in strength_counts.most_common(5)]
109
+ self.common_weaknesses = [w for w, _ in weakness_counts.most_common(5)]
110
+
111
+ def summary(self) -> str:
112
+ """Human-readable summary."""
113
+ lines = [
114
+ "=" * 60,
115
+ "FIDELITY REPORT",
116
+ "=" * 60,
117
+ "",
118
+ f"Prompt: {self.prompt[:50]}...",
119
+ f"Capability: {self.capability}",
120
+ f"Runs: {self.num_runs}",
121
+ "",
122
+ "QUALITY SCORES:",
123
+ f" Mean: {self.mean_score:.2f}/10",
124
+ f" Std Dev: {self.std_dev:.2f}",
125
+ f" Range: {self.min_score:.1f} - {self.max_score:.1f}",
126
+ "",
127
+ "REFINEMENT EFFICACY:",
128
+ f" First attempt mean: {self.first_attempt_mean:.2f}",
129
+ f" Refined attempts mean: {self.refined_attempts_mean:.2f}",
130
+ f" Improvement: {self.refinement_improvement:+.2f}",
131
+ "",
132
+ "COMMON STRENGTHS:",
133
+ ]
134
+ for s in self.common_strengths[:3]:
135
+ lines.append(f" + {s}")
136
+
137
+ lines.append("")
138
+ lines.append("COMMON WEAKNESSES:")
139
+ for w in self.common_weaknesses[:3]:
140
+ lines.append(f" - {w}")
141
+
142
+ lines.append("")
143
+ lines.append("=" * 60)
144
+
145
+ # Verdict
146
+ if self.mean_score >= 7.0:
147
+ lines.append("VERDICT: HIGH FIDELITY - System achieves intent well")
148
+ elif self.mean_score >= 5.0:
149
+ lines.append("VERDICT: MODERATE FIDELITY - Room for improvement")
150
+ else:
151
+ lines.append("VERDICT: LOW FIDELITY - Significant gap from intent")
152
+
153
+ if self.refinement_improvement > 0.5:
154
+ lines.append(f"REFINEMENT: EFFECTIVE (+{self.refinement_improvement:.1f} improvement)")
155
+ elif self.refinement_improvement < -0.5:
156
+ lines.append(f"REFINEMENT: COUNTERPRODUCTIVE ({self.refinement_improvement:.1f})")
157
+ else:
158
+ lines.append("REFINEMENT: MARGINAL EFFECT")
159
+
160
+ lines.append("=" * 60)
161
+
162
+ return "\n".join(lines)
163
+
164
+ def to_dict(self) -> Dict[str, Any]:
165
+ return {
166
+ "prompt": self.prompt,
167
+ "capability": self.capability,
168
+ "num_runs": self.num_runs,
169
+ "scores": [s.to_dict() for s in self.scores],
170
+ "statistics": {
171
+ "mean": self.mean_score,
172
+ "std_dev": self.std_dev,
173
+ "min": self.min_score,
174
+ "max": self.max_score,
175
+ },
176
+ "refinement": {
177
+ "first_attempt_mean": self.first_attempt_mean,
178
+ "refined_mean": self.refined_attempts_mean,
179
+ "improvement": self.refinement_improvement,
180
+ },
181
+ "patterns": {
182
+ "common_strengths": self.common_strengths,
183
+ "common_weaknesses": self.common_weaknesses,
184
+ }
185
+ }
186
+
187
+
188
+ class FidelityBenchmark:
189
+ """Benchmark for measuring vibe-aigc fidelity.
190
+
191
+ Usage:
192
+ benchmark = FidelityBenchmark(comfyui_url="http://192.168.1.143:8188")
193
+ await benchmark.initialize()
194
+
195
+ report = await benchmark.run(
196
+ prompt="cyberpunk samurai in neon rain",
197
+ capability=Capability.TEXT_TO_IMAGE,
198
+ num_runs=5
199
+ )
200
+
201
+ print(report.summary())
202
+ """
203
+
204
+ def __init__(
205
+ self,
206
+ comfyui_url: str = "http://127.0.0.1:8188",
207
+ max_attempts_per_run: int = 2,
208
+ quality_threshold: float = 7.0
209
+ ):
210
+ self.backend = VibeBackend(
211
+ comfyui_url=comfyui_url,
212
+ enable_vlm=True,
213
+ max_attempts=max_attempts_per_run,
214
+ quality_threshold=quality_threshold
215
+ )
216
+ self._initialized = False
217
+
218
+ async def initialize(self) -> None:
219
+ """Initialize the benchmark."""
220
+ await self.backend.initialize()
221
+ self._initialized = True
222
+
223
+ async def run(
224
+ self,
225
+ prompt: str,
226
+ capability: Capability = Capability.TEXT_TO_IMAGE,
227
+ num_runs: int = 5,
228
+ **kwargs
229
+ ) -> FidelityReport:
230
+ """Run the fidelity benchmark.
231
+
232
+ Args:
233
+ prompt: The prompt to test
234
+ capability: What to generate
235
+ num_runs: How many times to run
236
+ **kwargs: Additional generation parameters
237
+
238
+ Returns:
239
+ FidelityReport with scores and statistics
240
+ """
241
+ if not self._initialized:
242
+ await self.initialize()
243
+
244
+ print(f"Running fidelity benchmark: {num_runs} runs")
245
+ print(f"Prompt: {prompt[:50]}...")
246
+ print()
247
+
248
+ scores = []
249
+
250
+ for i in range(num_runs):
251
+ print(f"Run {i+1}/{num_runs}...")
252
+
253
+ request = GenerationRequest(
254
+ prompt=prompt,
255
+ capability=capability,
256
+ **kwargs
257
+ )
258
+
259
+ result = await self.backend.generate(request)
260
+
261
+ if result.success:
262
+ score = FidelityScore(
263
+ prompt=prompt,
264
+ output_url=result.output_url or "",
265
+ quality_score=result.quality_score or 5.0,
266
+ feedback=result.feedback or "",
267
+ strengths=result.strengths or [],
268
+ weaknesses=result.weaknesses or [],
269
+ attempt_number=result.attempts,
270
+ timestamp=datetime.now().isoformat()
271
+ )
272
+ scores.append(score)
273
+ print(f" Score: {score.quality_score}/10 (attempt {score.attempt_number})")
274
+ if score.strengths:
275
+ print(f" Strengths: {', '.join(score.strengths[:2])}")
276
+ if score.weaknesses:
277
+ print(f" Weaknesses: {', '.join(score.weaknesses[:2])}")
278
+ else:
279
+ print(f" Failed: {result.error}")
280
+
281
+ # Build report
282
+ report = FidelityReport(
283
+ prompt=prompt,
284
+ capability=capability.value,
285
+ num_runs=num_runs,
286
+ scores=scores
287
+ )
288
+ report.compute_statistics()
289
+
290
+ return report
291
+
292
+ async def compare_prompts(
293
+ self,
294
+ prompts: List[str],
295
+ capability: Capability = Capability.TEXT_TO_IMAGE,
296
+ runs_per_prompt: int = 3
297
+ ) -> List[FidelityReport]:
298
+ """Compare fidelity across multiple prompts."""
299
+ reports = []
300
+
301
+ for prompt in prompts:
302
+ report = await self.run(prompt, capability, runs_per_prompt)
303
+ reports.append(report)
304
+
305
+ return reports
306
+
307
+ async def test_refinement_efficacy(
308
+ self,
309
+ prompt: str,
310
+ capability: Capability = Capability.TEXT_TO_IMAGE,
311
+ num_runs: int = 5
312
+ ) -> Dict[str, Any]:
313
+ """Specifically test if VLM refinement improves quality.
314
+
315
+ Runs with max_attempts=1 (no refinement) vs max_attempts=3 (with refinement)
316
+ """
317
+ print("Testing refinement efficacy...")
318
+ print()
319
+
320
+ # Without refinement
321
+ print("Phase 1: Without refinement (max_attempts=1)")
322
+ self.backend.max_attempts = 1
323
+ no_refine_scores = []
324
+
325
+ for i in range(num_runs):
326
+ result = await self.backend.generate(GenerationRequest(
327
+ prompt=prompt,
328
+ capability=capability
329
+ ))
330
+ if result.success:
331
+ no_refine_scores.append(result.quality_score or 5.0)
332
+ print(f" Run {i+1}: {result.quality_score}/10")
333
+
334
+ # With refinement
335
+ print()
336
+ print("Phase 2: With refinement (max_attempts=3)")
337
+ self.backend.max_attempts = 3
338
+ with_refine_scores = []
339
+
340
+ for i in range(num_runs):
341
+ result = await self.backend.generate(GenerationRequest(
342
+ prompt=prompt,
343
+ capability=capability
344
+ ))
345
+ if result.success:
346
+ with_refine_scores.append(result.quality_score or 5.0)
347
+ print(f" Run {i+1}: {result.quality_score}/10 (attempts: {result.attempts})")
348
+
349
+ # Analysis
350
+ no_refine_mean = statistics.mean(no_refine_scores) if no_refine_scores else 0
351
+ with_refine_mean = statistics.mean(with_refine_scores) if with_refine_scores else 0
352
+ improvement = with_refine_mean - no_refine_mean
353
+
354
+ return {
355
+ "prompt": prompt,
356
+ "without_refinement": {
357
+ "scores": no_refine_scores,
358
+ "mean": no_refine_mean,
359
+ },
360
+ "with_refinement": {
361
+ "scores": with_refine_scores,
362
+ "mean": with_refine_mean,
363
+ },
364
+ "improvement": improvement,
365
+ "refinement_effective": improvement > 0.5
366
+ }
367
+
368
+
369
+ # =============================================================================
370
+ # CONVENIENCE FUNCTIONS
371
+ # =============================================================================
372
+
373
+ async def measure_fidelity(
374
+ prompt: str,
375
+ comfyui_url: str = "http://127.0.0.1:8188",
376
+ num_runs: int = 5
377
+ ) -> FidelityReport:
378
+ """Quick fidelity measurement."""
379
+ benchmark = FidelityBenchmark(comfyui_url=comfyui_url)
380
+ await benchmark.initialize()
381
+ return await benchmark.run(prompt, num_runs=num_runs)
382
+
383
+
384
+ async def run_creative_unit_test(
385
+ prompt: str,
386
+ expected_min_score: float = 6.0,
387
+ comfyui_url: str = "http://127.0.0.1:8188",
388
+ num_runs: int = 3
389
+ ) -> bool:
390
+ """Run a creative unit test — does the system achieve minimum quality?
391
+
392
+ Returns True if mean score >= expected_min_score
393
+ """
394
+ report = await measure_fidelity(prompt, comfyui_url, num_runs)
395
+ passed = report.mean_score >= expected_min_score
396
+
397
+ print(f"Creative Unit Test: {'PASSED' if passed else 'FAILED'}")
398
+ print(f" Expected: >= {expected_min_score}")
399
+ print(f" Actual: {report.mean_score:.2f}")
400
+
401
+ return passed
vibe_aigc/vibe_backend.py CHANGED
@@ -14,7 +14,7 @@ This works with ANY ComfyUI setup — no hardcoded models or patterns.
14
14
  import asyncio
15
15
  import aiohttp
16
16
  from typing import Any, Dict, List, Optional
17
- from dataclasses import dataclass
17
+ from dataclasses import dataclass, field
18
18
  from pathlib import Path
19
19
 
20
20
  from .discovery import (
@@ -51,6 +51,9 @@ class GenerationResult:
51
51
  output_path: Optional[str] = None
52
52
  quality_score: float = 0.0
53
53
  feedback: Optional[str] = None
54
+ strengths: List[str] = field(default_factory=list)
55
+ weaknesses: List[str] = field(default_factory=list)
56
+ prompt_improvements: List[str] = field(default_factory=list)
54
57
  error: Optional[str] = None
55
58
  workflow_used: Optional[str] = None
56
59
  model_used: Optional[str] = None
@@ -147,6 +150,10 @@ class VibeBackend:
147
150
  import random
148
151
  request.seed = random.randint(0, 2**32 - 1)
149
152
 
153
+ # Special handling for TEXT_TO_VIDEO: use I2V pipeline
154
+ if request.capability == Capability.TEXT_TO_VIDEO:
155
+ return await self._generate_video_via_i2v(request)
156
+
150
157
  # Try to get workflow
151
158
  workflow = await self._get_workflow(request)
152
159
  if not workflow:
@@ -188,16 +195,25 @@ class VibeBackend:
188
195
 
189
196
  # Compose from available nodes
190
197
  print(f"Composing workflow for {request.capability.value}...")
198
+
199
+ # Build kwargs based on capability
200
+ kwargs = {
201
+ "negative_prompt": request.negative_prompt,
202
+ "width": request.width,
203
+ "height": request.height,
204
+ "steps": request.steps,
205
+ "cfg": request.cfg,
206
+ "seed": request.seed
207
+ }
208
+
209
+ # Add frames only for video capabilities
210
+ if request.capability in [Capability.TEXT_TO_VIDEO, Capability.IMAGE_TO_VIDEO]:
211
+ kwargs["frames"] = request.frames
212
+
191
213
  return self.composer.compose_for_capability(
192
214
  capability=request.capability,
193
215
  prompt=request.prompt,
194
- negative_prompt=request.negative_prompt,
195
- width=request.width,
196
- height=request.height,
197
- frames=request.frames,
198
- steps=request.steps,
199
- cfg=request.cfg,
200
- seed=request.seed
216
+ **kwargs
201
217
  )
202
218
 
203
219
  async def _execute_with_feedback(
@@ -230,30 +246,65 @@ class VibeBackend:
230
246
  return result
231
247
 
232
248
  # VLM feedback
233
- if self.vlm and self.vlm.available and result.output_path:
234
- feedback = self.vlm.analyze_media(
235
- Path(result.output_path),
236
- current_prompt
237
- )
238
-
239
- result.quality_score = feedback.quality_score
240
- result.feedback = feedback.description
241
-
242
- if feedback.quality_score > best_score:
243
- best_score = feedback.quality_score
244
- best_result = result
249
+ if self.vlm and self.vlm.available and result.output_url:
250
+ # Download image for VLM analysis
251
+ feedback = None
252
+ temp_path = None
253
+ try:
254
+ import tempfile
255
+ import os
256
+ async with aiohttp.ClientSession() as session:
257
+ async with session.get(result.output_url) as resp:
258
+ if resp.status == 200:
259
+ content = await resp.read()
260
+ # Save to temp file (won't auto-delete)
261
+ suffix = '.png' if 'png' in result.output_url else '.webp'
262
+ fd, temp_path = tempfile.mkstemp(suffix=suffix)
263
+ os.write(fd, content)
264
+ os.close(fd)
265
+
266
+ feedback = self.vlm.analyze_media(
267
+ Path(temp_path),
268
+ current_prompt
269
+ )
270
+ except Exception as e:
271
+ print(f"VLM feedback failed: {e}")
272
+ feedback = None
273
+ finally:
274
+ # Clean up temp file (ignore errors on Windows)
275
+ if temp_path:
276
+ try:
277
+ import os
278
+ os.unlink(temp_path)
279
+ except:
280
+ pass # Windows file locking, will be cleaned up by OS
245
281
 
246
- if feedback.quality_score >= self.quality_threshold:
247
- print(f"Quality threshold met: {feedback.quality_score}/10")
282
+ if feedback:
283
+ result.quality_score = feedback.quality_score
284
+ result.feedback = feedback.description
285
+ result.strengths = feedback.strengths
286
+ result.weaknesses = feedback.weaknesses
287
+ result.prompt_improvements = feedback.prompt_improvements
288
+
289
+ if feedback.quality_score > best_score:
290
+ best_score = feedback.quality_score
291
+ best_result = result
292
+
293
+ if feedback.quality_score >= self.quality_threshold:
294
+ print(f"Quality threshold met: {feedback.quality_score}/10")
295
+ result.attempts = attempt + 1
296
+ return result
297
+
298
+ # Refine prompt for next attempt
299
+ if attempt < self.max_attempts - 1:
300
+ current_prompt = self.vlm.suggest_improvements(feedback, current_prompt)
301
+ print(f"Refined prompt: {current_prompt[:50]}...")
302
+ else:
303
+ # VLM failed, return successful result
248
304
  result.attempts = attempt + 1
249
305
  return result
250
-
251
- # Refine prompt for next attempt
252
- if attempt < self.max_attempts - 1:
253
- current_prompt = self.vlm.suggest_improvements(feedback, current_prompt)
254
- print(f"Refined prompt: {current_prompt[:50]}...")
255
306
  else:
256
- # No VLM, return first successful result
307
+ # No VLM configured, return first successful result
257
308
  result.attempts = attempt + 1
258
309
  return result
259
310
 
@@ -341,6 +392,199 @@ class VibeBackend:
341
392
  except Exception as e:
342
393
  return GenerationResult(success=False, error=str(e))
343
394
 
395
+ async def _generate_video_via_i2v(self, request: GenerationRequest) -> GenerationResult:
396
+ """Generate video via Image-to-Video pipeline.
397
+
398
+ Two-step process:
399
+ 1. Generate base image with TEXT_TO_IMAGE
400
+ 2. Animate with IMAGE_TO_VIDEO
401
+ """
402
+ print("\n[1/2] Generating base image...")
403
+
404
+ # Step 1: Generate image
405
+ image_workflow = self._create_flux_image_workflow(
406
+ prompt=request.prompt,
407
+ negative=request.negative_prompt,
408
+ width=request.width,
409
+ height=request.height,
410
+ seed=request.seed
411
+ )
412
+
413
+ image_result = await self._execute_workflow(image_workflow)
414
+ if not image_result.success:
415
+ return GenerationResult(
416
+ success=False,
417
+ error=f"Image generation failed: {image_result.error}"
418
+ )
419
+
420
+ print(f" Base image: {image_result.output_path}")
421
+
422
+ # Step 2: Upload image and animate
423
+ print("\n[2/2] Animating with I2V...")
424
+
425
+ # Download image
426
+ async with aiohttp.ClientSession() as session:
427
+ async with session.get(image_result.output_url) as resp:
428
+ image_data = await resp.read()
429
+
430
+ # Upload to ComfyUI
431
+ form = aiohttp.FormData()
432
+ form.add_field('image', image_data, filename='input.png', content_type='image/png')
433
+
434
+ async with session.post(f"{self.url}/upload/image", data=form) as resp:
435
+ upload_result = await resp.json()
436
+ uploaded_name = upload_result.get("name", "input.png")
437
+ print(f" Uploaded: {uploaded_name}")
438
+
439
+ # Create I2V workflow
440
+ i2v_workflow = self._create_wan_i2v_workflow(
441
+ uploaded_image=uploaded_name,
442
+ prompt=request.prompt,
443
+ negative=request.negative_prompt,
444
+ width=request.width,
445
+ height=request.height,
446
+ frames=request.frames,
447
+ seed=request.seed
448
+ )
449
+
450
+ video_result = await self._execute_workflow(i2v_workflow)
451
+ if not video_result.success:
452
+ return GenerationResult(
453
+ success=False,
454
+ error=f"Animation failed: {video_result.error}"
455
+ )
456
+
457
+ print(f" Video: {video_result.output_path}")
458
+ return video_result
459
+
460
+ def _create_flux_image_workflow(
461
+ self, prompt: str, negative: str, width: int, height: int, seed: int
462
+ ) -> Dict[str, Any]:
463
+ """Create FLUX image generation workflow."""
464
+ return {
465
+ "1": {
466
+ "class_type": "CheckpointLoaderSimple",
467
+ "inputs": {"ckpt_name": "flux1-dev-fp8.safetensors"}
468
+ },
469
+ "2": {
470
+ "class_type": "CLIPTextEncode",
471
+ "inputs": {"text": prompt, "clip": ["1", 1]}
472
+ },
473
+ "3": {
474
+ "class_type": "CLIPTextEncode",
475
+ "inputs": {"text": negative or "blurry, distorted, ugly", "clip": ["1", 1]}
476
+ },
477
+ "4": {
478
+ "class_type": "EmptyLatentImage",
479
+ "inputs": {"width": width, "height": height, "batch_size": 1}
480
+ },
481
+ "5": {
482
+ "class_type": "KSampler",
483
+ "inputs": {
484
+ "seed": seed,
485
+ "steps": 20,
486
+ "cfg": 3.5,
487
+ "sampler_name": "euler",
488
+ "scheduler": "simple",
489
+ "denoise": 1.0,
490
+ "model": ["1", 0],
491
+ "positive": ["2", 0],
492
+ "negative": ["3", 0],
493
+ "latent_image": ["4", 0]
494
+ }
495
+ },
496
+ "6": {
497
+ "class_type": "VAEDecode",
498
+ "inputs": {"samples": ["5", 0], "vae": ["1", 2]}
499
+ },
500
+ "7": {
501
+ "class_type": "SaveImage",
502
+ "inputs": {"images": ["6", 0], "filename_prefix": "vibe_base"}
503
+ }
504
+ }
505
+
506
+ def _create_wan_i2v_workflow(
507
+ self, uploaded_image: str, prompt: str, negative: str,
508
+ width: int, height: int, frames: int, seed: int
509
+ ) -> Dict[str, Any]:
510
+ """Create Wan 2.1 I2V workflow."""
511
+ return {
512
+ "1": {
513
+ "class_type": "UNETLoader",
514
+ "inputs": {
515
+ "unet_name": "I2V/Wan2_1-I2V-14B-480p_fp8_e4m3fn_scaled_KJ.safetensors",
516
+ "weight_dtype": "fp8_e4m3fn"
517
+ }
518
+ },
519
+ "2": {
520
+ "class_type": "CLIPLoader",
521
+ "inputs": {
522
+ "clip_name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors",
523
+ "type": "wan"
524
+ }
525
+ },
526
+ "3": {
527
+ "class_type": "VAELoader",
528
+ "inputs": {"vae_name": "wan_2.1_vae.safetensors"}
529
+ },
530
+ "4": {
531
+ "class_type": "LoadImage",
532
+ "inputs": {"image": uploaded_image}
533
+ },
534
+ "5": {
535
+ "class_type": "CLIPTextEncode",
536
+ "inputs": {"text": prompt + ", smooth motion, cinematic", "clip": ["2", 0]}
537
+ },
538
+ "6": {
539
+ "class_type": "CLIPTextEncode",
540
+ "inputs": {"text": negative or "static, frozen, blurry, distorted", "clip": ["2", 0]}
541
+ },
542
+ "7": {
543
+ "class_type": "WanImageToVideo",
544
+ "inputs": {
545
+ "positive": ["5", 0],
546
+ "negative": ["6", 0],
547
+ "vae": ["3", 0],
548
+ "width": width,
549
+ "height": height,
550
+ "length": frames,
551
+ "batch_size": 1,
552
+ "start_image": ["4", 0]
553
+ }
554
+ },
555
+ "8": {
556
+ "class_type": "KSampler",
557
+ "inputs": {
558
+ "seed": seed,
559
+ "steps": 30,
560
+ "cfg": 5.0,
561
+ "sampler_name": "euler",
562
+ "scheduler": "normal",
563
+ "denoise": 1.0,
564
+ "model": ["1", 0],
565
+ "positive": ["7", 0],
566
+ "negative": ["7", 1],
567
+ "latent_image": ["7", 2]
568
+ }
569
+ },
570
+ "9": {
571
+ "class_type": "VAEDecode",
572
+ "inputs": {"samples": ["8", 0], "vae": ["3", 0]}
573
+ },
574
+ "10": {
575
+ "class_type": "VHS_VideoCombine",
576
+ "inputs": {
577
+ "images": ["9", 0],
578
+ "frame_rate": 16,
579
+ "loop_count": 0,
580
+ "filename_prefix": "vibe_i2v",
581
+ "format": "image/webp",
582
+ "pingpong": False,
583
+ "save_output": True
584
+ }
585
+ }
586
+ }
587
+
344
588
  def status(self) -> str:
345
589
  """Get backend status."""
346
590
  if not self._initialized:
vibe_aigc/vlm_feedback.py CHANGED
@@ -118,25 +118,36 @@ class VLMFeedback:
118
118
 
119
119
  img = Image.open(image_path)
120
120
 
121
- prompt = f"""You are an AI art director analyzing generated images.
121
+ prompt = f"""You are an expert AI art director analyzing AI-generated images for quality.
122
122
 
123
- Context: {context}
123
+ Original prompt: {context}
124
+
125
+ IMPORTANT: You MUST provide specific, actionable prompt improvements.
124
126
 
125
- Analyze this image and respond in JSON format:
127
+ Analyze this image and respond ONLY with valid JSON (no markdown):
126
128
  {{
127
- "quality_score": <1-10>,
128
- "description": "<what you see>",
129
- "strengths": ["<strength1>", "<strength2>"],
130
- "weaknesses": ["<weakness1>", "<weakness2>"],
131
- "prompt_improvements": ["<specific prompt addition>", ...],
129
+ "quality_score": <1-10 based on: composition, detail, prompt adherence, aesthetic quality>,
130
+ "description": "<brief description of what you see>",
131
+ "strengths": ["<specific strength 1>", "<specific strength 2>"],
132
+ "weaknesses": ["<specific weakness 1>", "<specific weakness 2>"],
133
+ "prompt_improvements": [
134
+ "<SPECIFIC phrase to ADD to prompt to fix weakness 1>",
135
+ "<SPECIFIC phrase to ADD to prompt to fix weakness 2>",
136
+ "<SPECIFIC quality modifier to add>"
137
+ ],
132
138
  "parameter_changes": {{
133
- "cfg": <suggested cfg or null>,
134
- "steps": <suggested steps or null>,
135
- "sampler": "<suggested sampler or null>"
139
+ "cfg": <suggest higher/lower cfg if needed, or null>,
140
+ "steps": <suggest more/fewer steps if needed, or null>
136
141
  }}
137
142
  }}
138
143
 
139
- Be specific about what to ADD to the prompt to fix issues."""
144
+ REQUIRED: prompt_improvements must have at least 2 specific suggestions like:
145
+ - "add sharp focus" if blurry
146
+ - "add dramatic shadows" if flat lighting
147
+ - "add intricate details" if lacking detail
148
+ - "add correct anatomy" if distorted
149
+
150
+ Score guide: 1-3 poor, 4-5 mediocre, 6-7 good, 8-9 excellent, 10 perfect."""
140
151
 
141
152
  try:
142
153
  response = self.vlm.generate_content([prompt, img])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vibe-aigc
3
- Version: 0.6.0
3
+ Version: 0.6.2
4
4
  Summary: A New Paradigm for Content Generation via Agentic Orchestration
5
5
  Author: Vibe AIGC Contributors
6
6
  License-Expression: MIT
@@ -8,6 +8,7 @@ vibe_aigc/comfyui.py,sha256=Do7kOaeiuJ0SVYsib2I5prOn5O0M_u0gPbK32cFHYg4,15663
8
8
  vibe_aigc/composer_general.py,sha256=r2aqVyjqimTnbBODnKxJs4fQBOp6JEHegqnvxeel9Zk,16358
9
9
  vibe_aigc/discovery.py,sha256=PrrSy25jLz44O_mq1BgnAgR2AkZHwjJxSqn_ODZm_do,14660
10
10
  vibe_aigc/executor.py,sha256=AWTqa7hoDljhXjp6gCekzbTGROtD8kptb0IU3mi03lw,38977
11
+ vibe_aigc/fidelity.py,sha256=z3MWAvNa554LyaK8JF6Rb-xiH1o9_NW_nA3CSoMHB8s,13542
11
12
  vibe_aigc/knowledge.py,sha256=c1gC44rbebE_FjvKyLqPbJ-_9iJQlHoXr6yBcriZTe0,40728
12
13
  vibe_aigc/llm.py,sha256=MoFrSNnHT90hZEq_pXh1rEtIZfs1RDDVshliK9wyMbw,7879
13
14
  vibe_aigc/model_registry.py,sha256=sgvNy7RK_DSBo9W0DznlpOS3J1DrL4_vfrLAO-tSa6Y,27689
@@ -17,19 +18,19 @@ vibe_aigc/persistence.py,sha256=inrJQjmCK4LighxQSmJorR6c7OvRzx-cmEb5HCQS9PY,1061
17
18
  vibe_aigc/planner.py,sha256=hmnASmofpahNuF9ei_0DxzHxm23vYjF67u-SB_G5EcU,33129
18
19
  vibe_aigc/tools.py,sha256=Tm_NA53yJjjvCrUuZ7YVtdLAdfUgxOLm5zZzIcJYvHI,15572
19
20
  vibe_aigc/tools_multimodal.py,sha256=asSJJqF0hrD9uNiYpuieVY-lbgEXjbK3UjT20nX2Lig,20405
20
- vibe_aigc/vibe_backend.py,sha256=BnGxIyNcnqngRpVw_T5qoeKnsFrsh0z20x7oP9PNwkw,14304
21
+ vibe_aigc/vibe_backend.py,sha256=LUm9t3JeGfezJTjau9XAQeRN_DmHrPX2PCjNRhGE4lQ,23808
21
22
  vibe_aigc/video.py,sha256=0fg8RUpEsaJqDskAPiGP8yuyQDVCUvIy-uLScq_BOwg,14111
22
23
  vibe_aigc/visualization.py,sha256=jDs2f1vj4k8ZnJTA_niKLBH2NMahTgWneiADlNmW24s,7143
23
- vibe_aigc/vlm_feedback.py,sha256=-4_QAp1uCoHEzQKYln1Zk25V-jyr-Gb4dptdlJVUBUA,10137
24
+ vibe_aigc/vlm_feedback.py,sha256=Da26q5qmJr-vwdsstum8CTAjbedeLWAGxZfla2BS0Ko,10781
24
25
  vibe_aigc/workflow_backend.py,sha256=kMIgZgyg7O7txniBSjRzEYN6Aal9zEjfRJAO8FnW07o,12136
25
26
  vibe_aigc/workflow_composer.py,sha256=mNdrlyhq-Fi0H02_iB1mpCwL9k71gv1ST10Ftx99vW0,22586
26
27
  vibe_aigc/workflow_executor.py,sha256=mfYLOTfPmI7Upooxy07nPmlbZ-HZAfC18IaNW80G31E,20734
27
28
  vibe_aigc/workflow_registry.py,sha256=Z6gB1cA366LXqHcfqBF1od_8ySxAOt5RpKKaaZPqqUo,22359
28
29
  vibe_aigc/workflow_strategies.py,sha256=i_qqUrn-2F6lT9dNyFdTdy0NzE8ZnRNxAMl6zrOAtD8,26148
29
30
  vibe_aigc/workflows.py,sha256=uk7RjNVow6eimEdqfVQFDtLgHSkg0LUjSoa2N7C47u0,13886
30
- vibe_aigc-0.6.0.dist-info/licenses/LICENSE,sha256=Ir4dCTvOsbfoiOh9vYbhIKDH59S7J6qhJYZmHHICoKY,1079
31
- vibe_aigc-0.6.0.dist-info/METADATA,sha256=p0HacVM5u17rSaNYh5SxilbNIeFE4VFqMU1A9m_0coA,6604
32
- vibe_aigc-0.6.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
33
- vibe_aigc-0.6.0.dist-info/entry_points.txt,sha256=2htp4yXJMvCAQXTB39XWWwbBPP3MYUYXsqlwMeQsd7o,49
34
- vibe_aigc-0.6.0.dist-info/top_level.txt,sha256=Cpjz8X0WEhnhaigqxmsZSl9VxduaDspj7WuVUGGLeao,10
35
- vibe_aigc-0.6.0.dist-info/RECORD,,
31
+ vibe_aigc-0.6.2.dist-info/licenses/LICENSE,sha256=Ir4dCTvOsbfoiOh9vYbhIKDH59S7J6qhJYZmHHICoKY,1079
32
+ vibe_aigc-0.6.2.dist-info/METADATA,sha256=Tdhh5kFrhVvnvWNe3MkcXwrTW7FL_0w4rZ15rtmar34,6604
33
+ vibe_aigc-0.6.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
34
+ vibe_aigc-0.6.2.dist-info/entry_points.txt,sha256=2htp4yXJMvCAQXTB39XWWwbBPP3MYUYXsqlwMeQsd7o,49
35
+ vibe_aigc-0.6.2.dist-info/top_level.txt,sha256=Cpjz8X0WEhnhaigqxmsZSl9VxduaDspj7WuVUGGLeao,10
36
+ vibe_aigc-0.6.2.dist-info/RECORD,,