vibe-aigc 0.6.3__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vibe_aigc/__init__.py +46 -4
- vibe_aigc/composer_general.py +408 -1
- vibe_aigc/discovery.py +107 -2
- vibe_aigc/knowledge.py +512 -24
- vibe_aigc/llm.py +305 -62
- vibe_aigc/models.py +98 -1
- vibe_aigc/pipeline.py +565 -0
- vibe_aigc/planner.py +145 -0
- vibe_aigc/tools.py +32 -0
- vibe_aigc/tools_audio.py +746 -0
- vibe_aigc/tools_comfyui.py +708 -3
- vibe_aigc/tools_utility.py +997 -0
- vibe_aigc/tools_video.py +799 -0
- vibe_aigc/tools_vision.py +1187 -0
- vibe_aigc/vibe_backend.py +11 -1
- vibe_aigc/vlm_feedback.py +186 -7
- {vibe_aigc-0.6.3.dist-info → vibe_aigc-0.7.0.dist-info}/METADATA +29 -1
- {vibe_aigc-0.6.3.dist-info → vibe_aigc-0.7.0.dist-info}/RECORD +22 -17
- {vibe_aigc-0.6.3.dist-info → vibe_aigc-0.7.0.dist-info}/WHEEL +0 -0
- {vibe_aigc-0.6.3.dist-info → vibe_aigc-0.7.0.dist-info}/entry_points.txt +0 -0
- {vibe_aigc-0.6.3.dist-info → vibe_aigc-0.7.0.dist-info}/licenses/LICENSE +0 -0
- {vibe_aigc-0.6.3.dist-info → vibe_aigc-0.7.0.dist-info}/top_level.txt +0 -0
vibe_aigc/tools_comfyui.py
CHANGED
|
@@ -6,9 +6,18 @@ Integrates the VibeBackend into the Paper's tool architecture (Section 5.4):
|
|
|
6
6
|
the optimal ensemble of components."
|
|
7
7
|
|
|
8
8
|
This makes ComfyUI-based generation available to MetaPlanner as atomic tools.
|
|
9
|
+
|
|
10
|
+
Image Manipulation Tools:
|
|
11
|
+
- UpscaleTool: Upscale images using RealESRGAN, etc.
|
|
12
|
+
- InpaintTool: Mask-based inpainting
|
|
13
|
+
- Img2ImgTool: Image variation/transformation
|
|
14
|
+
- RemoveBackgroundTool: Background removal with alpha
|
|
15
|
+
- FaceRestoreTool: Face enhancement (CodeFormer, GFPGAN)
|
|
9
16
|
"""
|
|
10
17
|
|
|
11
|
-
|
|
18
|
+
import asyncio
|
|
19
|
+
import aiohttp
|
|
20
|
+
from typing import Dict, Any, Optional, List
|
|
12
21
|
from dataclasses import dataclass
|
|
13
22
|
|
|
14
23
|
from .tools import BaseTool, ToolSpec, ToolResult, ToolCategory
|
|
@@ -16,6 +25,87 @@ from .vibe_backend import VibeBackend, GenerationRequest, GenerationResult
|
|
|
16
25
|
from .discovery import Capability
|
|
17
26
|
|
|
18
27
|
|
|
28
|
+
class ComfyUIExecutor:
|
|
29
|
+
"""Lightweight executor for ComfyUI workflows."""
|
|
30
|
+
|
|
31
|
+
def __init__(self, comfyui_url: str = "http://127.0.0.1:8188"):
|
|
32
|
+
self.url = comfyui_url.rstrip('/')
|
|
33
|
+
|
|
34
|
+
async def upload_image(self, image_data: bytes, filename: str = "input.png") -> str:
|
|
35
|
+
"""Upload an image to ComfyUI and return the filename."""
|
|
36
|
+
async with aiohttp.ClientSession() as session:
|
|
37
|
+
form = aiohttp.FormData()
|
|
38
|
+
content_type = 'image/png' if filename.endswith('.png') else 'image/jpeg'
|
|
39
|
+
form.add_field('image', image_data, filename=filename, content_type=content_type)
|
|
40
|
+
|
|
41
|
+
async with session.post(f"{self.url}/upload/image", data=form) as resp:
|
|
42
|
+
result = await resp.json()
|
|
43
|
+
return result.get("name", filename)
|
|
44
|
+
|
|
45
|
+
async def download_image(self, url: str) -> bytes:
|
|
46
|
+
"""Download an image from a URL."""
|
|
47
|
+
async with aiohttp.ClientSession() as session:
|
|
48
|
+
async with session.get(url, timeout=aiohttp.ClientTimeout(total=60)) as resp:
|
|
49
|
+
if resp.status == 200:
|
|
50
|
+
return await resp.read()
|
|
51
|
+
raise Exception(f"Failed to download image: {resp.status}")
|
|
52
|
+
|
|
53
|
+
async def execute(self, workflow: Dict[str, Any], timeout: int = 600) -> Dict[str, Any]:
|
|
54
|
+
"""Execute a workflow and return the result."""
|
|
55
|
+
try:
|
|
56
|
+
async with aiohttp.ClientSession() as session:
|
|
57
|
+
# Queue the workflow
|
|
58
|
+
async with session.post(
|
|
59
|
+
f"{self.url}/prompt",
|
|
60
|
+
json={"prompt": workflow},
|
|
61
|
+
timeout=aiohttp.ClientTimeout(total=30)
|
|
62
|
+
) as resp:
|
|
63
|
+
data = await resp.json()
|
|
64
|
+
|
|
65
|
+
if "error" in data:
|
|
66
|
+
return {"success": False, "error": str(data["error"])}
|
|
67
|
+
|
|
68
|
+
prompt_id = data["prompt_id"]
|
|
69
|
+
|
|
70
|
+
# Poll for completion
|
|
71
|
+
for _ in range(timeout // 2):
|
|
72
|
+
await asyncio.sleep(2)
|
|
73
|
+
|
|
74
|
+
async with session.get(f"{self.url}/history/{prompt_id}") as resp:
|
|
75
|
+
history = await resp.json()
|
|
76
|
+
|
|
77
|
+
if prompt_id in history:
|
|
78
|
+
status = history[prompt_id].get("status", {})
|
|
79
|
+
|
|
80
|
+
if status.get("completed") or status.get("status_str") == "success":
|
|
81
|
+
outputs = history[prompt_id].get("outputs", {})
|
|
82
|
+
for node_output in outputs.values():
|
|
83
|
+
if "images" in node_output:
|
|
84
|
+
img = node_output["images"][0]
|
|
85
|
+
filename = img.get("filename", "")
|
|
86
|
+
subfolder = img.get("subfolder", "")
|
|
87
|
+
|
|
88
|
+
url = f"{self.url}/view?filename={filename}"
|
|
89
|
+
if subfolder:
|
|
90
|
+
url += f"&subfolder={subfolder}"
|
|
91
|
+
|
|
92
|
+
return {
|
|
93
|
+
"success": True,
|
|
94
|
+
"output_url": url,
|
|
95
|
+
"filename": filename
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return {"success": False, "error": "No image output found"}
|
|
99
|
+
|
|
100
|
+
if status.get("status_str") == "error":
|
|
101
|
+
return {"success": False, "error": "Workflow execution failed"}
|
|
102
|
+
|
|
103
|
+
return {"success": False, "error": "Timeout waiting for result"}
|
|
104
|
+
|
|
105
|
+
except Exception as e:
|
|
106
|
+
return {"success": False, "error": str(e)}
|
|
107
|
+
|
|
108
|
+
|
|
19
109
|
class ImageGenerationTool(BaseTool):
|
|
20
110
|
"""
|
|
21
111
|
Image generation tool using local ComfyUI.
|
|
@@ -263,9 +353,624 @@ class VideoGenerationTool(BaseTool):
|
|
|
263
353
|
)
|
|
264
354
|
|
|
265
355
|
|
|
356
|
+
class UpscaleTool(BaseTool):
|
|
357
|
+
"""
|
|
358
|
+
Image upscaling tool using ComfyUI.
|
|
359
|
+
|
|
360
|
+
Uses upscale models like RealESRGAN, 4x-UltraSharp, etc.
|
|
361
|
+
"""
|
|
362
|
+
|
|
363
|
+
def __init__(self, comfyui_url: str = "http://127.0.0.1:8188"):
|
|
364
|
+
self.comfyui_url = comfyui_url
|
|
365
|
+
self._executor = ComfyUIExecutor(comfyui_url)
|
|
366
|
+
|
|
367
|
+
@property
|
|
368
|
+
def spec(self) -> ToolSpec:
|
|
369
|
+
return ToolSpec(
|
|
370
|
+
name="upscale",
|
|
371
|
+
description="Upscale images using AI models (RealESRGAN, 4x-UltraSharp, etc.)",
|
|
372
|
+
category=ToolCategory.IMAGE,
|
|
373
|
+
input_schema={
|
|
374
|
+
"type": "object",
|
|
375
|
+
"required": ["image_url"],
|
|
376
|
+
"properties": {
|
|
377
|
+
"image_url": {"type": "string", "description": "URL of the image to upscale"},
|
|
378
|
+
"scale": {"type": "integer", "enum": [2, 4], "default": 2, "description": "Upscale factor (2x or 4x)"},
|
|
379
|
+
"model": {"type": "string", "default": "RealESRGAN_x4plus.pth", "description": "Upscale model to use"}
|
|
380
|
+
}
|
|
381
|
+
},
|
|
382
|
+
output_schema={
|
|
383
|
+
"type": "object",
|
|
384
|
+
"properties": {
|
|
385
|
+
"upscaled_url": {"type": "string", "description": "URL of the upscaled image"}
|
|
386
|
+
}
|
|
387
|
+
},
|
|
388
|
+
examples=[
|
|
389
|
+
{
|
|
390
|
+
"input": {"image_url": "http://...", "scale": 4},
|
|
391
|
+
"output": {"upscaled_url": "http://..."}
|
|
392
|
+
}
|
|
393
|
+
]
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
async def execute(
|
|
397
|
+
self,
|
|
398
|
+
inputs: Dict[str, Any],
|
|
399
|
+
context: Optional[Dict[str, Any]] = None
|
|
400
|
+
) -> ToolResult:
|
|
401
|
+
"""Upscale an image."""
|
|
402
|
+
try:
|
|
403
|
+
image_url = inputs.get("image_url")
|
|
404
|
+
scale = inputs.get("scale", 2)
|
|
405
|
+
model = inputs.get("model", "RealESRGAN_x4plus.pth" if scale == 4 else "RealESRGAN_x2plus.pth")
|
|
406
|
+
|
|
407
|
+
# Download and upload the image
|
|
408
|
+
image_data = await self._executor.download_image(image_url)
|
|
409
|
+
uploaded_name = await self._executor.upload_image(image_data, "upscale_input.png")
|
|
410
|
+
|
|
411
|
+
# Build upscale workflow
|
|
412
|
+
workflow = {
|
|
413
|
+
"1": {
|
|
414
|
+
"class_type": "LoadImage",
|
|
415
|
+
"inputs": {"image": uploaded_name}
|
|
416
|
+
},
|
|
417
|
+
"2": {
|
|
418
|
+
"class_type": "UpscaleModelLoader",
|
|
419
|
+
"inputs": {"model_name": model}
|
|
420
|
+
},
|
|
421
|
+
"3": {
|
|
422
|
+
"class_type": "ImageUpscaleWithModel",
|
|
423
|
+
"inputs": {
|
|
424
|
+
"upscale_model": ["2", 0],
|
|
425
|
+
"image": ["1", 0]
|
|
426
|
+
}
|
|
427
|
+
},
|
|
428
|
+
"4": {
|
|
429
|
+
"class_type": "SaveImage",
|
|
430
|
+
"inputs": {
|
|
431
|
+
"images": ["3", 0],
|
|
432
|
+
"filename_prefix": "upscaled"
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
result = await self._executor.execute(workflow)
|
|
438
|
+
|
|
439
|
+
if result.get("success"):
|
|
440
|
+
return ToolResult(
|
|
441
|
+
success=True,
|
|
442
|
+
output={"upscaled_url": result["output_url"]},
|
|
443
|
+
metadata={"model": model, "scale": scale}
|
|
444
|
+
)
|
|
445
|
+
else:
|
|
446
|
+
return ToolResult(success=False, output=None, error=result.get("error"))
|
|
447
|
+
|
|
448
|
+
except Exception as e:
|
|
449
|
+
return ToolResult(success=False, output=None, error=str(e))
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
class InpaintTool(BaseTool):
|
|
453
|
+
"""
|
|
454
|
+
Mask-based inpainting tool using ComfyUI.
|
|
455
|
+
|
|
456
|
+
Uses SD inpainting to fill masked regions based on a prompt.
|
|
457
|
+
"""
|
|
458
|
+
|
|
459
|
+
def __init__(self, comfyui_url: str = "http://127.0.0.1:8188"):
|
|
460
|
+
self.comfyui_url = comfyui_url
|
|
461
|
+
self._executor = ComfyUIExecutor(comfyui_url)
|
|
462
|
+
|
|
463
|
+
@property
|
|
464
|
+
def spec(self) -> ToolSpec:
|
|
465
|
+
return ToolSpec(
|
|
466
|
+
name="inpaint",
|
|
467
|
+
description="Inpaint masked regions of an image based on a prompt",
|
|
468
|
+
category=ToolCategory.IMAGE,
|
|
469
|
+
input_schema={
|
|
470
|
+
"type": "object",
|
|
471
|
+
"required": ["image_url", "mask_url", "prompt"],
|
|
472
|
+
"properties": {
|
|
473
|
+
"image_url": {"type": "string", "description": "URL of the image to inpaint"},
|
|
474
|
+
"mask_url": {"type": "string", "description": "URL of the mask (white = inpaint area)"},
|
|
475
|
+
"prompt": {"type": "string", "description": "What to generate in the masked area"},
|
|
476
|
+
"negative_prompt": {"type": "string", "description": "What to avoid"},
|
|
477
|
+
"strength": {"type": "number", "default": 1.0, "description": "Denoising strength (0-1)"}
|
|
478
|
+
}
|
|
479
|
+
},
|
|
480
|
+
output_schema={
|
|
481
|
+
"type": "object",
|
|
482
|
+
"properties": {
|
|
483
|
+
"inpainted_url": {"type": "string", "description": "URL of the inpainted image"}
|
|
484
|
+
}
|
|
485
|
+
},
|
|
486
|
+
examples=[
|
|
487
|
+
{
|
|
488
|
+
"input": {"image_url": "http://...", "mask_url": "http://...", "prompt": "a red rose"},
|
|
489
|
+
"output": {"inpainted_url": "http://..."}
|
|
490
|
+
}
|
|
491
|
+
]
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
async def execute(
|
|
495
|
+
self,
|
|
496
|
+
inputs: Dict[str, Any],
|
|
497
|
+
context: Optional[Dict[str, Any]] = None
|
|
498
|
+
) -> ToolResult:
|
|
499
|
+
"""Inpaint an image region."""
|
|
500
|
+
try:
|
|
501
|
+
image_url = inputs.get("image_url")
|
|
502
|
+
mask_url = inputs.get("mask_url")
|
|
503
|
+
prompt = inputs.get("prompt", "")
|
|
504
|
+
negative_prompt = inputs.get("negative_prompt", "blurry, distorted, ugly")
|
|
505
|
+
strength = inputs.get("strength", 1.0)
|
|
506
|
+
|
|
507
|
+
# Download and upload images
|
|
508
|
+
image_data = await self._executor.download_image(image_url)
|
|
509
|
+
mask_data = await self._executor.download_image(mask_url)
|
|
510
|
+
|
|
511
|
+
uploaded_image = await self._executor.upload_image(image_data, "inpaint_image.png")
|
|
512
|
+
uploaded_mask = await self._executor.upload_image(mask_data, "inpaint_mask.png")
|
|
513
|
+
|
|
514
|
+
# Build inpaint workflow
|
|
515
|
+
workflow = {
|
|
516
|
+
"1": {
|
|
517
|
+
"class_type": "CheckpointLoaderSimple",
|
|
518
|
+
"inputs": {"ckpt_name": "sd_xl_base_1.0.safetensors"}
|
|
519
|
+
},
|
|
520
|
+
"2": {
|
|
521
|
+
"class_type": "LoadImage",
|
|
522
|
+
"inputs": {"image": uploaded_image}
|
|
523
|
+
},
|
|
524
|
+
"3": {
|
|
525
|
+
"class_type": "LoadImage",
|
|
526
|
+
"inputs": {"image": uploaded_mask}
|
|
527
|
+
},
|
|
528
|
+
"4": {
|
|
529
|
+
"class_type": "ImageToMask",
|
|
530
|
+
"inputs": {"image": ["3", 0], "channel": "red"}
|
|
531
|
+
},
|
|
532
|
+
"5": {
|
|
533
|
+
"class_type": "VAEEncode",
|
|
534
|
+
"inputs": {"pixels": ["2", 0], "vae": ["1", 2]}
|
|
535
|
+
},
|
|
536
|
+
"6": {
|
|
537
|
+
"class_type": "SetLatentNoiseMask",
|
|
538
|
+
"inputs": {"samples": ["5", 0], "mask": ["4", 0]}
|
|
539
|
+
},
|
|
540
|
+
"7": {
|
|
541
|
+
"class_type": "CLIPTextEncode",
|
|
542
|
+
"inputs": {"text": prompt, "clip": ["1", 1]}
|
|
543
|
+
},
|
|
544
|
+
"8": {
|
|
545
|
+
"class_type": "CLIPTextEncode",
|
|
546
|
+
"inputs": {"text": negative_prompt, "clip": ["1", 1]}
|
|
547
|
+
},
|
|
548
|
+
"9": {
|
|
549
|
+
"class_type": "KSampler",
|
|
550
|
+
"inputs": {
|
|
551
|
+
"seed": 0,
|
|
552
|
+
"steps": 25,
|
|
553
|
+
"cfg": 7.0,
|
|
554
|
+
"sampler_name": "euler_ancestral",
|
|
555
|
+
"scheduler": "normal",
|
|
556
|
+
"denoise": strength,
|
|
557
|
+
"model": ["1", 0],
|
|
558
|
+
"positive": ["7", 0],
|
|
559
|
+
"negative": ["8", 0],
|
|
560
|
+
"latent_image": ["6", 0]
|
|
561
|
+
}
|
|
562
|
+
},
|
|
563
|
+
"10": {
|
|
564
|
+
"class_type": "VAEDecode",
|
|
565
|
+
"inputs": {"samples": ["9", 0], "vae": ["1", 2]}
|
|
566
|
+
},
|
|
567
|
+
"11": {
|
|
568
|
+
"class_type": "SaveImage",
|
|
569
|
+
"inputs": {"images": ["10", 0], "filename_prefix": "inpainted"}
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
result = await self._executor.execute(workflow)
|
|
574
|
+
|
|
575
|
+
if result.get("success"):
|
|
576
|
+
return ToolResult(
|
|
577
|
+
success=True,
|
|
578
|
+
output={"inpainted_url": result["output_url"]},
|
|
579
|
+
metadata={"prompt": prompt}
|
|
580
|
+
)
|
|
581
|
+
else:
|
|
582
|
+
return ToolResult(success=False, output=None, error=result.get("error"))
|
|
583
|
+
|
|
584
|
+
except Exception as e:
|
|
585
|
+
return ToolResult(success=False, output=None, error=str(e))
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
class Img2ImgTool(BaseTool):
|
|
589
|
+
"""
|
|
590
|
+
Image-to-image transformation tool using ComfyUI.
|
|
591
|
+
|
|
592
|
+
Takes an input image and transforms it based on a prompt.
|
|
593
|
+
"""
|
|
594
|
+
|
|
595
|
+
def __init__(self, comfyui_url: str = "http://127.0.0.1:8188"):
|
|
596
|
+
self.comfyui_url = comfyui_url
|
|
597
|
+
self._executor = ComfyUIExecutor(comfyui_url)
|
|
598
|
+
|
|
599
|
+
@property
|
|
600
|
+
def spec(self) -> ToolSpec:
|
|
601
|
+
return ToolSpec(
|
|
602
|
+
name="img2img",
|
|
603
|
+
description="Transform an image based on a text prompt",
|
|
604
|
+
category=ToolCategory.IMAGE,
|
|
605
|
+
input_schema={
|
|
606
|
+
"type": "object",
|
|
607
|
+
"required": ["image_url", "prompt"],
|
|
608
|
+
"properties": {
|
|
609
|
+
"image_url": {"type": "string", "description": "URL of the input image"},
|
|
610
|
+
"prompt": {"type": "string", "description": "Transformation prompt"},
|
|
611
|
+
"negative_prompt": {"type": "string", "description": "What to avoid"},
|
|
612
|
+
"strength": {"type": "number", "default": 0.75, "description": "Transformation strength (0-1, higher = more change)"}
|
|
613
|
+
}
|
|
614
|
+
},
|
|
615
|
+
output_schema={
|
|
616
|
+
"type": "object",
|
|
617
|
+
"properties": {
|
|
618
|
+
"transformed_url": {"type": "string", "description": "URL of the transformed image"}
|
|
619
|
+
}
|
|
620
|
+
},
|
|
621
|
+
examples=[
|
|
622
|
+
{
|
|
623
|
+
"input": {"image_url": "http://...", "prompt": "anime style", "strength": 0.7},
|
|
624
|
+
"output": {"transformed_url": "http://..."}
|
|
625
|
+
}
|
|
626
|
+
]
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
async def execute(
|
|
630
|
+
self,
|
|
631
|
+
inputs: Dict[str, Any],
|
|
632
|
+
context: Optional[Dict[str, Any]] = None
|
|
633
|
+
) -> ToolResult:
|
|
634
|
+
"""Transform an image based on a prompt."""
|
|
635
|
+
try:
|
|
636
|
+
image_url = inputs.get("image_url")
|
|
637
|
+
prompt = inputs.get("prompt", "")
|
|
638
|
+
negative_prompt = inputs.get("negative_prompt", "blurry, distorted, ugly, bad quality")
|
|
639
|
+
strength = inputs.get("strength", 0.75)
|
|
640
|
+
|
|
641
|
+
# Download and upload the image
|
|
642
|
+
image_data = await self._executor.download_image(image_url)
|
|
643
|
+
uploaded_name = await self._executor.upload_image(image_data, "img2img_input.png")
|
|
644
|
+
|
|
645
|
+
# Build img2img workflow
|
|
646
|
+
workflow = {
|
|
647
|
+
"1": {
|
|
648
|
+
"class_type": "CheckpointLoaderSimple",
|
|
649
|
+
"inputs": {"ckpt_name": "sd_xl_base_1.0.safetensors"}
|
|
650
|
+
},
|
|
651
|
+
"2": {
|
|
652
|
+
"class_type": "LoadImage",
|
|
653
|
+
"inputs": {"image": uploaded_name}
|
|
654
|
+
},
|
|
655
|
+
"3": {
|
|
656
|
+
"class_type": "VAEEncode",
|
|
657
|
+
"inputs": {"pixels": ["2", 0], "vae": ["1", 2]}
|
|
658
|
+
},
|
|
659
|
+
"4": {
|
|
660
|
+
"class_type": "CLIPTextEncode",
|
|
661
|
+
"inputs": {"text": prompt, "clip": ["1", 1]}
|
|
662
|
+
},
|
|
663
|
+
"5": {
|
|
664
|
+
"class_type": "CLIPTextEncode",
|
|
665
|
+
"inputs": {"text": negative_prompt, "clip": ["1", 1]}
|
|
666
|
+
},
|
|
667
|
+
"6": {
|
|
668
|
+
"class_type": "KSampler",
|
|
669
|
+
"inputs": {
|
|
670
|
+
"seed": 0,
|
|
671
|
+
"steps": 25,
|
|
672
|
+
"cfg": 7.0,
|
|
673
|
+
"sampler_name": "euler_ancestral",
|
|
674
|
+
"scheduler": "normal",
|
|
675
|
+
"denoise": strength,
|
|
676
|
+
"model": ["1", 0],
|
|
677
|
+
"positive": ["4", 0],
|
|
678
|
+
"negative": ["5", 0],
|
|
679
|
+
"latent_image": ["3", 0]
|
|
680
|
+
}
|
|
681
|
+
},
|
|
682
|
+
"7": {
|
|
683
|
+
"class_type": "VAEDecode",
|
|
684
|
+
"inputs": {"samples": ["6", 0], "vae": ["1", 2]}
|
|
685
|
+
},
|
|
686
|
+
"8": {
|
|
687
|
+
"class_type": "SaveImage",
|
|
688
|
+
"inputs": {"images": ["7", 0], "filename_prefix": "img2img"}
|
|
689
|
+
}
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
result = await self._executor.execute(workflow)
|
|
693
|
+
|
|
694
|
+
if result.get("success"):
|
|
695
|
+
return ToolResult(
|
|
696
|
+
success=True,
|
|
697
|
+
output={"transformed_url": result["output_url"]},
|
|
698
|
+
metadata={"prompt": prompt, "strength": strength}
|
|
699
|
+
)
|
|
700
|
+
else:
|
|
701
|
+
return ToolResult(success=False, output=None, error=result.get("error"))
|
|
702
|
+
|
|
703
|
+
except Exception as e:
|
|
704
|
+
return ToolResult(success=False, output=None, error=str(e))
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
class RemoveBackgroundTool(BaseTool):
|
|
708
|
+
"""
|
|
709
|
+
Background removal tool using ComfyUI.
|
|
710
|
+
|
|
711
|
+
Removes background from images, outputting with alpha channel.
|
|
712
|
+
Uses RMBG or similar segmentation models.
|
|
713
|
+
"""
|
|
714
|
+
|
|
715
|
+
def __init__(self, comfyui_url: str = "http://127.0.0.1:8188"):
|
|
716
|
+
self.comfyui_url = comfyui_url
|
|
717
|
+
self._executor = ComfyUIExecutor(comfyui_url)
|
|
718
|
+
|
|
719
|
+
@property
|
|
720
|
+
def spec(self) -> ToolSpec:
|
|
721
|
+
return ToolSpec(
|
|
722
|
+
name="remove_background",
|
|
723
|
+
description="Remove background from an image, output with transparency",
|
|
724
|
+
category=ToolCategory.IMAGE,
|
|
725
|
+
input_schema={
|
|
726
|
+
"type": "object",
|
|
727
|
+
"required": ["image_url"],
|
|
728
|
+
"properties": {
|
|
729
|
+
"image_url": {"type": "string", "description": "URL of the image"}
|
|
730
|
+
}
|
|
731
|
+
},
|
|
732
|
+
output_schema={
|
|
733
|
+
"type": "object",
|
|
734
|
+
"properties": {
|
|
735
|
+
"image_url": {"type": "string", "description": "URL of image with alpha channel"}
|
|
736
|
+
}
|
|
737
|
+
},
|
|
738
|
+
examples=[
|
|
739
|
+
{
|
|
740
|
+
"input": {"image_url": "http://..."},
|
|
741
|
+
"output": {"image_url": "http://..."}
|
|
742
|
+
}
|
|
743
|
+
]
|
|
744
|
+
)
|
|
745
|
+
|
|
746
|
+
async def execute(
|
|
747
|
+
self,
|
|
748
|
+
inputs: Dict[str, Any],
|
|
749
|
+
context: Optional[Dict[str, Any]] = None
|
|
750
|
+
) -> ToolResult:
|
|
751
|
+
"""Remove background from an image."""
|
|
752
|
+
try:
|
|
753
|
+
image_url = inputs.get("image_url")
|
|
754
|
+
|
|
755
|
+
# Download and upload the image
|
|
756
|
+
image_data = await self._executor.download_image(image_url)
|
|
757
|
+
uploaded_name = await self._executor.upload_image(image_data, "rmbg_input.png")
|
|
758
|
+
|
|
759
|
+
# Build background removal workflow using RMBG node
|
|
760
|
+
# This uses the ComfyUI-BRIA_AI-RMBG or similar node
|
|
761
|
+
workflow = {
|
|
762
|
+
"1": {
|
|
763
|
+
"class_type": "LoadImage",
|
|
764
|
+
"inputs": {"image": uploaded_name}
|
|
765
|
+
},
|
|
766
|
+
"2": {
|
|
767
|
+
"class_type": "BRIA_RMBG_ModelLoader",
|
|
768
|
+
"inputs": {}
|
|
769
|
+
},
|
|
770
|
+
"3": {
|
|
771
|
+
"class_type": "BRIA_RMBG_Zho",
|
|
772
|
+
"inputs": {
|
|
773
|
+
"rmbg_model": ["2", 0],
|
|
774
|
+
"image": ["1", 0]
|
|
775
|
+
}
|
|
776
|
+
},
|
|
777
|
+
"4": {
|
|
778
|
+
"class_type": "JoinImageWithAlpha",
|
|
779
|
+
"inputs": {
|
|
780
|
+
"image": ["1", 0],
|
|
781
|
+
"alpha": ["3", 0]
|
|
782
|
+
}
|
|
783
|
+
},
|
|
784
|
+
"5": {
|
|
785
|
+
"class_type": "SaveImage",
|
|
786
|
+
"inputs": {
|
|
787
|
+
"images": ["4", 0],
|
|
788
|
+
"filename_prefix": "rmbg"
|
|
789
|
+
}
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
result = await self._executor.execute(workflow)
|
|
794
|
+
|
|
795
|
+
if result.get("success"):
|
|
796
|
+
return ToolResult(
|
|
797
|
+
success=True,
|
|
798
|
+
output={"image_url": result["output_url"]},
|
|
799
|
+
metadata={}
|
|
800
|
+
)
|
|
801
|
+
else:
|
|
802
|
+
# Fallback: try alternative node structure (InspyrenetRembg)
|
|
803
|
+
workflow_alt = {
|
|
804
|
+
"1": {
|
|
805
|
+
"class_type": "LoadImage",
|
|
806
|
+
"inputs": {"image": uploaded_name}
|
|
807
|
+
},
|
|
808
|
+
"2": {
|
|
809
|
+
"class_type": "InspyrenetRembg",
|
|
810
|
+
"inputs": {
|
|
811
|
+
"image": ["1", 0],
|
|
812
|
+
"torchscript_jit": "default"
|
|
813
|
+
}
|
|
814
|
+
},
|
|
815
|
+
"3": {
|
|
816
|
+
"class_type": "SaveImage",
|
|
817
|
+
"inputs": {
|
|
818
|
+
"images": ["2", 0],
|
|
819
|
+
"filename_prefix": "rmbg"
|
|
820
|
+
}
|
|
821
|
+
}
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
result = await self._executor.execute(workflow_alt)
|
|
825
|
+
|
|
826
|
+
if result.get("success"):
|
|
827
|
+
return ToolResult(
|
|
828
|
+
success=True,
|
|
829
|
+
output={"image_url": result["output_url"]},
|
|
830
|
+
metadata={}
|
|
831
|
+
)
|
|
832
|
+
else:
|
|
833
|
+
return ToolResult(success=False, output=None, error=result.get("error"))
|
|
834
|
+
|
|
835
|
+
except Exception as e:
|
|
836
|
+
return ToolResult(success=False, output=None, error=str(e))
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
class FaceRestoreTool(BaseTool):
|
|
840
|
+
"""
|
|
841
|
+
Face restoration/enhancement tool using ComfyUI.
|
|
842
|
+
|
|
843
|
+
Uses CodeFormer or GFPGAN to enhance faces in images.
|
|
844
|
+
"""
|
|
845
|
+
|
|
846
|
+
def __init__(self, comfyui_url: str = "http://127.0.0.1:8188"):
|
|
847
|
+
self.comfyui_url = comfyui_url
|
|
848
|
+
self._executor = ComfyUIExecutor(comfyui_url)
|
|
849
|
+
|
|
850
|
+
@property
|
|
851
|
+
def spec(self) -> ToolSpec:
|
|
852
|
+
return ToolSpec(
|
|
853
|
+
name="face_restore",
|
|
854
|
+
description="Enhance and restore faces in an image using CodeFormer or GFPGAN",
|
|
855
|
+
category=ToolCategory.IMAGE,
|
|
856
|
+
input_schema={
|
|
857
|
+
"type": "object",
|
|
858
|
+
"required": ["image_url"],
|
|
859
|
+
"properties": {
|
|
860
|
+
"image_url": {"type": "string", "description": "URL of the image with faces"},
|
|
861
|
+
"model": {"type": "string", "enum": ["codeformer", "gfpgan"], "default": "codeformer"},
|
|
862
|
+
"fidelity": {"type": "number", "default": 0.5, "description": "CodeFormer fidelity (0-1, higher = more faithful to original)"}
|
|
863
|
+
}
|
|
864
|
+
},
|
|
865
|
+
output_schema={
|
|
866
|
+
"type": "object",
|
|
867
|
+
"properties": {
|
|
868
|
+
"restored_url": {"type": "string", "description": "URL of the face-restored image"}
|
|
869
|
+
}
|
|
870
|
+
},
|
|
871
|
+
examples=[
|
|
872
|
+
{
|
|
873
|
+
"input": {"image_url": "http://...", "model": "codeformer"},
|
|
874
|
+
"output": {"restored_url": "http://..."}
|
|
875
|
+
}
|
|
876
|
+
]
|
|
877
|
+
)
|
|
878
|
+
|
|
879
|
+
async def execute(
|
|
880
|
+
self,
|
|
881
|
+
inputs: Dict[str, Any],
|
|
882
|
+
context: Optional[Dict[str, Any]] = None
|
|
883
|
+
) -> ToolResult:
|
|
884
|
+
"""Restore faces in an image."""
|
|
885
|
+
try:
|
|
886
|
+
image_url = inputs.get("image_url")
|
|
887
|
+
model = inputs.get("model", "codeformer")
|
|
888
|
+
fidelity = inputs.get("fidelity", 0.5)
|
|
889
|
+
|
|
890
|
+
# Download and upload the image
|
|
891
|
+
image_data = await self._executor.download_image(image_url)
|
|
892
|
+
uploaded_name = await self._executor.upload_image(image_data, "face_input.png")
|
|
893
|
+
|
|
894
|
+
if model == "codeformer":
|
|
895
|
+
# CodeFormer workflow
|
|
896
|
+
workflow = {
|
|
897
|
+
"1": {
|
|
898
|
+
"class_type": "LoadImage",
|
|
899
|
+
"inputs": {"image": uploaded_name}
|
|
900
|
+
},
|
|
901
|
+
"2": {
|
|
902
|
+
"class_type": "FaceRestoreModelLoader",
|
|
903
|
+
"inputs": {"model_name": "codeformer-v0.1.0.pth"}
|
|
904
|
+
},
|
|
905
|
+
"3": {
|
|
906
|
+
"class_type": "FaceRestoreWithModel",
|
|
907
|
+
"inputs": {
|
|
908
|
+
"facerestore_model": ["2", 0],
|
|
909
|
+
"image": ["1", 0],
|
|
910
|
+
"fidelity": fidelity
|
|
911
|
+
}
|
|
912
|
+
},
|
|
913
|
+
"4": {
|
|
914
|
+
"class_type": "SaveImage",
|
|
915
|
+
"inputs": {
|
|
916
|
+
"images": ["3", 0],
|
|
917
|
+
"filename_prefix": "face_restored"
|
|
918
|
+
}
|
|
919
|
+
}
|
|
920
|
+
}
|
|
921
|
+
else:
|
|
922
|
+
# GFPGAN workflow
|
|
923
|
+
workflow = {
|
|
924
|
+
"1": {
|
|
925
|
+
"class_type": "LoadImage",
|
|
926
|
+
"inputs": {"image": uploaded_name}
|
|
927
|
+
},
|
|
928
|
+
"2": {
|
|
929
|
+
"class_type": "FaceRestoreModelLoader",
|
|
930
|
+
"inputs": {"model_name": "GFPGANv1.4.pth"}
|
|
931
|
+
},
|
|
932
|
+
"3": {
|
|
933
|
+
"class_type": "FaceRestoreWithModel",
|
|
934
|
+
"inputs": {
|
|
935
|
+
"facerestore_model": ["2", 0],
|
|
936
|
+
"image": ["1", 0],
|
|
937
|
+
"fidelity": 1.0 # GFPGAN doesn't use fidelity the same way
|
|
938
|
+
}
|
|
939
|
+
},
|
|
940
|
+
"4": {
|
|
941
|
+
"class_type": "SaveImage",
|
|
942
|
+
"inputs": {
|
|
943
|
+
"images": ["3", 0],
|
|
944
|
+
"filename_prefix": "face_restored"
|
|
945
|
+
}
|
|
946
|
+
}
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
result = await self._executor.execute(workflow)
|
|
950
|
+
|
|
951
|
+
if result.get("success"):
|
|
952
|
+
return ToolResult(
|
|
953
|
+
success=True,
|
|
954
|
+
output={"restored_url": result["output_url"]},
|
|
955
|
+
metadata={"model": model, "fidelity": fidelity}
|
|
956
|
+
)
|
|
957
|
+
else:
|
|
958
|
+
return ToolResult(success=False, output=None, error=result.get("error"))
|
|
959
|
+
|
|
960
|
+
except Exception as e:
|
|
961
|
+
return ToolResult(success=False, output=None, error=str(e))
|
|
962
|
+
|
|
963
|
+
|
|
266
964
|
def create_comfyui_tools(comfyui_url: str = "http://127.0.0.1:8188") -> list:
|
|
267
|
-
"""Create ComfyUI-based generation tools."""
|
|
965
|
+
"""Create ComfyUI-based generation and manipulation tools."""
|
|
268
966
|
return [
|
|
967
|
+
# Generation tools
|
|
269
968
|
ImageGenerationTool(comfyui_url=comfyui_url),
|
|
270
|
-
VideoGenerationTool(comfyui_url=comfyui_url)
|
|
969
|
+
VideoGenerationTool(comfyui_url=comfyui_url),
|
|
970
|
+
# Image manipulation tools
|
|
971
|
+
UpscaleTool(comfyui_url=comfyui_url),
|
|
972
|
+
InpaintTool(comfyui_url=comfyui_url),
|
|
973
|
+
Img2ImgTool(comfyui_url=comfyui_url),
|
|
974
|
+
RemoveBackgroundTool(comfyui_url=comfyui_url),
|
|
975
|
+
FaceRestoreTool(comfyui_url=comfyui_url),
|
|
271
976
|
]
|