vibe-aigc 0.6.2__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vibe_aigc/__init__.py +46 -4
- vibe_aigc/composer_general.py +408 -1
- vibe_aigc/discovery.py +107 -2
- vibe_aigc/knowledge.py +512 -24
- vibe_aigc/llm.py +305 -62
- vibe_aigc/models.py +98 -1
- vibe_aigc/pipeline.py +565 -0
- vibe_aigc/planner.py +145 -0
- vibe_aigc/tools.py +46 -2
- vibe_aigc/tools_audio.py +746 -0
- vibe_aigc/tools_comfyui.py +976 -0
- vibe_aigc/tools_utility.py +997 -0
- vibe_aigc/tools_video.py +799 -0
- vibe_aigc/tools_vision.py +1187 -0
- vibe_aigc/vibe_backend.py +11 -1
- vibe_aigc/vlm_feedback.py +186 -7
- {vibe_aigc-0.6.2.dist-info → vibe_aigc-0.7.0.dist-info}/METADATA +29 -1
- {vibe_aigc-0.6.2.dist-info → vibe_aigc-0.7.0.dist-info}/RECORD +22 -16
- {vibe_aigc-0.6.2.dist-info → vibe_aigc-0.7.0.dist-info}/WHEEL +0 -0
- {vibe_aigc-0.6.2.dist-info → vibe_aigc-0.7.0.dist-info}/entry_points.txt +0 -0
- {vibe_aigc-0.6.2.dist-info → vibe_aigc-0.7.0.dist-info}/licenses/LICENSE +0 -0
- {vibe_aigc-0.6.2.dist-info → vibe_aigc-0.7.0.dist-info}/top_level.txt +0 -0
vibe_aigc/__init__.py
CHANGED
|
@@ -11,9 +11,9 @@ Architecture (Paper Section 5):
|
|
|
11
11
|
- AssetBank: Character and style consistency management
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
|
-
from .models import Vibe, WorkflowPlan, WorkflowNode, WorkflowNodeType
|
|
14
|
+
from .models import Vibe, WorkflowPlan, WorkflowNode, WorkflowNodeType, GenerationRequest, CharacterProfile
|
|
15
15
|
from .planner import MetaPlanner
|
|
16
|
-
from .llm import LLMClient, LLMConfig
|
|
16
|
+
from .llm import LLMClient, LLMConfig, LLMProvider, list_ollama_models, check_ollama_available
|
|
17
17
|
from .executor import WorkflowExecutor, ExecutionStatus, ExecutionResult
|
|
18
18
|
|
|
19
19
|
# Paper Section 5.3: Domain-Specific Expert Knowledge Base
|
|
@@ -78,8 +78,15 @@ __version__ = "0.2.0"
|
|
|
78
78
|
__all__ = [
|
|
79
79
|
# Core models
|
|
80
80
|
"Vibe", "WorkflowPlan", "WorkflowNode", "WorkflowNodeType",
|
|
81
|
+
"GenerationRequest", "CharacterProfile",
|
|
82
|
+
# System Discovery
|
|
83
|
+
"SystemDiscovery", "SystemCapabilities", "Capability", "HardwareConstraints",
|
|
84
|
+
"AvailableNode", "AvailableModel", "discover_system",
|
|
85
|
+
# General Composer
|
|
86
|
+
"GeneralComposer", "NodeRequirement", "STANDARD_REQUIREMENTS", "create_composer",
|
|
81
87
|
# MetaPlanner (Section 5.2)
|
|
82
|
-
"MetaPlanner", "LLMClient", "LLMConfig",
|
|
88
|
+
"MetaPlanner", "LLMClient", "LLMConfig", "LLMProvider",
|
|
89
|
+
"list_ollama_models", "check_ollama_available",
|
|
83
90
|
# Executor
|
|
84
91
|
"WorkflowExecutor", "ExecutionStatus", "ExecutionResult",
|
|
85
92
|
# Knowledge Base (Section 5.3)
|
|
@@ -97,8 +104,31 @@ __all__ = [
|
|
|
97
104
|
"DesignerAgent", "ScreenwriterAgent", "ComposerAgent",
|
|
98
105
|
"create_default_agents",
|
|
99
106
|
# Asset Bank
|
|
100
|
-
"AssetBank", "Character", "StyleGuide", "Artifact", "create_asset_bank"
|
|
107
|
+
"AssetBank", "Character", "StyleGuide", "Artifact", "create_asset_bank",
|
|
108
|
+
# Pipeline chaining
|
|
109
|
+
"Pipeline", "PipelineStep", "PipelineResult", "PipelineStatus",
|
|
110
|
+
"PipelineBuilder", "StepResult",
|
|
111
|
+
"create_image_pipeline", "create_video_pipeline"
|
|
101
112
|
]
|
|
113
|
+
# System Discovery - Constraint-aware system discovery
|
|
114
|
+
from .discovery import (
|
|
115
|
+
SystemDiscovery,
|
|
116
|
+
SystemCapabilities,
|
|
117
|
+
Capability,
|
|
118
|
+
HardwareConstraints,
|
|
119
|
+
AvailableNode,
|
|
120
|
+
AvailableModel,
|
|
121
|
+
discover_system,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# General Workflow Composer - Builds workflows from discovered nodes
|
|
125
|
+
from .composer_general import (
|
|
126
|
+
GeneralComposer,
|
|
127
|
+
NodeRequirement,
|
|
128
|
+
STANDARD_REQUIREMENTS,
|
|
129
|
+
create_composer,
|
|
130
|
+
)
|
|
131
|
+
|
|
102
132
|
# Model Registry - Auto-detect available models
|
|
103
133
|
from .model_registry import ModelRegistry, ModelCapability, ModelFamily, ModelSpec
|
|
104
134
|
|
|
@@ -116,3 +146,15 @@ from .audio import MusicGenBackend, RiffusionBackend, ElevenLabsBackend, MusicGe
|
|
|
116
146
|
|
|
117
147
|
# MV Pipeline
|
|
118
148
|
from .mv_pipeline import MVPipeline, Shot, Storyboard, create_mv
|
|
149
|
+
|
|
150
|
+
# Pipeline chaining for workflow orchestration
|
|
151
|
+
from .pipeline import (
|
|
152
|
+
Pipeline,
|
|
153
|
+
PipelineStep,
|
|
154
|
+
PipelineResult,
|
|
155
|
+
PipelineStatus,
|
|
156
|
+
PipelineBuilder,
|
|
157
|
+
StepResult,
|
|
158
|
+
create_image_pipeline,
|
|
159
|
+
create_video_pipeline
|
|
160
|
+
)
|
vibe_aigc/composer_general.py
CHANGED
|
@@ -10,9 +10,10 @@ This composer:
|
|
|
10
10
|
NO HARDCODED NODE TYPES. Everything is discovered.
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
|
-
from typing import Any, Dict, List, Optional, Tuple
|
|
13
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
14
14
|
from dataclasses import dataclass, field
|
|
15
15
|
from .discovery import SystemCapabilities, Capability, AvailableNode, AvailableModel
|
|
16
|
+
from .models import GenerationRequest, CharacterProfile
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
@dataclass
|
|
@@ -46,6 +47,40 @@ STANDARD_REQUIREMENTS = {
|
|
|
46
47
|
output_types=["CLIP"],
|
|
47
48
|
preferred_patterns=["cliploader"]
|
|
48
49
|
),
|
|
50
|
+
# IP-Adapter / Character consistency
|
|
51
|
+
"load_ipadapter": NodeRequirement(
|
|
52
|
+
purpose="load_ipadapter",
|
|
53
|
+
output_types=["IPADAPTER"],
|
|
54
|
+
preferred_patterns=["ipadaptermodelloader", "ipadapterloader", "ipadapter"]
|
|
55
|
+
),
|
|
56
|
+
"load_clip_vision": NodeRequirement(
|
|
57
|
+
purpose="load_clip_vision",
|
|
58
|
+
output_types=["CLIP_VISION"],
|
|
59
|
+
preferred_patterns=["clipvisionloader", "clip_vision"]
|
|
60
|
+
),
|
|
61
|
+
"encode_clip_vision": NodeRequirement(
|
|
62
|
+
purpose="encode_clip_vision",
|
|
63
|
+
input_types=["CLIP_VISION", "IMAGE"],
|
|
64
|
+
output_types=["CLIP_VISION_OUTPUT"],
|
|
65
|
+
preferred_patterns=["clipvisionencode"]
|
|
66
|
+
),
|
|
67
|
+
"apply_ipadapter": NodeRequirement(
|
|
68
|
+
purpose="apply_ipadapter",
|
|
69
|
+
input_types=["MODEL", "IPADAPTER", "IMAGE"],
|
|
70
|
+
output_types=["MODEL"],
|
|
71
|
+
preferred_patterns=["ipadapterapply", "ipadapter"]
|
|
72
|
+
),
|
|
73
|
+
"load_image": NodeRequirement(
|
|
74
|
+
purpose="load_image",
|
|
75
|
+
output_types=["IMAGE"],
|
|
76
|
+
preferred_patterns=["loadimage", "load_image"]
|
|
77
|
+
),
|
|
78
|
+
"load_lora": NodeRequirement(
|
|
79
|
+
purpose="load_lora",
|
|
80
|
+
input_types=["MODEL", "CLIP"],
|
|
81
|
+
output_types=["MODEL", "CLIP"],
|
|
82
|
+
preferred_patterns=["loraloader", "lora"]
|
|
83
|
+
),
|
|
49
84
|
"encode_text": NodeRequirement(
|
|
50
85
|
purpose="encode_text",
|
|
51
86
|
input_types=["CLIP"],
|
|
@@ -425,6 +460,264 @@ class GeneralComposer:
|
|
|
425
460
|
base["4"]["inputs"]["batch_size"] = frames
|
|
426
461
|
return base
|
|
427
462
|
|
|
463
|
+
# =========================================================================
|
|
464
|
+
# CHARACTER CONSISTENCY / IP-ADAPTER SUPPORT
|
|
465
|
+
# =========================================================================
|
|
466
|
+
|
|
467
|
+
def inject_ipadapter(
|
|
468
|
+
self,
|
|
469
|
+
workflow: Dict[str, Any],
|
|
470
|
+
reference_image: str,
|
|
471
|
+
strength: float = 0.8,
|
|
472
|
+
model_node_id: str = "1",
|
|
473
|
+
start_node_id: int = 100
|
|
474
|
+
) -> Tuple[Dict[str, Any], str]:
|
|
475
|
+
"""Inject IP-Adapter nodes into an existing workflow.
|
|
476
|
+
|
|
477
|
+
Returns updated workflow and the new model node ID to use downstream.
|
|
478
|
+
"""
|
|
479
|
+
node_id = start_node_id
|
|
480
|
+
|
|
481
|
+
# Find IP-Adapter loader
|
|
482
|
+
ipadapter_loader = self.find_node_for(STANDARD_REQUIREMENTS["load_ipadapter"])
|
|
483
|
+
clip_vision_loader = self.find_node_for(STANDARD_REQUIREMENTS["load_clip_vision"])
|
|
484
|
+
ipadapter_apply = self.find_node_for(STANDARD_REQUIREMENTS["apply_ipadapter"])
|
|
485
|
+
load_image = self.find_node_for(STANDARD_REQUIREMENTS["load_image"])
|
|
486
|
+
|
|
487
|
+
if not ipadapter_apply:
|
|
488
|
+
print("No IP-Adapter apply node found - skipping character reference")
|
|
489
|
+
return workflow, model_node_id
|
|
490
|
+
|
|
491
|
+
if not load_image:
|
|
492
|
+
print("No image loader found - skipping character reference")
|
|
493
|
+
return workflow, model_node_id
|
|
494
|
+
|
|
495
|
+
# Load reference image
|
|
496
|
+
workflow[str(node_id)] = {
|
|
497
|
+
"class_type": load_image,
|
|
498
|
+
"inputs": {"image": reference_image}
|
|
499
|
+
}
|
|
500
|
+
ref_image_node = str(node_id)
|
|
501
|
+
node_id += 1
|
|
502
|
+
|
|
503
|
+
# Load CLIP Vision (if available and needed)
|
|
504
|
+
clip_vision_node = None
|
|
505
|
+
if clip_vision_loader:
|
|
506
|
+
clip_vision_models = self.caps.get_clip_vision_models()
|
|
507
|
+
if clip_vision_models:
|
|
508
|
+
workflow[str(node_id)] = {
|
|
509
|
+
"class_type": clip_vision_loader,
|
|
510
|
+
"inputs": {"clip_name": clip_vision_models[0].filename}
|
|
511
|
+
}
|
|
512
|
+
clip_vision_node = str(node_id)
|
|
513
|
+
node_id += 1
|
|
514
|
+
|
|
515
|
+
# Load IP-Adapter model (if loader exists)
|
|
516
|
+
ipadapter_model_node = None
|
|
517
|
+
if ipadapter_loader:
|
|
518
|
+
ipadapter_models = self.caps.get_ipadapter_models()
|
|
519
|
+
if ipadapter_models:
|
|
520
|
+
workflow[str(node_id)] = {
|
|
521
|
+
"class_type": ipadapter_loader,
|
|
522
|
+
"inputs": {"ipadapter_file": ipadapter_models[0].filename}
|
|
523
|
+
}
|
|
524
|
+
ipadapter_model_node = str(node_id)
|
|
525
|
+
node_id += 1
|
|
526
|
+
|
|
527
|
+
# Apply IP-Adapter
|
|
528
|
+
apply_inputs = {
|
|
529
|
+
"model": [model_node_id, 0],
|
|
530
|
+
"image": [ref_image_node, 0],
|
|
531
|
+
"weight": strength,
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
# Add optional inputs if available
|
|
535
|
+
if ipadapter_model_node:
|
|
536
|
+
apply_inputs["ipadapter"] = [ipadapter_model_node, 0]
|
|
537
|
+
if clip_vision_node:
|
|
538
|
+
apply_inputs["clip_vision"] = [clip_vision_node, 0]
|
|
539
|
+
|
|
540
|
+
workflow[str(node_id)] = {
|
|
541
|
+
"class_type": ipadapter_apply,
|
|
542
|
+
"inputs": apply_inputs
|
|
543
|
+
}
|
|
544
|
+
new_model_node = str(node_id)
|
|
545
|
+
node_id += 1
|
|
546
|
+
|
|
547
|
+
return workflow, new_model_node
|
|
548
|
+
|
|
549
|
+
def inject_lora(
|
|
550
|
+
self,
|
|
551
|
+
workflow: Dict[str, Any],
|
|
552
|
+
lora_path: str,
|
|
553
|
+
strength: float = 0.8,
|
|
554
|
+
model_node_id: str = "1",
|
|
555
|
+
clip_node_id: Optional[str] = None,
|
|
556
|
+
start_node_id: int = 100
|
|
557
|
+
) -> Tuple[Dict[str, Any], str, Optional[str]]:
|
|
558
|
+
"""Inject LoRA into an existing workflow.
|
|
559
|
+
|
|
560
|
+
Returns updated workflow, new model node ID, and new clip node ID.
|
|
561
|
+
"""
|
|
562
|
+
lora_loader = self.find_node_for(STANDARD_REQUIREMENTS["load_lora"])
|
|
563
|
+
if not lora_loader:
|
|
564
|
+
print("No LoRA loader found - skipping LoRA injection")
|
|
565
|
+
return workflow, model_node_id, clip_node_id
|
|
566
|
+
|
|
567
|
+
node_id = start_node_id
|
|
568
|
+
|
|
569
|
+
lora_inputs = {
|
|
570
|
+
"lora_name": lora_path,
|
|
571
|
+
"strength_model": strength,
|
|
572
|
+
"strength_clip": strength,
|
|
573
|
+
"model": [model_node_id, 0],
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
if clip_node_id:
|
|
577
|
+
lora_inputs["clip"] = [clip_node_id, 0]
|
|
578
|
+
|
|
579
|
+
workflow[str(node_id)] = {
|
|
580
|
+
"class_type": lora_loader,
|
|
581
|
+
"inputs": lora_inputs
|
|
582
|
+
}
|
|
583
|
+
new_model_node = str(node_id)
|
|
584
|
+
new_clip_node = str(node_id) if clip_node_id else None
|
|
585
|
+
|
|
586
|
+
return workflow, new_model_node, new_clip_node
|
|
587
|
+
|
|
588
|
+
def inject_clip_vision_reference(
|
|
589
|
+
self,
|
|
590
|
+
workflow: Dict[str, Any],
|
|
591
|
+
reference_image: str,
|
|
592
|
+
start_node_id: int = 100
|
|
593
|
+
) -> Tuple[Dict[str, Any], Optional[str]]:
|
|
594
|
+
"""Inject CLIP Vision encoding for reference image (lighter alternative to IP-Adapter).
|
|
595
|
+
|
|
596
|
+
This works even without full IP-Adapter by encoding the reference image
|
|
597
|
+
via CLIP Vision, which can be used for style guidance.
|
|
598
|
+
|
|
599
|
+
Returns updated workflow and the clip vision output node ID.
|
|
600
|
+
"""
|
|
601
|
+
clip_vision_loader = self.find_node_for(STANDARD_REQUIREMENTS["load_clip_vision"])
|
|
602
|
+
clip_vision_encode = self.find_node_for(STANDARD_REQUIREMENTS["encode_clip_vision"])
|
|
603
|
+
load_image = self.find_node_for(STANDARD_REQUIREMENTS["load_image"])
|
|
604
|
+
|
|
605
|
+
if not (clip_vision_encode and load_image):
|
|
606
|
+
print("CLIP Vision encode or image loader not available")
|
|
607
|
+
return workflow, None
|
|
608
|
+
|
|
609
|
+
node_id = start_node_id
|
|
610
|
+
|
|
611
|
+
# Load reference image
|
|
612
|
+
workflow[str(node_id)] = {
|
|
613
|
+
"class_type": load_image,
|
|
614
|
+
"inputs": {"image": reference_image}
|
|
615
|
+
}
|
|
616
|
+
ref_image_node = str(node_id)
|
|
617
|
+
node_id += 1
|
|
618
|
+
|
|
619
|
+
# Load CLIP Vision model (if needed)
|
|
620
|
+
clip_vision_node = None
|
|
621
|
+
if clip_vision_loader:
|
|
622
|
+
clip_vision_models = self.caps.get_clip_vision_models()
|
|
623
|
+
if clip_vision_models:
|
|
624
|
+
workflow[str(node_id)] = {
|
|
625
|
+
"class_type": clip_vision_loader,
|
|
626
|
+
"inputs": {"clip_name": clip_vision_models[0].filename}
|
|
627
|
+
}
|
|
628
|
+
clip_vision_node = str(node_id)
|
|
629
|
+
node_id += 1
|
|
630
|
+
|
|
631
|
+
# Encode with CLIP Vision
|
|
632
|
+
encode_inputs = {"image": [ref_image_node, 0]}
|
|
633
|
+
if clip_vision_node:
|
|
634
|
+
encode_inputs["clip_vision"] = [clip_vision_node, 0]
|
|
635
|
+
|
|
636
|
+
workflow[str(node_id)] = {
|
|
637
|
+
"class_type": clip_vision_encode,
|
|
638
|
+
"inputs": encode_inputs
|
|
639
|
+
}
|
|
640
|
+
clip_vision_output = str(node_id)
|
|
641
|
+
|
|
642
|
+
return workflow, clip_vision_output
|
|
643
|
+
|
|
644
|
+
def compose_with_character_reference(
|
|
645
|
+
self,
|
|
646
|
+
model: AvailableModel,
|
|
647
|
+
prompt: str,
|
|
648
|
+
reference_image: str,
|
|
649
|
+
character_strength: float = 0.8,
|
|
650
|
+
negative_prompt: str = "",
|
|
651
|
+
width: int = 512,
|
|
652
|
+
height: int = 512,
|
|
653
|
+
steps: int = 20,
|
|
654
|
+
cfg: float = 7.0,
|
|
655
|
+
seed: int = 0,
|
|
656
|
+
character_lora: Optional[str] = None,
|
|
657
|
+
character_lora_strength: float = 0.8
|
|
658
|
+
) -> Optional[Dict[str, Any]]:
|
|
659
|
+
"""Compose a text-to-image workflow with character reference.
|
|
660
|
+
|
|
661
|
+
Uses IP-Adapter if available, falls back to CLIP Vision encoding.
|
|
662
|
+
"""
|
|
663
|
+
# Start with base text-to-image workflow
|
|
664
|
+
workflow = self.compose_text_to_image(
|
|
665
|
+
model, prompt, negative_prompt, width, height, steps, cfg, seed
|
|
666
|
+
)
|
|
667
|
+
|
|
668
|
+
if not workflow:
|
|
669
|
+
return None
|
|
670
|
+
|
|
671
|
+
# Find the model node (typically node 1 from checkpoint loader)
|
|
672
|
+
model_node_id = "1"
|
|
673
|
+
clip_node_id = "1" # CLIP is output 1 from checkpoint loader
|
|
674
|
+
|
|
675
|
+
current_start_id = 100
|
|
676
|
+
|
|
677
|
+
# Inject character LoRA if provided
|
|
678
|
+
if character_lora:
|
|
679
|
+
workflow, model_node_id, clip_node_id = self.inject_lora(
|
|
680
|
+
workflow,
|
|
681
|
+
character_lora,
|
|
682
|
+
character_lora_strength,
|
|
683
|
+
model_node_id,
|
|
684
|
+
clip_node_id,
|
|
685
|
+
current_start_id
|
|
686
|
+
)
|
|
687
|
+
current_start_id += 10
|
|
688
|
+
|
|
689
|
+
# Try IP-Adapter first
|
|
690
|
+
if self.caps.has_ipadapter_support():
|
|
691
|
+
workflow, new_model_node = self.inject_ipadapter(
|
|
692
|
+
workflow,
|
|
693
|
+
reference_image,
|
|
694
|
+
character_strength,
|
|
695
|
+
model_node_id,
|
|
696
|
+
current_start_id
|
|
697
|
+
)
|
|
698
|
+
|
|
699
|
+
# Update sampler to use new model node
|
|
700
|
+
for node_id, node in workflow.items():
|
|
701
|
+
if node.get("class_type", "").lower() in ["ksampler", "sampler"]:
|
|
702
|
+
if "model" in node.get("inputs", {}):
|
|
703
|
+
node["inputs"]["model"] = [new_model_node, 0]
|
|
704
|
+
|
|
705
|
+
elif self.caps.has_reference_image_support():
|
|
706
|
+
# Fall back to CLIP Vision encoding
|
|
707
|
+
workflow, clip_vision_output = self.inject_clip_vision_reference(
|
|
708
|
+
workflow,
|
|
709
|
+
reference_image,
|
|
710
|
+
current_start_id
|
|
711
|
+
)
|
|
712
|
+
# Note: CLIP Vision output would need to be wired to compatible nodes
|
|
713
|
+
# This is a placeholder for systems without full IP-Adapter
|
|
714
|
+
print("Using CLIP Vision encoding (limited character consistency)")
|
|
715
|
+
|
|
716
|
+
else:
|
|
717
|
+
print("No character reference support available on this system")
|
|
718
|
+
|
|
719
|
+
return workflow
|
|
720
|
+
|
|
428
721
|
def compose_for_capability(
|
|
429
722
|
self,
|
|
430
723
|
capability: Capability,
|
|
@@ -448,6 +741,120 @@ class GeneralComposer:
|
|
|
448
741
|
return None
|
|
449
742
|
|
|
450
743
|
|
|
744
|
+
def compose_from_request(
|
|
745
|
+
self,
|
|
746
|
+
request: GenerationRequest,
|
|
747
|
+
capability: Capability = Capability.TEXT_TO_IMAGE
|
|
748
|
+
) -> Optional[Dict[str, Any]]:
|
|
749
|
+
"""Compose a workflow from a GenerationRequest.
|
|
750
|
+
|
|
751
|
+
Automatically handles character consistency if reference_image is provided.
|
|
752
|
+
"""
|
|
753
|
+
# Find appropriate model
|
|
754
|
+
model = None
|
|
755
|
+
if request.model:
|
|
756
|
+
# Use specified model
|
|
757
|
+
for category_models in self.caps.models.values():
|
|
758
|
+
for m in category_models:
|
|
759
|
+
if m.filename == request.model:
|
|
760
|
+
model = m
|
|
761
|
+
break
|
|
762
|
+
if model:
|
|
763
|
+
break
|
|
764
|
+
|
|
765
|
+
if not model:
|
|
766
|
+
model = self.find_model_for(capability)
|
|
767
|
+
|
|
768
|
+
if not model:
|
|
769
|
+
print(f"No model found for {capability.value}")
|
|
770
|
+
return None
|
|
771
|
+
|
|
772
|
+
# Compose based on whether we have character reference
|
|
773
|
+
if request.reference_image:
|
|
774
|
+
workflow = self.compose_with_character_reference(
|
|
775
|
+
model=model,
|
|
776
|
+
prompt=request.prompt,
|
|
777
|
+
reference_image=request.reference_image,
|
|
778
|
+
character_strength=request.character_strength,
|
|
779
|
+
negative_prompt=request.negative_prompt,
|
|
780
|
+
width=request.width,
|
|
781
|
+
height=request.height,
|
|
782
|
+
steps=request.steps,
|
|
783
|
+
cfg=request.cfg,
|
|
784
|
+
seed=request.seed,
|
|
785
|
+
character_lora=request.character_lora,
|
|
786
|
+
character_lora_strength=request.character_lora_strength
|
|
787
|
+
)
|
|
788
|
+
else:
|
|
789
|
+
# Standard composition
|
|
790
|
+
if capability == Capability.TEXT_TO_IMAGE:
|
|
791
|
+
workflow = self.compose_text_to_image(
|
|
792
|
+
model, request.prompt, request.negative_prompt,
|
|
793
|
+
request.width, request.height, request.steps,
|
|
794
|
+
request.cfg, request.seed
|
|
795
|
+
)
|
|
796
|
+
elif capability in [Capability.TEXT_TO_VIDEO, Capability.IMAGE_TO_VIDEO]:
|
|
797
|
+
workflow = self.compose_text_to_video(
|
|
798
|
+
model, request.prompt, request.negative_prompt,
|
|
799
|
+
request.width, request.height, request.frames,
|
|
800
|
+
request.steps, request.cfg, request.seed
|
|
801
|
+
)
|
|
802
|
+
else:
|
|
803
|
+
workflow = self.compose_for_capability(capability, request.prompt)
|
|
804
|
+
|
|
805
|
+
# Inject additional LoRAs if specified
|
|
806
|
+
if workflow and request.loras:
|
|
807
|
+
model_node = "1"
|
|
808
|
+
clip_node = "1"
|
|
809
|
+
start_id = 200
|
|
810
|
+
|
|
811
|
+
for lora_config in request.loras:
|
|
812
|
+
lora_path = lora_config.get("path", lora_config.get("name", ""))
|
|
813
|
+
lora_strength = lora_config.get("strength", 0.8)
|
|
814
|
+
|
|
815
|
+
if lora_path:
|
|
816
|
+
workflow, model_node, clip_node = self.inject_lora(
|
|
817
|
+
workflow, lora_path, lora_strength,
|
|
818
|
+
model_node, clip_node, start_id
|
|
819
|
+
)
|
|
820
|
+
start_id += 10
|
|
821
|
+
|
|
822
|
+
return workflow
|
|
823
|
+
|
|
824
|
+
def compose_for_character(
|
|
825
|
+
self,
|
|
826
|
+
profile: CharacterProfile,
|
|
827
|
+
prompt: str,
|
|
828
|
+
capability: Capability = Capability.TEXT_TO_IMAGE,
|
|
829
|
+
**kwargs
|
|
830
|
+
) -> Optional[Dict[str, Any]]:
|
|
831
|
+
"""Compose a workflow using a CharacterProfile for consistency.
|
|
832
|
+
|
|
833
|
+
Convenience method that extracts reference settings from profile.
|
|
834
|
+
"""
|
|
835
|
+
# Merge profile settings with any overrides
|
|
836
|
+
gen_params = profile.to_generation_params()
|
|
837
|
+
gen_params.update(kwargs)
|
|
838
|
+
|
|
839
|
+
# Inject trigger words into prompt if available
|
|
840
|
+
full_prompt = prompt
|
|
841
|
+
if profile.trigger_words:
|
|
842
|
+
trigger_str = " ".join(profile.trigger_words)
|
|
843
|
+
full_prompt = f"{trigger_str}, {prompt}"
|
|
844
|
+
|
|
845
|
+
# Add character description to prompt
|
|
846
|
+
if profile.description:
|
|
847
|
+
full_prompt = f"{profile.description}, {full_prompt}"
|
|
848
|
+
|
|
849
|
+
# Create request
|
|
850
|
+
request = GenerationRequest(
|
|
851
|
+
prompt=full_prompt,
|
|
852
|
+
**gen_params
|
|
853
|
+
)
|
|
854
|
+
|
|
855
|
+
return self.compose_from_request(request, capability)
|
|
856
|
+
|
|
857
|
+
|
|
451
858
|
def create_composer(capabilities: SystemCapabilities) -> GeneralComposer:
|
|
452
859
|
"""Create a general composer from system capabilities."""
|
|
453
860
|
return GeneralComposer(capabilities)
|
vibe_aigc/discovery.py
CHANGED
|
@@ -27,6 +27,8 @@ class Capability(Enum):
|
|
|
27
27
|
UPSCALE = "upscale"
|
|
28
28
|
INPAINT = "inpaint"
|
|
29
29
|
AUDIO = "audio"
|
|
30
|
+
CHARACTER_CONSISTENCY = "character_consistency" # IP-Adapter, LoRA character refs
|
|
31
|
+
STYLE_TRANSFER = "style_transfer" # Style reference from images
|
|
30
32
|
UNKNOWN = "unknown"
|
|
31
33
|
|
|
32
34
|
|
|
@@ -81,6 +83,14 @@ class AvailableModel:
|
|
|
81
83
|
"""Infer capability from filename patterns."""
|
|
82
84
|
name = self.filename.lower()
|
|
83
85
|
|
|
86
|
+
# IP-Adapter / Character consistency models
|
|
87
|
+
if any(x in name for x in ['ipadapter', 'ip_adapter', 'ip-adapter', 'instantid', 'faceid', 'pulid']):
|
|
88
|
+
return Capability.CHARACTER_CONSISTENCY
|
|
89
|
+
|
|
90
|
+
# Style transfer / reference models
|
|
91
|
+
if any(x in name for x in ['style', 'reference', 'clipvision']):
|
|
92
|
+
return Capability.STYLE_TRANSFER
|
|
93
|
+
|
|
84
94
|
# Video models
|
|
85
95
|
if any(x in name for x in ['video', 'animate', 'motion', 'wan', 'ltx', 'svd', 'i2v', 't2v']):
|
|
86
96
|
if 'i2v' in name or 'img2vid' in name:
|
|
@@ -104,6 +114,27 @@ class AvailableModel:
|
|
|
104
114
|
return Capability.TEXT_TO_IMAGE
|
|
105
115
|
|
|
106
116
|
return Capability.UNKNOWN
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def is_ipadapter(self) -> bool:
|
|
120
|
+
"""Check if this is an IP-Adapter model."""
|
|
121
|
+
name = self.filename.lower()
|
|
122
|
+
return any(x in name for x in ['ipadapter', 'ip_adapter', 'ip-adapter'])
|
|
123
|
+
|
|
124
|
+
@property
|
|
125
|
+
def is_character_lora(self) -> bool:
|
|
126
|
+
"""Check if this is a character/person LoRA."""
|
|
127
|
+
name = self.filename.lower()
|
|
128
|
+
# Character LoRAs often have these patterns
|
|
129
|
+
return self.category == 'loras' and any(x in name for x in [
|
|
130
|
+
'character', 'person', 'face', 'portrait', 'style', 'celeb'
|
|
131
|
+
])
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
def is_clip_vision(self) -> bool:
|
|
135
|
+
"""Check if this is a CLIP Vision model."""
|
|
136
|
+
name = self.filename.lower()
|
|
137
|
+
return self.category == 'clip_vision' or 'clipvision' in name or 'clip_vision' in name
|
|
107
138
|
|
|
108
139
|
|
|
109
140
|
@dataclass
|
|
@@ -126,6 +157,55 @@ class SystemCapabilities:
|
|
|
126
157
|
result.append(model)
|
|
127
158
|
return result
|
|
128
159
|
|
|
160
|
+
def get_ipadapter_models(self) -> List[AvailableModel]:
|
|
161
|
+
"""Get all IP-Adapter models."""
|
|
162
|
+
result = []
|
|
163
|
+
for category in ['ipadapter', 'instantid', 'pulid', 'faceid']:
|
|
164
|
+
result.extend(self.models.get(category, []))
|
|
165
|
+
# Also check other categories for IP-Adapter files
|
|
166
|
+
for category_models in self.models.values():
|
|
167
|
+
for model in category_models:
|
|
168
|
+
if model.is_ipadapter and model not in result:
|
|
169
|
+
result.append(model)
|
|
170
|
+
return result
|
|
171
|
+
|
|
172
|
+
def get_clip_vision_models(self) -> List[AvailableModel]:
|
|
173
|
+
"""Get all CLIP Vision models."""
|
|
174
|
+
result = list(self.models.get('clip_vision', []))
|
|
175
|
+
for category_models in self.models.values():
|
|
176
|
+
for model in category_models:
|
|
177
|
+
if model.is_clip_vision and model not in result:
|
|
178
|
+
result.append(model)
|
|
179
|
+
return result
|
|
180
|
+
|
|
181
|
+
def get_character_loras(self) -> List[AvailableModel]:
|
|
182
|
+
"""Get all character/person LoRAs."""
|
|
183
|
+
result = []
|
|
184
|
+
for model in self.models.get('loras', []):
|
|
185
|
+
if model.is_character_lora:
|
|
186
|
+
result.append(model)
|
|
187
|
+
return result
|
|
188
|
+
|
|
189
|
+
def has_ipadapter_support(self) -> bool:
|
|
190
|
+
"""Check if full IP-Adapter workflow is possible."""
|
|
191
|
+
# Need IP-Adapter node + IP-Adapter model + CLIP Vision
|
|
192
|
+
node_names = set(n.lower() for n in self.nodes.keys())
|
|
193
|
+
has_ipadapter_node = any('ipadapter' in n for n in node_names)
|
|
194
|
+
has_ipadapter_model = bool(self.get_ipadapter_models())
|
|
195
|
+
has_clip_vision = bool(self.get_clip_vision_models()) or 'CLIPVisionLoader' in self.nodes
|
|
196
|
+
return has_ipadapter_node and (has_ipadapter_model or has_clip_vision)
|
|
197
|
+
|
|
198
|
+
def has_reference_image_support(self) -> bool:
|
|
199
|
+
"""Check if any reference image workflow is possible (IP-Adapter, ByteDance, etc.)."""
|
|
200
|
+
node_names = set(n.lower() for n in self.nodes.keys())
|
|
201
|
+
# Check for various reference image approaches
|
|
202
|
+
return (
|
|
203
|
+
any('ipadapter' in n for n in node_names) or
|
|
204
|
+
any('reference' in n and 'image' in n for n in node_names) or
|
|
205
|
+
any('bytedance' in n.lower() for n in node_names) or
|
|
206
|
+
'CLIPVisionEncode' in self.nodes # Can encode reference images
|
|
207
|
+
)
|
|
208
|
+
|
|
129
209
|
def summary(self) -> str:
|
|
130
210
|
"""Human-readable summary."""
|
|
131
211
|
lines = [
|
|
@@ -149,6 +229,15 @@ class SystemCapabilities:
|
|
|
149
229
|
elif cap != Capability.UNKNOWN:
|
|
150
230
|
lines.append(f" [NO] {cap.value}")
|
|
151
231
|
|
|
232
|
+
# Character consistency details
|
|
233
|
+
lines.append("")
|
|
234
|
+
lines.append("Character Consistency:")
|
|
235
|
+
lines.append(f" IP-Adapter support: {'YES' if self.has_ipadapter_support() else 'NO'}")
|
|
236
|
+
lines.append(f" Reference image support: {'YES' if self.has_reference_image_support() else 'NO'}")
|
|
237
|
+
lines.append(f" IP-Adapter models: {len(self.get_ipadapter_models())}")
|
|
238
|
+
lines.append(f" CLIP Vision models: {len(self.get_clip_vision_models())}")
|
|
239
|
+
lines.append(f" Character LoRAs: {len(self.get_character_loras())}")
|
|
240
|
+
|
|
152
241
|
return "\n".join(lines)
|
|
153
242
|
|
|
154
243
|
|
|
@@ -231,10 +320,12 @@ class SystemDiscovery:
|
|
|
231
320
|
"""Discover available models via /models/* endpoints."""
|
|
232
321
|
models = {}
|
|
233
322
|
|
|
234
|
-
# Standard ComfyUI model categories
|
|
323
|
+
# Standard ComfyUI model categories + IP-Adapter related
|
|
235
324
|
categories = [
|
|
236
325
|
"checkpoints", "unet", "diffusion_models", "vae",
|
|
237
|
-
"clip", "loras", "upscale_models", "embeddings"
|
|
326
|
+
"clip", "loras", "upscale_models", "embeddings",
|
|
327
|
+
# IP-Adapter / Character consistency related
|
|
328
|
+
"ipadapter", "clip_vision", "insightface", "instantid", "pulid", "faceid"
|
|
238
329
|
]
|
|
239
330
|
|
|
240
331
|
for category in categories:
|
|
@@ -275,6 +366,20 @@ class SystemDiscovery:
|
|
|
275
366
|
# Also check node availability for capabilities
|
|
276
367
|
node_names = set(n.lower() for n in nodes.keys())
|
|
277
368
|
|
|
369
|
+
# IP-Adapter / Character consistency nodes
|
|
370
|
+
ip_adapter_patterns = ['ipadapter', 'ip_adapter', 'ip-adapter', 'instantid', 'faceid', 'pulid']
|
|
371
|
+
if any(any(p in n for p in ip_adapter_patterns) for n in node_names):
|
|
372
|
+
capabilities.add(Capability.CHARACTER_CONSISTENCY)
|
|
373
|
+
|
|
374
|
+
# CLIP Vision (needed for IP-Adapter) - partial support for character refs
|
|
375
|
+
if any('clipvision' in n or 'clip_vision' in n for n in node_names):
|
|
376
|
+
# CLIP Vision enables style/image reference even without full IP-Adapter
|
|
377
|
+
capabilities.add(Capability.STYLE_TRANSFER)
|
|
378
|
+
|
|
379
|
+
# ByteDance reference nodes (alternative to IP-Adapter)
|
|
380
|
+
if any('reference' in n and ('image' in n or 'bytedance' in n) for n in node_names):
|
|
381
|
+
capabilities.add(Capability.CHARACTER_CONSISTENCY)
|
|
382
|
+
|
|
278
383
|
# Video nodes
|
|
279
384
|
if any('video' in n or 'animate' in n for n in node_names):
|
|
280
385
|
capabilities.add(Capability.TEXT_TO_VIDEO)
|