vibe-aigc 0.6.2__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vibe_aigc/__init__.py CHANGED
@@ -11,9 +11,9 @@ Architecture (Paper Section 5):
11
11
  - AssetBank: Character and style consistency management
12
12
  """
13
13
 
14
- from .models import Vibe, WorkflowPlan, WorkflowNode, WorkflowNodeType
14
+ from .models import Vibe, WorkflowPlan, WorkflowNode, WorkflowNodeType, GenerationRequest, CharacterProfile
15
15
  from .planner import MetaPlanner
16
- from .llm import LLMClient, LLMConfig
16
+ from .llm import LLMClient, LLMConfig, LLMProvider, list_ollama_models, check_ollama_available
17
17
  from .executor import WorkflowExecutor, ExecutionStatus, ExecutionResult
18
18
 
19
19
  # Paper Section 5.3: Domain-Specific Expert Knowledge Base
@@ -78,8 +78,15 @@ __version__ = "0.2.0"
78
78
  __all__ = [
79
79
  # Core models
80
80
  "Vibe", "WorkflowPlan", "WorkflowNode", "WorkflowNodeType",
81
+ "GenerationRequest", "CharacterProfile",
82
+ # System Discovery
83
+ "SystemDiscovery", "SystemCapabilities", "Capability", "HardwareConstraints",
84
+ "AvailableNode", "AvailableModel", "discover_system",
85
+ # General Composer
86
+ "GeneralComposer", "NodeRequirement", "STANDARD_REQUIREMENTS", "create_composer",
81
87
  # MetaPlanner (Section 5.2)
82
- "MetaPlanner", "LLMClient", "LLMConfig",
88
+ "MetaPlanner", "LLMClient", "LLMConfig", "LLMProvider",
89
+ "list_ollama_models", "check_ollama_available",
83
90
  # Executor
84
91
  "WorkflowExecutor", "ExecutionStatus", "ExecutionResult",
85
92
  # Knowledge Base (Section 5.3)
@@ -97,8 +104,31 @@ __all__ = [
97
104
  "DesignerAgent", "ScreenwriterAgent", "ComposerAgent",
98
105
  "create_default_agents",
99
106
  # Asset Bank
100
- "AssetBank", "Character", "StyleGuide", "Artifact", "create_asset_bank"
107
+ "AssetBank", "Character", "StyleGuide", "Artifact", "create_asset_bank",
108
+ # Pipeline chaining
109
+ "Pipeline", "PipelineStep", "PipelineResult", "PipelineStatus",
110
+ "PipelineBuilder", "StepResult",
111
+ "create_image_pipeline", "create_video_pipeline"
101
112
  ]
113
+ # System Discovery - Constraint-aware system discovery
114
+ from .discovery import (
115
+ SystemDiscovery,
116
+ SystemCapabilities,
117
+ Capability,
118
+ HardwareConstraints,
119
+ AvailableNode,
120
+ AvailableModel,
121
+ discover_system,
122
+ )
123
+
124
+ # General Workflow Composer - Builds workflows from discovered nodes
125
+ from .composer_general import (
126
+ GeneralComposer,
127
+ NodeRequirement,
128
+ STANDARD_REQUIREMENTS,
129
+ create_composer,
130
+ )
131
+
102
132
  # Model Registry - Auto-detect available models
103
133
  from .model_registry import ModelRegistry, ModelCapability, ModelFamily, ModelSpec
104
134
 
@@ -116,3 +146,15 @@ from .audio import MusicGenBackend, RiffusionBackend, ElevenLabsBackend, MusicGe
116
146
 
117
147
  # MV Pipeline
118
148
  from .mv_pipeline import MVPipeline, Shot, Storyboard, create_mv
149
+
150
+ # Pipeline chaining for workflow orchestration
151
+ from .pipeline import (
152
+ Pipeline,
153
+ PipelineStep,
154
+ PipelineResult,
155
+ PipelineStatus,
156
+ PipelineBuilder,
157
+ StepResult,
158
+ create_image_pipeline,
159
+ create_video_pipeline
160
+ )
@@ -10,9 +10,10 @@ This composer:
10
10
  NO HARDCODED NODE TYPES. Everything is discovered.
11
11
  """
12
12
 
13
- from typing import Any, Dict, List, Optional, Tuple
13
+ from typing import Any, Dict, List, Optional, Tuple, Union
14
14
  from dataclasses import dataclass, field
15
15
  from .discovery import SystemCapabilities, Capability, AvailableNode, AvailableModel
16
+ from .models import GenerationRequest, CharacterProfile
16
17
 
17
18
 
18
19
  @dataclass
@@ -46,6 +47,40 @@ STANDARD_REQUIREMENTS = {
46
47
  output_types=["CLIP"],
47
48
  preferred_patterns=["cliploader"]
48
49
  ),
50
+ # IP-Adapter / Character consistency
51
+ "load_ipadapter": NodeRequirement(
52
+ purpose="load_ipadapter",
53
+ output_types=["IPADAPTER"],
54
+ preferred_patterns=["ipadaptermodelloader", "ipadapterloader", "ipadapter"]
55
+ ),
56
+ "load_clip_vision": NodeRequirement(
57
+ purpose="load_clip_vision",
58
+ output_types=["CLIP_VISION"],
59
+ preferred_patterns=["clipvisionloader", "clip_vision"]
60
+ ),
61
+ "encode_clip_vision": NodeRequirement(
62
+ purpose="encode_clip_vision",
63
+ input_types=["CLIP_VISION", "IMAGE"],
64
+ output_types=["CLIP_VISION_OUTPUT"],
65
+ preferred_patterns=["clipvisionencode"]
66
+ ),
67
+ "apply_ipadapter": NodeRequirement(
68
+ purpose="apply_ipadapter",
69
+ input_types=["MODEL", "IPADAPTER", "IMAGE"],
70
+ output_types=["MODEL"],
71
+ preferred_patterns=["ipadapterapply", "ipadapter"]
72
+ ),
73
+ "load_image": NodeRequirement(
74
+ purpose="load_image",
75
+ output_types=["IMAGE"],
76
+ preferred_patterns=["loadimage", "load_image"]
77
+ ),
78
+ "load_lora": NodeRequirement(
79
+ purpose="load_lora",
80
+ input_types=["MODEL", "CLIP"],
81
+ output_types=["MODEL", "CLIP"],
82
+ preferred_patterns=["loraloader", "lora"]
83
+ ),
49
84
  "encode_text": NodeRequirement(
50
85
  purpose="encode_text",
51
86
  input_types=["CLIP"],
@@ -425,6 +460,264 @@ class GeneralComposer:
425
460
  base["4"]["inputs"]["batch_size"] = frames
426
461
  return base
427
462
 
463
+ # =========================================================================
464
+ # CHARACTER CONSISTENCY / IP-ADAPTER SUPPORT
465
+ # =========================================================================
466
+
467
+ def inject_ipadapter(
468
+ self,
469
+ workflow: Dict[str, Any],
470
+ reference_image: str,
471
+ strength: float = 0.8,
472
+ model_node_id: str = "1",
473
+ start_node_id: int = 100
474
+ ) -> Tuple[Dict[str, Any], str]:
475
+ """Inject IP-Adapter nodes into an existing workflow.
476
+
477
+ Returns updated workflow and the new model node ID to use downstream.
478
+ """
479
+ node_id = start_node_id
480
+
481
+ # Find IP-Adapter loader
482
+ ipadapter_loader = self.find_node_for(STANDARD_REQUIREMENTS["load_ipadapter"])
483
+ clip_vision_loader = self.find_node_for(STANDARD_REQUIREMENTS["load_clip_vision"])
484
+ ipadapter_apply = self.find_node_for(STANDARD_REQUIREMENTS["apply_ipadapter"])
485
+ load_image = self.find_node_for(STANDARD_REQUIREMENTS["load_image"])
486
+
487
+ if not ipadapter_apply:
488
+ print("No IP-Adapter apply node found - skipping character reference")
489
+ return workflow, model_node_id
490
+
491
+ if not load_image:
492
+ print("No image loader found - skipping character reference")
493
+ return workflow, model_node_id
494
+
495
+ # Load reference image
496
+ workflow[str(node_id)] = {
497
+ "class_type": load_image,
498
+ "inputs": {"image": reference_image}
499
+ }
500
+ ref_image_node = str(node_id)
501
+ node_id += 1
502
+
503
+ # Load CLIP Vision (if available and needed)
504
+ clip_vision_node = None
505
+ if clip_vision_loader:
506
+ clip_vision_models = self.caps.get_clip_vision_models()
507
+ if clip_vision_models:
508
+ workflow[str(node_id)] = {
509
+ "class_type": clip_vision_loader,
510
+ "inputs": {"clip_name": clip_vision_models[0].filename}
511
+ }
512
+ clip_vision_node = str(node_id)
513
+ node_id += 1
514
+
515
+ # Load IP-Adapter model (if loader exists)
516
+ ipadapter_model_node = None
517
+ if ipadapter_loader:
518
+ ipadapter_models = self.caps.get_ipadapter_models()
519
+ if ipadapter_models:
520
+ workflow[str(node_id)] = {
521
+ "class_type": ipadapter_loader,
522
+ "inputs": {"ipadapter_file": ipadapter_models[0].filename}
523
+ }
524
+ ipadapter_model_node = str(node_id)
525
+ node_id += 1
526
+
527
+ # Apply IP-Adapter
528
+ apply_inputs = {
529
+ "model": [model_node_id, 0],
530
+ "image": [ref_image_node, 0],
531
+ "weight": strength,
532
+ }
533
+
534
+ # Add optional inputs if available
535
+ if ipadapter_model_node:
536
+ apply_inputs["ipadapter"] = [ipadapter_model_node, 0]
537
+ if clip_vision_node:
538
+ apply_inputs["clip_vision"] = [clip_vision_node, 0]
539
+
540
+ workflow[str(node_id)] = {
541
+ "class_type": ipadapter_apply,
542
+ "inputs": apply_inputs
543
+ }
544
+ new_model_node = str(node_id)
545
+ node_id += 1
546
+
547
+ return workflow, new_model_node
548
+
549
+ def inject_lora(
550
+ self,
551
+ workflow: Dict[str, Any],
552
+ lora_path: str,
553
+ strength: float = 0.8,
554
+ model_node_id: str = "1",
555
+ clip_node_id: Optional[str] = None,
556
+ start_node_id: int = 100
557
+ ) -> Tuple[Dict[str, Any], str, Optional[str]]:
558
+ """Inject LoRA into an existing workflow.
559
+
560
+ Returns updated workflow, new model node ID, and new clip node ID.
561
+ """
562
+ lora_loader = self.find_node_for(STANDARD_REQUIREMENTS["load_lora"])
563
+ if not lora_loader:
564
+ print("No LoRA loader found - skipping LoRA injection")
565
+ return workflow, model_node_id, clip_node_id
566
+
567
+ node_id = start_node_id
568
+
569
+ lora_inputs = {
570
+ "lora_name": lora_path,
571
+ "strength_model": strength,
572
+ "strength_clip": strength,
573
+ "model": [model_node_id, 0],
574
+ }
575
+
576
+ if clip_node_id:
577
+ lora_inputs["clip"] = [clip_node_id, 0]
578
+
579
+ workflow[str(node_id)] = {
580
+ "class_type": lora_loader,
581
+ "inputs": lora_inputs
582
+ }
583
+ new_model_node = str(node_id)
584
+ new_clip_node = str(node_id) if clip_node_id else None
585
+
586
+ return workflow, new_model_node, new_clip_node
587
+
588
+ def inject_clip_vision_reference(
589
+ self,
590
+ workflow: Dict[str, Any],
591
+ reference_image: str,
592
+ start_node_id: int = 100
593
+ ) -> Tuple[Dict[str, Any], Optional[str]]:
594
+ """Inject CLIP Vision encoding for reference image (lighter alternative to IP-Adapter).
595
+
596
+ This works even without full IP-Adapter by encoding the reference image
597
+ via CLIP Vision, which can be used for style guidance.
598
+
599
+ Returns updated workflow and the clip vision output node ID.
600
+ """
601
+ clip_vision_loader = self.find_node_for(STANDARD_REQUIREMENTS["load_clip_vision"])
602
+ clip_vision_encode = self.find_node_for(STANDARD_REQUIREMENTS["encode_clip_vision"])
603
+ load_image = self.find_node_for(STANDARD_REQUIREMENTS["load_image"])
604
+
605
+ if not (clip_vision_encode and load_image):
606
+ print("CLIP Vision encode or image loader not available")
607
+ return workflow, None
608
+
609
+ node_id = start_node_id
610
+
611
+ # Load reference image
612
+ workflow[str(node_id)] = {
613
+ "class_type": load_image,
614
+ "inputs": {"image": reference_image}
615
+ }
616
+ ref_image_node = str(node_id)
617
+ node_id += 1
618
+
619
+ # Load CLIP Vision model (if needed)
620
+ clip_vision_node = None
621
+ if clip_vision_loader:
622
+ clip_vision_models = self.caps.get_clip_vision_models()
623
+ if clip_vision_models:
624
+ workflow[str(node_id)] = {
625
+ "class_type": clip_vision_loader,
626
+ "inputs": {"clip_name": clip_vision_models[0].filename}
627
+ }
628
+ clip_vision_node = str(node_id)
629
+ node_id += 1
630
+
631
+ # Encode with CLIP Vision
632
+ encode_inputs = {"image": [ref_image_node, 0]}
633
+ if clip_vision_node:
634
+ encode_inputs["clip_vision"] = [clip_vision_node, 0]
635
+
636
+ workflow[str(node_id)] = {
637
+ "class_type": clip_vision_encode,
638
+ "inputs": encode_inputs
639
+ }
640
+ clip_vision_output = str(node_id)
641
+
642
+ return workflow, clip_vision_output
643
+
644
+ def compose_with_character_reference(
645
+ self,
646
+ model: AvailableModel,
647
+ prompt: str,
648
+ reference_image: str,
649
+ character_strength: float = 0.8,
650
+ negative_prompt: str = "",
651
+ width: int = 512,
652
+ height: int = 512,
653
+ steps: int = 20,
654
+ cfg: float = 7.0,
655
+ seed: int = 0,
656
+ character_lora: Optional[str] = None,
657
+ character_lora_strength: float = 0.8
658
+ ) -> Optional[Dict[str, Any]]:
659
+ """Compose a text-to-image workflow with character reference.
660
+
661
+ Uses IP-Adapter if available, falls back to CLIP Vision encoding.
662
+ """
663
+ # Start with base text-to-image workflow
664
+ workflow = self.compose_text_to_image(
665
+ model, prompt, negative_prompt, width, height, steps, cfg, seed
666
+ )
667
+
668
+ if not workflow:
669
+ return None
670
+
671
+ # Find the model node (typically node 1 from checkpoint loader)
672
+ model_node_id = "1"
673
+ clip_node_id = "1" # CLIP is output 1 from checkpoint loader
674
+
675
+ current_start_id = 100
676
+
677
+ # Inject character LoRA if provided
678
+ if character_lora:
679
+ workflow, model_node_id, clip_node_id = self.inject_lora(
680
+ workflow,
681
+ character_lora,
682
+ character_lora_strength,
683
+ model_node_id,
684
+ clip_node_id,
685
+ current_start_id
686
+ )
687
+ current_start_id += 10
688
+
689
+ # Try IP-Adapter first
690
+ if self.caps.has_ipadapter_support():
691
+ workflow, new_model_node = self.inject_ipadapter(
692
+ workflow,
693
+ reference_image,
694
+ character_strength,
695
+ model_node_id,
696
+ current_start_id
697
+ )
698
+
699
+ # Update sampler to use new model node
700
+ for node_id, node in workflow.items():
701
+ if node.get("class_type", "").lower() in ["ksampler", "sampler"]:
702
+ if "model" in node.get("inputs", {}):
703
+ node["inputs"]["model"] = [new_model_node, 0]
704
+
705
+ elif self.caps.has_reference_image_support():
706
+ # Fall back to CLIP Vision encoding
707
+ workflow, clip_vision_output = self.inject_clip_vision_reference(
708
+ workflow,
709
+ reference_image,
710
+ current_start_id
711
+ )
712
+ # Note: CLIP Vision output would need to be wired to compatible nodes
713
+ # This is a placeholder for systems without full IP-Adapter
714
+ print("Using CLIP Vision encoding (limited character consistency)")
715
+
716
+ else:
717
+ print("No character reference support available on this system")
718
+
719
+ return workflow
720
+
428
721
  def compose_for_capability(
429
722
  self,
430
723
  capability: Capability,
@@ -448,6 +741,120 @@ class GeneralComposer:
448
741
  return None
449
742
 
450
743
 
744
+ def compose_from_request(
745
+ self,
746
+ request: GenerationRequest,
747
+ capability: Capability = Capability.TEXT_TO_IMAGE
748
+ ) -> Optional[Dict[str, Any]]:
749
+ """Compose a workflow from a GenerationRequest.
750
+
751
+ Automatically handles character consistency if reference_image is provided.
752
+ """
753
+ # Find appropriate model
754
+ model = None
755
+ if request.model:
756
+ # Use specified model
757
+ for category_models in self.caps.models.values():
758
+ for m in category_models:
759
+ if m.filename == request.model:
760
+ model = m
761
+ break
762
+ if model:
763
+ break
764
+
765
+ if not model:
766
+ model = self.find_model_for(capability)
767
+
768
+ if not model:
769
+ print(f"No model found for {capability.value}")
770
+ return None
771
+
772
+ # Compose based on whether we have character reference
773
+ if request.reference_image:
774
+ workflow = self.compose_with_character_reference(
775
+ model=model,
776
+ prompt=request.prompt,
777
+ reference_image=request.reference_image,
778
+ character_strength=request.character_strength,
779
+ negative_prompt=request.negative_prompt,
780
+ width=request.width,
781
+ height=request.height,
782
+ steps=request.steps,
783
+ cfg=request.cfg,
784
+ seed=request.seed,
785
+ character_lora=request.character_lora,
786
+ character_lora_strength=request.character_lora_strength
787
+ )
788
+ else:
789
+ # Standard composition
790
+ if capability == Capability.TEXT_TO_IMAGE:
791
+ workflow = self.compose_text_to_image(
792
+ model, request.prompt, request.negative_prompt,
793
+ request.width, request.height, request.steps,
794
+ request.cfg, request.seed
795
+ )
796
+ elif capability in [Capability.TEXT_TO_VIDEO, Capability.IMAGE_TO_VIDEO]:
797
+ workflow = self.compose_text_to_video(
798
+ model, request.prompt, request.negative_prompt,
799
+ request.width, request.height, request.frames,
800
+ request.steps, request.cfg, request.seed
801
+ )
802
+ else:
803
+ workflow = self.compose_for_capability(capability, request.prompt)
804
+
805
+ # Inject additional LoRAs if specified
806
+ if workflow and request.loras:
807
+ model_node = "1"
808
+ clip_node = "1"
809
+ start_id = 200
810
+
811
+ for lora_config in request.loras:
812
+ lora_path = lora_config.get("path", lora_config.get("name", ""))
813
+ lora_strength = lora_config.get("strength", 0.8)
814
+
815
+ if lora_path:
816
+ workflow, model_node, clip_node = self.inject_lora(
817
+ workflow, lora_path, lora_strength,
818
+ model_node, clip_node, start_id
819
+ )
820
+ start_id += 10
821
+
822
+ return workflow
823
+
824
+ def compose_for_character(
825
+ self,
826
+ profile: CharacterProfile,
827
+ prompt: str,
828
+ capability: Capability = Capability.TEXT_TO_IMAGE,
829
+ **kwargs
830
+ ) -> Optional[Dict[str, Any]]:
831
+ """Compose a workflow using a CharacterProfile for consistency.
832
+
833
+ Convenience method that extracts reference settings from profile.
834
+ """
835
+ # Merge profile settings with any overrides
836
+ gen_params = profile.to_generation_params()
837
+ gen_params.update(kwargs)
838
+
839
+ # Inject trigger words into prompt if available
840
+ full_prompt = prompt
841
+ if profile.trigger_words:
842
+ trigger_str = " ".join(profile.trigger_words)
843
+ full_prompt = f"{trigger_str}, {prompt}"
844
+
845
+ # Add character description to prompt
846
+ if profile.description:
847
+ full_prompt = f"{profile.description}, {full_prompt}"
848
+
849
+ # Create request
850
+ request = GenerationRequest(
851
+ prompt=full_prompt,
852
+ **gen_params
853
+ )
854
+
855
+ return self.compose_from_request(request, capability)
856
+
857
+
451
858
  def create_composer(capabilities: SystemCapabilities) -> GeneralComposer:
452
859
  """Create a general composer from system capabilities."""
453
860
  return GeneralComposer(capabilities)
vibe_aigc/discovery.py CHANGED
@@ -27,6 +27,8 @@ class Capability(Enum):
27
27
  UPSCALE = "upscale"
28
28
  INPAINT = "inpaint"
29
29
  AUDIO = "audio"
30
+ CHARACTER_CONSISTENCY = "character_consistency" # IP-Adapter, LoRA character refs
31
+ STYLE_TRANSFER = "style_transfer" # Style reference from images
30
32
  UNKNOWN = "unknown"
31
33
 
32
34
 
@@ -81,6 +83,14 @@ class AvailableModel:
81
83
  """Infer capability from filename patterns."""
82
84
  name = self.filename.lower()
83
85
 
86
+ # IP-Adapter / Character consistency models
87
+ if any(x in name for x in ['ipadapter', 'ip_adapter', 'ip-adapter', 'instantid', 'faceid', 'pulid']):
88
+ return Capability.CHARACTER_CONSISTENCY
89
+
90
+ # Style transfer / reference models
91
+ if any(x in name for x in ['style', 'reference', 'clipvision']):
92
+ return Capability.STYLE_TRANSFER
93
+
84
94
  # Video models
85
95
  if any(x in name for x in ['video', 'animate', 'motion', 'wan', 'ltx', 'svd', 'i2v', 't2v']):
86
96
  if 'i2v' in name or 'img2vid' in name:
@@ -104,6 +114,27 @@ class AvailableModel:
104
114
  return Capability.TEXT_TO_IMAGE
105
115
 
106
116
  return Capability.UNKNOWN
117
+
118
+ @property
119
+ def is_ipadapter(self) -> bool:
120
+ """Check if this is an IP-Adapter model."""
121
+ name = self.filename.lower()
122
+ return any(x in name for x in ['ipadapter', 'ip_adapter', 'ip-adapter'])
123
+
124
+ @property
125
+ def is_character_lora(self) -> bool:
126
+ """Check if this is a character/person LoRA."""
127
+ name = self.filename.lower()
128
+ # Character LoRAs often have these patterns
129
+ return self.category == 'loras' and any(x in name for x in [
130
+ 'character', 'person', 'face', 'portrait', 'style', 'celeb'
131
+ ])
132
+
133
+ @property
134
+ def is_clip_vision(self) -> bool:
135
+ """Check if this is a CLIP Vision model."""
136
+ name = self.filename.lower()
137
+ return self.category == 'clip_vision' or 'clipvision' in name or 'clip_vision' in name
107
138
 
108
139
 
109
140
  @dataclass
@@ -126,6 +157,55 @@ class SystemCapabilities:
126
157
  result.append(model)
127
158
  return result
128
159
 
160
+ def get_ipadapter_models(self) -> List[AvailableModel]:
161
+ """Get all IP-Adapter models."""
162
+ result = []
163
+ for category in ['ipadapter', 'instantid', 'pulid', 'faceid']:
164
+ result.extend(self.models.get(category, []))
165
+ # Also check other categories for IP-Adapter files
166
+ for category_models in self.models.values():
167
+ for model in category_models:
168
+ if model.is_ipadapter and model not in result:
169
+ result.append(model)
170
+ return result
171
+
172
+ def get_clip_vision_models(self) -> List[AvailableModel]:
173
+ """Get all CLIP Vision models."""
174
+ result = list(self.models.get('clip_vision', []))
175
+ for category_models in self.models.values():
176
+ for model in category_models:
177
+ if model.is_clip_vision and model not in result:
178
+ result.append(model)
179
+ return result
180
+
181
+ def get_character_loras(self) -> List[AvailableModel]:
182
+ """Get all character/person LoRAs."""
183
+ result = []
184
+ for model in self.models.get('loras', []):
185
+ if model.is_character_lora:
186
+ result.append(model)
187
+ return result
188
+
189
+ def has_ipadapter_support(self) -> bool:
190
+ """Check if full IP-Adapter workflow is possible."""
191
+ # Need IP-Adapter node + IP-Adapter model + CLIP Vision
192
+ node_names = set(n.lower() for n in self.nodes.keys())
193
+ has_ipadapter_node = any('ipadapter' in n for n in node_names)
194
+ has_ipadapter_model = bool(self.get_ipadapter_models())
195
+ has_clip_vision = bool(self.get_clip_vision_models()) or 'CLIPVisionLoader' in self.nodes
196
+ return has_ipadapter_node and (has_ipadapter_model or has_clip_vision)
197
+
198
+ def has_reference_image_support(self) -> bool:
199
+ """Check if any reference image workflow is possible (IP-Adapter, ByteDance, etc.)."""
200
+ node_names = set(n.lower() for n in self.nodes.keys())
201
+ # Check for various reference image approaches
202
+ return (
203
+ any('ipadapter' in n for n in node_names) or
204
+ any('reference' in n and 'image' in n for n in node_names) or
205
+ any('bytedance' in n.lower() for n in node_names) or
206
+ 'CLIPVisionEncode' in self.nodes # Can encode reference images
207
+ )
208
+
129
209
  def summary(self) -> str:
130
210
  """Human-readable summary."""
131
211
  lines = [
@@ -149,6 +229,15 @@ class SystemCapabilities:
149
229
  elif cap != Capability.UNKNOWN:
150
230
  lines.append(f" [NO] {cap.value}")
151
231
 
232
+ # Character consistency details
233
+ lines.append("")
234
+ lines.append("Character Consistency:")
235
+ lines.append(f" IP-Adapter support: {'YES' if self.has_ipadapter_support() else 'NO'}")
236
+ lines.append(f" Reference image support: {'YES' if self.has_reference_image_support() else 'NO'}")
237
+ lines.append(f" IP-Adapter models: {len(self.get_ipadapter_models())}")
238
+ lines.append(f" CLIP Vision models: {len(self.get_clip_vision_models())}")
239
+ lines.append(f" Character LoRAs: {len(self.get_character_loras())}")
240
+
152
241
  return "\n".join(lines)
153
242
 
154
243
 
@@ -231,10 +320,12 @@ class SystemDiscovery:
231
320
  """Discover available models via /models/* endpoints."""
232
321
  models = {}
233
322
 
234
- # Standard ComfyUI model categories
323
+ # Standard ComfyUI model categories + IP-Adapter related
235
324
  categories = [
236
325
  "checkpoints", "unet", "diffusion_models", "vae",
237
- "clip", "loras", "upscale_models", "embeddings"
326
+ "clip", "loras", "upscale_models", "embeddings",
327
+ # IP-Adapter / Character consistency related
328
+ "ipadapter", "clip_vision", "insightface", "instantid", "pulid", "faceid"
238
329
  ]
239
330
 
240
331
  for category in categories:
@@ -275,6 +366,20 @@ class SystemDiscovery:
275
366
  # Also check node availability for capabilities
276
367
  node_names = set(n.lower() for n in nodes.keys())
277
368
 
369
+ # IP-Adapter / Character consistency nodes
370
+ ip_adapter_patterns = ['ipadapter', 'ip_adapter', 'ip-adapter', 'instantid', 'faceid', 'pulid']
371
+ if any(any(p in n for p in ip_adapter_patterns) for n in node_names):
372
+ capabilities.add(Capability.CHARACTER_CONSISTENCY)
373
+
374
+ # CLIP Vision (needed for IP-Adapter) - partial support for character refs
375
+ if any('clipvision' in n or 'clip_vision' in n for n in node_names):
376
+ # CLIP Vision enables style/image reference even without full IP-Adapter
377
+ capabilities.add(Capability.STYLE_TRANSFER)
378
+
379
+ # ByteDance reference nodes (alternative to IP-Adapter)
380
+ if any('reference' in n and ('image' in n or 'bytedance' in n) for n in node_names):
381
+ capabilities.add(Capability.CHARACTER_CONSISTENCY)
382
+
278
383
  # Video nodes
279
384
  if any('video' in n or 'animate' in n for n in node_names):
280
385
  capabilities.add(Capability.TEXT_TO_VIDEO)