PyPI - vibe-aigc - Versions diffs - 0.6.3__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

vibe-aigc 0.6.3py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

vibe_aigc/__init__.py +46 -4
vibe_aigc/composer_general.py +408 -1
vibe_aigc/discovery.py +107 -2
vibe_aigc/knowledge.py +512 -24
vibe_aigc/llm.py +305 -62
vibe_aigc/models.py +98 -1
vibe_aigc/pipeline.py +565 -0
vibe_aigc/planner.py +145 -0
vibe_aigc/tools.py +32 -0
vibe_aigc/tools_audio.py +746 -0
vibe_aigc/tools_comfyui.py +708 -3
vibe_aigc/tools_utility.py +997 -0
vibe_aigc/tools_video.py +799 -0
vibe_aigc/tools_vision.py +1187 -0
vibe_aigc/vibe_backend.py +11 -1
vibe_aigc/vlm_feedback.py +186 -7
{vibe_aigc-0.6.3.dist-info → vibe_aigc-0.7.0.dist-info}/METADATA +29 -1
{vibe_aigc-0.6.3.dist-info → vibe_aigc-0.7.0.dist-info}/RECORD +22 -17
{vibe_aigc-0.6.3.dist-info → vibe_aigc-0.7.0.dist-info}/WHEEL +0 -0
{vibe_aigc-0.6.3.dist-info → vibe_aigc-0.7.0.dist-info}/entry_points.txt +0 -0
{vibe_aigc-0.6.3.dist-info → vibe_aigc-0.7.0.dist-info}/licenses/LICENSE +0 -0
{vibe_aigc-0.6.3.dist-info → vibe_aigc-0.7.0.dist-info}/top_level.txt +0 -0

vibe_aigc/llm.py CHANGED Viewed

@@ -1,10 +1,16 @@
-"""LLM client abstraction for Vibe decomposition."""
+"""LLM client abstraction for Vibe decomposition.
+Supports multiple providers:
+- OpenAI (OPENAI_API_KEY)
+- Anthropic (ANTHROPIC_API_KEY)
+- Ollama (local, no key needed - uses OpenAI-compatible API)
+"""
 import asyncio
 import json
 import os
-from typing import Any, Dict, Optional
-from openai import AsyncOpenAI
+from enum import Enum
+from typing import Any, Dict, Optional, List
 from pydantic import BaseModel
 from .models import Vibe, WorkflowPlan
@@ -23,53 +29,196 @@ def _load_dotenv():
         pass
+class LLMProvider(str, Enum):
+    """Supported LLM providers."""
+    OPENAI = "openai"
+    ANTHROPIC = "anthropic"
+    OLLAMA = "ollama"
+    AUTO = "auto"  # Auto-detect based on available keys
+# Default models per provider
+DEFAULT_MODELS = {
+    LLMProvider.OPENAI: "gpt-4",
+    LLMProvider.ANTHROPIC: "claude-sonnet-4-20250514",
+    LLMProvider.OLLAMA: "qwen2.5:14b",  # Good balance of speed/quality
+}
+# Ollama models known to work well for planning
+OLLAMA_RECOMMENDED_MODELS = [
+    "qwen2.5-coder:32b-instruct-q4_K_M",  # Best for structured output
+    "glm-4.7-flash:latest",
+    "qwen2.5:14b",
+    "qwen2.5:7b",  # Faster, smaller
+]
 class LLMConfig(BaseModel):
     """Configuration for LLM client."""
-    model: str = "gpt-4"
+    provider: LLMProvider = LLMProvider.AUTO
+    model: Optional[str] = None  # None = use provider default
     temperature: float = 0.7
-    max_tokens: int = 2000
+    max_tokens: int = 4000
     api_key: Optional[str] = None
-    base_url: Optional[str] = None  # Custom endpoint (e.g., z.ai, local models)
+    base_url: Optional[str] = None  # Custom endpoint
+    # Ollama-specific
+    ollama_host: str = "http://localhost:11434"
+    class Config:
+        use_enum_values = True
     @classmethod
     def from_env(cls) -> "LLMConfig":
-        """Create config from environment variables."""
+        """Create config from environment variables with auto-detection."""
         _load_dotenv()
+        # Check for explicit provider
+        provider_str = os.getenv("LLM_PROVIDER", "auto").lower()
+        try:
+            provider = LLMProvider(provider_str)
+        except ValueError:
+            provider = LLMProvider.AUTO
+        return cls(
+            provider=provider,
+            model=os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL"),
+            api_key=os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY"),
+            base_url=os.getenv("OPENAI_BASE_URL") or os.getenv("LLM_BASE_URL"),
+            ollama_host=os.getenv("OLLAMA_HOST", "http://localhost:11434"),
+        )
+    @classmethod
+    def for_ollama(cls, host: str = "http://localhost:11434", model: str = "qwen2.5:14b") -> "LLMConfig":
+        """Convenience constructor for Ollama."""
+        return cls(
+            provider=LLMProvider.OLLAMA,
+            model=model,
+            base_url=f"{host.rstrip('/')}/v1",
+            ollama_host=host,
+        )
+    @classmethod
+    def for_openai(cls, api_key: Optional[str] = None, model: str = "gpt-4") -> "LLMConfig":
+        """Convenience constructor for OpenAI."""
+        return cls(
+            provider=LLMProvider.OPENAI,
+            model=model,
+            api_key=api_key or os.getenv("OPENAI_API_KEY"),
+        )
+    @classmethod
+    def for_anthropic(cls, api_key: Optional[str] = None, model: str = "claude-sonnet-4-20250514") -> "LLMConfig":
+        """Convenience constructor for Anthropic."""
         return cls(
-            model=os.getenv("OPENAI_MODEL", "gpt-4"),
-            api_key=os.getenv("OPENAI_API_KEY"),
-            base_url=os.getenv("OPENAI_BASE_URL"),
+            provider=LLMProvider.ANTHROPIC,
+            model=model,
+            api_key=api_key or os.getenv("ANTHROPIC_API_KEY"),
         )
+    def resolve_provider(self) -> LLMProvider:
+        """Resolve AUTO provider to actual provider based on available credentials."""
+        if self.provider != LLMProvider.AUTO:
+            return LLMProvider(self.provider)
+        _load_dotenv()
+        # Priority: explicit base_url > API keys > Ollama
+        if self.base_url:
+            # Custom endpoint - assume OpenAI-compatible
+            return LLMProvider.OPENAI
+        if self.api_key or os.getenv("OPENAI_API_KEY"):
+            return LLMProvider.OPENAI
+        if os.getenv("ANTHROPIC_API_KEY"):
+            return LLMProvider.ANTHROPIC
+        # Default to Ollama (no key needed)
+        return LLMProvider.OLLAMA
+    def get_model(self) -> str:
+        """Get model name, using default if not specified."""
+        if self.model:
+            return self.model
+        provider = self.resolve_provider()
+        return DEFAULT_MODELS.get(provider, "gpt-4")
 class LLMClient:
-    """Async client for LLM-based Vibe decomposition."""
+    """Async client for LLM-based Vibe decomposition.
+    Supports OpenAI, Anthropic, and Ollama backends.
+    """
     def __init__(self, config: Optional[LLMConfig] = None):
         # Load from env if no config provided
         if config is None:
             config = LLMConfig.from_env()
         self.config = config
+        self.provider = config.resolve_provider()
+        self._client = None
+        self._init_client()
+    def _init_client(self):
+        """Initialize the appropriate client based on provider."""
+        if self.provider == LLMProvider.ANTHROPIC:
+            self._init_anthropic_client()
+        else:
+            # OpenAI and Ollama both use OpenAI-compatible API
+            self._init_openai_client()
+    def _init_openai_client(self):
+        """Initialize OpenAI or Ollama client (OpenAI-compatible)."""
         try:
-            client_kwargs = {}
-            # Only pass api_key if explicitly set (let OpenAI client check env otherwise)
+            from openai import AsyncOpenAI
+        except ImportError:
+            raise RuntimeError(
+                "openai package required. Install with: pip install openai"
+            )
+        client_kwargs = {}
+        if self.provider == LLMProvider.OLLAMA:
+            # Ollama uses OpenAI-compatible API
+            base_url = self.config.base_url or f"{self.config.ollama_host.rstrip('/')}/v1"
+            client_kwargs["base_url"] = base_url
+            client_kwargs["api_key"] = "ollama"  # Ollama doesn't need a real key
+        else:
+            # OpenAI or custom endpoint
             if self.config.api_key:
                 client_kwargs["api_key"] = self.config.api_key
             if self.config.base_url:
                 client_kwargs["base_url"] = self.config.base_url
-            self.client = AsyncOpenAI(**client_kwargs)
+        try:
+            self._client = AsyncOpenAI(**client_kwargs)
         except Exception as e:
-            if "api_key" in str(e).lower():
+            if "api_key" in str(e).lower() and self.provider != LLMProvider.OLLAMA:
                 raise RuntimeError(
-                    "OpenAI API key is required. Set OPENAI_API_KEY environment variable "
-                    "or create a .env file. For z.ai, also set OPENAI_BASE_URL. "
-                    "Example .env:\n"
-                    "  OPENAI_API_KEY=your-key\n"
-                    "  OPENAI_BASE_URL=https://api.z.ai/v1"
+                    f"OpenAI API key required. Set OPENAI_API_KEY or use Ollama:\n"
+                    f"  LLMConfig.for_ollama('http://localhost:11434')\n"
+                    f"Original error: {e}"
                 ) from e
             raise
+    def _init_anthropic_client(self):
+        """Initialize Anthropic client."""
+        try:
+            from anthropic import AsyncAnthropic
+        except ImportError:
+            raise RuntimeError(
+                "anthropic package required. Install with: pip install anthropic"
+            )
+        api_key = self.config.api_key or os.getenv("ANTHROPIC_API_KEY")
+        if not api_key:
+            raise RuntimeError(
+                "Anthropic API key required. Set ANTHROPIC_API_KEY or pass api_key to config."
+            )
+        self._client = AsyncAnthropic(api_key=api_key)
     async def decompose_vibe(
         self,
@@ -108,7 +257,9 @@ Respond with a JSON object containing:
   - estimated_duration: estimated seconds to complete
 IMPORTANT: Each node should specify which tool to use for execution. Use the available tools provided.
-Focus on logical decomposition and clear dependencies. Keep tasks atomic and executable."""
+Focus on logical decomposition and clear dependencies. Keep tasks atomic and executable.
+Return ONLY valid JSON, no markdown code blocks or explanatory text."""
         # Build user prompt with context
         user_prompt_parts = [
@@ -142,9 +293,19 @@ Focus on logical decomposition and clear dependencies. Keep tasks atomic and exe
         user_prompt = "\n".join(user_prompt_parts)
+        # Dispatch to appropriate provider
+        if self.provider == LLMProvider.ANTHROPIC:
+            return await self._call_anthropic(system_prompt, user_prompt)
+        else:
+            return await self._call_openai_compatible(system_prompt, user_prompt)
+    async def _call_openai_compatible(self, system_prompt: str, user_prompt: str) -> Dict[str, Any]:
+        """Call OpenAI or Ollama (OpenAI-compatible API)."""
+        model = self.config.get_model()
         try:
-            response = await self.client.chat.completions.create(
-                model=self.config.model,
+            response = await self._client.chat.completions.create(
+                model=model,
                 messages=[
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": user_prompt}
@@ -154,51 +315,133 @@ Focus on logical decomposition and clear dependencies. Keep tasks atomic and exe
             )
             content = response.choices[0].message.content
-            if not content:
-                raise ValueError(
-                    "Empty response from LLM. This could indicate an API issue or "
-                    "the request was filtered. Please try again or adjust your vibe."
-                )
-            # Strip markdown code blocks if present (common with some LLMs like z.ai/GLM)
-            content = content.strip()
-            if content.startswith("```"):
-                # Remove opening ```json or ```
-                first_newline = content.find("\n")
-                if first_newline != -1:
-                    content = content[first_newline + 1:]
-                # Remove closing ```
-                if content.endswith("```"):
-                    content = content[:-3].strip()
+            return self._parse_json_response(content)
-            return json.loads(content)
+        except Exception as e:
+            return self._handle_error(e, model)
+    async def _call_anthropic(self, system_prompt: str, user_prompt: str) -> Dict[str, Any]:
+        """Call Anthropic Claude API."""
+        model = self.config.get_model()
+        try:
+            response = await self._client.messages.create(
+                model=model,
+                max_tokens=self.config.max_tokens,
+                system=system_prompt,
+                messages=[
+                    {"role": "user", "content": user_prompt}
+                ]
+            )
+            content = response.content[0].text
+            return self._parse_json_response(content)
+        except Exception as e:
+            return self._handle_error(e, model)
+    def _parse_json_response(self, content: str) -> Dict[str, Any]:
+        """Parse JSON from LLM response, handling common formatting issues."""
+        if not content:
+            raise ValueError(
+                "Empty response from LLM. This could indicate an API issue or "
+                "the request was filtered. Please try again or adjust your vibe."
+            )
+        # Strip markdown code blocks if present
+        content = content.strip()
+        if content.startswith("```"):
+            # Remove opening ```json or ```
+            first_newline = content.find("\n")
+            if first_newline != -1:
+                content = content[first_newline + 1:]
+            # Remove closing ```
+            if content.endswith("```"):
+                content = content[:-3].strip()
+        # Try to find JSON object if there's extra text
+        if not content.startswith("{"):
+            start = content.find("{")
+            if start != -1:
+                end = content.rfind("}") + 1
+                if end > start:
+                    content = content[start:end]
+        try:
+            return json.loads(content)
         except json.JSONDecodeError as e:
             raise ValueError(
                 f"Invalid JSON response from LLM: {e}. "
-                f"The LLM returned malformed data. Please try again. "
-                f"Response content: {content[:200] if 'content' in locals() else 'N/A'}..."
+                f"Response content: {content[:200]}..."
             ) from e
-        except Exception as e:
-            error_lower = str(e).lower()
-            if any(keyword in error_lower for keyword in ["api_key", "unauthorized", "authentication", "invalid.*key"]):
-                raise RuntimeError(
-                    f"LLM authentication failed: {e}. "
-                    "Please check your OpenAI API key and ensure it's valid. "
-                    "Get your API key from: https://platform.openai.com/api-keys"
-                ) from e
-            elif "rate limit" in str(e).lower():
+    def _handle_error(self, e: Exception, model: str) -> Dict[str, Any]:
+        """Handle and re-raise errors with helpful messages."""
+        error_lower = str(e).lower()
+        if any(kw in error_lower for kw in ["api_key", "unauthorized", "authentication", "invalid.*key"]):
+            raise RuntimeError(
+                f"LLM authentication failed: {e}. "
+                f"Provider: {self.provider.value}, Model: {model}\n"
+                f"For local development, use Ollama: LLMConfig.for_ollama()"
+            ) from e
+        elif "rate limit" in error_lower:
+            raise RuntimeError(
+                f"API rate limit exceeded: {e}. "
+                "Please wait a moment and try again."
+            ) from e
+        elif any(kw in error_lower for kw in ["timeout", "connection", "refused"]):
+            if self.provider == LLMProvider.OLLAMA:
                 raise RuntimeError(
-                    f"OpenAI API rate limit exceeded: {e}. "
-                    "Please wait a moment and try again, or check your API plan limits."
+                    f"Cannot connect to Ollama at {self.config.ollama_host}: {e}\n"
+                    f"Make sure Ollama is running: ollama serve"
                 ) from e
-            elif "timeout" in str(e).lower():
+            raise RuntimeError(
+                f"Network error while calling LLM: {e}"
+            ) from e
+        elif "model" in error_lower and "not found" in error_lower:
+            if self.provider == LLMProvider.OLLAMA:
                 raise RuntimeError(
-                    f"Network timeout while calling LLM: {e}. "
-                    "Please check your internet connection and try again."
+                    f"Model '{model}' not found in Ollama.\n"
+                    f"Pull it with: ollama pull {model}\n"
+                    f"Or use a different model: LLMConfig.for_ollama(model='qwen2.5:7b')"
                 ) from e
-            else:
-                raise RuntimeError(
-                    f"LLM request failed: {e}. "
-                    f"This could be a network issue, API outage, or configuration problem."
-                ) from e
+            raise RuntimeError(f"Model '{model}' not available: {e}") from e
+        else:
+            raise RuntimeError(
+                f"LLM request failed ({self.provider.value}/{model}): {e}"
+            ) from e
+async def list_ollama_models(host: str = "http://localhost:11434") -> List[str]:
+    """List available models on an Ollama instance.
+    Args:
+        host: Ollama server URL
+    Returns:
+        List of model names
+    """
+    import aiohttp
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.get(f"{host.rstrip('/')}/api/tags") as resp:
+                if resp.status == 200:
+                    data = await resp.json()
+                    return [m["name"] for m in data.get("models", [])]
+                return []
+    except Exception:
+        return []
+async def check_ollama_available(host: str = "http://localhost:11434") -> bool:
+    """Check if Ollama is available at the given host."""
+    import aiohttp
+    try:
+        async with aiohttp.ClientSession() as session:
+            async with session.get(f"{host.rstrip('/')}/api/tags", timeout=aiohttp.ClientTimeout(total=2)) as resp:
+                return resp.status == 200
+    except Exception:
+        return False

vibe_aigc/models.py CHANGED Viewed

@@ -1,10 +1,107 @@
 """Core data models for Vibe AIGC system."""
-from typing import List, Optional, Dict, Any
+from typing import List, Optional, Dict, Any, Union
 from enum import Enum
 from pydantic import BaseModel, Field
+class GenerationRequest(BaseModel):
+    """Request for content generation with optional character consistency."""
+    # Core generation parameters
+    prompt: str = Field(..., description="Primary prompt for generation")
+    negative_prompt: str = Field("", description="Negative prompt to avoid")
+    width: int = Field(512, description="Output width")
+    height: int = Field(512, description="Output height")
+    steps: int = Field(20, description="Number of sampling steps")
+    cfg: float = Field(7.0, description="Classifier-free guidance scale")
+    seed: int = Field(0, description="Random seed (0 for random)")
+    # Video-specific
+    frames: int = Field(24, description="Number of frames for video")
+    fps: int = Field(24, description="Frames per second for video")
+    # Character consistency / reference image support
+    reference_image: Optional[str] = Field(
+        None,
+        description="Path to character/style reference image for consistency"
+    )
+    character_strength: float = Field(
+        0.8,
+        ge=0.0,
+        le=1.0,
+        description="How strongly to apply character reference (0.0-1.0)"
+    )
+    reference_type: str = Field(
+        "character",
+        description="Type of reference: 'character' (face/person), 'style', or 'composition'"
+    )
+    # LoRA support for character consistency
+    character_lora: Optional[str] = Field(
+        None,
+        description="Path to character-specific LoRA model"
+    )
+    character_lora_strength: float = Field(
+        0.8,
+        ge=0.0,
+        le=2.0,
+        description="Strength of character LoRA (0.0-2.0)"
+    )
+    # Additional LoRAs
+    loras: List[Dict[str, Any]] = Field(
+        default_factory=list,
+        description="List of LoRAs: [{'path': str, 'strength': float}]"
+    )
+    # Model selection (optional - uses discovery if not specified)
+    model: Optional[str] = Field(None, description="Specific model to use")
+    vae: Optional[str] = Field(None, description="Specific VAE to use")
+    # Output
+    output_prefix: str = Field("vibe", description="Filename prefix for output")
+    class Config:
+        extra = "allow"  # Allow additional fields for flexibility
+class CharacterProfile(BaseModel):
+    """Profile for maintaining character consistency across generations."""
+    name: str = Field(..., description="Character identifier/name")
+    reference_images: List[str] = Field(
+        default_factory=list,
+        description="Paths to reference images of this character"
+    )
+    lora_path: Optional[str] = Field(
+        None,
+        description="Path to trained character LoRA if available"
+    )
+    lora_strength: float = Field(0.8, description="Default LoRA strength for this character")
+    # Character description for prompt injection
+    description: str = Field("", description="Text description of character appearance")
+    trigger_words: List[str] = Field(
+        default_factory=list,
+        description="Trigger words for character LoRA"
+    )
+    # Generation preferences
+    preferred_ip_strength: float = Field(0.8, description="Preferred IP-Adapter strength")
+    def to_generation_params(self) -> Dict[str, Any]:
+        """Convert profile to generation parameters."""
+        params = {}
+        if self.reference_images:
+            params["reference_image"] = self.reference_images[0]
+            params["character_strength"] = self.preferred_ip_strength
+        if self.lora_path:
+            params["character_lora"] = self.lora_path
+            params["character_lora_strength"] = self.lora_strength
+        return params
 class Vibe(BaseModel):
     """High-level representation of user's creative intent and aesthetic preferences."""

vibe-aigc 0.6.3__py3-none-any.whl → 0.7.0__py3-none-any.whl

vibe-aigc 0.6.3py3-none-any.whl → 0.7.0py3-none-any.whl