vibe-aigc 0.6.3__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vibe_aigc/llm.py CHANGED
@@ -1,10 +1,16 @@
1
- """LLM client abstraction for Vibe decomposition."""
1
+ """LLM client abstraction for Vibe decomposition.
2
+
3
+ Supports multiple providers:
4
+ - OpenAI (OPENAI_API_KEY)
5
+ - Anthropic (ANTHROPIC_API_KEY)
6
+ - Ollama (local, no key needed - uses OpenAI-compatible API)
7
+ """
2
8
 
3
9
  import asyncio
4
10
  import json
5
11
  import os
6
- from typing import Any, Dict, Optional
7
- from openai import AsyncOpenAI
12
+ from enum import Enum
13
+ from typing import Any, Dict, Optional, List
8
14
  from pydantic import BaseModel
9
15
 
10
16
  from .models import Vibe, WorkflowPlan
@@ -23,53 +29,196 @@ def _load_dotenv():
23
29
  pass
24
30
 
25
31
 
32
+ class LLMProvider(str, Enum):
33
+ """Supported LLM providers."""
34
+ OPENAI = "openai"
35
+ ANTHROPIC = "anthropic"
36
+ OLLAMA = "ollama"
37
+ AUTO = "auto" # Auto-detect based on available keys
38
+
39
+
40
+ # Default models per provider
41
+ DEFAULT_MODELS = {
42
+ LLMProvider.OPENAI: "gpt-4",
43
+ LLMProvider.ANTHROPIC: "claude-sonnet-4-20250514",
44
+ LLMProvider.OLLAMA: "qwen2.5:14b", # Good balance of speed/quality
45
+ }
46
+
47
+ # Ollama models known to work well for planning
48
+ OLLAMA_RECOMMENDED_MODELS = [
49
+ "qwen2.5-coder:32b-instruct-q4_K_M", # Best for structured output
50
+ "glm-4.7-flash:latest",
51
+ "qwen2.5:14b",
52
+ "qwen2.5:7b", # Faster, smaller
53
+ ]
54
+
55
+
26
56
  class LLMConfig(BaseModel):
27
57
  """Configuration for LLM client."""
28
58
 
29
- model: str = "gpt-4"
59
+ provider: LLMProvider = LLMProvider.AUTO
60
+ model: Optional[str] = None # None = use provider default
30
61
  temperature: float = 0.7
31
- max_tokens: int = 2000
62
+ max_tokens: int = 4000
32
63
  api_key: Optional[str] = None
33
- base_url: Optional[str] = None # Custom endpoint (e.g., z.ai, local models)
64
+ base_url: Optional[str] = None # Custom endpoint
65
+
66
+ # Ollama-specific
67
+ ollama_host: str = "http://localhost:11434"
68
+
69
+ class Config:
70
+ use_enum_values = True
34
71
 
35
72
  @classmethod
36
73
  def from_env(cls) -> "LLMConfig":
37
- """Create config from environment variables."""
74
+ """Create config from environment variables with auto-detection."""
38
75
  _load_dotenv()
76
+
77
+ # Check for explicit provider
78
+ provider_str = os.getenv("LLM_PROVIDER", "auto").lower()
79
+ try:
80
+ provider = LLMProvider(provider_str)
81
+ except ValueError:
82
+ provider = LLMProvider.AUTO
83
+
84
+ return cls(
85
+ provider=provider,
86
+ model=os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL"),
87
+ api_key=os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY"),
88
+ base_url=os.getenv("OPENAI_BASE_URL") or os.getenv("LLM_BASE_URL"),
89
+ ollama_host=os.getenv("OLLAMA_HOST", "http://localhost:11434"),
90
+ )
91
+
92
+ @classmethod
93
+ def for_ollama(cls, host: str = "http://localhost:11434", model: str = "qwen2.5:14b") -> "LLMConfig":
94
+ """Convenience constructor for Ollama."""
95
+ return cls(
96
+ provider=LLMProvider.OLLAMA,
97
+ model=model,
98
+ base_url=f"{host.rstrip('/')}/v1",
99
+ ollama_host=host,
100
+ )
101
+
102
+ @classmethod
103
+ def for_openai(cls, api_key: Optional[str] = None, model: str = "gpt-4") -> "LLMConfig":
104
+ """Convenience constructor for OpenAI."""
105
+ return cls(
106
+ provider=LLMProvider.OPENAI,
107
+ model=model,
108
+ api_key=api_key or os.getenv("OPENAI_API_KEY"),
109
+ )
110
+
111
+ @classmethod
112
+ def for_anthropic(cls, api_key: Optional[str] = None, model: str = "claude-sonnet-4-20250514") -> "LLMConfig":
113
+ """Convenience constructor for Anthropic."""
39
114
  return cls(
40
- model=os.getenv("OPENAI_MODEL", "gpt-4"),
41
- api_key=os.getenv("OPENAI_API_KEY"),
42
- base_url=os.getenv("OPENAI_BASE_URL"),
115
+ provider=LLMProvider.ANTHROPIC,
116
+ model=model,
117
+ api_key=api_key or os.getenv("ANTHROPIC_API_KEY"),
43
118
  )
119
+
120
+ def resolve_provider(self) -> LLMProvider:
121
+ """Resolve AUTO provider to actual provider based on available credentials."""
122
+ if self.provider != LLMProvider.AUTO:
123
+ return LLMProvider(self.provider)
124
+
125
+ _load_dotenv()
126
+
127
+ # Priority: explicit base_url > API keys > Ollama
128
+ if self.base_url:
129
+ # Custom endpoint - assume OpenAI-compatible
130
+ return LLMProvider.OPENAI
131
+
132
+ if self.api_key or os.getenv("OPENAI_API_KEY"):
133
+ return LLMProvider.OPENAI
134
+
135
+ if os.getenv("ANTHROPIC_API_KEY"):
136
+ return LLMProvider.ANTHROPIC
137
+
138
+ # Default to Ollama (no key needed)
139
+ return LLMProvider.OLLAMA
140
+
141
+ def get_model(self) -> str:
142
+ """Get model name, using default if not specified."""
143
+ if self.model:
144
+ return self.model
145
+ provider = self.resolve_provider()
146
+ return DEFAULT_MODELS.get(provider, "gpt-4")
44
147
 
45
148
 
46
149
  class LLMClient:
47
- """Async client for LLM-based Vibe decomposition."""
150
+ """Async client for LLM-based Vibe decomposition.
151
+
152
+ Supports OpenAI, Anthropic, and Ollama backends.
153
+ """
48
154
 
49
155
  def __init__(self, config: Optional[LLMConfig] = None):
50
156
  # Load from env if no config provided
51
157
  if config is None:
52
158
  config = LLMConfig.from_env()
53
159
  self.config = config
54
-
160
+ self.provider = config.resolve_provider()
161
+ self._client = None
162
+ self._init_client()
163
+
164
+ def _init_client(self):
165
+ """Initialize the appropriate client based on provider."""
166
+ if self.provider == LLMProvider.ANTHROPIC:
167
+ self._init_anthropic_client()
168
+ else:
169
+ # OpenAI and Ollama both use OpenAI-compatible API
170
+ self._init_openai_client()
171
+
172
+ def _init_openai_client(self):
173
+ """Initialize OpenAI or Ollama client (OpenAI-compatible)."""
55
174
  try:
56
- client_kwargs = {}
57
- # Only pass api_key if explicitly set (let OpenAI client check env otherwise)
175
+ from openai import AsyncOpenAI
176
+ except ImportError:
177
+ raise RuntimeError(
178
+ "openai package required. Install with: pip install openai"
179
+ )
180
+
181
+ client_kwargs = {}
182
+
183
+ if self.provider == LLMProvider.OLLAMA:
184
+ # Ollama uses OpenAI-compatible API
185
+ base_url = self.config.base_url or f"{self.config.ollama_host.rstrip('/')}/v1"
186
+ client_kwargs["base_url"] = base_url
187
+ client_kwargs["api_key"] = "ollama" # Ollama doesn't need a real key
188
+ else:
189
+ # OpenAI or custom endpoint
58
190
  if self.config.api_key:
59
191
  client_kwargs["api_key"] = self.config.api_key
60
192
  if self.config.base_url:
61
193
  client_kwargs["base_url"] = self.config.base_url
62
- self.client = AsyncOpenAI(**client_kwargs)
194
+
195
+ try:
196
+ self._client = AsyncOpenAI(**client_kwargs)
63
197
  except Exception as e:
64
- if "api_key" in str(e).lower():
198
+ if "api_key" in str(e).lower() and self.provider != LLMProvider.OLLAMA:
65
199
  raise RuntimeError(
66
- "OpenAI API key is required. Set OPENAI_API_KEY environment variable "
67
- "or create a .env file. For z.ai, also set OPENAI_BASE_URL. "
68
- "Example .env:\n"
69
- " OPENAI_API_KEY=your-key\n"
70
- " OPENAI_BASE_URL=https://api.z.ai/v1"
200
+ f"OpenAI API key required. Set OPENAI_API_KEY or use Ollama:\n"
201
+ f" LLMConfig.for_ollama('http://localhost:11434')\n"
202
+ f"Original error: {e}"
71
203
  ) from e
72
204
  raise
205
+
206
+ def _init_anthropic_client(self):
207
+ """Initialize Anthropic client."""
208
+ try:
209
+ from anthropic import AsyncAnthropic
210
+ except ImportError:
211
+ raise RuntimeError(
212
+ "anthropic package required. Install with: pip install anthropic"
213
+ )
214
+
215
+ api_key = self.config.api_key or os.getenv("ANTHROPIC_API_KEY")
216
+ if not api_key:
217
+ raise RuntimeError(
218
+ "Anthropic API key required. Set ANTHROPIC_API_KEY or pass api_key to config."
219
+ )
220
+
221
+ self._client = AsyncAnthropic(api_key=api_key)
73
222
 
74
223
  async def decompose_vibe(
75
224
  self,
@@ -108,7 +257,9 @@ Respond with a JSON object containing:
108
257
  - estimated_duration: estimated seconds to complete
109
258
 
110
259
  IMPORTANT: Each node should specify which tool to use for execution. Use the available tools provided.
111
- Focus on logical decomposition and clear dependencies. Keep tasks atomic and executable."""
260
+ Focus on logical decomposition and clear dependencies. Keep tasks atomic and executable.
261
+
262
+ Return ONLY valid JSON, no markdown code blocks or explanatory text."""
112
263
 
113
264
  # Build user prompt with context
114
265
  user_prompt_parts = [
@@ -142,9 +293,19 @@ Focus on logical decomposition and clear dependencies. Keep tasks atomic and exe
142
293
 
143
294
  user_prompt = "\n".join(user_prompt_parts)
144
295
 
296
+ # Dispatch to appropriate provider
297
+ if self.provider == LLMProvider.ANTHROPIC:
298
+ return await self._call_anthropic(system_prompt, user_prompt)
299
+ else:
300
+ return await self._call_openai_compatible(system_prompt, user_prompt)
301
+
302
+ async def _call_openai_compatible(self, system_prompt: str, user_prompt: str) -> Dict[str, Any]:
303
+ """Call OpenAI or Ollama (OpenAI-compatible API)."""
304
+ model = self.config.get_model()
305
+
145
306
  try:
146
- response = await self.client.chat.completions.create(
147
- model=self.config.model,
307
+ response = await self._client.chat.completions.create(
308
+ model=model,
148
309
  messages=[
149
310
  {"role": "system", "content": system_prompt},
150
311
  {"role": "user", "content": user_prompt}
@@ -154,51 +315,133 @@ Focus on logical decomposition and clear dependencies. Keep tasks atomic and exe
154
315
  )
155
316
 
156
317
  content = response.choices[0].message.content
157
- if not content:
158
- raise ValueError(
159
- "Empty response from LLM. This could indicate an API issue or "
160
- "the request was filtered. Please try again or adjust your vibe."
161
- )
162
-
163
- # Strip markdown code blocks if present (common with some LLMs like z.ai/GLM)
164
- content = content.strip()
165
- if content.startswith("```"):
166
- # Remove opening ```json or ```
167
- first_newline = content.find("\n")
168
- if first_newline != -1:
169
- content = content[first_newline + 1:]
170
- # Remove closing ```
171
- if content.endswith("```"):
172
- content = content[:-3].strip()
318
+ return self._parse_json_response(content)
173
319
 
174
- return json.loads(content)
320
+ except Exception as e:
321
+ return self._handle_error(e, model)
322
+
323
+ async def _call_anthropic(self, system_prompt: str, user_prompt: str) -> Dict[str, Any]:
324
+ """Call Anthropic Claude API."""
325
+ model = self.config.get_model()
326
+
327
+ try:
328
+ response = await self._client.messages.create(
329
+ model=model,
330
+ max_tokens=self.config.max_tokens,
331
+ system=system_prompt,
332
+ messages=[
333
+ {"role": "user", "content": user_prompt}
334
+ ]
335
+ )
336
+
337
+ content = response.content[0].text
338
+ return self._parse_json_response(content)
175
339
 
340
+ except Exception as e:
341
+ return self._handle_error(e, model)
342
+
343
+ def _parse_json_response(self, content: str) -> Dict[str, Any]:
344
+ """Parse JSON from LLM response, handling common formatting issues."""
345
+ if not content:
346
+ raise ValueError(
347
+ "Empty response from LLM. This could indicate an API issue or "
348
+ "the request was filtered. Please try again or adjust your vibe."
349
+ )
350
+
351
+ # Strip markdown code blocks if present
352
+ content = content.strip()
353
+ if content.startswith("```"):
354
+ # Remove opening ```json or ```
355
+ first_newline = content.find("\n")
356
+ if first_newline != -1:
357
+ content = content[first_newline + 1:]
358
+ # Remove closing ```
359
+ if content.endswith("```"):
360
+ content = content[:-3].strip()
361
+
362
+ # Try to find JSON object if there's extra text
363
+ if not content.startswith("{"):
364
+ start = content.find("{")
365
+ if start != -1:
366
+ end = content.rfind("}") + 1
367
+ if end > start:
368
+ content = content[start:end]
369
+
370
+ try:
371
+ return json.loads(content)
176
372
  except json.JSONDecodeError as e:
177
373
  raise ValueError(
178
374
  f"Invalid JSON response from LLM: {e}. "
179
- f"The LLM returned malformed data. Please try again. "
180
- f"Response content: {content[:200] if 'content' in locals() else 'N/A'}..."
375
+ f"Response content: {content[:200]}..."
181
376
  ) from e
182
- except Exception as e:
183
- error_lower = str(e).lower()
184
- if any(keyword in error_lower for keyword in ["api_key", "unauthorized", "authentication", "invalid.*key"]):
185
- raise RuntimeError(
186
- f"LLM authentication failed: {e}. "
187
- "Please check your OpenAI API key and ensure it's valid. "
188
- "Get your API key from: https://platform.openai.com/api-keys"
189
- ) from e
190
- elif "rate limit" in str(e).lower():
377
+
378
+ def _handle_error(self, e: Exception, model: str) -> Dict[str, Any]:
379
+ """Handle and re-raise errors with helpful messages."""
380
+ error_lower = str(e).lower()
381
+
382
+ if any(kw in error_lower for kw in ["api_key", "unauthorized", "authentication", "invalid.*key"]):
383
+ raise RuntimeError(
384
+ f"LLM authentication failed: {e}. "
385
+ f"Provider: {self.provider.value}, Model: {model}\n"
386
+ f"For local development, use Ollama: LLMConfig.for_ollama()"
387
+ ) from e
388
+ elif "rate limit" in error_lower:
389
+ raise RuntimeError(
390
+ f"API rate limit exceeded: {e}. "
391
+ "Please wait a moment and try again."
392
+ ) from e
393
+ elif any(kw in error_lower for kw in ["timeout", "connection", "refused"]):
394
+ if self.provider == LLMProvider.OLLAMA:
191
395
  raise RuntimeError(
192
- f"OpenAI API rate limit exceeded: {e}. "
193
- "Please wait a moment and try again, or check your API plan limits."
396
+ f"Cannot connect to Ollama at {self.config.ollama_host}: {e}\n"
397
+ f"Make sure Ollama is running: ollama serve"
194
398
  ) from e
195
- elif "timeout" in str(e).lower():
399
+ raise RuntimeError(
400
+ f"Network error while calling LLM: {e}"
401
+ ) from e
402
+ elif "model" in error_lower and "not found" in error_lower:
403
+ if self.provider == LLMProvider.OLLAMA:
196
404
  raise RuntimeError(
197
- f"Network timeout while calling LLM: {e}. "
198
- "Please check your internet connection and try again."
405
+ f"Model '{model}' not found in Ollama.\n"
406
+ f"Pull it with: ollama pull {model}\n"
407
+ f"Or use a different model: LLMConfig.for_ollama(model='qwen2.5:7b')"
199
408
  ) from e
200
- else:
201
- raise RuntimeError(
202
- f"LLM request failed: {e}. "
203
- f"This could be a network issue, API outage, or configuration problem."
204
- ) from e
409
+ raise RuntimeError(f"Model '{model}' not available: {e}") from e
410
+ else:
411
+ raise RuntimeError(
412
+ f"LLM request failed ({self.provider.value}/{model}): {e}"
413
+ ) from e
414
+
415
+
416
+ async def list_ollama_models(host: str = "http://localhost:11434") -> List[str]:
417
+ """List available models on an Ollama instance.
418
+
419
+ Args:
420
+ host: Ollama server URL
421
+
422
+ Returns:
423
+ List of model names
424
+ """
425
+ import aiohttp
426
+
427
+ try:
428
+ async with aiohttp.ClientSession() as session:
429
+ async with session.get(f"{host.rstrip('/')}/api/tags") as resp:
430
+ if resp.status == 200:
431
+ data = await resp.json()
432
+ return [m["name"] for m in data.get("models", [])]
433
+ return []
434
+ except Exception:
435
+ return []
436
+
437
+
438
+ async def check_ollama_available(host: str = "http://localhost:11434") -> bool:
439
+ """Check if Ollama is available at the given host."""
440
+ import aiohttp
441
+
442
+ try:
443
+ async with aiohttp.ClientSession() as session:
444
+ async with session.get(f"{host.rstrip('/')}/api/tags", timeout=aiohttp.ClientTimeout(total=2)) as resp:
445
+ return resp.status == 200
446
+ except Exception:
447
+ return False
vibe_aigc/models.py CHANGED
@@ -1,10 +1,107 @@
1
1
  """Core data models for Vibe AIGC system."""
2
2
 
3
- from typing import List, Optional, Dict, Any
3
+ from typing import List, Optional, Dict, Any, Union
4
4
  from enum import Enum
5
5
  from pydantic import BaseModel, Field
6
6
 
7
7
 
8
+ class GenerationRequest(BaseModel):
9
+ """Request for content generation with optional character consistency."""
10
+
11
+ # Core generation parameters
12
+ prompt: str = Field(..., description="Primary prompt for generation")
13
+ negative_prompt: str = Field("", description="Negative prompt to avoid")
14
+ width: int = Field(512, description="Output width")
15
+ height: int = Field(512, description="Output height")
16
+ steps: int = Field(20, description="Number of sampling steps")
17
+ cfg: float = Field(7.0, description="Classifier-free guidance scale")
18
+ seed: int = Field(0, description="Random seed (0 for random)")
19
+
20
+ # Video-specific
21
+ frames: int = Field(24, description="Number of frames for video")
22
+ fps: int = Field(24, description="Frames per second for video")
23
+
24
+ # Character consistency / reference image support
25
+ reference_image: Optional[str] = Field(
26
+ None,
27
+ description="Path to character/style reference image for consistency"
28
+ )
29
+ character_strength: float = Field(
30
+ 0.8,
31
+ ge=0.0,
32
+ le=1.0,
33
+ description="How strongly to apply character reference (0.0-1.0)"
34
+ )
35
+ reference_type: str = Field(
36
+ "character",
37
+ description="Type of reference: 'character' (face/person), 'style', or 'composition'"
38
+ )
39
+
40
+ # LoRA support for character consistency
41
+ character_lora: Optional[str] = Field(
42
+ None,
43
+ description="Path to character-specific LoRA model"
44
+ )
45
+ character_lora_strength: float = Field(
46
+ 0.8,
47
+ ge=0.0,
48
+ le=2.0,
49
+ description="Strength of character LoRA (0.0-2.0)"
50
+ )
51
+
52
+ # Additional LoRAs
53
+ loras: List[Dict[str, Any]] = Field(
54
+ default_factory=list,
55
+ description="List of LoRAs: [{'path': str, 'strength': float}]"
56
+ )
57
+
58
+ # Model selection (optional - uses discovery if not specified)
59
+ model: Optional[str] = Field(None, description="Specific model to use")
60
+ vae: Optional[str] = Field(None, description="Specific VAE to use")
61
+
62
+ # Output
63
+ output_prefix: str = Field("vibe", description="Filename prefix for output")
64
+
65
+ class Config:
66
+ extra = "allow" # Allow additional fields for flexibility
67
+
68
+
69
+ class CharacterProfile(BaseModel):
70
+ """Profile for maintaining character consistency across generations."""
71
+
72
+ name: str = Field(..., description="Character identifier/name")
73
+ reference_images: List[str] = Field(
74
+ default_factory=list,
75
+ description="Paths to reference images of this character"
76
+ )
77
+ lora_path: Optional[str] = Field(
78
+ None,
79
+ description="Path to trained character LoRA if available"
80
+ )
81
+ lora_strength: float = Field(0.8, description="Default LoRA strength for this character")
82
+
83
+ # Character description for prompt injection
84
+ description: str = Field("", description="Text description of character appearance")
85
+ trigger_words: List[str] = Field(
86
+ default_factory=list,
87
+ description="Trigger words for character LoRA"
88
+ )
89
+
90
+ # Generation preferences
91
+ preferred_ip_strength: float = Field(0.8, description="Preferred IP-Adapter strength")
92
+
93
+ def to_generation_params(self) -> Dict[str, Any]:
94
+ """Convert profile to generation parameters."""
95
+ params = {}
96
+ if self.reference_images:
97
+ params["reference_image"] = self.reference_images[0]
98
+ params["character_strength"] = self.preferred_ip_strength
99
+ if self.lora_path:
100
+ params["character_lora"] = self.lora_path
101
+ params["character_lora_strength"] = self.lora_strength
102
+ return params
103
+
104
+
8
105
  class Vibe(BaseModel):
9
106
  """High-level representation of user's creative intent and aesthetic preferences."""
10
107