utim-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1018 @@
1
+ """
2
+ UTIM Blender Agent — Advanced Image-to-3D Pipeline (v2)
3
+ ========================================================
4
+
5
+ Architecture (4 Phases)
6
+ ------------------------
7
+ Phase 0 – Deep Image Analysis
8
+ OpenCV + Pillow analyse the image locally: dominant colours, contours,
9
+ depth-map estimation, feature regions (face, body, hair, clothing).
10
+ A rich "scene_brief" dict is assembled for Phase 1.
11
+
12
+ Phase 1 – Vision-LLM Scene Understanding
13
+ The image AND scene_brief are sent to a vision-capable LLM with a
14
+ comprehensive system prompt that asks for:
15
+ • Structured part decomposition (head, body, hair, accessory…)
16
+ • Per-part geometry strategy (primitive hint, mesh complexity)
17
+ • Material and colour information per part
18
+ • Tattoo / decal texture descriptions
19
+ • Overall proportions, pose, and scene context
20
+
21
+ Phase 2 – Procedural Blender Script Generation
22
+ A code-generation LLM receives the scene analysis and writes a
23
+ complete, sophisticated bpy Python script that:
24
+ 1. Builds each body part using Blender primitives + modifiers
25
+ (Subdivision Surface, Solidify, Skin, Curve-based hair, etc.)
26
+ 2. Applies per-part Principled BSDF materials with the analysed colours
27
+ 3. Projects the original image as a texture on the main surface using
28
+ Smart UV Project + image texture nodes
29
+ 4. Optionally generates procedural tattoo decals via overlay material
30
+ 5. Sets up a 3-point studio light rig
31
+ 6. Exports to the requested format
32
+
33
+ Phase 3 – Execution, Validation & Retry
34
+ The script is executed via Blender in headless mode. If it fails a
35
+ parse-and-fix loop runs up to MAX_RETRIES times before raising.
36
+ """
37
+ from __future__ import annotations
38
+
39
+ import base64
40
+ import json
41
+ import mimetypes
42
+ import os
43
+ import pathlib
44
+ import re
45
+ import subprocess
46
+ import time
47
+ import uuid
48
+ from typing import Any, Dict, List, Optional
49
+
50
+ # ---------------------------------------------------------------------------
51
+ # Configuration
52
+ # ---------------------------------------------------------------------------
53
+
54
+ _OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
55
+
56
+ # Vision models (must support image_url content parts)
57
+ _VISION_MODELS: List[str] = [
58
+ "google/gemma-4-31b-it:free",
59
+ "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free",
60
+ "nvidia/nemotron-nano-12b-v2-vl:free",
61
+ "google/gemma-4-26b-a4b-it:free",
62
+ "nvidia/llama-nemotron-embed-vl-1b-v2:free",
63
+ "nvidia/llama-nemotron-rerank-vl-1b-v2:free",
64
+ "openrouter/free"
65
+ ]
66
+
67
+ _CODE_MODELS: List[str] = [
68
+ "poolside/laguna-m.1:free",
69
+ "cohere/north-mini-code:free",
70
+ "qwen/qwen3-coder:free",
71
+ "nvidia/nemotron-3-ultra-550b-a55b:free",
72
+ "openrouter/free"
73
+ ]
74
+
75
+ MAX_RETRIES = 3 # Script fix-and-retry attempts
76
+
77
+ # ---------------------------------------------------------------------------
78
+ # System prompts
79
+ # ---------------------------------------------------------------------------
80
+
81
+ _VISION_SYSTEM_PROMPT = """\
82
+ You are an expert Blender 3D artist and scene analyst. Examine the image and
83
+ produce a detailed JSON scene description that will be used to procedurally
84
+ rebuild this image as a 3D scene in Blender.
85
+
86
+ CRITICAL ANALYSIS GUIDELINES:
87
+ - DEPTH CUES: Analyze perspective, shading gradients, and contour lines to estimate depth/z-distance
88
+ - PROPORTIONS: Provide precise relative measurements (values 0.0-1.0 for size/position ratios)
89
+
90
+ MANDATORY - CHECK THESE BEFORE OUTPUT:
91
+ 1. TATTOOS: If ANY symbols/markings on skin, set has_tattoos=true and list EACH in tattoos array with location and shape_description
92
+ 2. EYE COLOR: Extract EXACT iris color - check carefully, may be red/yellow/etc
93
+ 3. HAIR GRADIENTS: If hair has color transitions, extract dark_root_color, mid_color, tip_color
94
+ 4. EXPRESSION: Describe accurately (smirk, neutral, angry, etc)
95
+
96
+ - EYES: Must detail exact eye construction (color, size, shape, pupil, cornea)
97
+ - Multi-object scenes: If multiple objects exist, provide "objects" array with each object's own structure
98
+
99
+ OUTPUT FORMAT — a single raw JSON object (no markdown, no explanation):
100
+
101
+ {
102
+ "scene_type": "character",
103
+ "description": "<one-paragraph description>",
104
+ "subject_label": "<e.g. anime_character, human_face, vehicle, animal>",
105
+
106
+ "parts": [
107
+ {
108
+ "id": "head",
109
+ "label": "Head / Face",
110
+ "geometry_hint": "uv_sphere_deformed",
111
+ "dominant_colors": [[r,g,b]],
112
+ "material": {
113
+ "name": "Skin",
114
+ "base_color": [r, g, b, 1.0],
115
+ "roughness": 0.6,
116
+ "metallic": 0.0,
117
+ "subsurface": 0.0,
118
+ "subsurface_color": [r, g, b],
119
+ "emission": [0,0,0]
120
+ },
121
+ "relative_size": [w, h, d],
122
+ "relative_position": [x, y, z],
123
+ "depth_hint": "convex/front-facing",
124
+ "notes": "describe shape details"
125
+ }
126
+ ],
127
+
128
+ "hair": {
129
+ "style": "spiky_anime",
130
+ "color": [r, g, b],
131
+ "secondary_color": [r, g, b],
132
+ "dark_root_color": [r, g, b],
133
+ "tip_color": [r, g, b],
134
+ "length": "medium",
135
+ "spike_count": 14,
136
+ "spike_directions": [
137
+ {"angle": 45, "direction": "forward_left", "width": 0.15},
138
+ {"angle": 60, "direction": "up_right", "width": 0.12}
139
+ ],
140
+ "notes": "describe spike shapes and flow direction"
141
+ },
142
+
143
+ "face_details": {
144
+ "has_tattoos": true,
145
+ "tattoos": [
146
+ {
147
+ "location": "forehead_center",
148
+ "relative_position": [0.0, 0.85, 0.02],
149
+ "size": [0.35, 0.12],
150
+ "color": [r, g, b],
151
+ "shape_description": "double-trident with dot between (example: like Jujutsu Kaisen symbol)",
152
+ "depth_hint": "flat_on_surface"
153
+ }
154
+ ],
155
+ "eyebrows": true,
156
+ "eyebrow_color": [r, g, b],
157
+ "eye_color": [r, g, b],
158
+ "eye_style": "anime_large",
159
+ "eye_size_ratio": 0.25,
160
+ "pupil_color": [r, g, b],
161
+ "expression": "smirk",
162
+ "eye_details": {
163
+ "iris_size": 0.7,
164
+ "shine_position": "upper_left",
165
+ "cornea_ior": 1.4
166
+ }
167
+ },
168
+
169
+ "clothing": [
170
+ {
171
+ "item": "scarf",
172
+ "color": [r, g, b],
173
+ "secondary_color": [r, g, b],
174
+ "material_hint": "fabric_thick",
175
+ "coverage": "neck_to_chin",
176
+ "thickness": 0.02,
177
+ "fold_directions": ["down_center", "out_sides"],
178
+ "relative_position": [0.0, -0.1, 0.0]
179
+ }
180
+ ],
181
+
182
+ "objects": [
183
+ {
184
+ "id": "background",
185
+ "label": "Background Object",
186
+ "geometry_hint": "plane",
187
+ "dominant_colors": [[r,g,b]],
188
+ "relative_size": [w, h, d],
189
+ "relative_position": [x, y, z]
190
+ }
191
+ ],
192
+
193
+ "lighting_suggestion": {
194
+ "type": "three_point",
195
+ "key_color": [r, g, b],
196
+ "fill_color": [r, g, b],
197
+ "rim_color": [r, g, b]
198
+ },
199
+
200
+ "overall_proportions": {
201
+ "head_scale": 1.0,
202
+ "body_visible": true,
203
+ "visible_parts": ["head", "neck", "shoulders"],
204
+ "head_to_body_ratio": 0.25
205
+ },
206
+
207
+ "depth_estimation": {
208
+ "foreground": "head/neck",
209
+ "background": "image_background",
210
+ "depth_layers": 3
211
+ },
212
+
213
+ "image_texture_applicable": true,
214
+ "background_color": [r, g, b]
215
+ }
216
+
217
+ Rules:
218
+ - All color values are 0.0-1.0 floats.
219
+ - For anime characters: subsurface=0.0, roughness=0.7, eye_style="anime_large".
220
+ - **TATTOOS/DECALS ARE MANDATORY**: If ANY markings, symbols, or special patterns are VISIBLE on skin, hair, or clothing, you MUST set has_tattoos=true and populate the tattoos array with precise positioning.
221
+ - **Eye details are mandatory**: ALWAYS populate eye_details with iris_size, shine_position, and cornea_ior.
222
+ - Hair spikes MUST have spike_count matching actual visible hair strands - count carefully and list ALL directions.
223
+ - Eye_size_ratio should be a decimal like 0.25 for 25% of head width.
224
+ - Tattoo relative_position uses 0-1 scale from head center (x,y,z) where z=0 is head center, z>0 is up.
225
+ - Output ONLY the raw JSON object — no backticks, no text before or after.
226
+ """
227
+
228
+ _CODE_SYSTEM_PROMPT = """\
229
+ You are a world-class Blender Python scripting expert specialising in
230
+ procedural character and object creation. You will receive a JSON scene
231
+ description and must output ONLY a complete, runnable bpy Python script.
232
+
233
+ CRITICAL COMPATIBILITY — Blender 5.x STRICT RULES:
234
+ 1. Use `bpy.context.scene.collection.objects.link(obj)` — NEVER `bpy.context.collection.objects.link(obj)`
235
+ 2. NEVER use `mesh.use_auto_smooth` — removed in Blender 5.x
236
+ 3. NEVER use `mesh.normals_split_custom_set()` or `mesh.normals_split_custom_set_from_vertices()`
237
+ 4. NEVER use `bmesh` for mesh creation — use `mesh.from_pydata(vertices, [], faces)` instead
238
+ 5. `bpy.context.view_layer.objects.active` requires an object already linked to the scene
239
+ 6. Use `obj.select_set(True)` to select objects
240
+ 7. For Subdivision Surface: `mod = obj.modifiers.new("Subsurf", "SUBSURF"); mod.levels = 2`
241
+ 8. NEVER set vertex normals directly — they are read-only in Blender 5.x
242
+
243
+ MANDATORY CONSTRUCTION ELEMENTS - ALWAYS INCLUDE THESE:
244
+ **These elements MUST be in EVERY script generated. No exceptions.**
245
+
246
+ 1. **Eye Construction (Layered)** - ALWAYS create separate eye objects if character:
247
+ - Create sclera sphere (white of eye), slightly flattened on X axis
248
+ - Add iris plane positioned at front of sclera with eye_color material
249
+ - Add thin transparent cornea shell with IOR=1.4 for shine/reflection
250
+ - Add white highlight (small sphere) on upper left of iris for eye sparkle
251
+ - Eye size should match eye_size_ratio relative to head
252
+ - Position symmetrically: left eye at negative X, right eye at positive X
253
+
254
+ 2. **Tattoo Decal Construction** - ALWAYS create if has_tattoos=true:
255
+ - For EACH tattoo in tattoos array, create a flat plane object
256
+ - Position using relative_position scaled to head size
257
+ - Use Shrinkwrap modifier to conform to head surface
258
+ - Use emission material for glowing tattoos, or regular material for skin markings
259
+ - For forehead tattoos: position at top/front of head with z=0.1 to 0.15
260
+ - Create BEFORE linking to collection, then shrinkwrap to target
261
+
262
+ 3. **Hair Curves** - ALWAYS create if hair style is spiky_anime or spike_count > 0:
263
+ - Create CURVE objects with `bpy.data.curves.new('HairCurve', 'CURVE')`
264
+ - Use `curve.extrude = 0.02` and `curve.bevel_depth = 0.01` for thickness
265
+ - For each spike: define spline points, set handle types to 'VECTOR'
266
+ - Rotate spikes outward using spike_directions angle and direction hints
267
+ - Apply gradient colors (dark_root at base, color middle, tip_color at top)
268
+ - If hair.style is "spiky" WITHOUT explicit spike_directions, create 12-16 cones around head with varied angles
269
+ - Hair spikes should emerge from head surface (z offset from head radius)
270
+ - Vary spike lengths: some short (0.3), some long (0.6), for natural chaos
271
+
272
+ 4. **Fallback Hair Construction** (when curve data insufficient):
273
+ - Use `bpy.ops.mesh.primitive_cone_add(radius1=0.12, radius2=0.02, depth=0.5)`
274
+ - Position cones around head circumference with slight randomness
275
+ - Apply hair.material to each spike
276
+ - Add Subdivision Surface modifier for smoother hair strands
277
+
278
+ 5. Clothing with Thickness:
279
+ - Add Solidify modifier for fabric thickness: `mod = obj.modifiers.new("Solidify", "SOLIDIFY"); mod.thickness = 0.02`
280
+ - Use fold_directions for edge crease weights if applicable
281
+
282
+ 6. Image Texture Projection:
283
+ - Add image texture node: `nodes.new('ShaderNodeTexImage')`
284
+ - Use 'CAMERA' projection for correct alignment: `tex.projection = 'CAMERA'`
285
+ - Connect to Principled BSDF Base Color
286
+
287
+ 7. Multi-Object Scene Support - If the scene contains multiple objects:
288
+ - Iterate through scene_data.get('objects', []) array
289
+ - For each object, build complete geometry including its own parts, materials, and positioning
290
+ - Position objects using their relative_position field
291
+ - Scale using relative_scale field
292
+ - Add appropriate spacing between objects to avoid intersection
293
+
294
+ SCRIPT STRUCTURE:
295
+ - Clear the scene using bpy.ops.object.select_all + bpy.ops.object.delete
296
+ - Build HEAD first, then EYES, then HAIR, then CLOTHING, then TATTOOS
297
+ - Apply materials with the exact colors from the scene JSON
298
+ - If hair uses curve-based spikes, create separate Curve objects for each spike
299
+ - Set up 3-point lighting rig (key light, fill light, rim light)
300
+ - Set world background color from background_color field
301
+ - ALWAYS include UTIM_BLENDER_SUCCESS print at end
302
+
303
+ EXPORT CODE (hardcode the exact values given in the user prompt, NOT placeholders):
304
+ ```python
305
+ export_path = "<ACTUAL_PATH_STRING>"
306
+ export_format = "<ACTUAL_FORMAT_STRING>"
307
+ if export_format == 'blend':
308
+ bpy.ops.wm.save_as_mainfile(filepath=export_path, copy=True)
309
+ elif export_format == 'obj':
310
+ bpy.ops.wm.obj_export(filepath=export_path)
311
+ elif export_format == 'glb':
312
+ bpy.ops.export_scene.gltf(filepath=export_path, export_format='GLB')
313
+ elif export_format == 'fbx':
314
+ bpy.ops.export_scene.fbx(filepath=export_path)
315
+ print(f"UTIM_BLENDER_SUCCESS: {export_path}")
316
+ ```
317
+
318
+ Output ONLY the Python script — no markdown, no explanation, no fences.
319
+ """
320
+
321
+ _FIX_SYSTEM_PROMPT = """\
322
+ You are a Blender Python debugging expert. A bpy script failed with an error.
323
+ Fix the script so it runs correctly in Blender 5.x.
324
+
325
+ Rules:
326
+ - Output ONLY the corrected Python script, no explanation, no markdown fences.
327
+ - Keep all the original logic intact; only fix the errors.
328
+ - NEVER use bmesh, mesh.use_auto_smooth, normals_split_custom_set.
329
+ - ALWAYS use bpy.context.scene.collection.objects.link(obj).
330
+ - If a modifier or operator is unavailable, comment it out gracefully.
331
+ - Make sure export_path and export_format are hardcoded strings.
332
+ - BLNDER 5.x FIX: Change principled.inputs['Subsurface'] to principled.inputs['Subsurface Weight']
333
+ - End the script with: print(f"UTIM_BLENDER_SUCCESS: {export_path}")
334
+ """
335
+
336
+ # ---------------------------------------------------------------------------
337
+ # Low-level LLM call
338
+ # ---------------------------------------------------------------------------
339
+
340
+ def _llm_call(
341
+ system: str,
342
+ user_text: str,
343
+ models: List[str],
344
+ image_b64: Optional[str] = None,
345
+ image_mime: Optional[str] = None,
346
+ max_tokens: int = 8192,
347
+ timeout: int = 120,
348
+ ) -> str:
349
+ """Call OpenRouter with a system+user message, returning the assistant text.
350
+
351
+ Tries each model in *models* until one succeeds.
352
+ """
353
+ from utim_cli.config import config
354
+ api_key = os.getenv("OPENROUTER_API_KEY", "") or config.get("api_key")
355
+ if not api_key:
356
+ raise RuntimeError("Neither OPENROUTER_API_KEY nor UTIM API key is set.")
357
+
358
+ if image_b64 and image_mime:
359
+ user_content: Any = [
360
+ {"type": "text", "text": user_text},
361
+ {"type": "image_url", "image_url": {"url": f"data:{image_mime};base64,{image_b64}"}},
362
+ ]
363
+ else:
364
+ user_content = user_text
365
+
366
+ last_err: Exception = RuntimeError("No models tried.")
367
+ for model in models:
368
+ for attempt in range(3):
369
+ try:
370
+ from utim_cli.client_utils import proxy_openrouter_request
371
+ resp = proxy_openrouter_request(
372
+ json_data={
373
+ "model": model,
374
+ "messages": [
375
+ {"role": "system", "content": system},
376
+ {"role": "user", "content": user_content},
377
+ ],
378
+ "max_tokens": max_tokens,
379
+ "stream": False,
380
+ },
381
+ stream=False,
382
+ timeout=timeout,
383
+ )
384
+ resp.raise_for_status()
385
+ data = resp.json()
386
+ text = data["choices"][0]["message"]["content"]
387
+ return text.strip()
388
+ except Exception as exc: # noqa: BLE001
389
+ last_err = exc
390
+ code = getattr(getattr(exc, "response", None), "status_code", 0)
391
+ if code == 429 and attempt < 2:
392
+ time.sleep(5 * (attempt + 1))
393
+ continue
394
+ break # try next model
395
+ raise RuntimeError(f"All LLM models failed. Last error: {last_err}") from last_err
396
+
397
+
398
+ # ---------------------------------------------------------------------------
399
+ # Phase 0 — Local image pre-analysis (no LLM needed)
400
+ # ---------------------------------------------------------------------------
401
+
402
+ def _phase0_local_analysis(image_path: str) -> Dict[str, Any]:
403
+ """Extract dominant colours, basic image stats, and depth hints using Pillow."""
404
+ brief: Dict[str, Any] = {
405
+ "width": 0,
406
+ "height": 0,
407
+ "dominant_colors": [],
408
+ "aspect_ratio": 1.0,
409
+ "has_transparency": False,
410
+ "brightness": 0.5,
411
+ "depth_hints": {},
412
+ "face_landmarks": {},
413
+ }
414
+
415
+ try:
416
+ from PIL import Image, ImageFilter, ImageDraw # type: ignore
417
+ import statistics
418
+
419
+ img = Image.open(image_path).convert("RGBA")
420
+ brief["width"] = img.width
421
+ brief["height"] = img.height
422
+ brief["aspect_ratio"] = round(img.width / max(img.height, 1), 3)
423
+ brief["has_transparency"] = img.mode == "RGBA"
424
+
425
+ # Quantise to 8 dominant colours
426
+ small = img.convert("RGB").resize((150, 150), Image.LANCZOS)
427
+ quantised = small.quantize(colors=8, method=Image.Quantize.FASTOCTREE)
428
+ palette = quantised.getpalette()
429
+ if palette:
430
+ colors = []
431
+ for i in range(0, min(24, len(palette)), 3):
432
+ r, g, b = palette[i], palette[i + 1], palette[i + 2]
433
+ colors.append([round(r / 255, 3), round(g / 255, 3), round(b / 255, 3)])
434
+ brief["dominant_colors"] = colors
435
+
436
+ # Average brightness
437
+ grey = small.convert("L")
438
+ pixels = list(grey.getdata())
439
+ brief["brightness"] = round(statistics.mean(pixels) / 255, 3)
440
+
441
+ # --- Depth Estimation: Enhanced edge and gradient analysis ---
442
+ # Blur and find edges to approximate depth contours
443
+ blurred = grey.filter(ImageFilter.GaussianBlur(radius=2))
444
+ edges = blurred.filter(ImageFilter.FIND_EDGES)
445
+
446
+ # Analyze edge density in different regions (vertical slices)
447
+ edge_pixels = list(edges.getdata())
448
+ width_small = 150
449
+ height_small = 150
450
+ total_pixels = width_small * height_small
451
+
452
+ # Divide image into vertical strips and count edges (proxy for depth changes)
453
+ strip_counts = []
454
+ for x in range(0, width_small, 15): # 10 vertical strips
455
+ edge_count = 0
456
+ total_in_strip = 0
457
+ for y in range(height_small):
458
+ for dx in range(15):
459
+ if x + dx < width_small:
460
+ idx = y * width_small + (x + dx)
461
+ if idx < len(edge_pixels):
462
+ total_in_strip += 1
463
+ if edge_pixels[idx] > 50:
464
+ edge_count += 1
465
+ strip_counts.append(edge_count / max(total_in_strip, 1))
466
+
467
+ # Horizontal strips for depth layers
468
+ horizontal_edge_density = []
469
+ for y in range(0, height_small, 15):
470
+ strip_start = y * width_small
471
+ strip_end = min(strip_start + 15 * width_small, len(edge_pixels))
472
+ strip_edges = edge_pixels[strip_start:strip_end]
473
+ edge_count = sum(1 for p in strip_edges if p > 50)
474
+ horizontal_edge_density.append(edge_count / max(len(strip_edges), 1))
475
+
476
+ # Depth hints from edge distribution
477
+ center_strip = len(strip_counts) // 2 if strip_counts else 0
478
+ brief["depth_hints"] = {
479
+ "vertical_edge_density": strip_counts,
480
+ "horizontal_edge_density": horizontal_edge_density,
481
+ "center_focus": strip_counts[center_strip] if strip_counts else 0.5,
482
+ "has_center_subject": bool(strip_counts[center_strip] > 0.1) if strip_counts else True,
483
+ "estimated_layer_depth": len([c for c in horizontal_edge_density if c > 0.15]) if horizontal_edge_density else 1,
484
+ "depth_variance": round(statistics.stdev(strip_counts) if len(strip_counts) > 1 else 0, 3),
485
+ }
486
+
487
+ # --- Simple face landmark estimation (center of mass for skin tones) ---
488
+ # Look for face-like region using simple luminance analysis
489
+ face_region_found = False
490
+ for y in range(0, height_small, 15):
491
+ row_lum = grey.crop((0, y, width_small, min(y + 15, height_small)))
492
+ row_pixels = list(row_lum.getdata())
493
+ avg_bright = statistics.mean(row_pixels)
494
+ if 0.3 < avg_bright / 255 < 0.7: # skin tone range
495
+ face_region_found = True
496
+ break
497
+
498
+ brief["face_landmarks"] = {
499
+ "estimated_face_present": face_region_found,
500
+ "skin_tone_range": [min(pixels), max(pixels)] if pixels else [128, 128],
501
+ }
502
+
503
+ # --- Dark mark/tattoo detection hint ---
504
+ # Look for dark marks on lighter skin regions (typical tattoo contrast)
505
+ # This is a heuristic to hint that tattoos may be present
506
+ forehead_dark_pixels = 0
507
+ if height_small > 0:
508
+ # Top 20% of image is forehead area
509
+ forehead_strip = grey.crop((0, 0, width_small, max(20, height_small // 5)))
510
+ forehead_pixels = list(forehead_strip.getdata())
511
+ forehead_dark_pixels = sum(1 for p in forehead_pixels if p < 100) # Dark pixels
512
+
513
+ brief["potential_tattoos"] = {
514
+ "dark_marks_on_forehead": forehead_dark_pixels > 50,
515
+ "check_tattoos": forehead_dark_pixels > 50,
516
+ }
517
+
518
+ except ImportError:
519
+ pass # Pillow not available — skip local analysis
520
+ except Exception:
521
+ pass # Silently ignore analysis errors
522
+
523
+ return brief
524
+
525
+
526
+ # ---------------------------------------------------------------------------
527
+ # Phase 1 — Vision-LLM scene understanding
528
+ # ---------------------------------------------------------------------------
529
+
530
+ def _phase1_vision(image_path: str, scene_brief: Dict[str, Any]) -> Dict[str, Any]:
531
+ """Send the image to a vision model and parse the returned scene JSON."""
532
+ if not os.path.isfile(image_path):
533
+ raise FileNotFoundError(f"Image not found: {image_path}")
534
+
535
+ mime_type, _ = mimetypes.guess_type(image_path)
536
+ if not mime_type or not mime_type.startswith("image/"):
537
+ ext = os.path.splitext(image_path)[1].lower()
538
+ mime_map = {
539
+ ".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg",
540
+ ".webp": "image/webp", ".gif": "image/gif", ".bmp": "image/bmp",
541
+ }
542
+ mime_type = mime_map.get(ext)
543
+ if not mime_type:
544
+ raise ValueError(f"Unsupported image format: {image_path}")
545
+
546
+ with open(image_path, "rb") as fh:
547
+ image_b64 = base64.b64encode(fh.read()).decode()
548
+
549
+ brief_text = (
550
+ f"Image pre-analysis (local):\n"
551
+ f" Resolution : {scene_brief.get('width')}x{scene_brief.get('height')} px\n"
552
+ f" Brightness : {scene_brief.get('brightness')}\n"
553
+ f" Dominant colours (0-1 RGB): {json.dumps(scene_brief.get('dominant_colors', []))}\n"
554
+ f" Depth hints: {json.dumps(scene_brief.get('depth_hints', {}))}\n"
555
+ f" Face landmarks: {json.dumps(scene_brief.get('face_landmarks', {}))}\n"
556
+ f" Potential tattoos: {json.dumps(scene_brief.get('potential_tattoos', {}))}\n\n"
557
+ "MANDATORY: If the pre-analysis shows potential_tattoos.check_tattoos=true, you MUST detect and list those tattoos!\n"
558
+ "MANDATORY: Re-examine the eyes - extract the EXACT iris color (may be red, yellow, etc.)\n"
559
+ "Examine this image carefully and output the JSON scene description exactly as specified."
560
+ )
561
+
562
+ raw = _llm_call(
563
+ system=_VISION_SYSTEM_PROMPT,
564
+ user_text=brief_text,
565
+ models=_VISION_MODELS,
566
+ image_b64=image_b64,
567
+ image_mime=mime_type,
568
+ max_tokens=8192,
569
+ )
570
+
571
+ # Strip possible markdown fences the model emits despite instructions
572
+ clean = re.sub(r"```(?:json)?\s*|\s*```", "", raw).strip()
573
+
574
+ # Find the outermost JSON object using brace counting for robustness
575
+ start = clean.find("{")
576
+ if start < 0:
577
+ raise ValueError(
578
+ f"Vision model did not return a JSON object.\nRaw output:\n{raw}"
579
+ )
580
+
581
+ # Count braces to find matching end brace
582
+ brace_count = 0
583
+ end = start
584
+ for i, char in enumerate(clean[start:], start):
585
+ if char == "{":
586
+ brace_count += 1
587
+ elif char == "}":
588
+ brace_count -= 1
589
+ if brace_count == 0:
590
+ end = i + 1
591
+ break
592
+
593
+ if end <= start:
594
+ # Fallback to original method
595
+ end = clean.rfind("}") + 1
596
+
597
+ json_str = clean[start:end]
598
+
599
+ try:
600
+ scene_data: Dict[str, Any] = json.loads(json_str)
601
+ except json.JSONDecodeError as e:
602
+ # Try to repair common JSON issues
603
+ # Remove trailing commas before } or ]
604
+ json_str = re.sub(r",\s*([}\]])", r"\1", json_str)
605
+ # Remove comments (not standard JSON but models sometimes include them)
606
+ json_str = re.sub(r"//.*$", "", json_str, flags=re.MULTILINE)
607
+ try:
608
+ scene_data = json.loads(json_str)
609
+ except json.JSONDecodeError as e2:
610
+ raise ValueError(
611
+ f"Vision model returned malformed JSON at position {e2.pos}:\n"
612
+ f"Error: {e2.msg}\n"
613
+ f"JSON snippet: {json_str[max(0,e2.pos-50):e2.pos+50]}\n"
614
+ f"Raw output:\n{raw[:2000]}"
615
+ ) from e2
616
+
617
+ # Ensure defaults
618
+ scene_data.setdefault("parts", [])
619
+ scene_data.setdefault("hair", {})
620
+ scene_data.setdefault("face_details", {})
621
+ scene_data.setdefault("clothing", [])
622
+ scene_data.setdefault("objects", [])
623
+ scene_data.setdefault("lighting_suggestion", {"type": "three_point"})
624
+ scene_data.setdefault("depth_estimation", {"depth_layers": 1})
625
+ scene_data.setdefault("image_texture_applicable", True)
626
+ scene_data.setdefault("background_color", [0.95, 0.95, 0.95])
627
+ scene_data.setdefault("overall_proportions", {"head_to_body_ratio": 0.25})
628
+
629
+ # Ensure nested defaults
630
+ scene_data["lighting_suggestion"].setdefault("rim_color", [0.8, 0.8, 1.0])
631
+ scene_data["face_details"].setdefault("tattoos", [])
632
+ scene_data["face_details"].setdefault("has_tattoos", bool(scene_data["face_details"].get("tattoos")))
633
+
634
+ return scene_data
635
+
636
+
637
+ # ---------------------------------------------------------------------------
638
+ # Phase 2 — Blender script generation
639
+ # ---------------------------------------------------------------------------
640
+
641
+ def _phase2_generate_script(
642
+ scene_data: Dict[str, Any],
643
+ image_path: str,
644
+ name: str,
645
+ export_path: str,
646
+ export_format: str,
647
+ ) -> str:
648
+ """Ask a code model to write a complete bpy script for the scene."""
649
+
650
+ # Forward-slash paths for Blender (works cross-platform)
651
+ blender_image_path = image_path.replace("\\", "/")
652
+ blender_export_path = export_path.replace("\\", "/")
653
+
654
+ user_prompt = (
655
+ f"Object name: {name}\n"
656
+ f"Export path: {blender_export_path}\n"
657
+ f"Export format: {export_format}\n"
658
+ f"Source image path (for texture projection): {blender_image_path}\n\n"
659
+ f"Scene description JSON:\n{json.dumps(scene_data, indent=2)}\n\n"
660
+ "Generate the complete Blender Python script now.\n\n"
661
+ "IMPORTANT INSTRUCTIONS FOR ENHANCED SCENE DATA:\n"
662
+ "- Build every part listed in scene_data['parts'] as a separate named object\n"
663
+ "- Use the EXACT export_path and export_format strings above — do NOT use placeholders\n"
664
+ "- For hair with spike_directions: create CURVE objects with bevel depth for flowing hair, OR create multiple cone/cylinder strands positioned according to each spike's angle and direction\n"
665
+ "- For tattoos with relative_position [x,y]: use these as UV coordinates or shrinkwrap positions on the head mesh\n"
666
+ "- For clothing items with fold_directions: add simple geometry hints for fabric folds (use simple planes or slight vertex offsets)\n"
667
+ "- For parts with depth_hint='convex': extend geometry outward slightly; 'front-facing': keep flat\n"
668
+ "- Create separate objects for each item in scene_data['clothing'] array\n"
669
+ "- If scene_data['parts'] contains multiple objects, position them according to their relative_position values using depth_estimation hints\n"
670
+ "- Apply multi-material support if material_regions is specified\n"
671
+ "- Set up rim lighting using lighting_suggestion.rim_color\n"
672
+ )
673
+
674
+ raw_script = _llm_call(
675
+ system=_CODE_SYSTEM_PROMPT,
676
+ user_text=user_prompt,
677
+ models=_CODE_MODELS,
678
+ max_tokens=16384,
679
+ )
680
+
681
+ return _clean_and_patch_script(raw_script, blender_export_path, export_format, blender_image_path)
682
+
683
+
684
+ # ---------------------------------------------------------------------------
685
+ # Script cleaning & Blender 5.x compatibility patching
686
+ # ---------------------------------------------------------------------------
687
+
688
+ def _clean_and_patch_script(
689
+ raw: str,
690
+ export_path: str,
691
+ export_format: str,
692
+ image_path: str = "",
693
+ ) -> str:
694
+ """Strip markdown fences, replace placeholders, and fix Blender 5.x issues."""
695
+ # Strip fences
696
+ script = re.sub(r"```(?:python)?\s*|\s*```", "", raw).strip()
697
+
698
+ # ── Placeholder replacement ──────────────────────────────────────────────
699
+ esc_path = export_path.replace("\\", "/")
700
+ esc_fmt = export_format
701
+
702
+ placeholder_patterns = [
703
+ (r'export_path\s*=\s*__EXPORT_PATH__', f'export_path = "{esc_path}"'),
704
+ (r'export_format\s*=\s*__EXPORT_FORMAT__', f'export_format = "{esc_fmt}"'),
705
+ (r'export_path\s*=\s*["\']__EXPORT_PATH__["\']', f'export_path = "{esc_path}"'),
706
+ (r'export_format\s*=\s*["\']__EXPORT_FORMAT__["\']', f'export_format = "{esc_fmt}"'),
707
+ (r'export_path\s*=\s*["\']<ACTUAL[^"\']*>["\']', f'export_path = "{esc_path}"'),
708
+ (r'export_format\s*=\s*["\']<ACTUAL[^"\']*>["\']', f'export_format = "{esc_fmt}"'),
709
+ (r'export_path\s*=\s*["\']<ACTUAL_EXPORT_PATH_STRING>["\']', f'export_path = "{esc_path}"'),
710
+ (r'export_format\s*=\s*["\']<ACTUAL_EXPORT_FORMAT_STRING>["\']', f'export_format = "{esc_fmt}"'),
711
+ ]
712
+ for pattern, replacement in placeholder_patterns:
713
+ script = re.sub(pattern, replacement, script)
714
+
715
+ if image_path:
716
+ esc_img = image_path.replace("\\", "/")
717
+ script = re.sub(
718
+ r'image_path\s*=\s*["\']<[^"\']*>["\']',
719
+ f'image_path = "{esc_img}"',
720
+ script,
721
+ )
722
+
723
+ # ── Blender 5.x compatibility fixes ─────────────────────────────────────
724
+ script = script.replace(
725
+ "bpy.context.collection.objects.link(obj)",
726
+ "bpy.context.scene.collection.objects.link(obj)",
727
+ )
728
+ script = re.sub(
729
+ r"mesh\.use_auto_smooth\s*=\s*(True|False)",
730
+ "# mesh.use_auto_smooth removed in Blender 5.x",
731
+ script,
732
+ )
733
+ script = re.sub(
734
+ r"mesh\.normals_split_custom_set_from_vertices\([^)]*\)",
735
+ "# normals_split_custom_set_from_vertices removed in Blender 5.x",
736
+ script,
737
+ )
738
+ script = re.sub(
739
+ r"mesh\.normals_split_custom_set\([^)]*\)",
740
+ "# normals_split_custom_set removed in Blender 5.x",
741
+ script,
742
+ )
743
+ script = re.sub(
744
+ r"v\.normal\s*=\s*[^#\n]+",
745
+ "# vertex.normal is read-only in Blender 5.x",
746
+ script,
747
+ )
748
+ script = re.sub(
749
+ r"mesh\.vertices\[[^\]]+\]\.normal\s*=\s*[^#\n]+",
750
+ "# vertex.normal is read-only in Blender 5.x",
751
+ script,
752
+ )
753
+ # Blender 5.x renamed 'Subsurface' to 'Subsurface Weight'
754
+ script = re.sub(
755
+ r"principled\.inputs\['Subsurface'\]\s*=\s*([^#\n]+)",
756
+ r"principled.inputs['Subsurface Weight'].default_value = \1",
757
+ script,
758
+ )
759
+ script = re.sub(
760
+ r"principled\.inputs\['Subsurface'\]\.default_value\s*=\s*([^#\n]+)",
761
+ r"principled.inputs['Subsurface Weight'].default_value = \1",
762
+ script,
763
+ )
764
+
765
+ # Ensure the success marker is present
766
+ if "UTIM_BLENDER_SUCCESS" not in script:
767
+ script += f'\nprint(f"UTIM_BLENDER_SUCCESS: {esc_path}")\n'
768
+
769
+ return script
770
+
771
+
772
+ # ---------------------------------------------------------------------------
773
+ # Phase 3 — Blender execution with retry
774
+ # ---------------------------------------------------------------------------
775
+
776
+ def _phase3_execute(
777
+ script: str,
778
+ scene_data: Dict[str, Any],
779
+ image_path: str,
780
+ name: str,
781
+ export_path: str,
782
+ export_format: str,
783
+ tmp_dir: pathlib.Path,
784
+ ) -> str:
785
+ """Execute the Blender script, retrying with LLM-assisted fixes on failure."""
786
+ from utim_cli.config import BLENDER_PATH # noqa: PLC0415
787
+ if not BLENDER_PATH:
788
+ raise RuntimeError(
789
+ "Blender executable not found. Set UTIM_BLENDER_PATH environment variable "
790
+ "or install Blender on the system PATH."
791
+ )
792
+
793
+ current_script = script
794
+ last_error = ""
795
+
796
+ for attempt in range(MAX_RETRIES + 1):
797
+ # Save script
798
+ script_path = tmp_dir / f"gen_{name}_{uuid.uuid4().hex[:8]}.py"
799
+ script_path.write_text(current_script, encoding="utf-8")
800
+
801
+ # Build command
802
+ if os.name == "nt":
803
+ cmd = f'& "{BLENDER_PATH}" -b -noaudio -P "{script_path}"'
804
+ else:
805
+ cmd = f'"{BLENDER_PATH}" -b -noaudio -P "{script_path}"'
806
+
807
+ # Auto-approve in sandbox mode
808
+ try:
809
+ from utim_cli.tools import _SANDBOX_MODE, is_command_approved, approve_command # noqa: PLC0415
810
+ if _SANDBOX_MODE and not is_command_approved(cmd):
811
+ approve_command(cmd)
812
+ except Exception:
813
+ pass
814
+
815
+ result = subprocess.run(
816
+ cmd if os.name != "nt" else ["powershell", "-Command", cmd],
817
+ capture_output=True,
818
+ text=True,
819
+ timeout=300,
820
+ )
821
+
822
+ combined_output = (result.stdout or "") + (result.stderr or "")
823
+
824
+ if result.returncode == 0:
825
+ # Verify export file exists
826
+ if pathlib.Path(export_path).exists():
827
+ return export_path
828
+ match = re.search(r"UTIM_BLENDER_SUCCESS:\s*(.+)", combined_output)
829
+ if match:
830
+ found_path = match.group(1).strip()
831
+ if pathlib.Path(found_path).exists():
832
+ return found_path
833
+ last_error = (
834
+ f"Blender exit 0 but export not found at: {export_path}\n"
835
+ f"Output:\n{combined_output[-2000:]}"
836
+ )
837
+ else:
838
+ last_error = (
839
+ f"Blender exit code {result.returncode}\n"
840
+ f"Output:\n{combined_output[-2000:]}"
841
+ )
842
+
843
+ if attempt < MAX_RETRIES:
844
+ # Ask LLM to fix the script
845
+ fix_prompt = (
846
+ f"The following Blender Python script failed with this error:\n\n"
847
+ f"--- ERROR ---\n{last_error}\n\n"
848
+ f"--- SCRIPT ---\n{current_script}\n\n"
849
+ f"Fix the script. "
850
+ f"Export path must be: {export_path.replace(chr(92), '/')}\n"
851
+ f"Export format must be: {export_format}"
852
+ )
853
+ try:
854
+ raw_fixed = _llm_call(
855
+ system=_FIX_SYSTEM_PROMPT,
856
+ user_text=fix_prompt,
857
+ models=_CODE_MODELS,
858
+ max_tokens=16384,
859
+ )
860
+ current_script = _clean_and_patch_script(
861
+ raw_fixed,
862
+ export_path.replace("\\", "/"),
863
+ export_format,
864
+ image_path.replace("\\", "/"),
865
+ )
866
+ except Exception as fix_exc:
867
+ raise RuntimeError(
868
+ f"Script execution failed and LLM fix also failed.\n"
869
+ f"Blender error:\n{last_error}\n"
870
+ f"Fix error: {fix_exc}"
871
+ ) from fix_exc
872
+
873
+ raise RuntimeError(
874
+ f"Blender script failed after {MAX_RETRIES + 1} attempts.\n"
875
+ f"Last error:\n{last_error}"
876
+ )
877
+
878
+
879
+ # ---------------------------------------------------------------------------
880
+ # Public entry point
881
+ # ---------------------------------------------------------------------------
882
+
883
+ def blender_agent_create_from_image(
884
+ image_path: str,
885
+ name: str,
886
+ output_path: Optional[str] = None,
887
+ output_format: str = "blend",
888
+ ) -> str:
889
+ """Create a detailed 3-D model from an image using the 4-phase Blender pipeline.
890
+
891
+ Parameters
892
+ ----------
893
+ image_path:
894
+ Absolute or relative path to the source image (PNG, JPG, WEBP, BMP).
895
+ name:
896
+ Base name for the Blender object and the exported file (no extension).
897
+ output_path:
898
+ Directory where the exported file will be saved.
899
+ Defaults to ``blender_assets/`` in the current working directory.
900
+ output_format:
901
+ ``"blend"``, ``"obj"``, ``"glb"``, or ``"fbx"``. Defaults to ``"blend"``.
902
+
903
+ Returns
904
+ -------
905
+ str
906
+ A human-readable progress log including the path to the exported
907
+ file, or an error description.
908
+ """
909
+ output_format = output_format.lower()
910
+ if output_format not in ("blend", "obj", "glb", "fbx"):
911
+ output_format = "blend"
912
+
913
+ output_dir = output_path or os.path.join(os.getcwd(), ".utim_tmp", "blender_assets")
914
+ assets_dir = pathlib.Path(output_dir).absolute()
915
+ assets_dir.mkdir(parents=True, exist_ok=True)
916
+
917
+ safe_name = re.sub(r"[^\w\-]", "_", name)
918
+ export_filename = f"{safe_name}_{uuid.uuid4().hex[:8]}.{output_format}"
919
+ export_path = str(assets_dir / export_filename)
920
+
921
+ tmp_dir = pathlib.Path(".utim_tmp/blender")
922
+ tmp_dir.mkdir(parents=True, exist_ok=True)
923
+
924
+ log_lines: List[str] = []
925
+
926
+ def _log(msg: str) -> None:
927
+ log_lines.append(msg)
928
+
929
+ # Time tracking
930
+ timings = {}
931
+ overall_start = time.time()
932
+
933
+ def _time_log(phase: str) -> float:
934
+ elapsed = time.time() - overall_start
935
+ timings[phase] = elapsed
936
+ return elapsed
937
+
938
+ # ── Phase 0: Local image analysis ────────────────────────────────────────
939
+ _log("[Blender Agent] Phase 0: Local image analysis...")
940
+ phase0_start = time.time()
941
+ scene_brief = _phase0_local_analysis(image_path)
942
+ _time_log("Phase 0")
943
+ _log(
944
+ f" Resolution : {scene_brief.get('width')}x{scene_brief.get('height')} px\n"
945
+ f" Brightness : {scene_brief.get('brightness')}\n"
946
+ f" Dom. colours: {len(scene_brief.get('dominant_colors', []))} found"
947
+ )
948
+
949
+ # ── Phase 1: Vision-LLM scene understanding ───────────────────────────────
950
+ _log("[Blender Agent] Phase 1: Vision-LLM scene analysis...")
951
+ phase1_start = time.time()
952
+ try:
953
+ scene_data = _phase1_vision(image_path, scene_brief)
954
+ except Exception as exc:
955
+ _time_log("Phase 1")
956
+ return "\n".join(log_lines) + f"\n[Blender Agent] Phase 1 FAILED.\nReason: {exc}"
957
+ _time_log("Phase 1")
958
+
959
+ part_count = len(scene_data.get("parts", []))
960
+ tattoos = scene_data.get("face_details", {}).get("tattoos", [])
961
+ _log(
962
+ f" Scene type : {scene_data.get('scene_type', 'unknown')}\n"
963
+ f" Subject : {scene_data.get('subject_label', 'unknown')}\n"
964
+ f" Description : {scene_data.get('description', '')[:120]}...\n"
965
+ f" Parts found : {part_count}\n"
966
+ f" Hair style : {scene_data.get('hair', {}).get('style', 'n/a')}\n"
967
+ f" Has tattoos : {len(tattoos) if isinstance(tattoos, list) else bool(tattoos)}\n"
968
+ f" Clothing : {len(scene_data.get('clothing', []))} item(s)\n"
969
+ f" Objects : {len(scene_data.get('objects', []))} background objects"
970
+ )
971
+
972
+ # Save the scene description JSON alongside the script for inspection
973
+ json_path = tmp_dir / f"scene_{safe_name}.json"
974
+ json_path.write_text(json.dumps(scene_data, indent=2), encoding="utf-8")
975
+ _log(f" Scene JSON : {json_path}")
976
+
977
+ # ── Phase 2: Blender script generation ───────────────────────────────────
978
+ _log("[Blender Agent] Phase 2: Generating Blender Python script...")
979
+ phase2_start = time.time()
980
+ try:
981
+ script = _phase2_generate_script(
982
+ scene_data=scene_data,
983
+ image_path=image_path,
984
+ name=safe_name,
985
+ export_path=export_path,
986
+ export_format=output_format,
987
+ )
988
+ except Exception as exc:
989
+ _time_log("Phase 2")
990
+ return "\n".join(log_lines) + f"\n[Blender Agent] Phase 2 FAILED.\nReason: {exc}"
991
+ _time_log("Phase 2")
992
+
993
+ script_preview_path = tmp_dir / f"gen_{safe_name}_preview.py"
994
+ script_preview_path.write_text(script, encoding="utf-8")
995
+ _log(f" Script saved: {script_preview_path}")
996
+
997
+ # ── Phase 3: Execute & validate ───────────────────────────────────────────
998
+ _log("[Blender Agent] Phase 3: Running Blender headless...")
999
+ phase3_start = time.time()
1000
+ try:
1001
+ final_path = _phase3_execute(
1002
+ script=script,
1003
+ scene_data=scene_data,
1004
+ image_path=image_path,
1005
+ name=safe_name,
1006
+ export_path=export_path,
1007
+ export_format=output_format,
1008
+ tmp_dir=tmp_dir,
1009
+ )
1010
+ except Exception as exc:
1011
+ _time_log("Phase 3")
1012
+ return "\n".join(log_lines) + f"\n[Blender Agent] Phase 3 FAILED.\nReason: {exc}"
1013
+ _time_log("Phase 3")
1014
+
1015
+ total_elapsed = time.time() - overall_start
1016
+ _log(f"[Blender Agent] SUCCESS - exported to: {final_path}")
1017
+ _log(f"[Blender Agent] Total elapsed time: {total_elapsed:.1f}s")
1018
+ return "\n".join(log_lines)