tracellm-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tracellm/tracer.py ADDED
@@ -0,0 +1,598 @@
1
+ import asyncio
2
+ import contextvars
3
+ import functools
4
+ import inspect
5
+ import time
6
+ import uuid
7
+ from datetime import datetime, timezone
8
+ from typing import Any, Callable
9
+
10
+ from tracellm.db import resolve_api_key, save_trace_payload
11
+ from tracellm.mascot import MascotState, header, message
12
+ from tracellm.summary import print_summary
13
+ from tracellm.trace_stream import TraceStream
14
+ from tracellm.utils import (
15
+ SLOW_TRACE_THRESHOLD_MS,
16
+ coerce_failure_reason,
17
+ coerce_response,
18
+ coerce_retry_count,
19
+ coerce_status,
20
+ coerce_steps,
21
+ console,
22
+ estimate_tokens,
23
+ render_trace_report,
24
+ simulate_step,
25
+ )
26
+
27
+ _current_trace_context: contextvars.ContextVar[dict[str, Any] | None] = contextvars.ContextVar(
28
+ "_current_trace_context", default=None
29
+ )
30
+
31
+
32
+ def build_trace_payload(
33
+ prompt: str,
34
+ model_name: str,
35
+ project_id: str,
36
+ project_name: str | None,
37
+ api_key: str | None,
38
+ environment: str,
39
+ result: Any,
40
+ trace_error: Exception | None,
41
+ started_at: datetime,
42
+ latency: float,
43
+ ) -> dict[str, Any]:
44
+ response_text = coerce_response(result)
45
+ steps = coerce_steps(result)
46
+ retry_count = coerce_retry_count(result)
47
+ status = coerce_status(result, retry_count)
48
+ failure_reason = coerce_failure_reason(result)
49
+
50
+ ctx = _current_trace_context.get()
51
+ if ctx and not steps:
52
+ steps = ctx.get("collected_steps", [])
53
+ if ctx and not retry_count:
54
+ retry_count = ctx.get("retry_count", 0)
55
+
56
+ if trace_error is not None:
57
+ status = "failed"
58
+ failure_reason = str(trace_error)
59
+ response_text = response_text or ""
60
+
61
+ return {
62
+ "trace_id": str(uuid.uuid4()),
63
+ "prompt": prompt,
64
+ "response": response_text,
65
+ "latency": latency,
66
+ "token_count": estimate_tokens(prompt, response_text, steps),
67
+ "model_name": model_name,
68
+ "project_id": project_id,
69
+ "project_name": project_name,
70
+ "api_key": api_key,
71
+ "environment": environment,
72
+ "status": status,
73
+ "steps": steps,
74
+ "retry_count": retry_count,
75
+ "failure_reason": failure_reason,
76
+ "slow_request": latency >= SLOW_TRACE_THRESHOLD_MS,
77
+ "created_at": started_at.isoformat(),
78
+ "updated_at": datetime.now(timezone.utc).isoformat(),
79
+ }
80
+
81
+
82
+ def persist_trace(trace_data: dict[str, Any]) -> None:
83
+ try:
84
+ save_trace_payload(trace_data)
85
+ except Exception as save_error:
86
+ console.print(f"[yellow]Trace persistence skipped:[/yellow] {save_error}")
87
+
88
+
89
+ def finalize_trace(
90
+ prompt: str,
91
+ model_name: str,
92
+ project_id: str,
93
+ project_name: str | None,
94
+ api_key: str | None,
95
+ environment: str,
96
+ result: Any,
97
+ trace_error: Exception | None,
98
+ started_at: datetime,
99
+ latency: float,
100
+ render: bool = True,
101
+ ) -> dict[str, Any]:
102
+ trace_data = build_trace_payload(
103
+ prompt, model_name, project_id, project_name, api_key, environment,
104
+ result, trace_error, started_at, latency,
105
+ )
106
+ persist_trace(trace_data)
107
+ if render:
108
+ render_trace_report(trace_data)
109
+ return trace_data
110
+
111
+
112
+ def _resolve_project_context(
113
+ api_key: str | None,
114
+ project: str | None,
115
+ environment: str | None,
116
+ ) -> tuple[str, str | None, str, str | None]:
117
+ if api_key:
118
+ try:
119
+ key_record = resolve_api_key(api_key)
120
+ return (
121
+ key_record.project_id,
122
+ project or key_record.project_id,
123
+ environment or key_record.environment,
124
+ key_record.key,
125
+ )
126
+ except Exception:
127
+ return (project or "default", project, environment or "development", api_key)
128
+ return (project or "default", project, environment or "development", None)
129
+
130
+
131
+ def trace(
132
+ prompt: str = "",
133
+ model_name: str = "unknown",
134
+ api_key: str | None = None,
135
+ project: str | None = None,
136
+ environment: str = "development",
137
+ ) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
138
+ def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
139
+ is_async = inspect.iscoroutinefunction(func)
140
+
141
+ if is_async:
142
+
143
+ @functools.wraps(func)
144
+ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
145
+ started_at = datetime.now(timezone.utc)
146
+ start = time.perf_counter()
147
+ result: Any = None
148
+ trace_error: Exception | None = None
149
+ effective_prompt = prompt or func.__name__
150
+ project_id, project_name, effective_environment, resolved_key = _resolve_project_context(
151
+ api_key=api_key, project=project, environment=environment,
152
+ )
153
+
154
+ ctx_token = _current_trace_context.set({
155
+ "project_id": project_id,
156
+ "project_name": project_name,
157
+ "environment": effective_environment,
158
+ "api_key": resolved_key,
159
+ "collected_steps": [],
160
+ "retry_count": 0,
161
+ })
162
+
163
+ try:
164
+ result = await func(*args, **kwargs)
165
+ return result
166
+ except Exception as error:
167
+ trace_error = error
168
+ raise
169
+ finally:
170
+ latency = round((time.perf_counter() - start) * 1000, 2)
171
+ finalize_trace(
172
+ prompt=effective_prompt,
173
+ model_name=model_name,
174
+ project_id=project_id,
175
+ project_name=project_name,
176
+ api_key=resolved_key,
177
+ environment=effective_environment,
178
+ result=result,
179
+ trace_error=trace_error,
180
+ started_at=started_at,
181
+ latency=latency,
182
+ render=True,
183
+ )
184
+ _current_trace_context.reset(ctx_token)
185
+
186
+ return async_wrapper
187
+
188
+ else:
189
+
190
+ @functools.wraps(func)
191
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
192
+ started_at = datetime.now(timezone.utc)
193
+ start = time.perf_counter()
194
+ result: Any = None
195
+ trace_error: Exception | None = None
196
+ effective_prompt = prompt or func.__name__
197
+ project_id, project_name, effective_environment, resolved_key = _resolve_project_context(
198
+ api_key=api_key, project=project, environment=environment,
199
+ )
200
+
201
+ ctx_token = _current_trace_context.set({
202
+ "project_id": project_id,
203
+ "project_name": project_name,
204
+ "environment": effective_environment,
205
+ "api_key": resolved_key,
206
+ "collected_steps": [],
207
+ "retry_count": 0,
208
+ })
209
+
210
+ try:
211
+ result = func(*args, **kwargs)
212
+ return result
213
+ except Exception as error:
214
+ trace_error = error
215
+ raise
216
+ finally:
217
+ latency = round((time.perf_counter() - start) * 1000, 2)
218
+ finalize_trace(
219
+ prompt=effective_prompt,
220
+ model_name=model_name,
221
+ project_id=project_id,
222
+ project_name=project_name,
223
+ api_key=resolved_key,
224
+ environment=effective_environment,
225
+ result=result,
226
+ trace_error=trace_error,
227
+ started_at=started_at,
228
+ latency=latency,
229
+ render=True,
230
+ )
231
+ _current_trace_context.reset(ctx_token)
232
+
233
+ return wrapper
234
+
235
+ return decorator
236
+
237
+
238
+ def simulate_llm_response(prompt: str = "Explain transformers for a production RAG + agent engineering team.") -> dict[str, Any]:
239
+ import random
240
+
241
+ question = prompt
242
+ session_id = str(uuid.uuid4())[:8]
243
+ steps: list[dict[str, Any]] = []
244
+ retry_count = random.randint(0, 2)
245
+ attempt_count = retry_count + 1
246
+ corpus_options = [
247
+ "attention_is_all_you_need",
248
+ "rag_failure_playbook",
249
+ "agent_latency_benchmarks",
250
+ "toolformer_notes",
251
+ "long_context_eval_report",
252
+ "retrieval_system_design",
253
+ ]
254
+
255
+ embedding_dims = random.choice([1536, 3072])
256
+ query_vector_checksum = hex(random.getrandbits(24))
257
+ simulate_step(
258
+ steps=steps,
259
+ tool_name="query_embedding",
260
+ input_data={
261
+ "session_id": session_id,
262
+ "query": question,
263
+ "embedding_model": "text-embedding-3-large",
264
+ },
265
+ output_data={
266
+ "vector_dimensions": embedding_dims,
267
+ "embedding_norm": round(random.uniform(0.98, 1.04), 4),
268
+ "checksum": query_vector_checksum,
269
+ "replay": {"stage": "embedding", "seed_hint": session_id},
270
+ },
271
+ min_delay=0.08,
272
+ max_delay=0.22,
273
+ random_module=random,
274
+ )
275
+
276
+ retrieved_docs = random.randint(14, 24)
277
+ top_k = random.randint(6, 9)
278
+ simulate_step(
279
+ steps=steps,
280
+ tool_name="vector_retrieval",
281
+ input_data={
282
+ "session_id": session_id,
283
+ "query": question,
284
+ "index": "research_embeddings_v2",
285
+ "top_k": top_k,
286
+ "filters": {"domain": "llm-systems", "freshness_days": 180},
287
+ },
288
+ output_data={
289
+ "documents_found": retrieved_docs,
290
+ "candidate_chunks": top_k,
291
+ "latency_bucket": random.choice(["p50", "p75", "p95"]),
292
+ "selected_ids": random.sample(corpus_options, k=min(top_k, len(corpus_options))),
293
+ "replay": {
294
+ "stage": "retrieval",
295
+ "query_hash": query_vector_checksum,
296
+ "cursor": f"retrieval:{session_id}",
297
+ },
298
+ },
299
+ min_delay=0.18,
300
+ max_delay=0.42,
301
+ random_module=random,
302
+ )
303
+
304
+ reranked_chunks = random.randint(4, 6)
305
+ simulate_step(
306
+ steps=steps,
307
+ tool_name="rerank_context",
308
+ input_data={
309
+ "session_id": session_id,
310
+ "strategy": "cross-encoder",
311
+ "candidate_count": top_k,
312
+ },
313
+ output_data={
314
+ "reranked_chunks": reranked_chunks,
315
+ "coverage_score": round(random.uniform(0.82, 0.96), 3),
316
+ "dropped_chunks": max(0, top_k - reranked_chunks),
317
+ "replay": {"stage": "rerank", "selected_chunk_count": reranked_chunks},
318
+ },
319
+ min_delay=0.09,
320
+ max_delay=0.24,
321
+ random_module=random,
322
+ )
323
+
324
+ simulate_step(
325
+ steps=steps,
326
+ tool_name="agent_planner",
327
+ input_data={
328
+ "session_id": session_id,
329
+ "mode": "multi-hop-reasoning",
330
+ "objective": "teach architecture and operational tradeoffs",
331
+ },
332
+ output_data={
333
+ "plan": [
334
+ "summarize transformer core concepts",
335
+ "connect self-attention to scaling behavior",
336
+ "map concepts to RAG and tool-using agents",
337
+ "call out failure modes and observability metrics",
338
+ ],
339
+ "planner_confidence": round(random.uniform(0.81, 0.94), 3),
340
+ "requires_tool_validation": retry_count > 0,
341
+ "replay": {"stage": "planning", "plan_id": f"plan-{session_id}"},
342
+ },
343
+ min_delay=0.12,
344
+ max_delay=0.31,
345
+ random_module=random,
346
+ )
347
+
348
+ simulate_step(
349
+ steps=steps,
350
+ tool_name="context_window_allocator",
351
+ input_data={
352
+ "session_id": session_id,
353
+ "budget_tokens": random.randint(4800, 7200),
354
+ "response_budget": random.randint(900, 1400),
355
+ },
356
+ output_data={
357
+ "allocated_context_tokens": random.randint(3000, 5200),
358
+ "reserved_for_tools": random.randint(500, 900),
359
+ "compression_applied": random.choice([True, False]),
360
+ "replay": {
361
+ "stage": "budgeting",
362
+ "slot_map": ["system", "retrieval", "tools", "generation"],
363
+ },
364
+ },
365
+ min_delay=0.05,
366
+ max_delay=0.18,
367
+ random_module=random,
368
+ )
369
+
370
+ if retry_count > 0:
371
+ for attempt in range(1, attempt_count):
372
+ simulate_step(
373
+ steps=steps,
374
+ tool_name="tool_schema_lookup",
375
+ input_data={
376
+ "session_id": session_id,
377
+ "attempt": attempt,
378
+ "requested_tool": "citation_builder",
379
+ },
380
+ output_data={
381
+ "error": random.choice(
382
+ [
383
+ "schema registry timeout",
384
+ "stale tool contract version",
385
+ "partial metadata returned",
386
+ ]
387
+ ),
388
+ "retryable": True,
389
+ "replay": {
390
+ "stage": "tool_lookup",
391
+ "attempt": attempt,
392
+ "decision": "retry",
393
+ },
394
+ },
395
+ min_delay=0.11,
396
+ max_delay=0.28,
397
+ random_module=random,
398
+ success=False,
399
+ )
400
+ simulate_step(
401
+ steps=steps,
402
+ tool_name="retry_guard",
403
+ input_data={
404
+ "session_id": session_id,
405
+ "attempt": attempt,
406
+ "policy": "exponential_backoff_with_jitter",
407
+ },
408
+ output_data={
409
+ "status": "retry_scheduled",
410
+ "backoff_ms": random.randint(180, 650),
411
+ "guardrail_state": "within_threshold",
412
+ "replay": {
413
+ "stage": "retry",
414
+ "attempt": attempt,
415
+ "next_attempt": attempt + 1,
416
+ },
417
+ },
418
+ min_delay=0.07,
419
+ max_delay=0.19,
420
+ random_module=random,
421
+ )
422
+
423
+ simulate_step(
424
+ steps=steps,
425
+ tool_name="tool_schema_lookup",
426
+ input_data={
427
+ "session_id": session_id,
428
+ "attempt": attempt_count,
429
+ "requested_tool": "citation_builder",
430
+ },
431
+ output_data={
432
+ "tool_contract_version": f"2026.05.{random.randint(10, 28)}",
433
+ "arguments_validated": True,
434
+ "replay": {
435
+ "stage": "tool_lookup",
436
+ "attempt": attempt_count,
437
+ "decision": "continue",
438
+ },
439
+ },
440
+ min_delay=0.09,
441
+ max_delay=0.21,
442
+ random_module=random,
443
+ )
444
+
445
+ simulate_step(
446
+ steps=steps,
447
+ tool_name="citation_builder",
448
+ input_data={
449
+ "session_id": session_id,
450
+ "source_count": reranked_chunks,
451
+ "format": "inline-bullets",
452
+ },
453
+ output_data={
454
+ "citations_generated": reranked_chunks,
455
+ "deduplicated_sources": random.randint(3, reranked_chunks),
456
+ "replay": {"stage": "tool_execution", "artifact_id": f"cite-{session_id}"},
457
+ },
458
+ min_delay=0.1,
459
+ max_delay=0.23,
460
+ random_module=random,
461
+ )
462
+
463
+ generation_started = time.perf_counter()
464
+ time.sleep(random.uniform(0.95, 1.9))
465
+ response = f"""
466
+ Transformers are neural architectures built around self-attention, which means the model can score how strongly every token should attend to every other token while building the next internal representation. That shift matters because it removes the strictly sequential bottleneck of older recurrent systems and makes training dramatically more parallel, which is why transformers became the default foundation for modern language models, multimodal systems, retrieval-heavy copilots, and agent frameworks.
467
+
468
+ At a systems level, the important intuition is that each layer repeatedly mixes three things: token identity, token position, and context relevance. Multi-head attention lets the model inspect several interaction patterns at once, so one head can track local syntax, another can follow long-range references, and another can focus on task-specific structure such as citations, code blocks, or tool outputs. Feed-forward blocks then reshape those mixed representations into features the next layer can use. Stack enough of these layers and the model learns abstractions that look like reasoning traces, latent memory lookups, planning heuristics, and style control even though the runtime primitive is still next-token prediction.
469
+
470
+ For production RAG and agent systems, transformers are only one part of the story. The operational pipeline usually includes query embedding, vector retrieval, reranking, prompt assembly, tool selection, retry handling, and final generation. A good answer is not just a function of the base model weights; it also depends on whether retrieval returned the right evidence, whether the planner selected the right tools, whether context budgeting dropped a critical chunk, and whether retries recovered from transient failures without hiding instability from operators.
471
+
472
+ That is why observability matters. When a transformer-based application appears to hallucinate, the root cause may actually be upstream: a low-recall vector search, schema drift in a tool contract, latency-induced truncation, or a retry path that silently swapped evidence sets between attempts. High-fidelity traces let teams inspect the exact execution graph, including step durations, retries, tool outputs, retrieval confidence, and token budgets. This makes it possible to distinguish model limitations from systems integration issues.
473
+
474
+ In practical terms, transformers excel because they scale with data, compute, and context more effectively than earlier sequence models. Self-attention produces rich contextual representations; retrieval extends the model with fresh external knowledge; tools let the system act beyond pure text generation; and planners coordinate these components into multi-step workflows. The resulting stack is powerful, but it is also failure-prone. The healthiest engineering pattern is to treat the LLM as one subsystem inside a larger distributed decision engine and trace every important boundary the same way you would trace a payment pipeline or a search request path.
475
+
476
+ If you are testing a dashboard, this run is intentionally token-heavy and observability-rich: it includes retrieval, planning, context allocation, tool validation, optional retries, and a long-form answer so latency, token volume, retries, and step timelines are all visible in the resulting trace payload. Session `{session_id}` completed after `{attempt_count}` tool lookup attempt(s), with `{len(steps)}` replayable steps recorded before generation finished.
477
+ """.strip()
478
+ generation_duration = round((time.perf_counter() - generation_started) * 1000, 2)
479
+ steps.append(
480
+ {
481
+ "step_id": str(uuid.uuid4()),
482
+ "tool_name": "response_generation",
483
+ "input": {
484
+ "session_id": session_id,
485
+ "model": "gpt-4.1-mini",
486
+ "temperature": 0.4,
487
+ "max_output_tokens": 1400,
488
+ },
489
+ "output": {
490
+ "preview": response[:220],
491
+ "output_sections": 6,
492
+ "estimated_completion_tokens": estimate_tokens(response),
493
+ "replay": {
494
+ "stage": "generation",
495
+ "response_id": f"resp-{session_id}",
496
+ "attempt_count": attempt_count,
497
+ },
498
+ },
499
+ "duration": generation_duration,
500
+ "success": True,
501
+ "timestamp": datetime.now(timezone.utc).isoformat(),
502
+ }
503
+ )
504
+
505
+ return {
506
+ "response": response,
507
+ "status": "warning" if retry_count > 0 else "success",
508
+ "retry_count": retry_count,
509
+ "steps": steps,
510
+ "observability": {
511
+ "session_id": session_id,
512
+ "retrieval_candidates": retrieved_docs,
513
+ "final_context_chunks": reranked_chunks,
514
+ "attempt_count": attempt_count,
515
+ },
516
+ }
517
+
518
+
519
+ def run_live_trace(
520
+ prompt: str,
521
+ model_name: str = "gpt-4.1-mini",
522
+ project: str | None = None,
523
+ api_key: str | None = None,
524
+ environment: str = "development",
525
+ render: bool = True,
526
+ ) -> dict[str, Any]:
527
+ started_at = datetime.now(timezone.utc)
528
+ start = time.perf_counter()
529
+ result = None
530
+ trace_error: Exception | None = None
531
+ project_id, project_name, effective_environment, resolved_key = _resolve_project_context(
532
+ api_key=api_key,
533
+ project=project,
534
+ environment=environment,
535
+ )
536
+
537
+ console.print()
538
+ console.print(header("Tracing request...", MascotState.LOADING))
539
+ console.print()
540
+
541
+ _STEP_EVENTS: list[tuple[str, str]] = [
542
+ ("query.embed", "Embedding prompt"),
543
+ ("vector.search", "Searching vector index"),
544
+ ("context.rerank", "Reranking context"),
545
+ ("agent.plan", "Planning tool execution"),
546
+ ("context.allocate", "Allocating context window"),
547
+ ("tool.chain", "Running tool chain"),
548
+ ("llm.generate", "Generating answer"),
549
+ ]
550
+
551
+ with TraceStream(prompt, model_name) as stream:
552
+ finished_steps: list[dict[str, Any]] = []
553
+ for event_name, label in _STEP_EVENTS:
554
+ stream.emit(event_name, label)
555
+ if label == "Generating answer":
556
+ try:
557
+ result = simulate_llm_response(prompt)
558
+ finished_steps = coerce_steps(result)
559
+ except Exception as error:
560
+ trace_error = error
561
+ raise
562
+
563
+ # If simulation didn't generate steps, emit step events from simulation
564
+ if not finished_steps and result:
565
+ finished_steps = coerce_steps(result)
566
+
567
+ latency = round((time.perf_counter() - start) * 1000, 2)
568
+ trace_data = finalize_trace(
569
+ prompt=prompt,
570
+ model_name=model_name,
571
+ project_id=project_id,
572
+ project_name=project_name,
573
+ api_key=resolved_key,
574
+ environment=effective_environment,
575
+ result=result,
576
+ trace_error=trace_error,
577
+ started_at=started_at,
578
+ latency=latency,
579
+ render=False,
580
+ )
581
+ print_summary(trace_data)
582
+ status = str(trace_data.get("status", "success")).lower()
583
+ if status == "success":
584
+ console.print(message("Trace complete", MascotState.SUCCESS))
585
+ elif status in ("warning", "failed"):
586
+ console.print(message("Warning: tool execution failed", MascotState.WARNING))
587
+ console.print()
588
+ return trace_data
589
+
590
+
591
+ @trace(
592
+ prompt="Explain transformers",
593
+ model_name="gpt-4.1-mini",
594
+ project="demo-workspace",
595
+ environment="development",
596
+ )
597
+ def llm_response() -> dict[str, Any]:
598
+ return simulate_llm_response()
@@ -0,0 +1,78 @@
1
+ """Execution tree view for replay — Rich Tree with nested steps."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from rich.panel import Panel
8
+ from rich.text import Text
9
+ from rich.tree import Tree
10
+
11
+ from tracellm.utils import console, latency_style
12
+
13
+
14
+ def _step_icon(step: dict[str, Any], active: bool, done: bool) -> str:
15
+ if active:
16
+ return "\u25b6"
17
+ if not step.get("success", True):
18
+ return "\u2717"
19
+ if done:
20
+ return "\u2713"
21
+ return " "
22
+
23
+
24
+ def _step_label(step: dict[str, Any]) -> str:
25
+ parts = []
26
+ tool_name = step.get("tool_name", "unknown")
27
+ duration = float(step.get("duration", 0.0))
28
+ success = bool(step.get("success", True))
29
+
30
+ parts.append(tool_name)
31
+ parts.append(f"[bright_black]{duration:.0f}ms[/bright_black]")
32
+ if not success:
33
+ parts.append("[red]RETRY[/red]")
34
+
35
+ return " ".join(parts)
36
+
37
+
38
+ def build_execution_tree(
39
+ steps: list[dict[str, Any]],
40
+ active_index: int | None = None,
41
+ ) -> Tree:
42
+ """Build a nested execution tree from trace steps."""
43
+ tree = Tree(
44
+ Text("agent:start", style="bold white"),
45
+ guide_style="bright_black",
46
+ )
47
+
48
+ for i, step in enumerate(steps, 1):
49
+ is_active = active_index == i
50
+ is_done = active_index is not None and i < active_index
51
+ icon = _step_icon(step, is_active, is_done)
52
+
53
+ style = "cyan" if is_active else "dim" if (active_index is not None and i > active_index) else "white"
54
+ label = f"[{style}]{icon}[/] [{style}]{_step_label(step)}[/]"
55
+
56
+ if "children" in step and step["children"]:
57
+ branch = tree.add(label)
58
+ for child in step["children"]:
59
+ c_icon = _step_icon(child, False, True)
60
+ c_label = f"{c_icon} {_step_label(child)}"
61
+ branch.add(c_label)
62
+ else:
63
+ tree.add(label)
64
+
65
+ status = "success" if all(s.get("success", True) for s in steps) else "warning"
66
+ final_style = "green" if status == "success" else "yellow"
67
+ tree.add(f"[{final_style}]\u2713[/] [{final_style}]done[/]")
68
+
69
+ return tree
70
+
71
+
72
+ def render_execution_panel(
73
+ steps: list[dict[str, Any]],
74
+ active_index: int | None = None,
75
+ ) -> Panel:
76
+ """Render the execution tree inside a Panel."""
77
+ tree = build_execution_tree(steps, active_index=active_index)
78
+ return Panel(tree, title="Execution Tree", border_style="bright_black", padding=(1, 2))