zapcode-ai 1.3.0__tar.gz → 1.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: zapcode-ai
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.4.0
|
|
4
4
|
Summary: AI SDK integration for Zapcode — let LLMs write and execute TypeScript safely
|
|
5
5
|
Project-URL: Homepage, https://github.com/TheUncharted/zapcode
|
|
6
6
|
Project-URL: Repository, https://github.com/TheUncharted/zapcode
|
|
@@ -47,10 +47,10 @@ Description-Content-Type: text/markdown
|
|
|
47
47
|
|
|
48
48
|
AI agents are more capable when they **write code** instead of chaining tool calls. Code gives agents loops, conditionals, variables, and composition — things that tool chains simulate poorly.
|
|
49
49
|
|
|
50
|
-
- [
|
|
51
|
-
- [Programmatic Tool Calling](https://
|
|
52
|
-
- [Code Execution with MCP](https://www.anthropic.com/engineering/code-execution-mcp)
|
|
53
|
-
- [Smol Agents](https://
|
|
50
|
+
- [Codemode](https://blog.cloudflare.com/code-mode/) from Cloudflare
|
|
51
|
+
- [Programmatic Tool Calling](https://platform.claude.com/docs/en/agents-and-tools/tool-use/programmatic-tool-calling) from Anthropic
|
|
52
|
+
- [Code Execution with MCP](https://www.anthropic.com/engineering/code-execution-with-mcp) from Anthropic
|
|
53
|
+
- [Smol Agents](https://github.com/huggingface/smolagents) from Hugging Face
|
|
54
54
|
|
|
55
55
|
**But running AI-generated code is dangerous and slow.**
|
|
56
56
|
|
|
@@ -176,7 +176,7 @@ if (!state.completed) {
|
|
|
176
176
|
}
|
|
177
177
|
```
|
|
178
178
|
|
|
179
|
-
See [`examples/typescript/basic.ts`](examples/typescript/basic.ts) for more.
|
|
179
|
+
See [`examples/typescript/basic/main.ts`](examples/typescript/basic/main.ts) for more.
|
|
180
180
|
|
|
181
181
|
### Python
|
|
182
182
|
|
|
@@ -213,7 +213,7 @@ if state.get("suspended"):
|
|
|
213
213
|
result = restored.resume({"condition": "Clear", "temp": 26})
|
|
214
214
|
```
|
|
215
215
|
|
|
216
|
-
See [`examples/python/basic.py`](examples/python/basic.py) for more.
|
|
216
|
+
See [`examples/python/basic/main.py`](examples/python/basic/main.py) for more.
|
|
217
217
|
|
|
218
218
|
<details>
|
|
219
219
|
<summary><strong>Rust</strong></summary>
|
|
@@ -251,7 +251,7 @@ if let VmState::Suspended { snapshot, .. } = state {
|
|
|
251
251
|
}
|
|
252
252
|
```
|
|
253
253
|
|
|
254
|
-
See [`examples/rust/basic.rs`](examples/rust/basic.rs) for more.
|
|
254
|
+
See [`examples/rust/basic/basic.rs`](examples/rust/basic/basic.rs) for more.
|
|
255
255
|
</details>
|
|
256
256
|
|
|
257
257
|
<details>
|
|
@@ -272,7 +272,7 @@ console.log(result.output); // 120
|
|
|
272
272
|
</script>
|
|
273
273
|
```
|
|
274
274
|
|
|
275
|
-
See [`examples/wasm/index.html`](examples/wasm/index.html) for a full playground.
|
|
275
|
+
See [`examples/wasm/basic/index.html`](examples/wasm/basic/index.html) for a full playground.
|
|
276
276
|
</details>
|
|
277
277
|
|
|
278
278
|
## AI Agent Usage
|
|
@@ -326,7 +326,7 @@ const { text } = await generateText({
|
|
|
326
326
|
|
|
327
327
|
Under the hood: the LLM writes TypeScript code that calls your tools → Zapcode executes it in a sandbox → tool calls suspend the VM → your `execute` functions run on the host → results flow back in. All in ~2µs startup + tool execution time.
|
|
328
328
|
|
|
329
|
-
See [`examples/typescript/ai-agent-zapcode-ai.ts`](examples/typescript/ai-agent-zapcode-ai.ts) for the full working example.
|
|
329
|
+
See [`examples/typescript/ai-agent/ai-agent-zapcode-ai.ts`](examples/typescript/ai-agent/ai-agent-zapcode-ai.ts) for the full working example.
|
|
330
330
|
|
|
331
331
|
<details>
|
|
332
332
|
<summary><strong>Anthropic SDK</strong></summary>
|
|
@@ -391,7 +391,7 @@ while state.get("suspended"):
|
|
|
391
391
|
print(state["output"])
|
|
392
392
|
```
|
|
393
393
|
|
|
394
|
-
See [`examples/typescript/ai-agent-anthropic.ts`](examples/typescript/ai-agent-anthropic.ts) and [`examples/python/ai_agent_anthropic.py`](examples/python/ai_agent_anthropic.py).
|
|
394
|
+
See [`examples/typescript/ai-agent/ai-agent-anthropic.ts`](examples/typescript/ai-agent/ai-agent-anthropic.ts) and [`examples/python/ai-agent/ai_agent_anthropic.py`](examples/python/ai-agent/ai_agent_anthropic.py).
|
|
395
395
|
</details>
|
|
396
396
|
|
|
397
397
|
<details>
|
|
@@ -478,6 +478,63 @@ langchain_tool = b.custom["langchain"]
|
|
|
478
478
|
The adapter receives an `AdapterContext` with everything needed: system prompt, tool name, tool JSON schema, and a `handleToolCall` function. Return whatever shape your SDK expects.
|
|
479
479
|
</details>
|
|
480
480
|
|
|
481
|
+
## Auto-Fix, Debug & Execution Tracing
|
|
482
|
+
|
|
483
|
+
### Auto-fix (`autoFix`)
|
|
484
|
+
|
|
485
|
+
When enabled, execution errors are returned as tool results instead of throwing — letting the LLM see the error and self-correct on the next step.
|
|
486
|
+
|
|
487
|
+
**TypeScript:**
|
|
488
|
+
```typescript
|
|
489
|
+
const { system, tools } = zapcode({
|
|
490
|
+
autoFix: true,
|
|
491
|
+
tools: { /* ... */ },
|
|
492
|
+
});
|
|
493
|
+
```
|
|
494
|
+
|
|
495
|
+
**Python:**
|
|
496
|
+
```python
|
|
497
|
+
zap = zapcode(auto_fix=True, tools={...})
|
|
498
|
+
```
|
|
499
|
+
|
|
500
|
+
### Execution Trace
|
|
501
|
+
|
|
502
|
+
Every execution produces a trace tree with timing for each phase (parse → compile → execute). Use `printTrace()` / `print_trace()` to display the full session trace, or `getTrace()` / `get_trace()` to access the trace programmatically.
|
|
503
|
+
|
|
504
|
+
**TypeScript:**
|
|
505
|
+
```typescript
|
|
506
|
+
const { system, tools, printTrace, getTrace } = zapcode({
|
|
507
|
+
autoFix: true,
|
|
508
|
+
tools: { /* ... */ },
|
|
509
|
+
});
|
|
510
|
+
|
|
511
|
+
// After running...
|
|
512
|
+
printTrace();
|
|
513
|
+
// ✓ zapcode.session 12.3ms
|
|
514
|
+
// ✓ execute_code 8.1ms
|
|
515
|
+
// ✓ parse 0.2ms
|
|
516
|
+
// ✓ compile 0.1ms
|
|
517
|
+
// ✓ execute 7.8ms
|
|
518
|
+
|
|
519
|
+
const trace = getTrace(); // TraceSpan tree
|
|
520
|
+
```
|
|
521
|
+
|
|
522
|
+
**Python:**
|
|
523
|
+
```python
|
|
524
|
+
zap = zapcode(auto_fix=True, tools={...})
|
|
525
|
+
|
|
526
|
+
# After running...
|
|
527
|
+
zap.print_trace()
|
|
528
|
+
trace = zap.get_trace() # TraceSpan tree
|
|
529
|
+
```
|
|
530
|
+
|
|
531
|
+
### Debug Logging
|
|
532
|
+
|
|
533
|
+
For detailed logging of generated code, tool calls, and output, see the debug-tracing examples which show how to inspect each execution step:
|
|
534
|
+
|
|
535
|
+
- [TypeScript debug-tracing example](examples/typescript/debug-tracing/main.ts)
|
|
536
|
+
- [Python debug-tracing example](examples/python/debug-tracing/main.py)
|
|
537
|
+
|
|
481
538
|
## What Zapcode Can and Cannot Do
|
|
482
539
|
|
|
483
540
|
**Can do:**
|
|
@@ -21,10 +21,10 @@
|
|
|
21
21
|
|
|
22
22
|
AI agents are more capable when they **write code** instead of chaining tool calls. Code gives agents loops, conditionals, variables, and composition — things that tool chains simulate poorly.
|
|
23
23
|
|
|
24
|
-
- [
|
|
25
|
-
- [Programmatic Tool Calling](https://
|
|
26
|
-
- [Code Execution with MCP](https://www.anthropic.com/engineering/code-execution-mcp)
|
|
27
|
-
- [Smol Agents](https://
|
|
24
|
+
- [Codemode](https://blog.cloudflare.com/code-mode/) from Cloudflare
|
|
25
|
+
- [Programmatic Tool Calling](https://platform.claude.com/docs/en/agents-and-tools/tool-use/programmatic-tool-calling) from Anthropic
|
|
26
|
+
- [Code Execution with MCP](https://www.anthropic.com/engineering/code-execution-with-mcp) from Anthropic
|
|
27
|
+
- [Smol Agents](https://github.com/huggingface/smolagents) from Hugging Face
|
|
28
28
|
|
|
29
29
|
**But running AI-generated code is dangerous and slow.**
|
|
30
30
|
|
|
@@ -150,7 +150,7 @@ if (!state.completed) {
|
|
|
150
150
|
}
|
|
151
151
|
```
|
|
152
152
|
|
|
153
|
-
See [`examples/typescript/basic.ts`](examples/typescript/basic.ts) for more.
|
|
153
|
+
See [`examples/typescript/basic/main.ts`](examples/typescript/basic/main.ts) for more.
|
|
154
154
|
|
|
155
155
|
### Python
|
|
156
156
|
|
|
@@ -187,7 +187,7 @@ if state.get("suspended"):
|
|
|
187
187
|
result = restored.resume({"condition": "Clear", "temp": 26})
|
|
188
188
|
```
|
|
189
189
|
|
|
190
|
-
See [`examples/python/basic.py`](examples/python/basic.py) for more.
|
|
190
|
+
See [`examples/python/basic/main.py`](examples/python/basic/main.py) for more.
|
|
191
191
|
|
|
192
192
|
<details>
|
|
193
193
|
<summary><strong>Rust</strong></summary>
|
|
@@ -225,7 +225,7 @@ if let VmState::Suspended { snapshot, .. } = state {
|
|
|
225
225
|
}
|
|
226
226
|
```
|
|
227
227
|
|
|
228
|
-
See [`examples/rust/basic.rs`](examples/rust/basic.rs) for more.
|
|
228
|
+
See [`examples/rust/basic/basic.rs`](examples/rust/basic/basic.rs) for more.
|
|
229
229
|
</details>
|
|
230
230
|
|
|
231
231
|
<details>
|
|
@@ -246,7 +246,7 @@ console.log(result.output); // 120
|
|
|
246
246
|
</script>
|
|
247
247
|
```
|
|
248
248
|
|
|
249
|
-
See [`examples/wasm/index.html`](examples/wasm/index.html) for a full playground.
|
|
249
|
+
See [`examples/wasm/basic/index.html`](examples/wasm/basic/index.html) for a full playground.
|
|
250
250
|
</details>
|
|
251
251
|
|
|
252
252
|
## AI Agent Usage
|
|
@@ -300,7 +300,7 @@ const { text } = await generateText({
|
|
|
300
300
|
|
|
301
301
|
Under the hood: the LLM writes TypeScript code that calls your tools → Zapcode executes it in a sandbox → tool calls suspend the VM → your `execute` functions run on the host → results flow back in. All in ~2µs startup + tool execution time.
|
|
302
302
|
|
|
303
|
-
See [`examples/typescript/ai-agent-zapcode-ai.ts`](examples/typescript/ai-agent-zapcode-ai.ts) for the full working example.
|
|
303
|
+
See [`examples/typescript/ai-agent/ai-agent-zapcode-ai.ts`](examples/typescript/ai-agent/ai-agent-zapcode-ai.ts) for the full working example.
|
|
304
304
|
|
|
305
305
|
<details>
|
|
306
306
|
<summary><strong>Anthropic SDK</strong></summary>
|
|
@@ -365,7 +365,7 @@ while state.get("suspended"):
|
|
|
365
365
|
print(state["output"])
|
|
366
366
|
```
|
|
367
367
|
|
|
368
|
-
See [`examples/typescript/ai-agent-anthropic.ts`](examples/typescript/ai-agent-anthropic.ts) and [`examples/python/ai_agent_anthropic.py`](examples/python/ai_agent_anthropic.py).
|
|
368
|
+
See [`examples/typescript/ai-agent/ai-agent-anthropic.ts`](examples/typescript/ai-agent/ai-agent-anthropic.ts) and [`examples/python/ai-agent/ai_agent_anthropic.py`](examples/python/ai-agent/ai_agent_anthropic.py).
|
|
369
369
|
</details>
|
|
370
370
|
|
|
371
371
|
<details>
|
|
@@ -452,6 +452,63 @@ langchain_tool = b.custom["langchain"]
|
|
|
452
452
|
The adapter receives an `AdapterContext` with everything needed: system prompt, tool name, tool JSON schema, and a `handleToolCall` function. Return whatever shape your SDK expects.
|
|
453
453
|
</details>
|
|
454
454
|
|
|
455
|
+
## Auto-Fix, Debug & Execution Tracing
|
|
456
|
+
|
|
457
|
+
### Auto-fix (`autoFix`)
|
|
458
|
+
|
|
459
|
+
When enabled, execution errors are returned as tool results instead of throwing — letting the LLM see the error and self-correct on the next step.
|
|
460
|
+
|
|
461
|
+
**TypeScript:**
|
|
462
|
+
```typescript
|
|
463
|
+
const { system, tools } = zapcode({
|
|
464
|
+
autoFix: true,
|
|
465
|
+
tools: { /* ... */ },
|
|
466
|
+
});
|
|
467
|
+
```
|
|
468
|
+
|
|
469
|
+
**Python:**
|
|
470
|
+
```python
|
|
471
|
+
zap = zapcode(auto_fix=True, tools={...})
|
|
472
|
+
```
|
|
473
|
+
|
|
474
|
+
### Execution Trace
|
|
475
|
+
|
|
476
|
+
Every execution produces a trace tree with timing for each phase (parse → compile → execute). Use `printTrace()` / `print_trace()` to display the full session trace, or `getTrace()` / `get_trace()` to access the trace programmatically.
|
|
477
|
+
|
|
478
|
+
**TypeScript:**
|
|
479
|
+
```typescript
|
|
480
|
+
const { system, tools, printTrace, getTrace } = zapcode({
|
|
481
|
+
autoFix: true,
|
|
482
|
+
tools: { /* ... */ },
|
|
483
|
+
});
|
|
484
|
+
|
|
485
|
+
// After running...
|
|
486
|
+
printTrace();
|
|
487
|
+
// ✓ zapcode.session 12.3ms
|
|
488
|
+
// ✓ execute_code 8.1ms
|
|
489
|
+
// ✓ parse 0.2ms
|
|
490
|
+
// ✓ compile 0.1ms
|
|
491
|
+
// ✓ execute 7.8ms
|
|
492
|
+
|
|
493
|
+
const trace = getTrace(); // TraceSpan tree
|
|
494
|
+
```
|
|
495
|
+
|
|
496
|
+
**Python:**
|
|
497
|
+
```python
|
|
498
|
+
zap = zapcode(auto_fix=True, tools={...})
|
|
499
|
+
|
|
500
|
+
# After running...
|
|
501
|
+
zap.print_trace()
|
|
502
|
+
trace = zap.get_trace() # TraceSpan tree
|
|
503
|
+
```
|
|
504
|
+
|
|
505
|
+
### Debug Logging
|
|
506
|
+
|
|
507
|
+
For detailed logging of generated code, tool calls, and output, see the debug-tracing examples which show how to inspect each execution step:
|
|
508
|
+
|
|
509
|
+
- [TypeScript debug-tracing example](examples/typescript/debug-tracing/main.ts)
|
|
510
|
+
- [Python debug-tracing example](examples/python/debug-tracing/main.py)
|
|
511
|
+
|
|
455
512
|
## What Zapcode Can and Cannot Do
|
|
456
513
|
|
|
457
514
|
**Can do:**
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "zapcode-ai"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.4.0" # x-release-please-version
|
|
4
4
|
description = "AI SDK integration for Zapcode — let LLMs write and execute TypeScript safely"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.10"
|
|
@@ -29,6 +29,8 @@ Works with any AI SDK:
|
|
|
29
29
|
|
|
30
30
|
from __future__ import annotations
|
|
31
31
|
|
|
32
|
+
import json
|
|
33
|
+
import time
|
|
32
34
|
from dataclasses import dataclass, field
|
|
33
35
|
from typing import Any, Callable, Awaitable
|
|
34
36
|
|
|
@@ -55,12 +57,27 @@ class ToolDefinition:
|
|
|
55
57
|
execute: Callable[..., Any] # (args: dict) -> Any or awaitable
|
|
56
58
|
|
|
57
59
|
|
|
60
|
+
@dataclass
|
|
61
|
+
class TraceSpan:
|
|
62
|
+
"""A single span in the execution trace. OTel-compatible shape."""
|
|
63
|
+
name: str
|
|
64
|
+
start_time: float # ms since epoch
|
|
65
|
+
end_time: float = 0.0
|
|
66
|
+
duration_ms: float = 0.0
|
|
67
|
+
status: str = "ok" # "ok" or "error"
|
|
68
|
+
attributes: dict[str, Any] = field(default_factory=dict)
|
|
69
|
+
children: list[TraceSpan] = field(default_factory=list)
|
|
70
|
+
|
|
71
|
+
|
|
58
72
|
@dataclass
|
|
59
73
|
class ExecutionResult:
|
|
60
74
|
"""Result of executing guest code."""
|
|
75
|
+
code: str
|
|
61
76
|
output: Any
|
|
62
77
|
stdout: str
|
|
63
78
|
tool_calls: list[dict[str, Any]]
|
|
79
|
+
error: str | None = None
|
|
80
|
+
trace: TraceSpan | None = None
|
|
64
81
|
|
|
65
82
|
|
|
66
83
|
# ---------------------------------------------------------------------------
|
|
@@ -142,6 +159,39 @@ Rules:
|
|
|
142
159
|
return "\n\n".join(parts)
|
|
143
160
|
|
|
144
161
|
|
|
162
|
+
# ---------------------------------------------------------------------------
|
|
163
|
+
# Trace helpers
|
|
164
|
+
# ---------------------------------------------------------------------------
|
|
165
|
+
|
|
166
|
+
def _create_span(name: str, attributes: dict[str, Any] | None = None) -> TraceSpan:
|
|
167
|
+
return TraceSpan(
|
|
168
|
+
name=name,
|
|
169
|
+
start_time=time.time() * 1000,
|
|
170
|
+
attributes=attributes or {},
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _end_span(span: TraceSpan, status: str | None = None) -> TraceSpan:
|
|
175
|
+
span.end_time = time.time() * 1000
|
|
176
|
+
span.duration_ms = span.end_time - span.start_time
|
|
177
|
+
if status:
|
|
178
|
+
span.status = status
|
|
179
|
+
return span
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _print_trace(span: TraceSpan, indent: int = 0) -> None:
|
|
183
|
+
prefix = "" if indent == 0 else "│ " * (indent - 1) + "├─ "
|
|
184
|
+
icon = "✗" if span.status == "error" else "✓"
|
|
185
|
+
duration = "<1ms" if span.duration_ms < 1 else f"{span.duration_ms:.0f}ms"
|
|
186
|
+
attrs = " ".join(
|
|
187
|
+
f"{k}={str(v)[:80]}" for k, v in span.attributes.items()
|
|
188
|
+
if not k.startswith("zapcode.code") # don't dump full code in trace
|
|
189
|
+
)
|
|
190
|
+
print(f"{prefix}{icon} {span.name} ({duration}){' ' + attrs if attrs else ''}")
|
|
191
|
+
for child in span.children:
|
|
192
|
+
_print_trace(child, indent + 1)
|
|
193
|
+
|
|
194
|
+
|
|
145
195
|
# ---------------------------------------------------------------------------
|
|
146
196
|
# Execution engine
|
|
147
197
|
# ---------------------------------------------------------------------------
|
|
@@ -152,48 +202,100 @@ def _execute_code(
|
|
|
152
202
|
*,
|
|
153
203
|
memory_limit_bytes: int | None = None,
|
|
154
204
|
time_limit_ms: int | None = None,
|
|
205
|
+
debug: bool = False,
|
|
206
|
+
auto_fix: bool = False,
|
|
155
207
|
) -> ExecutionResult:
|
|
156
208
|
tool_names = list(tool_defs.keys())
|
|
157
209
|
tool_calls: list[dict[str, Any]] = []
|
|
210
|
+
tracing = debug or auto_fix
|
|
158
211
|
|
|
159
|
-
|
|
160
|
-
if time_limit_ms is not None:
|
|
161
|
-
kwargs["time_limit_ms"] = time_limit_ms
|
|
162
|
-
if memory_limit_bytes is not None:
|
|
163
|
-
kwargs["memory_limit_bytes"] = memory_limit_bytes
|
|
164
|
-
|
|
165
|
-
sandbox = Zapcode(code, **kwargs)
|
|
166
|
-
state = sandbox.start()
|
|
167
|
-
|
|
168
|
-
while state.get("suspended"):
|
|
169
|
-
fn_name = state["function_name"]
|
|
170
|
-
args = state["args"]
|
|
171
|
-
|
|
172
|
-
tool_def = tool_defs.get(fn_name)
|
|
173
|
-
if not tool_def:
|
|
174
|
-
raise ValueError(
|
|
175
|
-
f"Guest code called unknown function '{fn_name}'. "
|
|
176
|
-
f"Available: {', '.join(tool_names)}"
|
|
177
|
-
)
|
|
178
|
-
|
|
179
|
-
# Build named args from positional args
|
|
180
|
-
param_names = list(tool_def.parameters.keys())
|
|
181
|
-
named_args = {
|
|
182
|
-
param_names[i]: args[i]
|
|
183
|
-
for i in range(min(len(param_names), len(args)))
|
|
184
|
-
}
|
|
212
|
+
exec_span = _create_span("execute", {"zapcode.code": code}) if tracing else None
|
|
185
213
|
|
|
186
|
-
|
|
187
|
-
|
|
214
|
+
try:
|
|
215
|
+
kwargs: dict[str, Any] = {"external_functions": tool_names}
|
|
216
|
+
if time_limit_ms is not None:
|
|
217
|
+
kwargs["time_limit_ms"] = time_limit_ms
|
|
218
|
+
if memory_limit_bytes is not None:
|
|
219
|
+
kwargs["memory_limit_bytes"] = memory_limit_bytes
|
|
188
220
|
|
|
189
|
-
|
|
190
|
-
state =
|
|
221
|
+
sandbox = Zapcode(code, **kwargs)
|
|
222
|
+
state = sandbox.start()
|
|
191
223
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
224
|
+
while state.get("suspended"):
|
|
225
|
+
fn_name = state["function_name"]
|
|
226
|
+
args = state["args"]
|
|
227
|
+
|
|
228
|
+
tool_def = tool_defs.get(fn_name)
|
|
229
|
+
if not tool_def:
|
|
230
|
+
raise ValueError(
|
|
231
|
+
f"Guest code called unknown function '{fn_name}'. "
|
|
232
|
+
f"Available: {', '.join(tool_names)}"
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
# Build named args from positional args
|
|
236
|
+
param_names = list(tool_def.parameters.keys())
|
|
237
|
+
named_args = {
|
|
238
|
+
param_names[i]: args[i]
|
|
239
|
+
for i in range(min(len(param_names), len(args)))
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
tool_span = _create_span("tool_call", {
|
|
243
|
+
"zapcode.tool.name": fn_name,
|
|
244
|
+
"zapcode.tool.args": json.dumps(args, default=str),
|
|
245
|
+
}) if tracing else None
|
|
246
|
+
|
|
247
|
+
result = tool_def.execute(named_args)
|
|
248
|
+
tool_calls.append({"name": fn_name, "args": args, "result": result})
|
|
249
|
+
|
|
250
|
+
if tool_span:
|
|
251
|
+
tool_span.attributes["zapcode.tool.result"] = json.dumps(result, default=str)
|
|
252
|
+
_end_span(tool_span)
|
|
253
|
+
exec_span.children.append(tool_span)
|
|
254
|
+
|
|
255
|
+
snapshot: ZapcodeSnapshot = state["snapshot"]
|
|
256
|
+
state = snapshot.resume(result)
|
|
257
|
+
|
|
258
|
+
stdout = state.get("stdout", "")
|
|
259
|
+
|
|
260
|
+
if exec_span:
|
|
261
|
+
exec_span.attributes["zapcode.output"] = json.dumps(state.get("output"), default=str)
|
|
262
|
+
if stdout:
|
|
263
|
+
exec_span.attributes["zapcode.stdout"] = stdout
|
|
264
|
+
_end_span(exec_span)
|
|
265
|
+
|
|
266
|
+
if debug and exec_span:
|
|
267
|
+
_print_trace(exec_span)
|
|
268
|
+
|
|
269
|
+
return ExecutionResult(
|
|
270
|
+
code=code,
|
|
271
|
+
output=state.get("output"),
|
|
272
|
+
stdout=stdout,
|
|
273
|
+
tool_calls=tool_calls,
|
|
274
|
+
trace=exec_span,
|
|
275
|
+
)
|
|
276
|
+
except Exception as err:
|
|
277
|
+
error_msg = str(err)
|
|
278
|
+
|
|
279
|
+
if exec_span:
|
|
280
|
+
exec_span.attributes["zapcode.error"] = error_msg
|
|
281
|
+
_end_span(exec_span, "error")
|
|
282
|
+
|
|
283
|
+
if not auto_fix:
|
|
284
|
+
if debug and exec_span:
|
|
285
|
+
_print_trace(exec_span)
|
|
286
|
+
raise
|
|
287
|
+
|
|
288
|
+
if debug and exec_span:
|
|
289
|
+
_print_trace(exec_span)
|
|
290
|
+
|
|
291
|
+
return ExecutionResult(
|
|
292
|
+
code=code,
|
|
293
|
+
output=None,
|
|
294
|
+
stdout="",
|
|
295
|
+
tool_calls=tool_calls,
|
|
296
|
+
error=f"Execution failed: {error_msg}. Please fix your code and try again.",
|
|
297
|
+
trace=exec_span,
|
|
298
|
+
)
|
|
197
299
|
|
|
198
300
|
|
|
199
301
|
# ---------------------------------------------------------------------------
|
|
@@ -241,6 +343,12 @@ class ZapcodeAI:
|
|
|
241
343
|
custom: dict[str, Any] = field(default_factory=dict)
|
|
242
344
|
"""Output from custom adapters, keyed by adapter name."""
|
|
243
345
|
|
|
346
|
+
get_trace: Callable[[], TraceSpan | None] = field(default=lambda: None)
|
|
347
|
+
"""Get the full session trace tree. Available when debug or auto_fix is enabled."""
|
|
348
|
+
|
|
349
|
+
print_trace: Callable[[], None] = field(default=lambda: None)
|
|
350
|
+
"""Print the full session trace tree to the console."""
|
|
351
|
+
|
|
244
352
|
|
|
245
353
|
# ---------------------------------------------------------------------------
|
|
246
354
|
# Main entry point
|
|
@@ -252,6 +360,8 @@ def zapcode(
|
|
|
252
360
|
system: str | None = None,
|
|
253
361
|
memory_limit_bytes: int | None = None,
|
|
254
362
|
time_limit_ms: int = 10_000,
|
|
363
|
+
debug: bool = False,
|
|
364
|
+
auto_fix: bool = False,
|
|
255
365
|
adapters: list[Adapter] | None = None,
|
|
256
366
|
) -> ZapcodeAI:
|
|
257
367
|
"""
|
|
@@ -263,6 +373,11 @@ def zapcode(
|
|
|
263
373
|
- `handle_tool_call(code)` → Universal handler for any SDK
|
|
264
374
|
- `custom` → Output from custom adapters
|
|
265
375
|
|
|
376
|
+
Args:
|
|
377
|
+
debug: Log generated code, tool calls, and output to the console.
|
|
378
|
+
auto_fix: When True, execution errors are returned as tool results
|
|
379
|
+
instead of raising. The LLM sees the error and can self-correct.
|
|
380
|
+
|
|
266
381
|
Example with Anthropic SDK::
|
|
267
382
|
|
|
268
383
|
from zapcode_ai import zapcode, ToolDefinition, ParamDef
|
|
@@ -293,13 +408,30 @@ def zapcode(
|
|
|
293
408
|
print(result.output)
|
|
294
409
|
"""
|
|
295
410
|
system_prompt = _build_system_prompt(tools, system)
|
|
411
|
+
tracing = debug or auto_fix
|
|
412
|
+
|
|
413
|
+
# Session-level trace collects all attempts
|
|
414
|
+
session_trace: TraceSpan | None = (
|
|
415
|
+
_create_span("session", {"zapcode.tools": ", ".join(tools.keys())})
|
|
416
|
+
if tracing else None
|
|
417
|
+
)
|
|
418
|
+
attempt_count = 0
|
|
296
419
|
|
|
297
420
|
def handle_tool_call(code: str) -> ExecutionResult:
|
|
298
|
-
|
|
421
|
+
nonlocal attempt_count
|
|
422
|
+
attempt_count += 1
|
|
423
|
+
result = _execute_code(
|
|
299
424
|
code, tools,
|
|
300
425
|
memory_limit_bytes=memory_limit_bytes,
|
|
301
426
|
time_limit_ms=time_limit_ms,
|
|
427
|
+
debug=debug,
|
|
428
|
+
auto_fix=auto_fix,
|
|
302
429
|
)
|
|
430
|
+
if session_trace and result.trace:
|
|
431
|
+
result.trace.name = f"attempt_{attempt_count}"
|
|
432
|
+
result.trace.attributes["zapcode.attempt"] = attempt_count
|
|
433
|
+
session_trace.children.append(result.trace)
|
|
434
|
+
return result
|
|
303
435
|
|
|
304
436
|
# Anthropic SDK format
|
|
305
437
|
anthropic_tools = [
|
|
@@ -335,12 +467,28 @@ def zapcode(
|
|
|
335
467
|
for adapter in adapters:
|
|
336
468
|
custom[adapter.name] = adapter.adapt(ctx)
|
|
337
469
|
|
|
470
|
+
def get_trace() -> TraceSpan | None:
|
|
471
|
+
if not session_trace:
|
|
472
|
+
return None
|
|
473
|
+
status = "ok" if any(c.status == "ok" for c in session_trace.children) else "error"
|
|
474
|
+
_end_span(session_trace, status)
|
|
475
|
+
return session_trace
|
|
476
|
+
|
|
477
|
+
def print_session_trace() -> None:
|
|
478
|
+
trace = get_trace()
|
|
479
|
+
if trace:
|
|
480
|
+
print("\n─── Zapcode Trace ───")
|
|
481
|
+
_print_trace(trace)
|
|
482
|
+
print("─────────────────────\n")
|
|
483
|
+
|
|
338
484
|
return ZapcodeAI(
|
|
339
485
|
system=system_prompt,
|
|
340
486
|
anthropic_tools=anthropic_tools,
|
|
341
487
|
openai_tools=openai_tools,
|
|
342
488
|
handle_tool_call=handle_tool_call,
|
|
343
489
|
custom=custom,
|
|
490
|
+
get_trace=get_trace,
|
|
491
|
+
print_trace=print_session_trace,
|
|
344
492
|
)
|
|
345
493
|
|
|
346
494
|
|
|
@@ -350,6 +498,8 @@ def execute(
|
|
|
350
498
|
*,
|
|
351
499
|
memory_limit_bytes: int | None = None,
|
|
352
500
|
time_limit_ms: int | None = None,
|
|
501
|
+
debug: bool = False,
|
|
502
|
+
auto_fix: bool = False,
|
|
353
503
|
) -> ExecutionResult:
|
|
354
504
|
"""
|
|
355
505
|
Execute TypeScript code directly in a Zapcode sandbox with tool resolution.
|
|
@@ -374,4 +524,6 @@ def execute(
|
|
|
374
524
|
code, tools,
|
|
375
525
|
memory_limit_bytes=memory_limit_bytes,
|
|
376
526
|
time_limit_ms=time_limit_ms,
|
|
527
|
+
debug=debug,
|
|
528
|
+
auto_fix=auto_fix,
|
|
377
529
|
)
|
|
File without changes
|