zapcode-ai 1.2.0__tar.gz → 1.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: zapcode-ai
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.4.0
|
|
4
4
|
Summary: AI SDK integration for Zapcode — let LLMs write and execute TypeScript safely
|
|
5
5
|
Project-URL: Homepage, https://github.com/TheUncharted/zapcode
|
|
6
6
|
Project-URL: Repository, https://github.com/TheUncharted/zapcode
|
|
@@ -47,10 +47,10 @@ Description-Content-Type: text/markdown
|
|
|
47
47
|
|
|
48
48
|
AI agents are more capable when they **write code** instead of chaining tool calls. Code gives agents loops, conditionals, variables, and composition — things that tool chains simulate poorly.
|
|
49
49
|
|
|
50
|
-
- [
|
|
51
|
-
- [Programmatic Tool Calling](https://
|
|
52
|
-
- [Code Execution with MCP](https://www.anthropic.com/engineering/code-execution-mcp)
|
|
53
|
-
- [Smol Agents](https://
|
|
50
|
+
- [Codemode](https://blog.cloudflare.com/code-mode/) from Cloudflare
|
|
51
|
+
- [Programmatic Tool Calling](https://platform.claude.com/docs/en/agents-and-tools/tool-use/programmatic-tool-calling) from Anthropic
|
|
52
|
+
- [Code Execution with MCP](https://www.anthropic.com/engineering/code-execution-with-mcp) from Anthropic
|
|
53
|
+
- [Smol Agents](https://github.com/huggingface/smolagents) from Hugging Face
|
|
54
54
|
|
|
55
55
|
**But running AI-generated code is dangerous and slow.**
|
|
56
56
|
|
|
@@ -92,6 +92,10 @@ All benchmarks run the full pipeline: parse → compile → execute. No caching,
|
|
|
92
92
|
| Array creation | **2.4 µs** | — | — |
|
|
93
93
|
| Object creation | **5.2 µs** | — | — |
|
|
94
94
|
| Function call | **4.6 µs** | — | — |
|
|
95
|
+
| Promise.resolve + await | **3.1 µs** | — | — |
|
|
96
|
+
| Promise.then (single) | **5.6 µs** | — | — |
|
|
97
|
+
| Promise.then chain (×3) | **9.9 µs** | — | — |
|
|
98
|
+
| Promise.all (3 promises) | **7.4 µs** | — | — |
|
|
95
99
|
| Loop (100 iterations) | **77.8 µs** | — | — |
|
|
96
100
|
| Fibonacci (n=10, 177 calls) | **138.4 µs** | — | — |
|
|
97
101
|
| Snapshot size (typical agent) | **< 2 KB** | N/A | N/A |
|
|
@@ -172,7 +176,7 @@ if (!state.completed) {
|
|
|
172
176
|
}
|
|
173
177
|
```
|
|
174
178
|
|
|
175
|
-
See [`examples/typescript/basic.ts`](examples/typescript/basic.ts) for more.
|
|
179
|
+
See [`examples/typescript/basic/main.ts`](examples/typescript/basic/main.ts) for more.
|
|
176
180
|
|
|
177
181
|
### Python
|
|
178
182
|
|
|
@@ -209,7 +213,7 @@ if state.get("suspended"):
|
|
|
209
213
|
result = restored.resume({"condition": "Clear", "temp": 26})
|
|
210
214
|
```
|
|
211
215
|
|
|
212
|
-
See [`examples/python/basic.py`](examples/python/basic.py) for more.
|
|
216
|
+
See [`examples/python/basic/main.py`](examples/python/basic/main.py) for more.
|
|
213
217
|
|
|
214
218
|
<details>
|
|
215
219
|
<summary><strong>Rust</strong></summary>
|
|
@@ -247,7 +251,7 @@ if let VmState::Suspended { snapshot, .. } = state {
|
|
|
247
251
|
}
|
|
248
252
|
```
|
|
249
253
|
|
|
250
|
-
See [`examples/rust/basic.rs`](examples/rust/basic.rs) for more.
|
|
254
|
+
See [`examples/rust/basic/basic.rs`](examples/rust/basic/basic.rs) for more.
|
|
251
255
|
</details>
|
|
252
256
|
|
|
253
257
|
<details>
|
|
@@ -268,7 +272,7 @@ console.log(result.output); // 120
|
|
|
268
272
|
</script>
|
|
269
273
|
```
|
|
270
274
|
|
|
271
|
-
See [`examples/wasm/index.html`](examples/wasm/index.html) for a full playground.
|
|
275
|
+
See [`examples/wasm/basic/index.html`](examples/wasm/basic/index.html) for a full playground.
|
|
272
276
|
</details>
|
|
273
277
|
|
|
274
278
|
## AI Agent Usage
|
|
@@ -322,7 +326,7 @@ const { text } = await generateText({
|
|
|
322
326
|
|
|
323
327
|
Under the hood: the LLM writes TypeScript code that calls your tools → Zapcode executes it in a sandbox → tool calls suspend the VM → your `execute` functions run on the host → results flow back in. All in ~2µs startup + tool execution time.
|
|
324
328
|
|
|
325
|
-
See [`examples/typescript/ai-agent-zapcode-ai.ts`](examples/typescript/ai-agent-zapcode-ai.ts) for the full working example.
|
|
329
|
+
See [`examples/typescript/ai-agent/ai-agent-zapcode-ai.ts`](examples/typescript/ai-agent/ai-agent-zapcode-ai.ts) for the full working example.
|
|
326
330
|
|
|
327
331
|
<details>
|
|
328
332
|
<summary><strong>Anthropic SDK</strong></summary>
|
|
@@ -387,7 +391,7 @@ while state.get("suspended"):
|
|
|
387
391
|
print(state["output"])
|
|
388
392
|
```
|
|
389
393
|
|
|
390
|
-
See [`examples/typescript/ai-agent-anthropic.ts`](examples/typescript/ai-agent-anthropic.ts) and [`examples/python/ai_agent_anthropic.py`](examples/python/ai_agent_anthropic.py).
|
|
394
|
+
See [`examples/typescript/ai-agent/ai-agent-anthropic.ts`](examples/typescript/ai-agent/ai-agent-anthropic.ts) and [`examples/python/ai-agent/ai_agent_anthropic.py`](examples/python/ai-agent/ai_agent_anthropic.py).
|
|
391
395
|
</details>
|
|
392
396
|
|
|
393
397
|
<details>
|
|
@@ -474,6 +478,63 @@ langchain_tool = b.custom["langchain"]
|
|
|
474
478
|
The adapter receives an `AdapterContext` with everything needed: system prompt, tool name, tool JSON schema, and a `handleToolCall` function. Return whatever shape your SDK expects.
|
|
475
479
|
</details>
|
|
476
480
|
|
|
481
|
+
## Auto-Fix, Debug & Execution Tracing
|
|
482
|
+
|
|
483
|
+
### Auto-fix (`autoFix`)
|
|
484
|
+
|
|
485
|
+
When enabled, execution errors are returned as tool results instead of throwing — letting the LLM see the error and self-correct on the next step.
|
|
486
|
+
|
|
487
|
+
**TypeScript:**
|
|
488
|
+
```typescript
|
|
489
|
+
const { system, tools } = zapcode({
|
|
490
|
+
autoFix: true,
|
|
491
|
+
tools: { /* ... */ },
|
|
492
|
+
});
|
|
493
|
+
```
|
|
494
|
+
|
|
495
|
+
**Python:**
|
|
496
|
+
```python
|
|
497
|
+
zap = zapcode(auto_fix=True, tools={...})
|
|
498
|
+
```
|
|
499
|
+
|
|
500
|
+
### Execution Trace
|
|
501
|
+
|
|
502
|
+
Every execution produces a trace tree with timing for each phase (parse → compile → execute). Use `printTrace()` / `print_trace()` to display the full session trace, or `getTrace()` / `get_trace()` to access the trace programmatically.
|
|
503
|
+
|
|
504
|
+
**TypeScript:**
|
|
505
|
+
```typescript
|
|
506
|
+
const { system, tools, printTrace, getTrace } = zapcode({
|
|
507
|
+
autoFix: true,
|
|
508
|
+
tools: { /* ... */ },
|
|
509
|
+
});
|
|
510
|
+
|
|
511
|
+
// After running...
|
|
512
|
+
printTrace();
|
|
513
|
+
// ✓ zapcode.session 12.3ms
|
|
514
|
+
// ✓ execute_code 8.1ms
|
|
515
|
+
// ✓ parse 0.2ms
|
|
516
|
+
// ✓ compile 0.1ms
|
|
517
|
+
// ✓ execute 7.8ms
|
|
518
|
+
|
|
519
|
+
const trace = getTrace(); // TraceSpan tree
|
|
520
|
+
```
|
|
521
|
+
|
|
522
|
+
**Python:**
|
|
523
|
+
```python
|
|
524
|
+
zap = zapcode(auto_fix=True, tools={...})
|
|
525
|
+
|
|
526
|
+
# After running...
|
|
527
|
+
zap.print_trace()
|
|
528
|
+
trace = zap.get_trace() # TraceSpan tree
|
|
529
|
+
```
|
|
530
|
+
|
|
531
|
+
### Debug Logging
|
|
532
|
+
|
|
533
|
+
For detailed logging of generated code, tool calls, and output, see the debug-tracing examples which show how to inspect each execution step:
|
|
534
|
+
|
|
535
|
+
- [TypeScript debug-tracing example](examples/typescript/debug-tracing/main.ts)
|
|
536
|
+
- [Python debug-tracing example](examples/python/debug-tracing/main.py)
|
|
537
|
+
|
|
477
538
|
## What Zapcode Can and Cannot Do
|
|
478
539
|
|
|
479
540
|
**Can do:**
|
|
@@ -21,10 +21,10 @@
|
|
|
21
21
|
|
|
22
22
|
AI agents are more capable when they **write code** instead of chaining tool calls. Code gives agents loops, conditionals, variables, and composition — things that tool chains simulate poorly.
|
|
23
23
|
|
|
24
|
-
- [
|
|
25
|
-
- [Programmatic Tool Calling](https://
|
|
26
|
-
- [Code Execution with MCP](https://www.anthropic.com/engineering/code-execution-mcp)
|
|
27
|
-
- [Smol Agents](https://
|
|
24
|
+
- [Codemode](https://blog.cloudflare.com/code-mode/) from Cloudflare
|
|
25
|
+
- [Programmatic Tool Calling](https://platform.claude.com/docs/en/agents-and-tools/tool-use/programmatic-tool-calling) from Anthropic
|
|
26
|
+
- [Code Execution with MCP](https://www.anthropic.com/engineering/code-execution-with-mcp) from Anthropic
|
|
27
|
+
- [Smol Agents](https://github.com/huggingface/smolagents) from Hugging Face
|
|
28
28
|
|
|
29
29
|
**But running AI-generated code is dangerous and slow.**
|
|
30
30
|
|
|
@@ -66,6 +66,10 @@ All benchmarks run the full pipeline: parse → compile → execute. No caching,
|
|
|
66
66
|
| Array creation | **2.4 µs** | — | — |
|
|
67
67
|
| Object creation | **5.2 µs** | — | — |
|
|
68
68
|
| Function call | **4.6 µs** | — | — |
|
|
69
|
+
| Promise.resolve + await | **3.1 µs** | — | — |
|
|
70
|
+
| Promise.then (single) | **5.6 µs** | — | — |
|
|
71
|
+
| Promise.then chain (×3) | **9.9 µs** | — | — |
|
|
72
|
+
| Promise.all (3 promises) | **7.4 µs** | — | — |
|
|
69
73
|
| Loop (100 iterations) | **77.8 µs** | — | — |
|
|
70
74
|
| Fibonacci (n=10, 177 calls) | **138.4 µs** | — | — |
|
|
71
75
|
| Snapshot size (typical agent) | **< 2 KB** | N/A | N/A |
|
|
@@ -146,7 +150,7 @@ if (!state.completed) {
|
|
|
146
150
|
}
|
|
147
151
|
```
|
|
148
152
|
|
|
149
|
-
See [`examples/typescript/basic.ts`](examples/typescript/basic.ts) for more.
|
|
153
|
+
See [`examples/typescript/basic/main.ts`](examples/typescript/basic/main.ts) for more.
|
|
150
154
|
|
|
151
155
|
### Python
|
|
152
156
|
|
|
@@ -183,7 +187,7 @@ if state.get("suspended"):
|
|
|
183
187
|
result = restored.resume({"condition": "Clear", "temp": 26})
|
|
184
188
|
```
|
|
185
189
|
|
|
186
|
-
See [`examples/python/basic.py`](examples/python/basic.py) for more.
|
|
190
|
+
See [`examples/python/basic/main.py`](examples/python/basic/main.py) for more.
|
|
187
191
|
|
|
188
192
|
<details>
|
|
189
193
|
<summary><strong>Rust</strong></summary>
|
|
@@ -221,7 +225,7 @@ if let VmState::Suspended { snapshot, .. } = state {
|
|
|
221
225
|
}
|
|
222
226
|
```
|
|
223
227
|
|
|
224
|
-
See [`examples/rust/basic.rs`](examples/rust/basic.rs) for more.
|
|
228
|
+
See [`examples/rust/basic/basic.rs`](examples/rust/basic/basic.rs) for more.
|
|
225
229
|
</details>
|
|
226
230
|
|
|
227
231
|
<details>
|
|
@@ -242,7 +246,7 @@ console.log(result.output); // 120
|
|
|
242
246
|
</script>
|
|
243
247
|
```
|
|
244
248
|
|
|
245
|
-
See [`examples/wasm/index.html`](examples/wasm/index.html) for a full playground.
|
|
249
|
+
See [`examples/wasm/basic/index.html`](examples/wasm/basic/index.html) for a full playground.
|
|
246
250
|
</details>
|
|
247
251
|
|
|
248
252
|
## AI Agent Usage
|
|
@@ -296,7 +300,7 @@ const { text } = await generateText({
|
|
|
296
300
|
|
|
297
301
|
Under the hood: the LLM writes TypeScript code that calls your tools → Zapcode executes it in a sandbox → tool calls suspend the VM → your `execute` functions run on the host → results flow back in. All in ~2µs startup + tool execution time.
|
|
298
302
|
|
|
299
|
-
See [`examples/typescript/ai-agent-zapcode-ai.ts`](examples/typescript/ai-agent-zapcode-ai.ts) for the full working example.
|
|
303
|
+
See [`examples/typescript/ai-agent/ai-agent-zapcode-ai.ts`](examples/typescript/ai-agent/ai-agent-zapcode-ai.ts) for the full working example.
|
|
300
304
|
|
|
301
305
|
<details>
|
|
302
306
|
<summary><strong>Anthropic SDK</strong></summary>
|
|
@@ -361,7 +365,7 @@ while state.get("suspended"):
|
|
|
361
365
|
print(state["output"])
|
|
362
366
|
```
|
|
363
367
|
|
|
364
|
-
See [`examples/typescript/ai-agent-anthropic.ts`](examples/typescript/ai-agent-anthropic.ts) and [`examples/python/ai_agent_anthropic.py`](examples/python/ai_agent_anthropic.py).
|
|
368
|
+
See [`examples/typescript/ai-agent/ai-agent-anthropic.ts`](examples/typescript/ai-agent/ai-agent-anthropic.ts) and [`examples/python/ai-agent/ai_agent_anthropic.py`](examples/python/ai-agent/ai_agent_anthropic.py).
|
|
365
369
|
</details>
|
|
366
370
|
|
|
367
371
|
<details>
|
|
@@ -448,6 +452,63 @@ langchain_tool = b.custom["langchain"]
|
|
|
448
452
|
The adapter receives an `AdapterContext` with everything needed: system prompt, tool name, tool JSON schema, and a `handleToolCall` function. Return whatever shape your SDK expects.
|
|
449
453
|
</details>
|
|
450
454
|
|
|
455
|
+
## Auto-Fix, Debug & Execution Tracing
|
|
456
|
+
|
|
457
|
+
### Auto-fix (`autoFix`)
|
|
458
|
+
|
|
459
|
+
When enabled, execution errors are returned as tool results instead of throwing — letting the LLM see the error and self-correct on the next step.
|
|
460
|
+
|
|
461
|
+
**TypeScript:**
|
|
462
|
+
```typescript
|
|
463
|
+
const { system, tools } = zapcode({
|
|
464
|
+
autoFix: true,
|
|
465
|
+
tools: { /* ... */ },
|
|
466
|
+
});
|
|
467
|
+
```
|
|
468
|
+
|
|
469
|
+
**Python:**
|
|
470
|
+
```python
|
|
471
|
+
zap = zapcode(auto_fix=True, tools={...})
|
|
472
|
+
```
|
|
473
|
+
|
|
474
|
+
### Execution Trace
|
|
475
|
+
|
|
476
|
+
Every execution produces a trace tree with timing for each phase (parse → compile → execute). Use `printTrace()` / `print_trace()` to display the full session trace, or `getTrace()` / `get_trace()` to access the trace programmatically.
|
|
477
|
+
|
|
478
|
+
**TypeScript:**
|
|
479
|
+
```typescript
|
|
480
|
+
const { system, tools, printTrace, getTrace } = zapcode({
|
|
481
|
+
autoFix: true,
|
|
482
|
+
tools: { /* ... */ },
|
|
483
|
+
});
|
|
484
|
+
|
|
485
|
+
// After running...
|
|
486
|
+
printTrace();
|
|
487
|
+
// ✓ zapcode.session 12.3ms
|
|
488
|
+
// ✓ execute_code 8.1ms
|
|
489
|
+
// ✓ parse 0.2ms
|
|
490
|
+
// ✓ compile 0.1ms
|
|
491
|
+
// ✓ execute 7.8ms
|
|
492
|
+
|
|
493
|
+
const trace = getTrace(); // TraceSpan tree
|
|
494
|
+
```
|
|
495
|
+
|
|
496
|
+
**Python:**
|
|
497
|
+
```python
|
|
498
|
+
zap = zapcode(auto_fix=True, tools={...})
|
|
499
|
+
|
|
500
|
+
# After running...
|
|
501
|
+
zap.print_trace()
|
|
502
|
+
trace = zap.get_trace() # TraceSpan tree
|
|
503
|
+
```
|
|
504
|
+
|
|
505
|
+
### Debug Logging
|
|
506
|
+
|
|
507
|
+
For detailed logging of generated code, tool calls, and output, see the debug-tracing examples which show how to inspect each execution step:
|
|
508
|
+
|
|
509
|
+
- [TypeScript debug-tracing example](examples/typescript/debug-tracing/main.ts)
|
|
510
|
+
- [Python debug-tracing example](examples/python/debug-tracing/main.py)
|
|
511
|
+
|
|
451
512
|
## What Zapcode Can and Cannot Do
|
|
452
513
|
|
|
453
514
|
**Can do:**
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "zapcode-ai"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.4.0" # x-release-please-version
|
|
4
4
|
description = "AI SDK integration for Zapcode — let LLMs write and execute TypeScript safely"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.10"
|
|
@@ -29,6 +29,8 @@ Works with any AI SDK:
|
|
|
29
29
|
|
|
30
30
|
from __future__ import annotations
|
|
31
31
|
|
|
32
|
+
import json
|
|
33
|
+
import time
|
|
32
34
|
from dataclasses import dataclass, field
|
|
33
35
|
from typing import Any, Callable, Awaitable
|
|
34
36
|
|
|
@@ -55,12 +57,27 @@ class ToolDefinition:
|
|
|
55
57
|
execute: Callable[..., Any] # (args: dict) -> Any or awaitable
|
|
56
58
|
|
|
57
59
|
|
|
60
|
+
@dataclass
|
|
61
|
+
class TraceSpan:
|
|
62
|
+
"""A single span in the execution trace. OTel-compatible shape."""
|
|
63
|
+
name: str
|
|
64
|
+
start_time: float # ms since epoch
|
|
65
|
+
end_time: float = 0.0
|
|
66
|
+
duration_ms: float = 0.0
|
|
67
|
+
status: str = "ok" # "ok" or "error"
|
|
68
|
+
attributes: dict[str, Any] = field(default_factory=dict)
|
|
69
|
+
children: list[TraceSpan] = field(default_factory=list)
|
|
70
|
+
|
|
71
|
+
|
|
58
72
|
@dataclass
|
|
59
73
|
class ExecutionResult:
|
|
60
74
|
"""Result of executing guest code."""
|
|
75
|
+
code: str
|
|
61
76
|
output: Any
|
|
62
77
|
stdout: str
|
|
63
78
|
tool_calls: list[dict[str, Any]]
|
|
79
|
+
error: str | None = None
|
|
80
|
+
trace: TraceSpan | None = None
|
|
64
81
|
|
|
65
82
|
|
|
66
83
|
# ---------------------------------------------------------------------------
|
|
@@ -142,6 +159,39 @@ Rules:
|
|
|
142
159
|
return "\n\n".join(parts)
|
|
143
160
|
|
|
144
161
|
|
|
162
|
+
# ---------------------------------------------------------------------------
|
|
163
|
+
# Trace helpers
|
|
164
|
+
# ---------------------------------------------------------------------------
|
|
165
|
+
|
|
166
|
+
def _create_span(name: str, attributes: dict[str, Any] | None = None) -> TraceSpan:
|
|
167
|
+
return TraceSpan(
|
|
168
|
+
name=name,
|
|
169
|
+
start_time=time.time() * 1000,
|
|
170
|
+
attributes=attributes or {},
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _end_span(span: TraceSpan, status: str | None = None) -> TraceSpan:
|
|
175
|
+
span.end_time = time.time() * 1000
|
|
176
|
+
span.duration_ms = span.end_time - span.start_time
|
|
177
|
+
if status:
|
|
178
|
+
span.status = status
|
|
179
|
+
return span
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _print_trace(span: TraceSpan, indent: int = 0) -> None:
|
|
183
|
+
prefix = "" if indent == 0 else "│ " * (indent - 1) + "├─ "
|
|
184
|
+
icon = "✗" if span.status == "error" else "✓"
|
|
185
|
+
duration = "<1ms" if span.duration_ms < 1 else f"{span.duration_ms:.0f}ms"
|
|
186
|
+
attrs = " ".join(
|
|
187
|
+
f"{k}={str(v)[:80]}" for k, v in span.attributes.items()
|
|
188
|
+
if not k.startswith("zapcode.code") # don't dump full code in trace
|
|
189
|
+
)
|
|
190
|
+
print(f"{prefix}{icon} {span.name} ({duration}){' ' + attrs if attrs else ''}")
|
|
191
|
+
for child in span.children:
|
|
192
|
+
_print_trace(child, indent + 1)
|
|
193
|
+
|
|
194
|
+
|
|
145
195
|
# ---------------------------------------------------------------------------
|
|
146
196
|
# Execution engine
|
|
147
197
|
# ---------------------------------------------------------------------------
|
|
@@ -152,48 +202,100 @@ def _execute_code(
|
|
|
152
202
|
*,
|
|
153
203
|
memory_limit_bytes: int | None = None,
|
|
154
204
|
time_limit_ms: int | None = None,
|
|
205
|
+
debug: bool = False,
|
|
206
|
+
auto_fix: bool = False,
|
|
155
207
|
) -> ExecutionResult:
|
|
156
208
|
tool_names = list(tool_defs.keys())
|
|
157
209
|
tool_calls: list[dict[str, Any]] = []
|
|
210
|
+
tracing = debug or auto_fix
|
|
158
211
|
|
|
159
|
-
|
|
160
|
-
if time_limit_ms is not None:
|
|
161
|
-
kwargs["time_limit_ms"] = time_limit_ms
|
|
162
|
-
if memory_limit_bytes is not None:
|
|
163
|
-
kwargs["memory_limit_bytes"] = memory_limit_bytes
|
|
164
|
-
|
|
165
|
-
sandbox = Zapcode(code, **kwargs)
|
|
166
|
-
state = sandbox.start()
|
|
167
|
-
|
|
168
|
-
while state.get("suspended"):
|
|
169
|
-
fn_name = state["function_name"]
|
|
170
|
-
args = state["args"]
|
|
171
|
-
|
|
172
|
-
tool_def = tool_defs.get(fn_name)
|
|
173
|
-
if not tool_def:
|
|
174
|
-
raise ValueError(
|
|
175
|
-
f"Guest code called unknown function '{fn_name}'. "
|
|
176
|
-
f"Available: {', '.join(tool_names)}"
|
|
177
|
-
)
|
|
178
|
-
|
|
179
|
-
# Build named args from positional args
|
|
180
|
-
param_names = list(tool_def.parameters.keys())
|
|
181
|
-
named_args = {
|
|
182
|
-
param_names[i]: args[i]
|
|
183
|
-
for i in range(min(len(param_names), len(args)))
|
|
184
|
-
}
|
|
212
|
+
exec_span = _create_span("execute", {"zapcode.code": code}) if tracing else None
|
|
185
213
|
|
|
186
|
-
|
|
187
|
-
|
|
214
|
+
try:
|
|
215
|
+
kwargs: dict[str, Any] = {"external_functions": tool_names}
|
|
216
|
+
if time_limit_ms is not None:
|
|
217
|
+
kwargs["time_limit_ms"] = time_limit_ms
|
|
218
|
+
if memory_limit_bytes is not None:
|
|
219
|
+
kwargs["memory_limit_bytes"] = memory_limit_bytes
|
|
188
220
|
|
|
189
|
-
|
|
190
|
-
state =
|
|
221
|
+
sandbox = Zapcode(code, **kwargs)
|
|
222
|
+
state = sandbox.start()
|
|
191
223
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
224
|
+
while state.get("suspended"):
|
|
225
|
+
fn_name = state["function_name"]
|
|
226
|
+
args = state["args"]
|
|
227
|
+
|
|
228
|
+
tool_def = tool_defs.get(fn_name)
|
|
229
|
+
if not tool_def:
|
|
230
|
+
raise ValueError(
|
|
231
|
+
f"Guest code called unknown function '{fn_name}'. "
|
|
232
|
+
f"Available: {', '.join(tool_names)}"
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
# Build named args from positional args
|
|
236
|
+
param_names = list(tool_def.parameters.keys())
|
|
237
|
+
named_args = {
|
|
238
|
+
param_names[i]: args[i]
|
|
239
|
+
for i in range(min(len(param_names), len(args)))
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
tool_span = _create_span("tool_call", {
|
|
243
|
+
"zapcode.tool.name": fn_name,
|
|
244
|
+
"zapcode.tool.args": json.dumps(args, default=str),
|
|
245
|
+
}) if tracing else None
|
|
246
|
+
|
|
247
|
+
result = tool_def.execute(named_args)
|
|
248
|
+
tool_calls.append({"name": fn_name, "args": args, "result": result})
|
|
249
|
+
|
|
250
|
+
if tool_span:
|
|
251
|
+
tool_span.attributes["zapcode.tool.result"] = json.dumps(result, default=str)
|
|
252
|
+
_end_span(tool_span)
|
|
253
|
+
exec_span.children.append(tool_span)
|
|
254
|
+
|
|
255
|
+
snapshot: ZapcodeSnapshot = state["snapshot"]
|
|
256
|
+
state = snapshot.resume(result)
|
|
257
|
+
|
|
258
|
+
stdout = state.get("stdout", "")
|
|
259
|
+
|
|
260
|
+
if exec_span:
|
|
261
|
+
exec_span.attributes["zapcode.output"] = json.dumps(state.get("output"), default=str)
|
|
262
|
+
if stdout:
|
|
263
|
+
exec_span.attributes["zapcode.stdout"] = stdout
|
|
264
|
+
_end_span(exec_span)
|
|
265
|
+
|
|
266
|
+
if debug and exec_span:
|
|
267
|
+
_print_trace(exec_span)
|
|
268
|
+
|
|
269
|
+
return ExecutionResult(
|
|
270
|
+
code=code,
|
|
271
|
+
output=state.get("output"),
|
|
272
|
+
stdout=stdout,
|
|
273
|
+
tool_calls=tool_calls,
|
|
274
|
+
trace=exec_span,
|
|
275
|
+
)
|
|
276
|
+
except Exception as err:
|
|
277
|
+
error_msg = str(err)
|
|
278
|
+
|
|
279
|
+
if exec_span:
|
|
280
|
+
exec_span.attributes["zapcode.error"] = error_msg
|
|
281
|
+
_end_span(exec_span, "error")
|
|
282
|
+
|
|
283
|
+
if not auto_fix:
|
|
284
|
+
if debug and exec_span:
|
|
285
|
+
_print_trace(exec_span)
|
|
286
|
+
raise
|
|
287
|
+
|
|
288
|
+
if debug and exec_span:
|
|
289
|
+
_print_trace(exec_span)
|
|
290
|
+
|
|
291
|
+
return ExecutionResult(
|
|
292
|
+
code=code,
|
|
293
|
+
output=None,
|
|
294
|
+
stdout="",
|
|
295
|
+
tool_calls=tool_calls,
|
|
296
|
+
error=f"Execution failed: {error_msg}. Please fix your code and try again.",
|
|
297
|
+
trace=exec_span,
|
|
298
|
+
)
|
|
197
299
|
|
|
198
300
|
|
|
199
301
|
# ---------------------------------------------------------------------------
|
|
@@ -241,6 +343,12 @@ class ZapcodeAI:
|
|
|
241
343
|
custom: dict[str, Any] = field(default_factory=dict)
|
|
242
344
|
"""Output from custom adapters, keyed by adapter name."""
|
|
243
345
|
|
|
346
|
+
get_trace: Callable[[], TraceSpan | None] = field(default=lambda: None)
|
|
347
|
+
"""Get the full session trace tree. Available when debug or auto_fix is enabled."""
|
|
348
|
+
|
|
349
|
+
print_trace: Callable[[], None] = field(default=lambda: None)
|
|
350
|
+
"""Print the full session trace tree to the console."""
|
|
351
|
+
|
|
244
352
|
|
|
245
353
|
# ---------------------------------------------------------------------------
|
|
246
354
|
# Main entry point
|
|
@@ -252,6 +360,8 @@ def zapcode(
|
|
|
252
360
|
system: str | None = None,
|
|
253
361
|
memory_limit_bytes: int | None = None,
|
|
254
362
|
time_limit_ms: int = 10_000,
|
|
363
|
+
debug: bool = False,
|
|
364
|
+
auto_fix: bool = False,
|
|
255
365
|
adapters: list[Adapter] | None = None,
|
|
256
366
|
) -> ZapcodeAI:
|
|
257
367
|
"""
|
|
@@ -263,6 +373,11 @@ def zapcode(
|
|
|
263
373
|
- `handle_tool_call(code)` → Universal handler for any SDK
|
|
264
374
|
- `custom` → Output from custom adapters
|
|
265
375
|
|
|
376
|
+
Args:
|
|
377
|
+
debug: Log generated code, tool calls, and output to the console.
|
|
378
|
+
auto_fix: When True, execution errors are returned as tool results
|
|
379
|
+
instead of raising. The LLM sees the error and can self-correct.
|
|
380
|
+
|
|
266
381
|
Example with Anthropic SDK::
|
|
267
382
|
|
|
268
383
|
from zapcode_ai import zapcode, ToolDefinition, ParamDef
|
|
@@ -293,13 +408,30 @@ def zapcode(
|
|
|
293
408
|
print(result.output)
|
|
294
409
|
"""
|
|
295
410
|
system_prompt = _build_system_prompt(tools, system)
|
|
411
|
+
tracing = debug or auto_fix
|
|
412
|
+
|
|
413
|
+
# Session-level trace collects all attempts
|
|
414
|
+
session_trace: TraceSpan | None = (
|
|
415
|
+
_create_span("session", {"zapcode.tools": ", ".join(tools.keys())})
|
|
416
|
+
if tracing else None
|
|
417
|
+
)
|
|
418
|
+
attempt_count = 0
|
|
296
419
|
|
|
297
420
|
def handle_tool_call(code: str) -> ExecutionResult:
|
|
298
|
-
|
|
421
|
+
nonlocal attempt_count
|
|
422
|
+
attempt_count += 1
|
|
423
|
+
result = _execute_code(
|
|
299
424
|
code, tools,
|
|
300
425
|
memory_limit_bytes=memory_limit_bytes,
|
|
301
426
|
time_limit_ms=time_limit_ms,
|
|
427
|
+
debug=debug,
|
|
428
|
+
auto_fix=auto_fix,
|
|
302
429
|
)
|
|
430
|
+
if session_trace and result.trace:
|
|
431
|
+
result.trace.name = f"attempt_{attempt_count}"
|
|
432
|
+
result.trace.attributes["zapcode.attempt"] = attempt_count
|
|
433
|
+
session_trace.children.append(result.trace)
|
|
434
|
+
return result
|
|
303
435
|
|
|
304
436
|
# Anthropic SDK format
|
|
305
437
|
anthropic_tools = [
|
|
@@ -335,12 +467,28 @@ def zapcode(
|
|
|
335
467
|
for adapter in adapters:
|
|
336
468
|
custom[adapter.name] = adapter.adapt(ctx)
|
|
337
469
|
|
|
470
|
+
def get_trace() -> TraceSpan | None:
|
|
471
|
+
if not session_trace:
|
|
472
|
+
return None
|
|
473
|
+
status = "ok" if any(c.status == "ok" for c in session_trace.children) else "error"
|
|
474
|
+
_end_span(session_trace, status)
|
|
475
|
+
return session_trace
|
|
476
|
+
|
|
477
|
+
def print_session_trace() -> None:
|
|
478
|
+
trace = get_trace()
|
|
479
|
+
if trace:
|
|
480
|
+
print("\n─── Zapcode Trace ───")
|
|
481
|
+
_print_trace(trace)
|
|
482
|
+
print("─────────────────────\n")
|
|
483
|
+
|
|
338
484
|
return ZapcodeAI(
|
|
339
485
|
system=system_prompt,
|
|
340
486
|
anthropic_tools=anthropic_tools,
|
|
341
487
|
openai_tools=openai_tools,
|
|
342
488
|
handle_tool_call=handle_tool_call,
|
|
343
489
|
custom=custom,
|
|
490
|
+
get_trace=get_trace,
|
|
491
|
+
print_trace=print_session_trace,
|
|
344
492
|
)
|
|
345
493
|
|
|
346
494
|
|
|
@@ -350,6 +498,8 @@ def execute(
|
|
|
350
498
|
*,
|
|
351
499
|
memory_limit_bytes: int | None = None,
|
|
352
500
|
time_limit_ms: int | None = None,
|
|
501
|
+
debug: bool = False,
|
|
502
|
+
auto_fix: bool = False,
|
|
353
503
|
) -> ExecutionResult:
|
|
354
504
|
"""
|
|
355
505
|
Execute TypeScript code directly in a Zapcode sandbox with tool resolution.
|
|
@@ -374,4 +524,6 @@ def execute(
|
|
|
374
524
|
code, tools,
|
|
375
525
|
memory_limit_bytes=memory_limit_bytes,
|
|
376
526
|
time_limit_ms=time_limit_ms,
|
|
527
|
+
debug=debug,
|
|
528
|
+
auto_fix=auto_fix,
|
|
377
529
|
)
|
|
File without changes
|