wafer-cli 0.2.19__tar.gz → 0.2.36__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wafer_cli-0.2.36/PKG-INFO +260 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/pyproject.toml +3 -2
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/tests/test_cli_coverage.py +14 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/tests/test_wevin_cli.py +86 -7
- wafer_cli-0.2.36/wafer/agent_defaults.py +197 -0
- wafer_cli-0.2.36/wafer/baseline.py +661 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/cli.py +607 -27
- wafer_cli-0.2.36/wafer/cli_instructions.py +143 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/corpus.py +241 -9
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/evaluate.py +424 -8
- wafer_cli-0.2.36/wafer/specs_cli.py +157 -0
- wafer_cli-0.2.36/wafer/targets_cli.py +472 -0
- wafer_cli-0.2.36/wafer/templates/aiter_optimize.py +59 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/templates/ask_docs.py +1 -1
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/templates/optimize_kernel.py +3 -1
- wafer_cli-0.2.36/wafer/templates/optimize_vllm.py +156 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/templates/trace_analyze.py +1 -1
- wafer_cli-0.2.36/wafer/tests/test_eval_cli_parity.py +199 -0
- wafer_cli-0.2.36/wafer/trace_compare.py +183 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/wevin_cli.py +113 -25
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/workspaces.py +96 -0
- wafer_cli-0.2.36/wafer_cli.egg-info/PKG-INFO +260 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer_cli.egg-info/SOURCES.txt +9 -0
- wafer_cli-0.2.19/PKG-INFO +0 -16
- wafer_cli-0.2.19/wafer_cli.egg-info/PKG-INFO +0 -16
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/README.md +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/setup.cfg +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/tests/test_analytics.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/tests/test_auth.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/tests/test_billing.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/tests/test_cli_parity_integration.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/tests/test_config_integration.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/tests/test_file_operations_integration.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/tests/test_kernel_scope_cli.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/tests/test_nsys_analyze.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/tests/test_nsys_profile.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/tests/test_output.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/tests/test_rocprof_compute_integration.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/tests/test_skill_commands.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/tests/test_ssh_integration.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/tests/test_targets_ops.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/tests/test_workflow_integration.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/GUIDE.md +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/__init__.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/analytics.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/api_client.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/auth.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/autotuner.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/billing.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/config.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/global_config.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/gpu_run.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/inference.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/kernel_scope.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/ncu_analyze.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/nsys_analyze.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/nsys_profile.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/output.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/problems.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/rocprof_compute.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/rocprof_sdk.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/rocprof_systems.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/skills/wafer-guide/SKILL.md +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/ssh_keys.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/target_lock.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/targets.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/targets_ops.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/templates/__init__.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/templates/optimize_kernelbench.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer/tracelens.py +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer_cli.egg-info/dependency_links.txt +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer_cli.egg-info/entry_points.txt +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer_cli.egg-info/requires.txt +0 -0
- {wafer_cli-0.2.19 → wafer_cli-0.2.36}/wafer_cli.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: wafer-cli
|
|
3
|
+
Version: 0.2.36
|
|
4
|
+
Summary: CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: typer>=0.12.0
|
|
8
|
+
Requires-Dist: trio>=0.24.0
|
|
9
|
+
Requires-Dist: trio-asyncio>=0.15.0
|
|
10
|
+
Requires-Dist: wafer-core>=0.1.0
|
|
11
|
+
Requires-Dist: perfetto>=0.16.0
|
|
12
|
+
Requires-Dist: posthog>=3.0.0
|
|
13
|
+
Provides-Extra: dev
|
|
14
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
15
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
16
|
+
Requires-Dist: diff-cover>=8.0.0; extra == "dev"
|
|
17
|
+
Requires-Dist: ruff>=0.4.0; extra == "dev"
|
|
18
|
+
|
|
19
|
+
# Wafer CLI
|
|
20
|
+
|
|
21
|
+
Run GPU workloads, optimize kernels, and query GPU documentation.
|
|
22
|
+
|
|
23
|
+
## Getting Started
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# Install
|
|
27
|
+
cd apps/wafer-cli && uv sync
|
|
28
|
+
|
|
29
|
+
# Use staging (workspaces and other features require staging)
|
|
30
|
+
wafer config set api.environment staging
|
|
31
|
+
|
|
32
|
+
# Login
|
|
33
|
+
wafer login
|
|
34
|
+
|
|
35
|
+
# Run a command on a remote GPU
|
|
36
|
+
wafer remote-run -- nvidia-smi
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Commands
|
|
40
|
+
|
|
41
|
+
### `wafer login` / `wafer logout` / `wafer whoami`
|
|
42
|
+
|
|
43
|
+
Authenticate with GitHub OAuth.
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
wafer login # Opens browser for GitHub OAuth
|
|
47
|
+
wafer whoami # Show current user
|
|
48
|
+
wafer logout # Remove credentials
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### `wafer remote-run`
|
|
52
|
+
|
|
53
|
+
Run any command on a remote GPU.
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
wafer remote-run -- nvidia-smi
|
|
57
|
+
wafer remote-run --upload-dir ./my_code -- python3 train.py
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### `wafer workspaces`
|
|
61
|
+
|
|
62
|
+
Create and manage persistent GPU environments.
|
|
63
|
+
|
|
64
|
+
**Available GPUs:**
|
|
65
|
+
|
|
66
|
+
- `MI300X` - AMD Instinct MI300X (192GB HBM3, ROCm)
|
|
67
|
+
- `B200` - NVIDIA Blackwell B200 (180GB HBM3e, CUDA) - default
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
wafer workspaces list
|
|
71
|
+
wafer workspaces create my-workspace --gpu B200 --wait # NVIDIA B200
|
|
72
|
+
wafer workspaces create amd-dev --gpu MI300X # AMD MI300X
|
|
73
|
+
wafer workspaces ssh <workspace-id>
|
|
74
|
+
wafer workspaces delete <workspace-id>
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### `wafer agent`
|
|
78
|
+
|
|
79
|
+
AI assistant for GPU kernel development. Helps with CUDA/Triton optimization, documentation queries, and performance analysis.
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
wafer agent "What is TMEM in CuTeDSL?"
|
|
83
|
+
wafer agent -s "optimize this kernel" < kernel.py
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### `wafer evaluate`
|
|
87
|
+
|
|
88
|
+
Evaluate kernel correctness and performance against a reference implementation.
|
|
89
|
+
|
|
90
|
+
**Functional format** (default):
|
|
91
|
+
```bash
|
|
92
|
+
# Generate template files
|
|
93
|
+
wafer evaluate make-template ./my-kernel
|
|
94
|
+
|
|
95
|
+
# Run evaluation
|
|
96
|
+
wafer evaluate --impl kernel.py --reference ref.py --test-cases tests.json --benchmark
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
The implementation must define `custom_kernel(inputs)`, the reference must define `ref_kernel(inputs)` and `generate_input(**params)`.
|
|
100
|
+
|
|
101
|
+
**KernelBench format** (ModelNew class):
|
|
102
|
+
```bash
|
|
103
|
+
# Extract a KernelBench problem as template
|
|
104
|
+
wafer evaluate kernelbench make-template level1/1
|
|
105
|
+
|
|
106
|
+
# Run evaluation
|
|
107
|
+
wafer evaluate kernelbench --impl my_kernel.py --reference problem.py --benchmark
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
The implementation must define `class ModelNew(nn.Module)`, the reference must define `class Model`, `get_inputs()`, and `get_init_inputs()`.
|
|
111
|
+
|
|
112
|
+
### `wafer wevin -t ask-docs`
|
|
113
|
+
|
|
114
|
+
Query GPU documentation using the docs template.
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
wafer wevin -t ask-docs --json -s "What causes bank conflicts in shared memory?"
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### `wafer corpus`
|
|
121
|
+
|
|
122
|
+
Download documentation to local filesystem for agents to search.
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
wafer corpus list
|
|
126
|
+
wafer corpus download cuda-programming-guide
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## Customization
|
|
132
|
+
|
|
133
|
+
### `wafer remote-run` options
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
wafer remote-run --image pytorch/pytorch:2.5.1-cuda12.4-cudnn9-devel -- python3 script.py
|
|
137
|
+
wafer remote-run --require-hwc -- ncu --set full python3 bench.py # Hardware counters for NCU
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### `wafer evaluate` options
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
wafer evaluate --impl k.py --reference r.py --test-cases t.json \
|
|
144
|
+
--target vultr-b200 \ # Specific GPU target
|
|
145
|
+
--benchmark \ # Measure performance
|
|
146
|
+
--profile # Enable torch.profiler + NCU
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### `wafer push` for multi-command workflows
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
WORKSPACE=$(wafer push ./project)
|
|
153
|
+
wafer remote-run --workspace-id $WORKSPACE -- python3 test1.py
|
|
154
|
+
wafer remote-run --workspace-id $WORKSPACE -- python3 test2.py
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Profile analysis
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
wafer nvidia ncu analyze profile.ncu-rep
|
|
161
|
+
wafer nvidia nsys analyze profile.nsys-rep
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## Advanced
|
|
167
|
+
|
|
168
|
+
### Local targets
|
|
169
|
+
|
|
170
|
+
Bypass the API and SSH directly to your own GPUs:
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
wafer targets list
|
|
174
|
+
wafer targets add ./my-gpu.toml
|
|
175
|
+
wafer targets default my-gpu
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Defensive evaluation
|
|
179
|
+
|
|
180
|
+
Detect evaluation hacking (stream injection, lazy evaluation, etc.):
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
wafer evaluate --impl k.py --reference r.py --test-cases t.json --benchmark --defensive
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### Other tools
|
|
187
|
+
|
|
188
|
+
```bash
|
|
189
|
+
wafer perfetto <trace.json> --query "SELECT * FROM slice" # Perfetto SQL queries
|
|
190
|
+
wafer capture ./script.py # Capture execution snapshot
|
|
191
|
+
wafer compiler-analyze kernel.ptx # Analyze PTX/SASS
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
### ROCm profiling (AMD GPUs)
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
wafer rocprof-sdk ...
|
|
198
|
+
wafer rocprof-systems ...
|
|
199
|
+
wafer rocprof-compute ...
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
---
|
|
203
|
+
|
|
204
|
+
## Shell Completion
|
|
205
|
+
|
|
206
|
+
Enable tab completion for commands, options, and target names:
|
|
207
|
+
|
|
208
|
+
```bash
|
|
209
|
+
# Install completion (zsh/bash/fish)
|
|
210
|
+
wafer --install-completion
|
|
211
|
+
|
|
212
|
+
# Then restart your terminal, or source your shell config:
|
|
213
|
+
source ~/.zshrc # or ~/.bashrc
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
Now you can tab-complete:
|
|
217
|
+
- Commands: `wafer eva<TAB>` → `wafer evaluate`
|
|
218
|
+
- Options: `wafer evaluate --<TAB>`
|
|
219
|
+
- Target names: `wafer evaluate --target v<TAB>` → `wafer evaluate --target vultr-b200`
|
|
220
|
+
- File paths: `wafer evaluate --impl ./<TAB>`
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
224
|
+
## AI Assistant Skills
|
|
225
|
+
|
|
226
|
+
Install the Wafer CLI skill to make wafer commands discoverable by your AI coding assistant:
|
|
227
|
+
|
|
228
|
+
```bash
|
|
229
|
+
# Install for all supported tools (Claude Code, Codex CLI, Cursor)
|
|
230
|
+
wafer skill install
|
|
231
|
+
|
|
232
|
+
# Install for a specific tool
|
|
233
|
+
wafer skill install -t cursor # Cursor
|
|
234
|
+
wafer skill install -t claude # Claude Code
|
|
235
|
+
wafer skill install -t codex # Codex CLI
|
|
236
|
+
|
|
237
|
+
# Check installation status
|
|
238
|
+
wafer skill status
|
|
239
|
+
|
|
240
|
+
# Uninstall
|
|
241
|
+
wafer skill uninstall
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
### Installing from GitHub (Cursor)
|
|
245
|
+
|
|
246
|
+
You can also install the skill directly from GitHub in Cursor:
|
|
247
|
+
|
|
248
|
+
1. Open Cursor Settings (Cmd+Shift+J / Ctrl+Shift+J)
|
|
249
|
+
2. Navigate to **Rules** → **Add Rule** → **Remote Rule (Github)**
|
|
250
|
+
3. Enter: `https://github.com/wafer-ai/skills`
|
|
251
|
+
4. Cursor will automatically discover skills in `.cursor/skills/`
|
|
252
|
+
|
|
253
|
+
The skill provides comprehensive guidance for GPU kernel development, including documentation lookup, trace analysis, kernel evaluation, and optimization workflows.
|
|
254
|
+
|
|
255
|
+
---
|
|
256
|
+
|
|
257
|
+
## Requirements
|
|
258
|
+
|
|
259
|
+
- Python 3.10+
|
|
260
|
+
- GitHub account (for authentication)
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "wafer-cli"
|
|
3
|
-
version = "0.2.
|
|
4
|
-
description = "CLI
|
|
3
|
+
version = "0.2.36"
|
|
4
|
+
description = "CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels"
|
|
5
|
+
readme = "README.md"
|
|
5
6
|
requires-python = ">=3.11"
|
|
6
7
|
dependencies = [
|
|
7
8
|
"typer>=0.12.0",
|
|
@@ -719,3 +719,17 @@ class TestWorkspacesExecFlagPassthrough:
|
|
|
719
719
|
"workspaces", "exec", "test-ws", "--", "cmd", "--output=/tmp/out"
|
|
720
720
|
])
|
|
721
721
|
assert "no such option" not in result.output.lower()
|
|
722
|
+
|
|
723
|
+
|
|
724
|
+
class TestAgentNoSandboxOption:
|
|
725
|
+
"""Test --no-sandbox option in wafer agent command."""
|
|
726
|
+
|
|
727
|
+
def test_agent_no_sandbox_option_exists(self) -> None:
|
|
728
|
+
"""Test that --no-sandbox option is accepted by wafer agent command."""
|
|
729
|
+
result = runner.invoke(app, ["agent", "--help"])
|
|
730
|
+
assert result.exit_code == 0
|
|
731
|
+
# Strip ANSI escape codes before checking (help output may contain color codes)
|
|
732
|
+
ansi_escape = re.compile(r'\x1b\[[0-9;]*m')
|
|
733
|
+
clean_output = ansi_escape.sub('', result.stdout)
|
|
734
|
+
assert "--no-sandbox" in clean_output
|
|
735
|
+
assert "liability" in clean_output.lower() # Warning text should be in help
|
|
@@ -634,35 +634,114 @@ def test_streaming_frontend_session_start_state_without_session_id():
|
|
|
634
634
|
|
|
635
635
|
def test_streaming_frontend_session_start_resumed_then_new():
|
|
636
636
|
"""Test session_start emission when resuming but states have different session_id.
|
|
637
|
-
|
|
637
|
+
|
|
638
638
|
Edge case: --resume used but states return different session_id (should use states one).
|
|
639
639
|
"""
|
|
640
640
|
import trio
|
|
641
641
|
|
|
642
642
|
from wafer.wevin_cli import StreamingChunkFrontend
|
|
643
|
-
|
|
643
|
+
|
|
644
644
|
async def run_test() -> None:
|
|
645
645
|
# Start with resumed session_id
|
|
646
646
|
frontend = StreamingChunkFrontend(
|
|
647
647
|
session_id="resumed-session-123",
|
|
648
648
|
model="claude-sonnet-4.5"
|
|
649
649
|
)
|
|
650
|
-
|
|
650
|
+
|
|
651
651
|
emitted_events = []
|
|
652
652
|
|
|
653
653
|
def mock_emit(obj) -> None:
|
|
654
654
|
emitted_events.append(obj)
|
|
655
|
-
|
|
655
|
+
|
|
656
656
|
frontend._emit = mock_emit
|
|
657
|
-
|
|
657
|
+
|
|
658
658
|
# start() emits session_start for resumed session
|
|
659
659
|
await frontend.start()
|
|
660
660
|
assert len(emitted_events) == 1
|
|
661
661
|
assert emitted_events[0]["session_id"] == "resumed-session-123"
|
|
662
|
-
|
|
662
|
+
|
|
663
663
|
# If states have different session_id (shouldn't happen, but handle gracefully)
|
|
664
664
|
# The logic in main() checks `if first_session_id and not session_id`
|
|
665
665
|
# So if session_id was set, it won't emit again
|
|
666
666
|
# This is correct behavior - use the one from --resume
|
|
667
|
-
|
|
667
|
+
|
|
668
668
|
trio.run(run_test)
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
# =============================================================================
|
|
672
|
+
# --no-sandbox flag tests
|
|
673
|
+
# =============================================================================
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
def test_no_sandbox_parameter_accepted():
|
|
677
|
+
"""Test that no_sandbox parameter exists in wevin_main signature."""
|
|
678
|
+
import inspect
|
|
679
|
+
|
|
680
|
+
from wafer.wevin_cli import main as wevin_main
|
|
681
|
+
|
|
682
|
+
sig = inspect.signature(wevin_main)
|
|
683
|
+
params = sig.parameters
|
|
684
|
+
|
|
685
|
+
# Verify parameter exists
|
|
686
|
+
assert 'no_sandbox' in params
|
|
687
|
+
|
|
688
|
+
# Verify type and default
|
|
689
|
+
assert str(params['no_sandbox'].annotation) in ('bool', "<class 'bool'>")
|
|
690
|
+
assert params['no_sandbox'].default is False
|
|
691
|
+
|
|
692
|
+
|
|
693
|
+
def test_build_environment_accepts_no_sandbox():
|
|
694
|
+
"""Test that _build_environment accepts no_sandbox parameter."""
|
|
695
|
+
import inspect
|
|
696
|
+
|
|
697
|
+
from wafer.wevin_cli import _build_environment
|
|
698
|
+
|
|
699
|
+
sig = inspect.signature(_build_environment)
|
|
700
|
+
params = sig.parameters
|
|
701
|
+
|
|
702
|
+
assert 'no_sandbox' in params
|
|
703
|
+
assert params['no_sandbox'].default is False
|
|
704
|
+
|
|
705
|
+
|
|
706
|
+
def test_build_environment_with_no_sandbox_false():
|
|
707
|
+
"""Test _build_environment creates env with sandbox ENABLED when no_sandbox=False."""
|
|
708
|
+
from wafer_core.rollouts.templates import TemplateConfig
|
|
709
|
+
from wafer_core.sandbox import SandboxMode
|
|
710
|
+
|
|
711
|
+
from wafer.wevin_cli import _build_environment
|
|
712
|
+
|
|
713
|
+
tpl = TemplateConfig(
|
|
714
|
+
name="test",
|
|
715
|
+
description="Test template",
|
|
716
|
+
system_prompt="Test",
|
|
717
|
+
tools=["read"],
|
|
718
|
+
)
|
|
719
|
+
|
|
720
|
+
# This will raise RuntimeError if sandbox is unavailable on this system
|
|
721
|
+
# That's expected - we're testing that sandbox is ENABLED by default
|
|
722
|
+
try:
|
|
723
|
+
env = _build_environment(tpl, None, None, no_sandbox=False)
|
|
724
|
+
# If we get here, sandbox is available - verify it's enabled
|
|
725
|
+
assert env.sandbox_mode == SandboxMode.ENABLED
|
|
726
|
+
except RuntimeError as e:
|
|
727
|
+
# Sandbox unavailable - that's OK, the error proves ENABLED is set
|
|
728
|
+
assert "sandboxing is not available" in str(e)
|
|
729
|
+
|
|
730
|
+
|
|
731
|
+
def test_build_environment_with_no_sandbox_true():
|
|
732
|
+
"""Test _build_environment creates env with sandbox DISABLED when no_sandbox=True."""
|
|
733
|
+
from wafer_core.rollouts.templates import TemplateConfig
|
|
734
|
+
from wafer_core.sandbox import SandboxMode
|
|
735
|
+
|
|
736
|
+
from wafer.wevin_cli import _build_environment
|
|
737
|
+
|
|
738
|
+
tpl = TemplateConfig(
|
|
739
|
+
name="test",
|
|
740
|
+
description="Test template",
|
|
741
|
+
system_prompt="Test",
|
|
742
|
+
tools=["read"],
|
|
743
|
+
)
|
|
744
|
+
|
|
745
|
+
# This should NOT raise - sandbox is disabled
|
|
746
|
+
env = _build_environment(tpl, None, None, no_sandbox=True)
|
|
747
|
+
assert env.sandbox_mode == SandboxMode.DISABLED
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""Shared agent defaults for kernel optimization tasks.
|
|
2
|
+
|
|
3
|
+
Single source of truth for bash allowlists and enabled tools used by both:
|
|
4
|
+
- CLI templates (apps/wafer-cli/wafer/templates/*.py)
|
|
5
|
+
- Eval configs (research/evals/*_eval/*.py)
|
|
6
|
+
|
|
7
|
+
Import from here instead of defining your own copy.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
# Tools available to the agent (coding environment tools)
|
|
13
|
+
ENABLED_TOOLS: list[str] = ["read", "write", "edit", "glob", "grep", "bash"]
|
|
14
|
+
|
|
15
|
+
# vLLM-specific tools (same as ENABLED_TOOLS for now)
|
|
16
|
+
VLLM_ENABLED_TOOLS: list[str] = ["read", "write", "edit", "glob", "grep", "bash"]
|
|
17
|
+
|
|
18
|
+
# Bash commands allowed for kernel optimization agents.
|
|
19
|
+
# Uses prefix matching — "wafer evaluate" also allows "wafer evaluate kernelbench".
|
|
20
|
+
KERNELBENCH_BASH_ALLOWLIST: list[str] = [
|
|
21
|
+
# Kernel evaluation
|
|
22
|
+
"wafer evaluate",
|
|
23
|
+
# Profiling — AMD
|
|
24
|
+
"wafer amd rocprof-compute",
|
|
25
|
+
"wafer amd rocprof-sdk",
|
|
26
|
+
"wafer amd rocprof-systems",
|
|
27
|
+
# Profiling — NVIDIA
|
|
28
|
+
"wafer nvidia ncu",
|
|
29
|
+
"wafer nvidia nsys",
|
|
30
|
+
# Analysis
|
|
31
|
+
"wafer compiler-analyze",
|
|
32
|
+
# Sub-agents
|
|
33
|
+
"wafer agent -t ask-docs",
|
|
34
|
+
# General utilities
|
|
35
|
+
"python",
|
|
36
|
+
"python3",
|
|
37
|
+
"timeout",
|
|
38
|
+
"ls",
|
|
39
|
+
"cat",
|
|
40
|
+
"head",
|
|
41
|
+
"tail",
|
|
42
|
+
"wc",
|
|
43
|
+
"pwd",
|
|
44
|
+
"which",
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
# Tools available to aiter optimization agents (full coding environment)
|
|
48
|
+
AITER_ENABLED_TOOLS: list[str] = ["read", "write", "edit", "glob", "grep", "bash"]
|
|
49
|
+
|
|
50
|
+
# System prompt for aiter optimization (shared between eval and template)
|
|
51
|
+
# Uses {op_name}, {test_file}, {target_flag} placeholders
|
|
52
|
+
AITER_SYSTEM_PROMPT = """\
|
|
53
|
+
You are a GPU kernel optimization expert specializing in AMD MI300X and the aiter library.
|
|
54
|
+
|
|
55
|
+
## Context
|
|
56
|
+
|
|
57
|
+
aiter (ROCm/aiter) is AMD's centralized repository for high-performance AI operators.
|
|
58
|
+
Operators are implemented using Triton kernels, Composable Kernel (CK), or HIP/ROCm.
|
|
59
|
+
|
|
60
|
+
Each operator has a test in `op_tests/test_{{op}}.py` that validates correctness and
|
|
61
|
+
measures performance against a reference implementation.
|
|
62
|
+
|
|
63
|
+
## Your Task
|
|
64
|
+
|
|
65
|
+
1. **Understand the operator**: Read the test file and trace imports to find implementation
|
|
66
|
+
2. **Establish baseline**: Run the evaluation to measure current performance
|
|
67
|
+
```bash
|
|
68
|
+
# Quick check with one shape (fast iteration)
|
|
69
|
+
wafer evaluate aiter --aiter-dir . --cmd "python op_tests/test_{{op}}.py --mnk 128,32,8192" {target_flag}
|
|
70
|
+
|
|
71
|
+
# Full test suite (final validation)
|
|
72
|
+
wafer evaluate aiter --aiter-dir . --cmd "python op_tests/test_{{op}}.py" {target_flag}
|
|
73
|
+
```
|
|
74
|
+
3. **Identify optimizations**: Look for memory access patterns, occupancy, instruction selection
|
|
75
|
+
4. **Implement changes**: Modify the operator to improve performance
|
|
76
|
+
5. **Validate**: Re-run evaluation to verify correctness and measure speedup
|
|
77
|
+
6. **Iterate**: Use quick checks during development, full suite for final validation
|
|
78
|
+
|
|
79
|
+
## Finding Source Files
|
|
80
|
+
|
|
81
|
+
The aiter codebase structure varies by operator. To find implementation files:
|
|
82
|
+
|
|
83
|
+
1. **Start with the test file**: `op_tests/test_{{op}}.py`
|
|
84
|
+
- Read imports to see what modules are used
|
|
85
|
+
- Look for the main function being tested
|
|
86
|
+
|
|
87
|
+
2. **Check common locations** (not all ops have all of these):
|
|
88
|
+
- `aiter/ops/{{op}}.py` — High-level Python API (some ops)
|
|
89
|
+
- `aiter/triton_kernels/` — Triton kernel implementations
|
|
90
|
+
- `csrc/kernels/` — CUDA/HIP kernel implementations
|
|
91
|
+
- `csrc/py_itfs_cu/` — Python interface CUDA files
|
|
92
|
+
- `csrc/cktile_*/` — Composable Kernel tile implementations
|
|
93
|
+
|
|
94
|
+
3. **Search for the op name**:
|
|
95
|
+
```bash
|
|
96
|
+
find . -name "*{{op}}*" -type f | grep -v __pycache__
|
|
97
|
+
grep -r "def {{function_name}}" aiter/ csrc/ --include="*.py" --include="*.cu"
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Key Directories
|
|
101
|
+
|
|
102
|
+
- `aiter/` — Main package with operator implementations
|
|
103
|
+
- `aiter/ops/` — High-level operator APIs (some ops)
|
|
104
|
+
- `aiter/triton_kernels/` — Triton kernel implementations
|
|
105
|
+
- `csrc/` — C++/CUDA/HIP implementations
|
|
106
|
+
- `op_tests/` — Tests for each operator
|
|
107
|
+
- `aiter/configs/` — Tuned configurations (CSV files)
|
|
108
|
+
|
|
109
|
+
## Output
|
|
110
|
+
|
|
111
|
+
Your goal is to produce:
|
|
112
|
+
1. Modified operator code with optimizations
|
|
113
|
+
2. Benchmark results showing correctness and speedup
|
|
114
|
+
3. A summary of what you changed and why
|
|
115
|
+
|
|
116
|
+
The optimization should be correct (pass the op_test) and faster than baseline."""
|
|
117
|
+
|
|
118
|
+
# Bash commands allowed for aiter optimization agents.
|
|
119
|
+
AITER_BASH_ALLOWLIST: list[str] = [
|
|
120
|
+
# Read-only
|
|
121
|
+
"ls",
|
|
122
|
+
"cat",
|
|
123
|
+
"head",
|
|
124
|
+
"tail",
|
|
125
|
+
"wc",
|
|
126
|
+
"find",
|
|
127
|
+
"grep",
|
|
128
|
+
"rg",
|
|
129
|
+
"pwd",
|
|
130
|
+
"tree",
|
|
131
|
+
"which",
|
|
132
|
+
"diff",
|
|
133
|
+
"sort",
|
|
134
|
+
# Filesystem
|
|
135
|
+
"mkdir",
|
|
136
|
+
"cp",
|
|
137
|
+
"mv",
|
|
138
|
+
# Git
|
|
139
|
+
"git diff",
|
|
140
|
+
"git status",
|
|
141
|
+
"git log",
|
|
142
|
+
# Compilation
|
|
143
|
+
"hipcc",
|
|
144
|
+
"g++",
|
|
145
|
+
"gcc",
|
|
146
|
+
"clang",
|
|
147
|
+
"python",
|
|
148
|
+
"python3",
|
|
149
|
+
"pip",
|
|
150
|
+
"pytest",
|
|
151
|
+
# Execution — allows running compiled binaries and python scripts
|
|
152
|
+
"./",
|
|
153
|
+
# Kernel evaluation
|
|
154
|
+
"wafer evaluate aiter",
|
|
155
|
+
# Profiling — AMD
|
|
156
|
+
"wafer amd rocprof-compute",
|
|
157
|
+
"wafer amd rocprof-sdk",
|
|
158
|
+
"wafer amd rocprof-systems",
|
|
159
|
+
"wafer amd isa",
|
|
160
|
+
# Sub-agents
|
|
161
|
+
"wafer agent -t ask-docs",
|
|
162
|
+
# Misc
|
|
163
|
+
"timeout",
|
|
164
|
+
]
|
|
165
|
+
|
|
166
|
+
# Bash commands allowed for vLLM kernel optimization agents.
|
|
167
|
+
VLLM_BASH_ALLOWLIST: list[str] = [
|
|
168
|
+
# vLLM evaluation
|
|
169
|
+
"wafer evaluate vllm",
|
|
170
|
+
# vLLM's own test and benchmark commands (run inside vllm dir)
|
|
171
|
+
"pytest",
|
|
172
|
+
# Profiling — AMD
|
|
173
|
+
"wafer amd rocprof-compute",
|
|
174
|
+
"wafer amd rocprof-sdk",
|
|
175
|
+
"wafer amd rocprof-systems",
|
|
176
|
+
# Profiling — NVIDIA
|
|
177
|
+
"wafer nvidia ncu",
|
|
178
|
+
"wafer nvidia nsys",
|
|
179
|
+
# Analysis
|
|
180
|
+
"wafer compiler-analyze",
|
|
181
|
+
# Sub-agents
|
|
182
|
+
"wafer agent -t ask-docs",
|
|
183
|
+
# General utilities
|
|
184
|
+
"python",
|
|
185
|
+
"python3",
|
|
186
|
+
"pip",
|
|
187
|
+
"timeout",
|
|
188
|
+
"ls",
|
|
189
|
+
"cat",
|
|
190
|
+
"head",
|
|
191
|
+
"tail",
|
|
192
|
+
"wc",
|
|
193
|
+
"pwd",
|
|
194
|
+
"which",
|
|
195
|
+
"cd",
|
|
196
|
+
"git",
|
|
197
|
+
]
|