wafer-cli 0.2.32__py3-none-any.whl → 0.2.34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wafer/GUIDE.md +1 -1
- wafer/agent_defaults.py +157 -2
- wafer/billing.py +6 -6
- wafer/cli.py +432 -348
- wafer/corpus.py +6 -72
- wafer/evaluate.py +143 -81
- wafer/global_config.py +0 -13
- wafer/kernel_scope.py +1 -1
- wafer/ncu_analyze.py +1 -1
- wafer/nsys_analyze.py +1 -1
- wafer/skills/wafer-guide/SKILL.md +6 -22
- wafer/ssh_keys.py +6 -6
- wafer/targets_ops.py +2 -29
- wafer/templates/aiter_optimize.py +59 -0
- wafer/templates/optimize_kernel.py +2 -4
- wafer/templates/optimize_kernelbench.py +62 -17
- wafer/templates/optimize_vllm.py +156 -0
- wafer/trace_compare.py +48 -139
- wafer/wevin_cli.py +1 -12
- wafer/workspaces.py +8 -8
- wafer_cli-0.2.34.dist-info/METADATA +260 -0
- {wafer_cli-0.2.32.dist-info → wafer_cli-0.2.34.dist-info}/RECORD +25 -23
- wafer_cli-0.2.32.dist-info/METADATA +0 -107
- {wafer_cli-0.2.32.dist-info → wafer_cli-0.2.34.dist-info}/WHEEL +0 -0
- {wafer_cli-0.2.32.dist-info → wafer_cli-0.2.34.dist-info}/entry_points.txt +0 -0
- {wafer_cli-0.2.32.dist-info → wafer_cli-0.2.34.dist-info}/top_level.txt +0 -0
wafer/workspaces.py
CHANGED
|
@@ -39,13 +39,13 @@ def _friendly_error(status_code: int, response_text: str, workspace_id: str) ->
|
|
|
39
39
|
User-friendly error message with suggested next steps
|
|
40
40
|
"""
|
|
41
41
|
if status_code == 401:
|
|
42
|
-
return "Not authenticated. Run: wafer
|
|
42
|
+
return "Not authenticated. Run: wafer login"
|
|
43
43
|
|
|
44
44
|
if status_code == 402:
|
|
45
45
|
return (
|
|
46
46
|
"Insufficient credits.\n"
|
|
47
|
-
" Check usage: wafer
|
|
48
|
-
" Add credits: wafer
|
|
47
|
+
" Check usage: wafer billing\n"
|
|
48
|
+
" Add credits: wafer billing topup"
|
|
49
49
|
)
|
|
50
50
|
|
|
51
51
|
if status_code == 404:
|
|
@@ -107,7 +107,7 @@ def _list_workspaces_raw() -> list[dict]:
|
|
|
107
107
|
workspaces = response.json()
|
|
108
108
|
except httpx.HTTPStatusError as e:
|
|
109
109
|
if e.response.status_code == 401:
|
|
110
|
-
raise RuntimeError("Not authenticated. Run: wafer
|
|
110
|
+
raise RuntimeError("Not authenticated. Run: wafer login") from e
|
|
111
111
|
raise RuntimeError(f"API error: {e.response.status_code} - {e.response.text}") from e
|
|
112
112
|
except httpx.RequestError as e:
|
|
113
113
|
raise RuntimeError(f"Could not reach API: {e}") from e
|
|
@@ -188,7 +188,7 @@ def list_workspaces(json_output: bool = False) -> str:
|
|
|
188
188
|
workspaces = response.json()
|
|
189
189
|
except httpx.HTTPStatusError as e:
|
|
190
190
|
if e.response.status_code == 401:
|
|
191
|
-
raise RuntimeError("Not authenticated. Run: wafer
|
|
191
|
+
raise RuntimeError("Not authenticated. Run: wafer login") from e
|
|
192
192
|
raise RuntimeError(f"API error: {e.response.status_code} - {e.response.text}") from e
|
|
193
193
|
except httpx.RequestError as e:
|
|
194
194
|
raise RuntimeError(f"Could not reach API: {e}") from e
|
|
@@ -307,7 +307,7 @@ def create_workspace(
|
|
|
307
307
|
workspace = response.json()
|
|
308
308
|
except httpx.HTTPStatusError as e:
|
|
309
309
|
if e.response.status_code == 401:
|
|
310
|
-
raise RuntimeError("Not authenticated. Run: wafer
|
|
310
|
+
raise RuntimeError("Not authenticated. Run: wafer login") from e
|
|
311
311
|
if e.response.status_code == 400:
|
|
312
312
|
raise RuntimeError(f"Bad request: {e.response.text}") from e
|
|
313
313
|
raise RuntimeError(f"API error: {e.response.status_code} - {e.response.text}") from e
|
|
@@ -413,7 +413,7 @@ def delete_workspace(workspace_id: str, json_output: bool = False) -> str:
|
|
|
413
413
|
result = response.json()
|
|
414
414
|
except httpx.HTTPStatusError as e:
|
|
415
415
|
if e.response.status_code == 401:
|
|
416
|
-
raise RuntimeError("Not authenticated. Run: wafer
|
|
416
|
+
raise RuntimeError("Not authenticated. Run: wafer login") from e
|
|
417
417
|
if e.response.status_code == 404:
|
|
418
418
|
raise RuntimeError(f"Workspace not found: {workspace_id}") from e
|
|
419
419
|
raise RuntimeError(f"API error: {e.response.status_code} - {e.response.text}") from e
|
|
@@ -691,7 +691,7 @@ def get_workspace_raw(workspace_id: str) -> dict:
|
|
|
691
691
|
workspace = response.json()
|
|
692
692
|
except httpx.HTTPStatusError as e:
|
|
693
693
|
if e.response.status_code == 401:
|
|
694
|
-
raise RuntimeError("Not authenticated. Run: wafer
|
|
694
|
+
raise RuntimeError("Not authenticated. Run: wafer login") from e
|
|
695
695
|
if e.response.status_code == 404:
|
|
696
696
|
raise RuntimeError(f"Workspace not found: {workspace_id}") from e
|
|
697
697
|
raise RuntimeError(f"API error: {e.response.status_code} - {e.response.text}") from e
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: wafer-cli
|
|
3
|
+
Version: 0.2.34
|
|
4
|
+
Summary: CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: typer>=0.12.0
|
|
8
|
+
Requires-Dist: trio>=0.24.0
|
|
9
|
+
Requires-Dist: trio-asyncio>=0.15.0
|
|
10
|
+
Requires-Dist: wafer-core>=0.1.0
|
|
11
|
+
Requires-Dist: perfetto>=0.16.0
|
|
12
|
+
Requires-Dist: posthog>=3.0.0
|
|
13
|
+
Provides-Extra: dev
|
|
14
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
15
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
16
|
+
Requires-Dist: diff-cover>=8.0.0; extra == "dev"
|
|
17
|
+
Requires-Dist: ruff>=0.4.0; extra == "dev"
|
|
18
|
+
|
|
19
|
+
# Wafer CLI
|
|
20
|
+
|
|
21
|
+
Run GPU workloads, optimize kernels, and query GPU documentation.
|
|
22
|
+
|
|
23
|
+
## Getting Started
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# Install
|
|
27
|
+
cd apps/wafer-cli && uv sync
|
|
28
|
+
|
|
29
|
+
# Use staging (workspaces and other features require staging)
|
|
30
|
+
wafer config set api.environment staging
|
|
31
|
+
|
|
32
|
+
# Login
|
|
33
|
+
wafer login
|
|
34
|
+
|
|
35
|
+
# Run a command on a remote GPU
|
|
36
|
+
wafer remote-run -- nvidia-smi
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Commands
|
|
40
|
+
|
|
41
|
+
### `wafer login` / `wafer logout` / `wafer whoami`
|
|
42
|
+
|
|
43
|
+
Authenticate with GitHub OAuth.
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
wafer login # Opens browser for GitHub OAuth
|
|
47
|
+
wafer whoami # Show current user
|
|
48
|
+
wafer logout # Remove credentials
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### `wafer remote-run`
|
|
52
|
+
|
|
53
|
+
Run any command on a remote GPU.
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
wafer remote-run -- nvidia-smi
|
|
57
|
+
wafer remote-run --upload-dir ./my_code -- python3 train.py
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### `wafer workspaces`
|
|
61
|
+
|
|
62
|
+
Create and manage persistent GPU environments.
|
|
63
|
+
|
|
64
|
+
**Available GPUs:**
|
|
65
|
+
|
|
66
|
+
- `MI300X` - AMD Instinct MI300X (192GB HBM3, ROCm)
|
|
67
|
+
- `B200` - NVIDIA Blackwell B200 (180GB HBM3e, CUDA) - default
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
wafer workspaces list
|
|
71
|
+
wafer workspaces create my-workspace --gpu B200 --wait # NVIDIA B200
|
|
72
|
+
wafer workspaces create amd-dev --gpu MI300X # AMD MI300X
|
|
73
|
+
wafer workspaces ssh <workspace-id>
|
|
74
|
+
wafer workspaces delete <workspace-id>
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### `wafer agent`
|
|
78
|
+
|
|
79
|
+
AI assistant for GPU kernel development. Helps with CUDA/Triton optimization, documentation queries, and performance analysis.
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
wafer agent "What is TMEM in CuTeDSL?"
|
|
83
|
+
wafer agent -s "optimize this kernel" < kernel.py
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### `wafer evaluate`
|
|
87
|
+
|
|
88
|
+
Evaluate kernel correctness and performance against a reference implementation.
|
|
89
|
+
|
|
90
|
+
**Functional format** (default):
|
|
91
|
+
```bash
|
|
92
|
+
# Generate template files
|
|
93
|
+
wafer evaluate make-template ./my-kernel
|
|
94
|
+
|
|
95
|
+
# Run evaluation
|
|
96
|
+
wafer evaluate --impl kernel.py --reference ref.py --test-cases tests.json --benchmark
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
The implementation must define `custom_kernel(inputs)`, the reference must define `ref_kernel(inputs)` and `generate_input(**params)`.
|
|
100
|
+
|
|
101
|
+
**KernelBench format** (ModelNew class):
|
|
102
|
+
```bash
|
|
103
|
+
# Extract a KernelBench problem as template
|
|
104
|
+
wafer evaluate kernelbench make-template level1/1
|
|
105
|
+
|
|
106
|
+
# Run evaluation
|
|
107
|
+
wafer evaluate kernelbench --impl my_kernel.py --reference problem.py --benchmark
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
The implementation must define `class ModelNew(nn.Module)`, the reference must define `class Model`, `get_inputs()`, and `get_init_inputs()`.
|
|
111
|
+
|
|
112
|
+
### `wafer wevin -t ask-docs`
|
|
113
|
+
|
|
114
|
+
Query GPU documentation using the docs template.
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
wafer wevin -t ask-docs --json -s "What causes bank conflicts in shared memory?"
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### `wafer corpus`
|
|
121
|
+
|
|
122
|
+
Download documentation to local filesystem for agents to search.
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
wafer corpus list
|
|
126
|
+
wafer corpus download cuda-programming-guide
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## Customization
|
|
132
|
+
|
|
133
|
+
### `wafer remote-run` options
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
wafer remote-run --image pytorch/pytorch:2.5.1-cuda12.4-cudnn9-devel -- python3 script.py
|
|
137
|
+
wafer remote-run --require-hwc -- ncu --set full python3 bench.py # Hardware counters for NCU
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### `wafer evaluate` options
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
wafer evaluate --impl k.py --reference r.py --test-cases t.json \
|
|
144
|
+
--target vultr-b200 \ # Specific GPU target
|
|
145
|
+
--benchmark \ # Measure performance
|
|
146
|
+
--profile # Enable torch.profiler + NCU
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### `wafer push` for multi-command workflows
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
WORKSPACE=$(wafer push ./project)
|
|
153
|
+
wafer remote-run --workspace-id $WORKSPACE -- python3 test1.py
|
|
154
|
+
wafer remote-run --workspace-id $WORKSPACE -- python3 test2.py
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Profile analysis
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
wafer nvidia ncu analyze profile.ncu-rep
|
|
161
|
+
wafer nvidia nsys analyze profile.nsys-rep
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## Advanced
|
|
167
|
+
|
|
168
|
+
### Local targets
|
|
169
|
+
|
|
170
|
+
Bypass the API and SSH directly to your own GPUs:
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
wafer targets list
|
|
174
|
+
wafer targets add ./my-gpu.toml
|
|
175
|
+
wafer targets default my-gpu
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Defensive evaluation
|
|
179
|
+
|
|
180
|
+
Detect evaluation hacking (stream injection, lazy evaluation, etc.):
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
wafer evaluate --impl k.py --reference r.py --test-cases t.json --benchmark --defensive
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### Other tools
|
|
187
|
+
|
|
188
|
+
```bash
|
|
189
|
+
wafer perfetto <trace.json> --query "SELECT * FROM slice" # Perfetto SQL queries
|
|
190
|
+
wafer capture ./script.py # Capture execution snapshot
|
|
191
|
+
wafer compiler-analyze kernel.ptx # Analyze PTX/SASS
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
### ROCm profiling (AMD GPUs)
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
wafer rocprof-sdk ...
|
|
198
|
+
wafer rocprof-systems ...
|
|
199
|
+
wafer rocprof-compute ...
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
---
|
|
203
|
+
|
|
204
|
+
## Shell Completion
|
|
205
|
+
|
|
206
|
+
Enable tab completion for commands, options, and target names:
|
|
207
|
+
|
|
208
|
+
```bash
|
|
209
|
+
# Install completion (zsh/bash/fish)
|
|
210
|
+
wafer --install-completion
|
|
211
|
+
|
|
212
|
+
# Then restart your terminal, or source your shell config:
|
|
213
|
+
source ~/.zshrc # or ~/.bashrc
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
Now you can tab-complete:
|
|
217
|
+
- Commands: `wafer eva<TAB>` → `wafer evaluate`
|
|
218
|
+
- Options: `wafer evaluate --<TAB>`
|
|
219
|
+
- Target names: `wafer evaluate --target v<TAB>` → `wafer evaluate --target vultr-b200`
|
|
220
|
+
- File paths: `wafer evaluate --impl ./<TAB>`
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
224
|
+
## AI Assistant Skills
|
|
225
|
+
|
|
226
|
+
Install the Wafer CLI skill to make wafer commands discoverable by your AI coding assistant:
|
|
227
|
+
|
|
228
|
+
```bash
|
|
229
|
+
# Install for all supported tools (Claude Code, Codex CLI, Cursor)
|
|
230
|
+
wafer skill install
|
|
231
|
+
|
|
232
|
+
# Install for a specific tool
|
|
233
|
+
wafer skill install -t cursor # Cursor
|
|
234
|
+
wafer skill install -t claude # Claude Code
|
|
235
|
+
wafer skill install -t codex # Codex CLI
|
|
236
|
+
|
|
237
|
+
# Check installation status
|
|
238
|
+
wafer skill status
|
|
239
|
+
|
|
240
|
+
# Uninstall
|
|
241
|
+
wafer skill uninstall
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
### Installing from GitHub (Cursor)
|
|
245
|
+
|
|
246
|
+
You can also install the skill directly from GitHub in Cursor:
|
|
247
|
+
|
|
248
|
+
1. Open Cursor Settings (Cmd+Shift+J / Ctrl+Shift+J)
|
|
249
|
+
2. Navigate to **Rules** → **Add Rule** → **Remote Rule (Github)**
|
|
250
|
+
3. Enter: `https://github.com/wafer-ai/skills`
|
|
251
|
+
4. Cursor will automatically discover skills in `.cursor/skills/`
|
|
252
|
+
|
|
253
|
+
The skill provides comprehensive guidance for GPU kernel development, including documentation lookup, trace analysis, kernel evaluation, and optimization workflows.
|
|
254
|
+
|
|
255
|
+
---
|
|
256
|
+
|
|
257
|
+
## Requirements
|
|
258
|
+
|
|
259
|
+
- Python 3.10+
|
|
260
|
+
- GitHub account (for authentication)
|
|
@@ -1,23 +1,23 @@
|
|
|
1
|
-
wafer/GUIDE.md,sha256=
|
|
1
|
+
wafer/GUIDE.md,sha256=G6P4aFZslEXiHmVjtTB3_OIpGK5d1tSiqxtawASVUZg,3588
|
|
2
2
|
wafer/__init__.py,sha256=kBM_ONCpU6UUMBOH8Tmg4A88sNFnbaD59o61cJs-uYM,90
|
|
3
|
-
wafer/agent_defaults.py,sha256=
|
|
3
|
+
wafer/agent_defaults.py,sha256=jJfTXqa9JO_fdVWi_G8_W3TXdYb7qPmK5IdmUCSLbgg,5761
|
|
4
4
|
wafer/analytics.py,sha256=qLY6Z16usVHFD8TCv7XBuz7l47vXVdXk-qhOzA-hW_8,8179
|
|
5
5
|
wafer/api_client.py,sha256=i_Az2b2llC3DSW8yOL-BKqa7LSKuxOr8hSN40s-oQXY,6313
|
|
6
6
|
wafer/auth.py,sha256=dwss_se5P-FFc9IN38q4kh_dBrA6k-CguDBkivgcdj0,14003
|
|
7
7
|
wafer/autotuner.py,sha256=41WYP41pTDvMijv2h42vm89bcHtDMJXObDlWmn6xpFU,44416
|
|
8
8
|
wafer/baseline.py,sha256=OrGCAut_xtkH9Ogx4mMU5-94Q0oClIXqac94YRwqERY,21534
|
|
9
|
-
wafer/billing.py,sha256=
|
|
10
|
-
wafer/cli.py,sha256=
|
|
9
|
+
wafer/billing.py,sha256=jbLB2lI4_9f2KD8uEFDi_ixLlowe5hasC0TIZJyIXRg,7163
|
|
10
|
+
wafer/cli.py,sha256=DlsE-3cWZ-02yNb7fOf-9yXmQJyrSHsFWym2I3LpmrU,275688
|
|
11
11
|
wafer/cli_instructions.py,sha256=bziUKDNDAXABVMvKPLEMXm-hFSD2TcFSh-FKRYa949k,4693
|
|
12
12
|
wafer/config.py,sha256=h5Eo9_yfWqWGoPNdVQikI9GoZVUeysunSYiixf1mKcw,3411
|
|
13
|
-
wafer/corpus.py,sha256=
|
|
14
|
-
wafer/evaluate.py,sha256=
|
|
15
|
-
wafer/global_config.py,sha256=
|
|
13
|
+
wafer/corpus.py,sha256=oQegXA43MuyRvYxOsWhmqeP5vMb5IKFHOvM-1RcahPA,22301
|
|
14
|
+
wafer/evaluate.py,sha256=HMFQD-uwC6Wky1t_0JxYZaoHWgLaTBkjxOxgpZVnGrc,190519
|
|
15
|
+
wafer/global_config.py,sha256=fhaR_RU3ufMksDmOohH1OLeQ0JT0SDW1hEip_zaP75k,11345
|
|
16
16
|
wafer/gpu_run.py,sha256=TwqXy72T7f2I7e6n5WWod3xgxCPnDhU0BgLsB4CUoQY,9716
|
|
17
17
|
wafer/inference.py,sha256=tZCO5i05FKY27ewis3CSBHFBeFbXY3xwj0DSjdoMY9s,4314
|
|
18
|
-
wafer/kernel_scope.py,sha256=
|
|
19
|
-
wafer/ncu_analyze.py,sha256=
|
|
20
|
-
wafer/nsys_analyze.py,sha256=
|
|
18
|
+
wafer/kernel_scope.py,sha256=YtnxknAChkJoeU_vIdxiqWsAITGBeabp9OGIK-X32i0,20796
|
|
19
|
+
wafer/ncu_analyze.py,sha256=rAWzKQRZEY6E_CL3gAWUaW3uZ4kvQVZskVCPDpsFJuE,24633
|
|
20
|
+
wafer/nsys_analyze.py,sha256=AhNcjPaapB0QCbqiHRXvyy-ccjevvVwEyxes84D28JU,36124
|
|
21
21
|
wafer/nsys_profile.py,sha256=QFBl8pkr8r4uRNdNUO9gY-obj9slqpOgVYFZ_sXu6Nw,15478
|
|
22
22
|
wafer/output.py,sha256=8jw5ifvIMK8ldyBMGW4NhrKvJPl66TV2Y2fJ5Tlhh1I,8293
|
|
23
23
|
wafer/problems.py,sha256=ce2sy10A1nnNUG3VGsseTS8jL7LZsku4dE8zVf9JHQ4,11296
|
|
@@ -25,24 +25,26 @@ wafer/rocprof_compute.py,sha256=n_yOGZaFbOXna_ghhmYWXeyUoSabgH4KkjlYq38DlHo,1988
|
|
|
25
25
|
wafer/rocprof_sdk.py,sha256=0Q7Ye6dUfa1anFZbqKc21rItgqva8V8VIZoSB7wqbmA,10085
|
|
26
26
|
wafer/rocprof_systems.py,sha256=4IWbMcbYk1x_8iS7P3FC_u5sgH6EXADCtR2lV9id80M,18629
|
|
27
27
|
wafer/specs_cli.py,sha256=frMEKwMflxVNpFlAuxprmr33ZZ1Oeh2lB0KWZ4oZWzw,4360
|
|
28
|
-
wafer/ssh_keys.py,sha256=
|
|
28
|
+
wafer/ssh_keys.py,sha256=9kSdhV_dg9T6pQu2JmNQptarkkwGtN9rLyRkI1bW4i4,8094
|
|
29
29
|
wafer/target_lock.py,sha256=SDKhNzv2N7gsphGflcNni9FE5YYuAMuEthngAJEo4Gs,7809
|
|
30
30
|
wafer/targets.py,sha256=9r-iRWoKSH5cQl1LcamaX-T7cNVOg99ngIm_hlRk-qU,26922
|
|
31
31
|
wafer/targets_cli.py,sha256=Oe3e02rSXeNrMbe_Qv9DNfQ8dEOKodtU7BbQQWxlNwA,16348
|
|
32
|
-
wafer/targets_ops.py,sha256=
|
|
33
|
-
wafer/trace_compare.py,sha256=
|
|
32
|
+
wafer/targets_ops.py,sha256=jN1oIBx0mutxRNE9xpIc7SaBxPkVmOyus2eqn0kEKNI,21475
|
|
33
|
+
wafer/trace_compare.py,sha256=IBVSGI8u5A10haDzL4eQ0R24fM1G_dd1F3-4iEkG1EQ,6349
|
|
34
34
|
wafer/tracelens.py,sha256=g9ZIeFyNojZn4uTd3skPqIrRiL7aMJOz_-GOd3aiyy4,7998
|
|
35
|
-
wafer/wevin_cli.py,sha256=
|
|
36
|
-
wafer/workspaces.py,sha256=
|
|
37
|
-
wafer/skills/wafer-guide/SKILL.md,sha256=
|
|
35
|
+
wafer/wevin_cli.py,sha256=ruHROgLl4SL0UaKW9JbkTkVjtmtG3Jqigp7cnbQAbZ0,25564
|
|
36
|
+
wafer/workspaces.py,sha256=k_iCZ-mOrG2KiTXqqcZ5_VifSIXsFGaZM4hjnxBnBmc,35666
|
|
37
|
+
wafer/skills/wafer-guide/SKILL.md,sha256=KWetJw2TVTbz11_nzqazqOJWWRlbHRFShs4sOoreiWo,3255
|
|
38
38
|
wafer/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
|
+
wafer/templates/aiter_optimize.py,sha256=IjUiwiDa4MBu_lsn0Fn2vc76Tx8kgA-i_SCsbm9kriA,2362
|
|
39
40
|
wafer/templates/ask_docs.py,sha256=15t1Aa4WBMwMox8XmFdzyosOZfBLMdXyaxo3GDb7nTE,2254
|
|
40
|
-
wafer/templates/optimize_kernel.py,sha256=
|
|
41
|
-
wafer/templates/optimize_kernelbench.py,sha256=
|
|
41
|
+
wafer/templates/optimize_kernel.py,sha256=4-MaKm_C9BQHQEllrNLLYkcdhJpcj6D-8zbJ4FdLUEY,2444
|
|
42
|
+
wafer/templates/optimize_kernelbench.py,sha256=aoOA13zWEl89r6QW03xF9NKxQ7j4mWe9rwua6-mlr4Y,4780
|
|
43
|
+
wafer/templates/optimize_vllm.py,sha256=_D1rDP9wHA8CCvmoUrdLEW94MiaK4nAYJ-jbnpAvq7A,6154
|
|
42
44
|
wafer/templates/trace_analyze.py,sha256=B7CiRlsokERzBjLL-k49kGjpU2zlJZqzTE05xbRS1WI,2878
|
|
43
45
|
wafer/tests/test_eval_cli_parity.py,sha256=SGmaj2NGBZ7GdDF53bXsECvQbV21iHZw8YeL_MJOLk0,7206
|
|
44
|
-
wafer_cli-0.2.
|
|
45
|
-
wafer_cli-0.2.
|
|
46
|
-
wafer_cli-0.2.
|
|
47
|
-
wafer_cli-0.2.
|
|
48
|
-
wafer_cli-0.2.
|
|
46
|
+
wafer_cli-0.2.34.dist-info/METADATA,sha256=77u_qyiWRKOi_bDUeTZGGW9Df10ezICj9S6XWoZ4xoo,6461
|
|
47
|
+
wafer_cli-0.2.34.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
48
|
+
wafer_cli-0.2.34.dist-info/entry_points.txt,sha256=WqB7hB__WhtPY8y1cO2sZiUz7fCq6Ik-usAigpeFvWE,41
|
|
49
|
+
wafer_cli-0.2.34.dist-info/top_level.txt,sha256=2MK1IVMWfpLL8BZCQ3E9aG6L6L666gSA_teYlwan4fs,6
|
|
50
|
+
wafer_cli-0.2.34.dist-info/RECORD,,
|
|
@@ -1,107 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: wafer-cli
|
|
3
|
-
Version: 0.2.32
|
|
4
|
-
Summary: CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels
|
|
5
|
-
Requires-Python: >=3.11
|
|
6
|
-
Description-Content-Type: text/markdown
|
|
7
|
-
Requires-Dist: typer>=0.12.0
|
|
8
|
-
Requires-Dist: trio>=0.24.0
|
|
9
|
-
Requires-Dist: trio-asyncio>=0.15.0
|
|
10
|
-
Requires-Dist: wafer-core>=0.1.0
|
|
11
|
-
Requires-Dist: perfetto>=0.16.0
|
|
12
|
-
Requires-Dist: posthog>=3.0.0
|
|
13
|
-
Provides-Extra: dev
|
|
14
|
-
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
15
|
-
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
16
|
-
Requires-Dist: diff-cover>=8.0.0; extra == "dev"
|
|
17
|
-
Requires-Dist: ruff>=0.4.0; extra == "dev"
|
|
18
|
-
|
|
19
|
-
# Wafer CLI
|
|
20
|
-
|
|
21
|
-
Wafer CLI gives coding agents direct access to GPU docs, trace analysis, and remote kernel evaluation.
|
|
22
|
-
It helps you develop and optimize GPU kernels even when you are not working on a machine with a GPU.
|
|
23
|
-
|
|
24
|
-
## Key features
|
|
25
|
-
|
|
26
|
-
- Query GPU documentation with citations
|
|
27
|
-
- Analyze GPU traces and profiles
|
|
28
|
-
- Evaluate kernels on remote GPUs for correctness and performance
|
|
29
|
-
- Run commands on GPU targets (remote or local)
|
|
30
|
-
- Manage persistent workspaces
|
|
31
|
-
|
|
32
|
-
## Quick start
|
|
33
|
-
|
|
34
|
-
```bash
|
|
35
|
-
uv tool install wafer-cli
|
|
36
|
-
wafer login
|
|
37
|
-
wafer remote-run -- nvidia-smi
|
|
38
|
-
```
|
|
39
|
-
|
|
40
|
-
## Common commands
|
|
41
|
-
|
|
42
|
-
```bash
|
|
43
|
-
wafer workspaces list
|
|
44
|
-
wafer workspaces create my-workspace --wait
|
|
45
|
-
wafer agent -t ask-docs --corpus cuda "What causes shared memory bank conflicts?"
|
|
46
|
-
wafer agent -t trace-analyze --args trace=./profile.ncu-rep "Why is this kernel slow?"
|
|
47
|
-
wafer evaluate --impl kernel.py --reference ref.py --test-cases tests.json --benchmark
|
|
48
|
-
wafer nvidia ncu analyze profile.ncu-rep
|
|
49
|
-
wafer corpus list
|
|
50
|
-
```
|
|
51
|
-
|
|
52
|
-
## Typical workflows
|
|
53
|
-
|
|
54
|
-
### Query GPU documentation
|
|
55
|
-
|
|
56
|
-
Download a documentation corpus and ask questions with citations.
|
|
57
|
-
|
|
58
|
-
```bash
|
|
59
|
-
wafer corpus download cuda
|
|
60
|
-
wafer agent -t ask-docs --corpus cuda "What causes shared memory bank conflicts?"
|
|
61
|
-
```
|
|
62
|
-
|
|
63
|
-
### Analyze performance traces
|
|
64
|
-
|
|
65
|
-
Use the trace analysis template or query trace data directly.
|
|
66
|
-
|
|
67
|
-
```bash
|
|
68
|
-
wafer agent -t trace-analyze --args trace=./profile.ncu-rep "Why is this kernel slow?"
|
|
69
|
-
wafer nvidia perfetto query trace.json \
|
|
70
|
-
"SELECT name, dur/1e6 as ms FROM slice WHERE cat='kernel' ORDER BY dur DESC LIMIT 10"
|
|
71
|
-
```
|
|
72
|
-
|
|
73
|
-
### Evaluate kernels on remote GPUs
|
|
74
|
-
|
|
75
|
-
Run correctness and performance checks on a remote target.
|
|
76
|
-
|
|
77
|
-
```bash
|
|
78
|
-
wafer evaluate \
|
|
79
|
-
--impl ./kernel.py \
|
|
80
|
-
--reference ./reference.py \
|
|
81
|
-
--test-cases ./tests.json \
|
|
82
|
-
--benchmark
|
|
83
|
-
```
|
|
84
|
-
|
|
85
|
-
### Run commands on a remote GPU
|
|
86
|
-
|
|
87
|
-
```bash
|
|
88
|
-
wafer remote-run -- nvidia-smi
|
|
89
|
-
wafer remote-run --upload-dir ./my_code -- python3 train.py
|
|
90
|
-
```
|
|
91
|
-
|
|
92
|
-
### Manage workspaces
|
|
93
|
-
|
|
94
|
-
```bash
|
|
95
|
-
wafer workspaces list
|
|
96
|
-
wafer workspaces create my-workspace --wait
|
|
97
|
-
wafer workspaces ssh <workspace-id>
|
|
98
|
-
wafer workspaces delete <workspace-id>
|
|
99
|
-
```
|
|
100
|
-
|
|
101
|
-
## Install the CLI skill (optional)
|
|
102
|
-
|
|
103
|
-
```bash
|
|
104
|
-
wafer skill install
|
|
105
|
-
# or
|
|
106
|
-
wafer skill install -t <claude/codex>
|
|
107
|
-
```
|
|
File without changes
|
|
File without changes
|
|
File without changes
|