wafer-cli 0.2.32__py3-none-any.whl → 0.2.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wafer/workspaces.py CHANGED
@@ -39,13 +39,13 @@ def _friendly_error(status_code: int, response_text: str, workspace_id: str) ->
39
39
  User-friendly error message with suggested next steps
40
40
  """
41
41
  if status_code == 401:
42
- return "Not authenticated. Run: wafer auth login"
42
+ return "Not authenticated. Run: wafer login"
43
43
 
44
44
  if status_code == 402:
45
45
  return (
46
46
  "Insufficient credits.\n"
47
- " Check usage: wafer config billing\n"
48
- " Add credits: wafer config billing topup"
47
+ " Check usage: wafer billing\n"
48
+ " Add credits: wafer billing topup"
49
49
  )
50
50
 
51
51
  if status_code == 404:
@@ -107,7 +107,7 @@ def _list_workspaces_raw() -> list[dict]:
107
107
  workspaces = response.json()
108
108
  except httpx.HTTPStatusError as e:
109
109
  if e.response.status_code == 401:
110
- raise RuntimeError("Not authenticated. Run: wafer auth login") from e
110
+ raise RuntimeError("Not authenticated. Run: wafer login") from e
111
111
  raise RuntimeError(f"API error: {e.response.status_code} - {e.response.text}") from e
112
112
  except httpx.RequestError as e:
113
113
  raise RuntimeError(f"Could not reach API: {e}") from e
@@ -188,7 +188,7 @@ def list_workspaces(json_output: bool = False) -> str:
188
188
  workspaces = response.json()
189
189
  except httpx.HTTPStatusError as e:
190
190
  if e.response.status_code == 401:
191
- raise RuntimeError("Not authenticated. Run: wafer auth login") from e
191
+ raise RuntimeError("Not authenticated. Run: wafer login") from e
192
192
  raise RuntimeError(f"API error: {e.response.status_code} - {e.response.text}") from e
193
193
  except httpx.RequestError as e:
194
194
  raise RuntimeError(f"Could not reach API: {e}") from e
@@ -307,7 +307,7 @@ def create_workspace(
307
307
  workspace = response.json()
308
308
  except httpx.HTTPStatusError as e:
309
309
  if e.response.status_code == 401:
310
- raise RuntimeError("Not authenticated. Run: wafer auth login") from e
310
+ raise RuntimeError("Not authenticated. Run: wafer login") from e
311
311
  if e.response.status_code == 400:
312
312
  raise RuntimeError(f"Bad request: {e.response.text}") from e
313
313
  raise RuntimeError(f"API error: {e.response.status_code} - {e.response.text}") from e
@@ -413,7 +413,7 @@ def delete_workspace(workspace_id: str, json_output: bool = False) -> str:
413
413
  result = response.json()
414
414
  except httpx.HTTPStatusError as e:
415
415
  if e.response.status_code == 401:
416
- raise RuntimeError("Not authenticated. Run: wafer auth login") from e
416
+ raise RuntimeError("Not authenticated. Run: wafer login") from e
417
417
  if e.response.status_code == 404:
418
418
  raise RuntimeError(f"Workspace not found: {workspace_id}") from e
419
419
  raise RuntimeError(f"API error: {e.response.status_code} - {e.response.text}") from e
@@ -691,7 +691,7 @@ def get_workspace_raw(workspace_id: str) -> dict:
691
691
  workspace = response.json()
692
692
  except httpx.HTTPStatusError as e:
693
693
  if e.response.status_code == 401:
694
- raise RuntimeError("Not authenticated. Run: wafer auth login") from e
694
+ raise RuntimeError("Not authenticated. Run: wafer login") from e
695
695
  if e.response.status_code == 404:
696
696
  raise RuntimeError(f"Workspace not found: {workspace_id}") from e
697
697
  raise RuntimeError(f"API error: {e.response.status_code} - {e.response.text}") from e
@@ -0,0 +1,260 @@
1
+ Metadata-Version: 2.4
2
+ Name: wafer-cli
3
+ Version: 0.2.33
4
+ Summary: CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels
5
+ Requires-Python: >=3.11
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: typer>=0.12.0
8
+ Requires-Dist: trio>=0.24.0
9
+ Requires-Dist: trio-asyncio>=0.15.0
10
+ Requires-Dist: wafer-core>=0.1.0
11
+ Requires-Dist: perfetto>=0.16.0
12
+ Requires-Dist: posthog>=3.0.0
13
+ Provides-Extra: dev
14
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
15
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
16
+ Requires-Dist: diff-cover>=8.0.0; extra == "dev"
17
+ Requires-Dist: ruff>=0.4.0; extra == "dev"
18
+
19
+ # Wafer CLI
20
+
21
+ Run GPU workloads, optimize kernels, and query GPU documentation.
22
+
23
+ ## Getting Started
24
+
25
+ ```bash
26
+ # Install
27
+ cd apps/wafer-cli && uv sync
28
+
29
+ # Use staging (workspaces and other features require staging)
30
+ wafer config set api.environment staging
31
+
32
+ # Login
33
+ wafer login
34
+
35
+ # Run a command on a remote GPU
36
+ wafer remote-run -- nvidia-smi
37
+ ```
38
+
39
+ ## Commands
40
+
41
+ ### `wafer login` / `wafer logout` / `wafer whoami`
42
+
43
+ Authenticate with GitHub OAuth.
44
+
45
+ ```bash
46
+ wafer login # Opens browser for GitHub OAuth
47
+ wafer whoami # Show current user
48
+ wafer logout # Remove credentials
49
+ ```
50
+
51
+ ### `wafer remote-run`
52
+
53
+ Run any command on a remote GPU.
54
+
55
+ ```bash
56
+ wafer remote-run -- nvidia-smi
57
+ wafer remote-run --upload-dir ./my_code -- python3 train.py
58
+ ```
59
+
60
+ ### `wafer workspaces`
61
+
62
+ Create and manage persistent GPU environments.
63
+
64
+ **Available GPUs:**
65
+
66
+ - `MI300X` - AMD Instinct MI300X (192GB HBM3, ROCm)
67
+ - `B200` - NVIDIA Blackwell B200 (180GB HBM3e, CUDA) - default
68
+
69
+ ```bash
70
+ wafer workspaces list
71
+ wafer workspaces create my-workspace --gpu B200 --wait # NVIDIA B200
72
+ wafer workspaces create amd-dev --gpu MI300X # AMD MI300X
73
+ wafer workspaces ssh <workspace-id>
74
+ wafer workspaces delete <workspace-id>
75
+ ```
76
+
77
+ ### `wafer agent`
78
+
79
+ AI assistant for GPU kernel development. Helps with CUDA/Triton optimization, documentation queries, and performance analysis.
80
+
81
+ ```bash
82
+ wafer agent "What is TMEM in CuTeDSL?"
83
+ wafer agent -s "optimize this kernel" < kernel.py
84
+ ```
85
+
86
+ ### `wafer evaluate`
87
+
88
+ Evaluate kernel correctness and performance against a reference implementation.
89
+
90
+ **Functional format** (default):
91
+ ```bash
92
+ # Generate template files
93
+ wafer evaluate make-template ./my-kernel
94
+
95
+ # Run evaluation
96
+ wafer evaluate --impl kernel.py --reference ref.py --test-cases tests.json --benchmark
97
+ ```
98
+
99
+ The implementation must define `custom_kernel(inputs)`, the reference must define `ref_kernel(inputs)` and `generate_input(**params)`.
100
+
101
+ **KernelBench format** (ModelNew class):
102
+ ```bash
103
+ # Extract a KernelBench problem as template
104
+ wafer evaluate kernelbench make-template level1/1
105
+
106
+ # Run evaluation
107
+ wafer evaluate kernelbench --impl my_kernel.py --reference problem.py --benchmark
108
+ ```
109
+
110
+ The implementation must define `class ModelNew(nn.Module)`, the reference must define `class Model`, `get_inputs()`, and `get_init_inputs()`.
111
+
112
+ ### `wafer wevin -t ask-docs`
113
+
114
+ Query GPU documentation using the docs template.
115
+
116
+ ```bash
117
+ wafer wevin -t ask-docs --json -s "What causes bank conflicts in shared memory?"
118
+ ```
119
+
120
+ ### `wafer corpus`
121
+
122
+ Download documentation to local filesystem for agents to search.
123
+
124
+ ```bash
125
+ wafer corpus list
126
+ wafer corpus download cuda-programming-guide
127
+ ```
128
+
129
+ ---
130
+
131
+ ## Customization
132
+
133
+ ### `wafer remote-run` options
134
+
135
+ ```bash
136
+ wafer remote-run --image pytorch/pytorch:2.5.1-cuda12.4-cudnn9-devel -- python3 script.py
137
+ wafer remote-run --require-hwc -- ncu --set full python3 bench.py # Hardware counters for NCU
138
+ ```
139
+
140
+ ### `wafer evaluate` options
141
+
142
+ ```bash
143
+ wafer evaluate --impl k.py --reference r.py --test-cases t.json \
144
+ --target vultr-b200 \ # Specific GPU target
145
+ --benchmark \ # Measure performance
146
+ --profile # Enable torch.profiler + NCU
147
+ ```
148
+
149
+ ### `wafer push` for multi-command workflows
150
+
151
+ ```bash
152
+ WORKSPACE=$(wafer push ./project)
153
+ wafer remote-run --workspace-id $WORKSPACE -- python3 test1.py
154
+ wafer remote-run --workspace-id $WORKSPACE -- python3 test2.py
155
+ ```
156
+
157
+ ### Profile analysis
158
+
159
+ ```bash
160
+ wafer nvidia ncu analyze profile.ncu-rep
161
+ wafer nvidia nsys analyze profile.nsys-rep
162
+ ```
163
+
164
+ ---
165
+
166
+ ## Advanced
167
+
168
+ ### Local targets
169
+
170
+ Bypass the API and SSH directly to your own GPUs:
171
+
172
+ ```bash
173
+ wafer targets list
174
+ wafer targets add ./my-gpu.toml
175
+ wafer targets default my-gpu
176
+ ```
177
+
178
+ ### Defensive evaluation
179
+
180
+ Detect evaluation hacking (stream injection, lazy evaluation, etc.):
181
+
182
+ ```bash
183
+ wafer evaluate --impl k.py --reference r.py --test-cases t.json --benchmark --defensive
184
+ ```
185
+
186
+ ### Other tools
187
+
188
+ ```bash
189
+ wafer perfetto <trace.json> --query "SELECT * FROM slice" # Perfetto SQL queries
190
+ wafer capture ./script.py # Capture execution snapshot
191
+ wafer compiler-analyze kernel.ptx # Analyze PTX/SASS
192
+ ```
193
+
194
+ ### ROCm profiling (AMD GPUs)
195
+
196
+ ```bash
197
+ wafer rocprof-sdk ...
198
+ wafer rocprof-systems ...
199
+ wafer rocprof-compute ...
200
+ ```
201
+
202
+ ---
203
+
204
+ ## Shell Completion
205
+
206
+ Enable tab completion for commands, options, and target names:
207
+
208
+ ```bash
209
+ # Install completion (zsh/bash/fish)
210
+ wafer --install-completion
211
+
212
+ # Then restart your terminal, or source your shell config:
213
+ source ~/.zshrc # or ~/.bashrc
214
+ ```
215
+
216
+ Now you can tab-complete:
217
+ - Commands: `wafer eva<TAB>` → `wafer evaluate`
218
+ - Options: `wafer evaluate --<TAB>`
219
+ - Target names: `wafer evaluate --target v<TAB>` → `wafer evaluate --target vultr-b200`
220
+ - File paths: `wafer evaluate --impl ./<TAB>`
221
+
222
+ ---
223
+
224
+ ## AI Assistant Skills
225
+
226
+ Install the Wafer CLI skill to make wafer commands discoverable by your AI coding assistant:
227
+
228
+ ```bash
229
+ # Install for all supported tools (Claude Code, Codex CLI, Cursor)
230
+ wafer skill install
231
+
232
+ # Install for a specific tool
233
+ wafer skill install -t cursor # Cursor
234
+ wafer skill install -t claude # Claude Code
235
+ wafer skill install -t codex # Codex CLI
236
+
237
+ # Check installation status
238
+ wafer skill status
239
+
240
+ # Uninstall
241
+ wafer skill uninstall
242
+ ```
243
+
244
+ ### Installing from GitHub (Cursor)
245
+
246
+ You can also install the skill directly from GitHub in Cursor:
247
+
248
+ 1. Open Cursor Settings (Cmd+Shift+J / Ctrl+Shift+J)
249
+ 2. Navigate to **Rules** → **Add Rule** → **Remote Rule (Github)**
250
+ 3. Enter: `https://github.com/wafer-ai/skills`
251
+ 4. Cursor will automatically discover skills in `.cursor/skills/`
252
+
253
+ The skill provides comprehensive guidance for GPU kernel development, including documentation lookup, trace analysis, kernel evaluation, and optimization workflows.
254
+
255
+ ---
256
+
257
+ ## Requirements
258
+
259
+ - Python 3.10+
260
+ - GitHub account (for authentication)
@@ -1,23 +1,23 @@
1
- wafer/GUIDE.md,sha256=aZMNTNHhSc5dmRskUnpgcGlHYgsbf5S6Q_Thn0xmN1A,3588
1
+ wafer/GUIDE.md,sha256=G6P4aFZslEXiHmVjtTB3_OIpGK5d1tSiqxtawASVUZg,3588
2
2
  wafer/__init__.py,sha256=kBM_ONCpU6UUMBOH8Tmg4A88sNFnbaD59o61cJs-uYM,90
3
- wafer/agent_defaults.py,sha256=qpJvVyY7jw2EqQo_IZ4M4aR2-kKNomyTmoOOah1FW6I,1179
3
+ wafer/agent_defaults.py,sha256=jJfTXqa9JO_fdVWi_G8_W3TXdYb7qPmK5IdmUCSLbgg,5761
4
4
  wafer/analytics.py,sha256=qLY6Z16usVHFD8TCv7XBuz7l47vXVdXk-qhOzA-hW_8,8179
5
5
  wafer/api_client.py,sha256=i_Az2b2llC3DSW8yOL-BKqa7LSKuxOr8hSN40s-oQXY,6313
6
6
  wafer/auth.py,sha256=dwss_se5P-FFc9IN38q4kh_dBrA6k-CguDBkivgcdj0,14003
7
7
  wafer/autotuner.py,sha256=41WYP41pTDvMijv2h42vm89bcHtDMJXObDlWmn6xpFU,44416
8
8
  wafer/baseline.py,sha256=OrGCAut_xtkH9Ogx4mMU5-94Q0oClIXqac94YRwqERY,21534
9
- wafer/billing.py,sha256=hEEwtrtIsbPQ3lLJNcyTLMsapUbcuvcVW_e9_0SxzVo,7199
10
- wafer/cli.py,sha256=jHh4EcCGheDq14E11rdSHXImMdriMSFb2vNcvhsV59A,273228
9
+ wafer/billing.py,sha256=jbLB2lI4_9f2KD8uEFDi_ixLlowe5hasC0TIZJyIXRg,7163
10
+ wafer/cli.py,sha256=DlsE-3cWZ-02yNb7fOf-9yXmQJyrSHsFWym2I3LpmrU,275688
11
11
  wafer/cli_instructions.py,sha256=bziUKDNDAXABVMvKPLEMXm-hFSD2TcFSh-FKRYa949k,4693
12
12
  wafer/config.py,sha256=h5Eo9_yfWqWGoPNdVQikI9GoZVUeysunSYiixf1mKcw,3411
13
- wafer/corpus.py,sha256=CY9T7wXENNDJxnrtI-XsQmXeptrFfKG4x-lngrc9_3s,24748
14
- wafer/evaluate.py,sha256=i15PliAVI3W04_4eju46PBDdh2BwSToLME5n7yGu7dU,187355
15
- wafer/global_config.py,sha256=iu1HbTDr1695tSeDG2NfkK7PiY7XD6vjCk37w1wHbgk,11920
13
+ wafer/corpus.py,sha256=oQegXA43MuyRvYxOsWhmqeP5vMb5IKFHOvM-1RcahPA,22301
14
+ wafer/evaluate.py,sha256=HMFQD-uwC6Wky1t_0JxYZaoHWgLaTBkjxOxgpZVnGrc,190519
15
+ wafer/global_config.py,sha256=fhaR_RU3ufMksDmOohH1OLeQ0JT0SDW1hEip_zaP75k,11345
16
16
  wafer/gpu_run.py,sha256=TwqXy72T7f2I7e6n5WWod3xgxCPnDhU0BgLsB4CUoQY,9716
17
17
  wafer/inference.py,sha256=tZCO5i05FKY27ewis3CSBHFBeFbXY3xwj0DSjdoMY9s,4314
18
- wafer/kernel_scope.py,sha256=hKCwCIVZWl5xFdoA5G9kPucdG9O0jw9Zgyso-mc6aZo,20801
19
- wafer/ncu_analyze.py,sha256=f7yJayhmEjXn18g6MpxtQoN5_WW_kq4Qyxa1hd0tC74,24638
20
- wafer/nsys_analyze.py,sha256=4BV6vSTZy7jLzeAxdKJp6QceYy6t9SWGSnjDV8RqkcI,36129
18
+ wafer/kernel_scope.py,sha256=YtnxknAChkJoeU_vIdxiqWsAITGBeabp9OGIK-X32i0,20796
19
+ wafer/ncu_analyze.py,sha256=rAWzKQRZEY6E_CL3gAWUaW3uZ4kvQVZskVCPDpsFJuE,24633
20
+ wafer/nsys_analyze.py,sha256=AhNcjPaapB0QCbqiHRXvyy-ccjevvVwEyxes84D28JU,36124
21
21
  wafer/nsys_profile.py,sha256=QFBl8pkr8r4uRNdNUO9gY-obj9slqpOgVYFZ_sXu6Nw,15478
22
22
  wafer/output.py,sha256=8jw5ifvIMK8ldyBMGW4NhrKvJPl66TV2Y2fJ5Tlhh1I,8293
23
23
  wafer/problems.py,sha256=ce2sy10A1nnNUG3VGsseTS8jL7LZsku4dE8zVf9JHQ4,11296
@@ -25,24 +25,26 @@ wafer/rocprof_compute.py,sha256=n_yOGZaFbOXna_ghhmYWXeyUoSabgH4KkjlYq38DlHo,1988
25
25
  wafer/rocprof_sdk.py,sha256=0Q7Ye6dUfa1anFZbqKc21rItgqva8V8VIZoSB7wqbmA,10085
26
26
  wafer/rocprof_systems.py,sha256=4IWbMcbYk1x_8iS7P3FC_u5sgH6EXADCtR2lV9id80M,18629
27
27
  wafer/specs_cli.py,sha256=frMEKwMflxVNpFlAuxprmr33ZZ1Oeh2lB0KWZ4oZWzw,4360
28
- wafer/ssh_keys.py,sha256=MxiHlSm6wuDUFzkOQtx5K7OIbx_a6bXxE-m8OpwLx98,8130
28
+ wafer/ssh_keys.py,sha256=9kSdhV_dg9T6pQu2JmNQptarkkwGtN9rLyRkI1bW4i4,8094
29
29
  wafer/target_lock.py,sha256=SDKhNzv2N7gsphGflcNni9FE5YYuAMuEthngAJEo4Gs,7809
30
30
  wafer/targets.py,sha256=9r-iRWoKSH5cQl1LcamaX-T7cNVOg99ngIm_hlRk-qU,26922
31
31
  wafer/targets_cli.py,sha256=Oe3e02rSXeNrMbe_Qv9DNfQ8dEOKodtU7BbQQWxlNwA,16348
32
- wafer/targets_ops.py,sha256=wLPyq55H_wz0wEAEg8KFLYs9LIIyiVIphcsXD2NLa-E,22623
33
- wafer/trace_compare.py,sha256=COuxxKY874DteOSLUvJuJFREPMBSybq9dtANi3ATsg4,10803
32
+ wafer/targets_ops.py,sha256=jN1oIBx0mutxRNE9xpIc7SaBxPkVmOyus2eqn0kEKNI,21475
33
+ wafer/trace_compare.py,sha256=IBVSGI8u5A10haDzL4eQ0R24fM1G_dd1F3-4iEkG1EQ,6349
34
34
  wafer/tracelens.py,sha256=g9ZIeFyNojZn4uTd3skPqIrRiL7aMJOz_-GOd3aiyy4,7998
35
- wafer/wevin_cli.py,sha256=eo1ETsXIsCftXSG5AxEYYZipNGcXayKyIevs5F6MjXg,26140
36
- wafer/workspaces.py,sha256=J-TXGwHXSZlzRWCew63KNvk6HLJ-zTSELRgzjryTkMk,35710
37
- wafer/skills/wafer-guide/SKILL.md,sha256=UDsXCD5Kb-lDParKCTf2WkE3kodVs-rja8XeumSBO5U,3934
35
+ wafer/wevin_cli.py,sha256=ruHROgLl4SL0UaKW9JbkTkVjtmtG3Jqigp7cnbQAbZ0,25564
36
+ wafer/workspaces.py,sha256=k_iCZ-mOrG2KiTXqqcZ5_VifSIXsFGaZM4hjnxBnBmc,35666
37
+ wafer/skills/wafer-guide/SKILL.md,sha256=KWetJw2TVTbz11_nzqazqOJWWRlbHRFShs4sOoreiWo,3255
38
38
  wafer/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
+ wafer/templates/aiter_optimize.py,sha256=IjUiwiDa4MBu_lsn0Fn2vc76Tx8kgA-i_SCsbm9kriA,2362
39
40
  wafer/templates/ask_docs.py,sha256=15t1Aa4WBMwMox8XmFdzyosOZfBLMdXyaxo3GDb7nTE,2254
40
- wafer/templates/optimize_kernel.py,sha256=Q4FA_8ECEegW_3DS51mkLCX6Vk1dcWWzY3A_RQ4NW8U,2576
41
- wafer/templates/optimize_kernelbench.py,sha256=T3co9Y9eSLWDrZG66gwQVFMdnGVoyUQos-TxnMMBLL8,3747
41
+ wafer/templates/optimize_kernel.py,sha256=4-MaKm_C9BQHQEllrNLLYkcdhJpcj6D-8zbJ4FdLUEY,2444
42
+ wafer/templates/optimize_kernelbench.py,sha256=aoOA13zWEl89r6QW03xF9NKxQ7j4mWe9rwua6-mlr4Y,4780
43
+ wafer/templates/optimize_vllm.py,sha256=_D1rDP9wHA8CCvmoUrdLEW94MiaK4nAYJ-jbnpAvq7A,6154
42
44
  wafer/templates/trace_analyze.py,sha256=B7CiRlsokERzBjLL-k49kGjpU2zlJZqzTE05xbRS1WI,2878
43
45
  wafer/tests/test_eval_cli_parity.py,sha256=SGmaj2NGBZ7GdDF53bXsECvQbV21iHZw8YeL_MJOLk0,7206
44
- wafer_cli-0.2.32.dist-info/METADATA,sha256=snTvnaN37WG1rXCxW2YibK6CtBX9lhe8mOxEi9Th5iA,2799
45
- wafer_cli-0.2.32.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
46
- wafer_cli-0.2.32.dist-info/entry_points.txt,sha256=WqB7hB__WhtPY8y1cO2sZiUz7fCq6Ik-usAigpeFvWE,41
47
- wafer_cli-0.2.32.dist-info/top_level.txt,sha256=2MK1IVMWfpLL8BZCQ3E9aG6L6L666gSA_teYlwan4fs,6
48
- wafer_cli-0.2.32.dist-info/RECORD,,
46
+ wafer_cli-0.2.33.dist-info/METADATA,sha256=y3kmdBLh6-BZwFPcVmFYwOQQ3H1gM0ljANx1_W6FrEQ,6461
47
+ wafer_cli-0.2.33.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
48
+ wafer_cli-0.2.33.dist-info/entry_points.txt,sha256=WqB7hB__WhtPY8y1cO2sZiUz7fCq6Ik-usAigpeFvWE,41
49
+ wafer_cli-0.2.33.dist-info/top_level.txt,sha256=2MK1IVMWfpLL8BZCQ3E9aG6L6L666gSA_teYlwan4fs,6
50
+ wafer_cli-0.2.33.dist-info/RECORD,,
@@ -1,107 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: wafer-cli
3
- Version: 0.2.32
4
- Summary: CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels
5
- Requires-Python: >=3.11
6
- Description-Content-Type: text/markdown
7
- Requires-Dist: typer>=0.12.0
8
- Requires-Dist: trio>=0.24.0
9
- Requires-Dist: trio-asyncio>=0.15.0
10
- Requires-Dist: wafer-core>=0.1.0
11
- Requires-Dist: perfetto>=0.16.0
12
- Requires-Dist: posthog>=3.0.0
13
- Provides-Extra: dev
14
- Requires-Dist: pytest>=8.0.0; extra == "dev"
15
- Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
16
- Requires-Dist: diff-cover>=8.0.0; extra == "dev"
17
- Requires-Dist: ruff>=0.4.0; extra == "dev"
18
-
19
- # Wafer CLI
20
-
21
- Wafer CLI gives coding agents direct access to GPU docs, trace analysis, and remote kernel evaluation.
22
- It helps you develop and optimize GPU kernels even when you are not working on a machine with a GPU.
23
-
24
- ## Key features
25
-
26
- - Query GPU documentation with citations
27
- - Analyze GPU traces and profiles
28
- - Evaluate kernels on remote GPUs for correctness and performance
29
- - Run commands on GPU targets (remote or local)
30
- - Manage persistent workspaces
31
-
32
- ## Quick start
33
-
34
- ```bash
35
- uv tool install wafer-cli
36
- wafer login
37
- wafer remote-run -- nvidia-smi
38
- ```
39
-
40
- ## Common commands
41
-
42
- ```bash
43
- wafer workspaces list
44
- wafer workspaces create my-workspace --wait
45
- wafer agent -t ask-docs --corpus cuda "What causes shared memory bank conflicts?"
46
- wafer agent -t trace-analyze --args trace=./profile.ncu-rep "Why is this kernel slow?"
47
- wafer evaluate --impl kernel.py --reference ref.py --test-cases tests.json --benchmark
48
- wafer nvidia ncu analyze profile.ncu-rep
49
- wafer corpus list
50
- ```
51
-
52
- ## Typical workflows
53
-
54
- ### Query GPU documentation
55
-
56
- Download a documentation corpus and ask questions with citations.
57
-
58
- ```bash
59
- wafer corpus download cuda
60
- wafer agent -t ask-docs --corpus cuda "What causes shared memory bank conflicts?"
61
- ```
62
-
63
- ### Analyze performance traces
64
-
65
- Use the trace analysis template or query trace data directly.
66
-
67
- ```bash
68
- wafer agent -t trace-analyze --args trace=./profile.ncu-rep "Why is this kernel slow?"
69
- wafer nvidia perfetto query trace.json \
70
- "SELECT name, dur/1e6 as ms FROM slice WHERE cat='kernel' ORDER BY dur DESC LIMIT 10"
71
- ```
72
-
73
- ### Evaluate kernels on remote GPUs
74
-
75
- Run correctness and performance checks on a remote target.
76
-
77
- ```bash
78
- wafer evaluate \
79
- --impl ./kernel.py \
80
- --reference ./reference.py \
81
- --test-cases ./tests.json \
82
- --benchmark
83
- ```
84
-
85
- ### Run commands on a remote GPU
86
-
87
- ```bash
88
- wafer remote-run -- nvidia-smi
89
- wafer remote-run --upload-dir ./my_code -- python3 train.py
90
- ```
91
-
92
- ### Manage workspaces
93
-
94
- ```bash
95
- wafer workspaces list
96
- wafer workspaces create my-workspace --wait
97
- wafer workspaces ssh <workspace-id>
98
- wafer workspaces delete <workspace-id>
99
- ```
100
-
101
- ## Install the CLI skill (optional)
102
-
103
- ```bash
104
- wafer skill install
105
- # or
106
- wafer skill install -t <claude/codex>
107
- ```