wafer-cli 0.2.32__tar.gz → 0.2.34__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. wafer_cli-0.2.34/PKG-INFO +260 -0
  2. wafer_cli-0.2.34/README.md +242 -0
  3. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/pyproject.toml +1 -1
  4. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/tests/test_analytics.py +2 -2
  5. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/tests/test_billing.py +15 -15
  6. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/tests/test_cli_coverage.py +47 -1
  7. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/tests/test_cli_parity_integration.py +47 -0
  8. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/GUIDE.md +1 -1
  9. wafer_cli-0.2.34/wafer/agent_defaults.py +197 -0
  10. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/billing.py +6 -6
  11. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/cli.py +432 -348
  12. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/corpus.py +6 -72
  13. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/evaluate.py +143 -81
  14. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/global_config.py +0 -13
  15. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/kernel_scope.py +1 -1
  16. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/ncu_analyze.py +1 -1
  17. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/nsys_analyze.py +1 -1
  18. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/skills/wafer-guide/SKILL.md +6 -22
  19. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/ssh_keys.py +6 -6
  20. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/targets_ops.py +2 -29
  21. wafer_cli-0.2.34/wafer/templates/aiter_optimize.py +59 -0
  22. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/templates/optimize_kernel.py +2 -4
  23. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/templates/optimize_kernelbench.py +62 -17
  24. wafer_cli-0.2.34/wafer/templates/optimize_vllm.py +156 -0
  25. wafer_cli-0.2.34/wafer/trace_compare.py +183 -0
  26. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/wevin_cli.py +1 -12
  27. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/workspaces.py +8 -8
  28. wafer_cli-0.2.34/wafer_cli.egg-info/PKG-INFO +260 -0
  29. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer_cli.egg-info/SOURCES.txt +2 -0
  30. wafer_cli-0.2.32/PKG-INFO +0 -107
  31. wafer_cli-0.2.32/README.md +0 -89
  32. wafer_cli-0.2.32/wafer/agent_defaults.py +0 -42
  33. wafer_cli-0.2.32/wafer/trace_compare.py +0 -274
  34. wafer_cli-0.2.32/wafer_cli.egg-info/PKG-INFO +0 -107
  35. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/setup.cfg +0 -0
  36. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/tests/test_auth.py +0 -0
  37. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/tests/test_config_integration.py +0 -0
  38. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/tests/test_file_operations_integration.py +0 -0
  39. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/tests/test_kernel_scope_cli.py +0 -0
  40. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/tests/test_nsys_analyze.py +0 -0
  41. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/tests/test_nsys_profile.py +0 -0
  42. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/tests/test_output.py +0 -0
  43. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/tests/test_rocprof_compute_integration.py +0 -0
  44. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/tests/test_skill_commands.py +0 -0
  45. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/tests/test_ssh_integration.py +0 -0
  46. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/tests/test_targets_ops.py +0 -0
  47. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/tests/test_wevin_cli.py +0 -0
  48. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/tests/test_workflow_integration.py +0 -0
  49. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/__init__.py +0 -0
  50. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/analytics.py +0 -0
  51. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/api_client.py +0 -0
  52. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/auth.py +0 -0
  53. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/autotuner.py +0 -0
  54. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/baseline.py +0 -0
  55. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/cli_instructions.py +0 -0
  56. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/config.py +0 -0
  57. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/gpu_run.py +0 -0
  58. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/inference.py +0 -0
  59. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/nsys_profile.py +0 -0
  60. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/output.py +0 -0
  61. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/problems.py +0 -0
  62. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/rocprof_compute.py +0 -0
  63. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/rocprof_sdk.py +0 -0
  64. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/rocprof_systems.py +0 -0
  65. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/specs_cli.py +0 -0
  66. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/target_lock.py +0 -0
  67. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/targets.py +0 -0
  68. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/targets_cli.py +0 -0
  69. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/templates/__init__.py +0 -0
  70. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/templates/ask_docs.py +0 -0
  71. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/templates/trace_analyze.py +0 -0
  72. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/tests/test_eval_cli_parity.py +0 -0
  73. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer/tracelens.py +0 -0
  74. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer_cli.egg-info/dependency_links.txt +0 -0
  75. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer_cli.egg-info/entry_points.txt +0 -0
  76. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer_cli.egg-info/requires.txt +0 -0
  77. {wafer_cli-0.2.32 → wafer_cli-0.2.34}/wafer_cli.egg-info/top_level.txt +0 -0
@@ -0,0 +1,260 @@
1
+ Metadata-Version: 2.4
2
+ Name: wafer-cli
3
+ Version: 0.2.34
4
+ Summary: CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels
5
+ Requires-Python: >=3.11
6
+ Description-Content-Type: text/markdown
7
+ Requires-Dist: typer>=0.12.0
8
+ Requires-Dist: trio>=0.24.0
9
+ Requires-Dist: trio-asyncio>=0.15.0
10
+ Requires-Dist: wafer-core>=0.1.0
11
+ Requires-Dist: perfetto>=0.16.0
12
+ Requires-Dist: posthog>=3.0.0
13
+ Provides-Extra: dev
14
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
15
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
16
+ Requires-Dist: diff-cover>=8.0.0; extra == "dev"
17
+ Requires-Dist: ruff>=0.4.0; extra == "dev"
18
+
19
+ # Wafer CLI
20
+
21
+ Run GPU workloads, optimize kernels, and query GPU documentation.
22
+
23
+ ## Getting Started
24
+
25
+ ```bash
26
+ # Install
27
+ cd apps/wafer-cli && uv sync
28
+
29
+ # Use staging (workspaces and other features require staging)
30
+ wafer config set api.environment staging
31
+
32
+ # Login
33
+ wafer login
34
+
35
+ # Run a command on a remote GPU
36
+ wafer remote-run -- nvidia-smi
37
+ ```
38
+
39
+ ## Commands
40
+
41
+ ### `wafer login` / `wafer logout` / `wafer whoami`
42
+
43
+ Authenticate with GitHub OAuth.
44
+
45
+ ```bash
46
+ wafer login # Opens browser for GitHub OAuth
47
+ wafer whoami # Show current user
48
+ wafer logout # Remove credentials
49
+ ```
50
+
51
+ ### `wafer remote-run`
52
+
53
+ Run any command on a remote GPU.
54
+
55
+ ```bash
56
+ wafer remote-run -- nvidia-smi
57
+ wafer remote-run --upload-dir ./my_code -- python3 train.py
58
+ ```
59
+
60
+ ### `wafer workspaces`
61
+
62
+ Create and manage persistent GPU environments.
63
+
64
+ **Available GPUs:**
65
+
66
+ - `MI300X` - AMD Instinct MI300X (192GB HBM3, ROCm)
67
+ - `B200` - NVIDIA Blackwell B200 (180GB HBM3e, CUDA) - default
68
+
69
+ ```bash
70
+ wafer workspaces list
71
+ wafer workspaces create my-workspace --gpu B200 --wait # NVIDIA B200
72
+ wafer workspaces create amd-dev --gpu MI300X # AMD MI300X
73
+ wafer workspaces ssh <workspace-id>
74
+ wafer workspaces delete <workspace-id>
75
+ ```
76
+
77
+ ### `wafer agent`
78
+
79
+ AI assistant for GPU kernel development. Helps with CUDA/Triton optimization, documentation queries, and performance analysis.
80
+
81
+ ```bash
82
+ wafer agent "What is TMEM in CuTeDSL?"
83
+ wafer agent -s "optimize this kernel" < kernel.py
84
+ ```
85
+
86
+ ### `wafer evaluate`
87
+
88
+ Evaluate kernel correctness and performance against a reference implementation.
89
+
90
+ **Functional format** (default):
91
+ ```bash
92
+ # Generate template files
93
+ wafer evaluate make-template ./my-kernel
94
+
95
+ # Run evaluation
96
+ wafer evaluate --impl kernel.py --reference ref.py --test-cases tests.json --benchmark
97
+ ```
98
+
99
+ The implementation must define `custom_kernel(inputs)`, the reference must define `ref_kernel(inputs)` and `generate_input(**params)`.
100
+
101
+ **KernelBench format** (ModelNew class):
102
+ ```bash
103
+ # Extract a KernelBench problem as template
104
+ wafer evaluate kernelbench make-template level1/1
105
+
106
+ # Run evaluation
107
+ wafer evaluate kernelbench --impl my_kernel.py --reference problem.py --benchmark
108
+ ```
109
+
110
+ The implementation must define `class ModelNew(nn.Module)`, the reference must define `class Model`, `get_inputs()`, and `get_init_inputs()`.
111
+
112
+ ### `wafer wevin -t ask-docs`
113
+
114
+ Query GPU documentation using the docs template.
115
+
116
+ ```bash
117
+ wafer wevin -t ask-docs --json -s "What causes bank conflicts in shared memory?"
118
+ ```
119
+
120
+ ### `wafer corpus`
121
+
122
+ Download documentation to local filesystem for agents to search.
123
+
124
+ ```bash
125
+ wafer corpus list
126
+ wafer corpus download cuda-programming-guide
127
+ ```
128
+
129
+ ---
130
+
131
+ ## Customization
132
+
133
+ ### `wafer remote-run` options
134
+
135
+ ```bash
136
+ wafer remote-run --image pytorch/pytorch:2.5.1-cuda12.4-cudnn9-devel -- python3 script.py
137
+ wafer remote-run --require-hwc -- ncu --set full python3 bench.py # Hardware counters for NCU
138
+ ```
139
+
140
+ ### `wafer evaluate` options
141
+
142
+ ```bash
143
+ wafer evaluate --impl k.py --reference r.py --test-cases t.json \
144
+ --target vultr-b200 \ # Specific GPU target
145
+ --benchmark \ # Measure performance
146
+ --profile # Enable torch.profiler + NCU
147
+ ```
148
+
149
+ ### `wafer push` for multi-command workflows
150
+
151
+ ```bash
152
+ WORKSPACE=$(wafer push ./project)
153
+ wafer remote-run --workspace-id $WORKSPACE -- python3 test1.py
154
+ wafer remote-run --workspace-id $WORKSPACE -- python3 test2.py
155
+ ```
156
+
157
+ ### Profile analysis
158
+
159
+ ```bash
160
+ wafer nvidia ncu analyze profile.ncu-rep
161
+ wafer nvidia nsys analyze profile.nsys-rep
162
+ ```
163
+
164
+ ---
165
+
166
+ ## Advanced
167
+
168
+ ### Local targets
169
+
170
+ Bypass the API and SSH directly to your own GPUs:
171
+
172
+ ```bash
173
+ wafer targets list
174
+ wafer targets add ./my-gpu.toml
175
+ wafer targets default my-gpu
176
+ ```
177
+
178
+ ### Defensive evaluation
179
+
180
+ Detect evaluation hacking (stream injection, lazy evaluation, etc.):
181
+
182
+ ```bash
183
+ wafer evaluate --impl k.py --reference r.py --test-cases t.json --benchmark --defensive
184
+ ```
185
+
186
+ ### Other tools
187
+
188
+ ```bash
189
+ wafer perfetto <trace.json> --query "SELECT * FROM slice" # Perfetto SQL queries
190
+ wafer capture ./script.py # Capture execution snapshot
191
+ wafer compiler-analyze kernel.ptx # Analyze PTX/SASS
192
+ ```
193
+
194
+ ### ROCm profiling (AMD GPUs)
195
+
196
+ ```bash
197
+ wafer rocprof-sdk ...
198
+ wafer rocprof-systems ...
199
+ wafer rocprof-compute ...
200
+ ```
201
+
202
+ ---
203
+
204
+ ## Shell Completion
205
+
206
+ Enable tab completion for commands, options, and target names:
207
+
208
+ ```bash
209
+ # Install completion (zsh/bash/fish)
210
+ wafer --install-completion
211
+
212
+ # Then restart your terminal, or source your shell config:
213
+ source ~/.zshrc # or ~/.bashrc
214
+ ```
215
+
216
+ Now you can tab-complete:
217
+ - Commands: `wafer eva<TAB>` → `wafer evaluate`
218
+ - Options: `wafer evaluate --<TAB>`
219
+ - Target names: `wafer evaluate --target v<TAB>` → `wafer evaluate --target vultr-b200`
220
+ - File paths: `wafer evaluate --impl ./<TAB>`
221
+
222
+ ---
223
+
224
+ ## AI Assistant Skills
225
+
226
+ Install the Wafer CLI skill to make wafer commands discoverable by your AI coding assistant:
227
+
228
+ ```bash
229
+ # Install for all supported tools (Claude Code, Codex CLI, Cursor)
230
+ wafer skill install
231
+
232
+ # Install for a specific tool
233
+ wafer skill install -t cursor # Cursor
234
+ wafer skill install -t claude # Claude Code
235
+ wafer skill install -t codex # Codex CLI
236
+
237
+ # Check installation status
238
+ wafer skill status
239
+
240
+ # Uninstall
241
+ wafer skill uninstall
242
+ ```
243
+
244
+ ### Installing from GitHub (Cursor)
245
+
246
+ You can also install the skill directly from GitHub in Cursor:
247
+
248
+ 1. Open Cursor Settings (Cmd+Shift+J / Ctrl+Shift+J)
249
+ 2. Navigate to **Rules** → **Add Rule** → **Remote Rule (Github)**
250
+ 3. Enter: `https://github.com/wafer-ai/skills`
251
+ 4. Cursor will automatically discover skills in `.cursor/skills/`
252
+
253
+ The skill provides comprehensive guidance for GPU kernel development, including documentation lookup, trace analysis, kernel evaluation, and optimization workflows.
254
+
255
+ ---
256
+
257
+ ## Requirements
258
+
259
+ - Python 3.10+
260
+ - GitHub account (for authentication)
@@ -0,0 +1,242 @@
1
+ # Wafer CLI
2
+
3
+ Run GPU workloads, optimize kernels, and query GPU documentation.
4
+
5
+ ## Getting Started
6
+
7
+ ```bash
8
+ # Install
9
+ cd apps/wafer-cli && uv sync
10
+
11
+ # Use staging (workspaces and other features require staging)
12
+ wafer config set api.environment staging
13
+
14
+ # Login
15
+ wafer login
16
+
17
+ # Run a command on a remote GPU
18
+ wafer remote-run -- nvidia-smi
19
+ ```
20
+
21
+ ## Commands
22
+
23
+ ### `wafer login` / `wafer logout` / `wafer whoami`
24
+
25
+ Authenticate with GitHub OAuth.
26
+
27
+ ```bash
28
+ wafer login # Opens browser for GitHub OAuth
29
+ wafer whoami # Show current user
30
+ wafer logout # Remove credentials
31
+ ```
32
+
33
+ ### `wafer remote-run`
34
+
35
+ Run any command on a remote GPU.
36
+
37
+ ```bash
38
+ wafer remote-run -- nvidia-smi
39
+ wafer remote-run --upload-dir ./my_code -- python3 train.py
40
+ ```
41
+
42
+ ### `wafer workspaces`
43
+
44
+ Create and manage persistent GPU environments.
45
+
46
+ **Available GPUs:**
47
+
48
+ - `MI300X` - AMD Instinct MI300X (192GB HBM3, ROCm)
49
+ - `B200` - NVIDIA Blackwell B200 (180GB HBM3e, CUDA) - default
50
+
51
+ ```bash
52
+ wafer workspaces list
53
+ wafer workspaces create my-workspace --gpu B200 --wait # NVIDIA B200
54
+ wafer workspaces create amd-dev --gpu MI300X # AMD MI300X
55
+ wafer workspaces ssh <workspace-id>
56
+ wafer workspaces delete <workspace-id>
57
+ ```
58
+
59
+ ### `wafer agent`
60
+
61
+ AI assistant for GPU kernel development. Helps with CUDA/Triton optimization, documentation queries, and performance analysis.
62
+
63
+ ```bash
64
+ wafer agent "What is TMEM in CuTeDSL?"
65
+ wafer agent -s "optimize this kernel" < kernel.py
66
+ ```
67
+
68
+ ### `wafer evaluate`
69
+
70
+ Evaluate kernel correctness and performance against a reference implementation.
71
+
72
+ **Functional format** (default):
73
+ ```bash
74
+ # Generate template files
75
+ wafer evaluate make-template ./my-kernel
76
+
77
+ # Run evaluation
78
+ wafer evaluate --impl kernel.py --reference ref.py --test-cases tests.json --benchmark
79
+ ```
80
+
81
+ The implementation must define `custom_kernel(inputs)`, the reference must define `ref_kernel(inputs)` and `generate_input(**params)`.
82
+
83
+ **KernelBench format** (ModelNew class):
84
+ ```bash
85
+ # Extract a KernelBench problem as template
86
+ wafer evaluate kernelbench make-template level1/1
87
+
88
+ # Run evaluation
89
+ wafer evaluate kernelbench --impl my_kernel.py --reference problem.py --benchmark
90
+ ```
91
+
92
+ The implementation must define `class ModelNew(nn.Module)`, the reference must define `class Model`, `get_inputs()`, and `get_init_inputs()`.
93
+
94
+ ### `wafer wevin -t ask-docs`
95
+
96
+ Query GPU documentation using the docs template.
97
+
98
+ ```bash
99
+ wafer wevin -t ask-docs --json -s "What causes bank conflicts in shared memory?"
100
+ ```
101
+
102
+ ### `wafer corpus`
103
+
104
+ Download documentation to local filesystem for agents to search.
105
+
106
+ ```bash
107
+ wafer corpus list
108
+ wafer corpus download cuda-programming-guide
109
+ ```
110
+
111
+ ---
112
+
113
+ ## Customization
114
+
115
+ ### `wafer remote-run` options
116
+
117
+ ```bash
118
+ wafer remote-run --image pytorch/pytorch:2.5.1-cuda12.4-cudnn9-devel -- python3 script.py
119
+ wafer remote-run --require-hwc -- ncu --set full python3 bench.py # Hardware counters for NCU
120
+ ```
121
+
122
+ ### `wafer evaluate` options
123
+
124
+ ```bash
125
+ wafer evaluate --impl k.py --reference r.py --test-cases t.json \
126
+ --target vultr-b200 \ # Specific GPU target
127
+ --benchmark \ # Measure performance
128
+ --profile # Enable torch.profiler + NCU
129
+ ```
130
+
131
+ ### `wafer push` for multi-command workflows
132
+
133
+ ```bash
134
+ WORKSPACE=$(wafer push ./project)
135
+ wafer remote-run --workspace-id $WORKSPACE -- python3 test1.py
136
+ wafer remote-run --workspace-id $WORKSPACE -- python3 test2.py
137
+ ```
138
+
139
+ ### Profile analysis
140
+
141
+ ```bash
142
+ wafer nvidia ncu analyze profile.ncu-rep
143
+ wafer nvidia nsys analyze profile.nsys-rep
144
+ ```
145
+
146
+ ---
147
+
148
+ ## Advanced
149
+
150
+ ### Local targets
151
+
152
+ Bypass the API and SSH directly to your own GPUs:
153
+
154
+ ```bash
155
+ wafer targets list
156
+ wafer targets add ./my-gpu.toml
157
+ wafer targets default my-gpu
158
+ ```
159
+
160
+ ### Defensive evaluation
161
+
162
+ Detect evaluation hacking (stream injection, lazy evaluation, etc.):
163
+
164
+ ```bash
165
+ wafer evaluate --impl k.py --reference r.py --test-cases t.json --benchmark --defensive
166
+ ```
167
+
168
+ ### Other tools
169
+
170
+ ```bash
171
+ wafer perfetto <trace.json> --query "SELECT * FROM slice" # Perfetto SQL queries
172
+ wafer capture ./script.py # Capture execution snapshot
173
+ wafer compiler-analyze kernel.ptx # Analyze PTX/SASS
174
+ ```
175
+
176
+ ### ROCm profiling (AMD GPUs)
177
+
178
+ ```bash
179
+ wafer rocprof-sdk ...
180
+ wafer rocprof-systems ...
181
+ wafer rocprof-compute ...
182
+ ```
183
+
184
+ ---
185
+
186
+ ## Shell Completion
187
+
188
+ Enable tab completion for commands, options, and target names:
189
+
190
+ ```bash
191
+ # Install completion (zsh/bash/fish)
192
+ wafer --install-completion
193
+
194
+ # Then restart your terminal, or source your shell config:
195
+ source ~/.zshrc # or ~/.bashrc
196
+ ```
197
+
198
+ Now you can tab-complete:
199
+ - Commands: `wafer eva<TAB>` → `wafer evaluate`
200
+ - Options: `wafer evaluate --<TAB>`
201
+ - Target names: `wafer evaluate --target v<TAB>` → `wafer evaluate --target vultr-b200`
202
+ - File paths: `wafer evaluate --impl ./<TAB>`
203
+
204
+ ---
205
+
206
+ ## AI Assistant Skills
207
+
208
+ Install the Wafer CLI skill to make wafer commands discoverable by your AI coding assistant:
209
+
210
+ ```bash
211
+ # Install for all supported tools (Claude Code, Codex CLI, Cursor)
212
+ wafer skill install
213
+
214
+ # Install for a specific tool
215
+ wafer skill install -t cursor # Cursor
216
+ wafer skill install -t claude # Claude Code
217
+ wafer skill install -t codex # Codex CLI
218
+
219
+ # Check installation status
220
+ wafer skill status
221
+
222
+ # Uninstall
223
+ wafer skill uninstall
224
+ ```
225
+
226
+ ### Installing from GitHub (Cursor)
227
+
228
+ You can also install the skill directly from GitHub in Cursor:
229
+
230
+ 1. Open Cursor Settings (Cmd+Shift+J / Ctrl+Shift+J)
231
+ 2. Navigate to **Rules** → **Add Rule** → **Remote Rule (Github)**
232
+ 3. Enter: `https://github.com/wafer-ai/skills`
233
+ 4. Cursor will automatically discover skills in `.cursor/skills/`
234
+
235
+ The skill provides comprehensive guidance for GPU kernel development, including documentation lookup, trace analysis, kernel evaluation, and optimization workflows.
236
+
237
+ ---
238
+
239
+ ## Requirements
240
+
241
+ - Python 3.10+
242
+ - GitHub account (for authentication)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "wafer-cli"
3
- version = "0.2.32"
3
+ version = "0.2.34"
4
4
  description = "CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -467,7 +467,7 @@ class TestLoginLogoutAnalytics:
467
467
  patch("wafer.analytics.track_login") as mock_track_login, \
468
468
  patch("wafer.analytics.init_analytics", return_value=True):
469
469
 
470
- runner.invoke(app, ["auth", "login", "--token", "test-token"])
470
+ runner.invoke(app, ["login", "--token", "test-token"])
471
471
 
472
472
  # track_login should be called
473
473
  mock_track_login.assert_called_once_with("test-user-id", "test@example.com")
@@ -484,7 +484,7 @@ class TestLoginLogoutAnalytics:
484
484
  patch("wafer.analytics.track_logout") as mock_track_logout, \
485
485
  patch("wafer.analytics.init_analytics", return_value=True):
486
486
 
487
- result = runner.invoke(app, ["auth", "logout"])
487
+ result = runner.invoke(app, ["logout"])
488
488
 
489
489
  assert result.exit_code == 0
490
490
  mock_track_logout.assert_called_once()
@@ -210,7 +210,7 @@ class TestBillingUsageCommand:
210
210
  )
211
211
  mock_client.return_value.__enter__.return_value.get.return_value = mock_response
212
212
 
213
- result = runner.invoke(app, ["config", "billing"])
213
+ result = runner.invoke(app, ["billing"])
214
214
 
215
215
  assert result.exit_code != 0
216
216
  assert "login" in result.output.lower()
@@ -242,7 +242,7 @@ class TestBillingUsageCommand:
242
242
  mock_response.raise_for_status.return_value = None
243
243
  mock_client.return_value.__enter__.return_value.get.return_value = mock_response
244
244
 
245
- result = runner.invoke(app, ["config", "billing", "--json"])
245
+ result = runner.invoke(app, ["billing", "--json"])
246
246
 
247
247
  assert result.exit_code == 0
248
248
  data = json.loads(result.stdout)
@@ -275,7 +275,7 @@ class TestBillingUsageCommand:
275
275
  mock_response.raise_for_status.return_value = None
276
276
  mock_client.return_value.__enter__.return_value.get.return_value = mock_response
277
277
 
278
- result = runner.invoke(app, ["config", "billing"])
278
+ result = runner.invoke(app, ["billing"])
279
279
 
280
280
  assert result.exit_code == 0
281
281
  assert "Pro" in result.output
@@ -294,7 +294,7 @@ class TestBillingUsageCommand:
294
294
  httpx.RequestError("Connection failed")
295
295
  )
296
296
 
297
- result = runner.invoke(app, ["config", "billing"])
297
+ result = runner.invoke(app, ["billing"])
298
298
 
299
299
  assert result.exit_code != 0
300
300
  assert "error" in result.output.lower() or "reach" in result.output.lower()
@@ -317,7 +317,7 @@ class TestBillingTopupCommand:
317
317
  )
318
318
  mock_client.return_value.__enter__.return_value.post.return_value = mock_response
319
319
 
320
- result = runner.invoke(app, ["config", "billing", "topup"])
320
+ result = runner.invoke(app, ["billing", "topup"])
321
321
 
322
322
  assert result.exit_code != 0
323
323
  assert "login" in result.output.lower()
@@ -343,7 +343,7 @@ class TestBillingTopupCommand:
343
343
  mock_client.return_value.__enter__.return_value.post.return_value = mock_response
344
344
 
345
345
  with patch("webbrowser.open") as mock_browser:
346
- result = runner.invoke(app, ["config", "billing", "topup"])
346
+ result = runner.invoke(app, ["billing", "topup"])
347
347
 
348
348
  assert result.exit_code == 0
349
349
  # Verify $25 = 2500 cents was sent
@@ -372,7 +372,7 @@ class TestBillingTopupCommand:
372
372
  mock_client.return_value.__enter__.return_value.post.return_value = mock_response
373
373
 
374
374
  with patch("webbrowser.open") as mock_browser:
375
- result = runner.invoke(app, ["config", "billing", "topup", "100"])
375
+ result = runner.invoke(app, ["billing", "topup", "100"])
376
376
 
377
377
  assert result.exit_code == 0
378
378
  call_args = mock_client.return_value.__enter__.return_value.post.call_args
@@ -381,14 +381,14 @@ class TestBillingTopupCommand:
381
381
 
382
382
  def test_amount_below_minimum(self) -> None:
383
383
  """Amount below $10 should error."""
384
- result = runner.invoke(app, ["config", "billing", "topup", "5"])
384
+ result = runner.invoke(app, ["billing", "topup", "5"])
385
385
 
386
386
  assert result.exit_code != 0
387
387
  assert "10" in result.output # Should mention minimum
388
388
 
389
389
  def test_amount_above_maximum(self) -> None:
390
390
  """Amount above $500 should error."""
391
- result = runner.invoke(app, ["config", "billing", "topup", "600"])
391
+ result = runner.invoke(app, ["billing", "topup", "600"])
392
392
 
393
393
  assert result.exit_code != 0
394
394
  assert "500" in result.output # Should mention maximum
@@ -410,7 +410,7 @@ class TestBillingTopupCommand:
410
410
  )
411
411
  mock_client.return_value.__enter__.return_value.post.return_value = mock_response
412
412
 
413
- result = runner.invoke(app, ["config", "billing", "topup"])
413
+ result = runner.invoke(app, ["billing", "topup"])
414
414
 
415
415
  assert result.exit_code != 0
416
416
  assert "upgrade" in result.output.lower() or "portal" in result.output.lower()
@@ -436,7 +436,7 @@ class TestBillingTopupCommand:
436
436
  mock_client.return_value.__enter__.return_value.post.return_value = mock_response
437
437
 
438
438
  with patch("webbrowser.open") as mock_browser:
439
- result = runner.invoke(app, ["config", "billing", "topup", "--no-browser"])
439
+ result = runner.invoke(app, ["billing", "topup", "--no-browser"])
440
440
 
441
441
  assert result.exit_code == 0
442
442
  assert "https://checkout.stripe.com/test" in result.output
@@ -460,7 +460,7 @@ class TestBillingPortalCommand:
460
460
  )
461
461
  mock_client.return_value.__enter__.return_value.post.return_value = mock_response
462
462
 
463
- result = runner.invoke(app, ["config", "billing", "portal"])
463
+ result = runner.invoke(app, ["billing", "portal"])
464
464
 
465
465
  assert result.exit_code != 0
466
466
  assert "login" in result.output.lower()
@@ -483,7 +483,7 @@ class TestBillingPortalCommand:
483
483
  mock_client.return_value.__enter__.return_value.post.return_value = mock_response
484
484
 
485
485
  with patch("webbrowser.open") as mock_browser:
486
- result = runner.invoke(app, ["config", "billing", "portal"])
486
+ result = runner.invoke(app, ["billing", "portal"])
487
487
 
488
488
  assert result.exit_code == 0
489
489
  mock_browser.assert_called_once_with("https://billing.stripe.com/test")
@@ -506,7 +506,7 @@ class TestBillingPortalCommand:
506
506
  mock_client.return_value.__enter__.return_value.post.return_value = mock_response
507
507
 
508
508
  with patch("webbrowser.open") as mock_browser:
509
- result = runner.invoke(app, ["config", "billing", "portal", "--no-browser"])
509
+ result = runner.invoke(app, ["billing", "portal", "--no-browser"])
510
510
 
511
511
  assert result.exit_code == 0
512
512
  assert "https://billing.stripe.com/test" in result.output
@@ -528,4 +528,4 @@ class TestInsufficientCreditsError:
528
528
  message = _friendly_error(402, '{"detail": "Insufficient credits"}', "test-workspace")
529
529
 
530
530
  assert "credit" in message.lower()
531
- assert "wafer config billing" in message.lower()
531
+ assert "wafer billing" in message.lower()