wafer-cli 0.2.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. wafer_cli-0.2.19/PKG-INFO +16 -0
  2. wafer_cli-0.2.19/README.md +242 -0
  3. wafer_cli-0.2.19/pyproject.toml +120 -0
  4. wafer_cli-0.2.19/setup.cfg +4 -0
  5. wafer_cli-0.2.19/tests/test_analytics.py +557 -0
  6. wafer_cli-0.2.19/tests/test_auth.py +193 -0
  7. wafer_cli-0.2.19/tests/test_billing.py +531 -0
  8. wafer_cli-0.2.19/tests/test_cli_coverage.py +721 -0
  9. wafer_cli-0.2.19/tests/test_cli_parity_integration.py +695 -0
  10. wafer_cli-0.2.19/tests/test_config_integration.py +50 -0
  11. wafer_cli-0.2.19/tests/test_file_operations_integration.py +193 -0
  12. wafer_cli-0.2.19/tests/test_kernel_scope_cli.py +618 -0
  13. wafer_cli-0.2.19/tests/test_nsys_analyze.py +289 -0
  14. wafer_cli-0.2.19/tests/test_nsys_profile.py +160 -0
  15. wafer_cli-0.2.19/tests/test_output.py +263 -0
  16. wafer_cli-0.2.19/tests/test_rocprof_compute_integration.py +212 -0
  17. wafer_cli-0.2.19/tests/test_skill_commands.py +231 -0
  18. wafer_cli-0.2.19/tests/test_ssh_integration.py +134 -0
  19. wafer_cli-0.2.19/tests/test_targets_ops.py +206 -0
  20. wafer_cli-0.2.19/tests/test_wevin_cli.py +668 -0
  21. wafer_cli-0.2.19/tests/test_workflow_integration.py +147 -0
  22. wafer_cli-0.2.19/wafer/GUIDE.md +118 -0
  23. wafer_cli-0.2.19/wafer/__init__.py +3 -0
  24. wafer_cli-0.2.19/wafer/analytics.py +306 -0
  25. wafer_cli-0.2.19/wafer/api_client.py +195 -0
  26. wafer_cli-0.2.19/wafer/auth.py +439 -0
  27. wafer_cli-0.2.19/wafer/autotuner.py +1080 -0
  28. wafer_cli-0.2.19/wafer/billing.py +233 -0
  29. wafer_cli-0.2.19/wafer/cli.py +7289 -0
  30. wafer_cli-0.2.19/wafer/config.py +105 -0
  31. wafer_cli-0.2.19/wafer/corpus.py +366 -0
  32. wafer_cli-0.2.19/wafer/evaluate.py +4594 -0
  33. wafer_cli-0.2.19/wafer/global_config.py +350 -0
  34. wafer_cli-0.2.19/wafer/gpu_run.py +307 -0
  35. wafer_cli-0.2.19/wafer/inference.py +148 -0
  36. wafer_cli-0.2.19/wafer/kernel_scope.py +552 -0
  37. wafer_cli-0.2.19/wafer/ncu_analyze.py +651 -0
  38. wafer_cli-0.2.19/wafer/nsys_analyze.py +1042 -0
  39. wafer_cli-0.2.19/wafer/nsys_profile.py +510 -0
  40. wafer_cli-0.2.19/wafer/output.py +248 -0
  41. wafer_cli-0.2.19/wafer/problems.py +357 -0
  42. wafer_cli-0.2.19/wafer/rocprof_compute.py +490 -0
  43. wafer_cli-0.2.19/wafer/rocprof_sdk.py +274 -0
  44. wafer_cli-0.2.19/wafer/rocprof_systems.py +520 -0
  45. wafer_cli-0.2.19/wafer/skills/wafer-guide/SKILL.md +129 -0
  46. wafer_cli-0.2.19/wafer/ssh_keys.py +261 -0
  47. wafer_cli-0.2.19/wafer/target_lock.py +270 -0
  48. wafer_cli-0.2.19/wafer/targets.py +842 -0
  49. wafer_cli-0.2.19/wafer/targets_ops.py +717 -0
  50. wafer_cli-0.2.19/wafer/templates/__init__.py +0 -0
  51. wafer_cli-0.2.19/wafer/templates/ask_docs.py +61 -0
  52. wafer_cli-0.2.19/wafer/templates/optimize_kernel.py +71 -0
  53. wafer_cli-0.2.19/wafer/templates/optimize_kernelbench.py +137 -0
  54. wafer_cli-0.2.19/wafer/templates/trace_analyze.py +74 -0
  55. wafer_cli-0.2.19/wafer/tracelens.py +218 -0
  56. wafer_cli-0.2.19/wafer/wevin_cli.py +577 -0
  57. wafer_cli-0.2.19/wafer/workspaces.py +903 -0
  58. wafer_cli-0.2.19/wafer_cli.egg-info/PKG-INFO +16 -0
  59. wafer_cli-0.2.19/wafer_cli.egg-info/SOURCES.txt +61 -0
  60. wafer_cli-0.2.19/wafer_cli.egg-info/dependency_links.txt +1 -0
  61. wafer_cli-0.2.19/wafer_cli.egg-info/entry_points.txt +2 -0
  62. wafer_cli-0.2.19/wafer_cli.egg-info/requires.txt +12 -0
  63. wafer_cli-0.2.19/wafer_cli.egg-info/top_level.txt +2 -0
@@ -0,0 +1,16 @@
1
+ Metadata-Version: 2.4
2
+ Name: wafer-cli
3
+ Version: 0.2.19
4
+ Summary: CLI tool for running commands on remote GPUs and GPU kernel optimization agent
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: typer>=0.12.0
7
+ Requires-Dist: trio>=0.24.0
8
+ Requires-Dist: trio-asyncio>=0.15.0
9
+ Requires-Dist: wafer-core>=0.1.0
10
+ Requires-Dist: perfetto>=0.16.0
11
+ Requires-Dist: posthog>=3.0.0
12
+ Provides-Extra: dev
13
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
14
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
15
+ Requires-Dist: diff-cover>=8.0.0; extra == "dev"
16
+ Requires-Dist: ruff>=0.4.0; extra == "dev"
@@ -0,0 +1,242 @@
1
+ # Wafer CLI
2
+
3
+ Run GPU workloads, optimize kernels, and query GPU documentation.
4
+
5
+ ## Getting Started
6
+
7
+ ```bash
8
+ # Install
9
+ cd apps/wafer-cli && uv sync
10
+
11
+ # Use staging (workspaces and other features require staging)
12
+ wafer config set api.environment staging
13
+
14
+ # Login
15
+ wafer login
16
+
17
+ # Run a command on a remote GPU
18
+ wafer remote-run -- nvidia-smi
19
+ ```
20
+
21
+ ## Commands
22
+
23
+ ### `wafer login` / `wafer logout` / `wafer whoami`
24
+
25
+ Authenticate with GitHub OAuth.
26
+
27
+ ```bash
28
+ wafer login # Opens browser for GitHub OAuth
29
+ wafer whoami # Show current user
30
+ wafer logout # Remove credentials
31
+ ```
32
+
33
+ ### `wafer remote-run`
34
+
35
+ Run any command on a remote GPU.
36
+
37
+ ```bash
38
+ wafer remote-run -- nvidia-smi
39
+ wafer remote-run --upload-dir ./my_code -- python3 train.py
40
+ ```
41
+
42
+ ### `wafer workspaces`
43
+
44
+ Create and manage persistent GPU environments.
45
+
46
+ **Available GPUs:**
47
+
48
+ - `MI300X` - AMD Instinct MI300X (192GB HBM3, ROCm)
49
+ - `B200` - NVIDIA Blackwell B200 (180GB HBM3e, CUDA) - default
50
+
51
+ ```bash
52
+ wafer workspaces list
53
+ wafer workspaces create my-workspace --gpu B200 --wait # NVIDIA B200
54
+ wafer workspaces create amd-dev --gpu MI300X # AMD MI300X
55
+ wafer workspaces ssh <workspace-id>
56
+ wafer workspaces delete <workspace-id>
57
+ ```
58
+
59
+ ### `wafer agent`
60
+
61
+ AI assistant for GPU kernel development. Helps with CUDA/Triton optimization, documentation queries, and performance analysis.
62
+
63
+ ```bash
64
+ wafer agent "What is TMEM in CuTeDSL?"
65
+ wafer agent -s "optimize this kernel" < kernel.py
66
+ ```
67
+
68
+ ### `wafer evaluate`
69
+
70
+ Evaluate kernel correctness and performance against a reference implementation.
71
+
72
+ **Functional format** (default):
73
+ ```bash
74
+ # Generate template files
75
+ wafer evaluate make-template ./my-kernel
76
+
77
+ # Run evaluation
78
+ wafer evaluate --impl kernel.py --reference ref.py --test-cases tests.json --benchmark
79
+ ```
80
+
81
+ The implementation must define `custom_kernel(inputs)`, the reference must define `ref_kernel(inputs)` and `generate_input(**params)`.
82
+
83
+ **KernelBench format** (ModelNew class):
84
+ ```bash
85
+ # Extract a KernelBench problem as template
86
+ wafer evaluate kernelbench make-template level1/1
87
+
88
+ # Run evaluation
89
+ wafer evaluate kernelbench --impl my_kernel.py --reference problem.py --benchmark
90
+ ```
91
+
92
+ The implementation must define `class ModelNew(nn.Module)`, the reference must define `class Model`, `get_inputs()`, and `get_init_inputs()`.
93
+
94
+ ### `wafer wevin -t ask-docs`
95
+
96
+ Query GPU documentation using the docs template.
97
+
98
+ ```bash
99
+ wafer wevin -t ask-docs --json -s "What causes bank conflicts in shared memory?"
100
+ ```
101
+
102
+ ### `wafer corpus`
103
+
104
+ Download documentation to local filesystem for agents to search.
105
+
106
+ ```bash
107
+ wafer corpus list
108
+ wafer corpus download cuda-programming-guide
109
+ ```
110
+
111
+ ---
112
+
113
+ ## Customization
114
+
115
+ ### `wafer remote-run` options
116
+
117
+ ```bash
118
+ wafer remote-run --image pytorch/pytorch:2.5.1-cuda12.4-cudnn9-devel -- python3 script.py
119
+ wafer remote-run --require-hwc -- ncu --set full python3 bench.py # Hardware counters for NCU
120
+ ```
121
+
122
+ ### `wafer evaluate` options
123
+
124
+ ```bash
125
+ wafer evaluate --impl k.py --reference r.py --test-cases t.json \
126
+ --target vultr-b200 \ # Specific GPU target
127
+ --benchmark \ # Measure performance
128
+ --profile # Enable torch.profiler + NCU
129
+ ```
130
+
131
+ ### `wafer push` for multi-command workflows
132
+
133
+ ```bash
134
+ WORKSPACE=$(wafer push ./project)
135
+ wafer remote-run --workspace-id $WORKSPACE -- python3 test1.py
136
+ wafer remote-run --workspace-id $WORKSPACE -- python3 test2.py
137
+ ```
138
+
139
+ ### Profile analysis
140
+
141
+ ```bash
142
+ wafer nvidia ncu analyze profile.ncu-rep
143
+ wafer nvidia nsys analyze profile.nsys-rep
144
+ ```
145
+
146
+ ---
147
+
148
+ ## Advanced
149
+
150
+ ### Local targets
151
+
152
+ Bypass the API and SSH directly to your own GPUs:
153
+
154
+ ```bash
155
+ wafer targets list
156
+ wafer targets add ./my-gpu.toml
157
+ wafer targets default my-gpu
158
+ ```
159
+
160
+ ### Defensive evaluation
161
+
162
+ Detect evaluation hacking (stream injection, lazy evaluation, etc.):
163
+
164
+ ```bash
165
+ wafer evaluate --impl k.py --reference r.py --test-cases t.json --benchmark --defensive
166
+ ```
167
+
168
+ ### Other tools
169
+
170
+ ```bash
171
+ wafer perfetto <trace.json> --query "SELECT * FROM slice" # Perfetto SQL queries
172
+ wafer capture ./script.py # Capture execution snapshot
173
+ wafer compiler-analyze kernel.ptx # Analyze PTX/SASS
174
+ ```
175
+
176
+ ### ROCm profiling (AMD GPUs)
177
+
178
+ ```bash
179
+ wafer rocprof-sdk ...
180
+ wafer rocprof-systems ...
181
+ wafer rocprof-compute ...
182
+ ```
183
+
184
+ ---
185
+
186
+ ## Shell Completion
187
+
188
+ Enable tab completion for commands, options, and target names:
189
+
190
+ ```bash
191
+ # Install completion (zsh/bash/fish)
192
+ wafer --install-completion
193
+
194
+ # Then restart your terminal, or source your shell config:
195
+ source ~/.zshrc # or ~/.bashrc
196
+ ```
197
+
198
+ Now you can tab-complete:
199
+ - Commands: `wafer eva<TAB>` → `wafer evaluate`
200
+ - Options: `wafer evaluate --<TAB>`
201
+ - Target names: `wafer evaluate --target v<TAB>` → `wafer evaluate --target vultr-b200`
202
+ - File paths: `wafer evaluate --impl ./<TAB>`
203
+
204
+ ---
205
+
206
+ ## AI Assistant Skills
207
+
208
+ Install the Wafer CLI skill to make wafer commands discoverable by your AI coding assistant:
209
+
210
+ ```bash
211
+ # Install for all supported tools (Claude Code, Codex CLI, Cursor)
212
+ wafer skill install
213
+
214
+ # Install for a specific tool
215
+ wafer skill install -t cursor # Cursor
216
+ wafer skill install -t claude # Claude Code
217
+ wafer skill install -t codex # Codex CLI
218
+
219
+ # Check installation status
220
+ wafer skill status
221
+
222
+ # Uninstall
223
+ wafer skill uninstall
224
+ ```
225
+
226
+ ### Installing from GitHub (Cursor)
227
+
228
+ You can also install the skill directly from GitHub in Cursor:
229
+
230
+ 1. Open Cursor Settings (Cmd+Shift+J / Ctrl+Shift+J)
231
+ 2. Navigate to **Rules** → **Add Rule** → **Remote Rule (Github)**
232
+ 3. Enter: `https://github.com/wafer-ai/skills`
233
+ 4. Cursor will automatically discover skills in `.cursor/skills/`
234
+
235
+ The skill provides comprehensive guidance for GPU kernel development, including documentation lookup, trace analysis, kernel evaluation, and optimization workflows.
236
+
237
+ ---
238
+
239
+ ## Requirements
240
+
241
+ - Python 3.10+
242
+ - GitHub account (for authentication)
@@ -0,0 +1,120 @@
1
+ [project]
2
+ name = "wafer-cli"
3
+ version = "0.2.19"
4
+ description = "CLI tool for running commands on remote GPUs and GPU kernel optimization agent"
5
+ requires-python = ">=3.11"
6
+ dependencies = [
7
+ "typer>=0.12.0",
8
+ "trio>=0.24.0",
9
+ "trio-asyncio>=0.15.0", # Bridge asyncssh (asyncio) to trio for async SSH
10
+ # Wafer core for environments and utils (includes rollouts)
11
+ "wafer-core>=0.1.0",
12
+ "perfetto>=0.16.0",
13
+ "posthog>=3.0.0", # Analytics tracking
14
+ ]
15
+
16
+ [project.scripts]
17
+ wafer = "wafer.cli:main"
18
+
19
+ [tool.uv.sources]
20
+ wafer-core = { workspace = true }
21
+
22
+ [project.optional-dependencies]
23
+ dev = [
24
+ "pytest>=8.0.0",
25
+ "pytest-cov>=4.1.0",
26
+ "diff-cover>=8.0.0",
27
+ "ruff>=0.4.0",
28
+ ]
29
+
30
+ [build-system]
31
+ requires = ["setuptools>=61.0"]
32
+ build-backend = "setuptools.build_meta"
33
+
34
+ [tool.setuptools.packages.find]
35
+ where = ["."]
36
+ include = ["wafer*"]
37
+
38
+ [tool.setuptools.package-data]
39
+ wafer = ["GUIDE.md", "skills/*/SKILL.md"]
40
+
41
+ [tool.ruff]
42
+ line-length = 100
43
+ target-version = "py311"
44
+ preview = true # Required for PLR1702 (too-many-nested-blocks)
45
+
46
+ [tool.ruff.lint]
47
+ select = [
48
+ "E", # pycodestyle errors
49
+ "F", # pyflakes
50
+ "I", # isort (import sorting)
51
+ "ANN", # flake8-annotations (enforce type annotations)
52
+ "ASYNC", # flake8-async (trio/asyncio best practices)
53
+ "B", # flake8-bugbear (common bugs)
54
+ "UP", # pyupgrade (modern Python patterns)
55
+ "PLR0913", # too-many-arguments (Tiger Style: "hourglass shape: few parameters")
56
+ "PLR0915", # too-many-statements (Tiger Style: 70 line limit)
57
+ "PLR1702", # too-many-nested-blocks (Tiger Style: "centralize control flow")
58
+ "PLW2901", # redefined-loop-variable (Carmack SSA: single assignment)
59
+ "RET506", # superfluous-else-raise (explicit control flow)
60
+ "RET507", # superfluous-else-continue (explicit control flow)
61
+ "A", # flake8-builtins (shadowing builtins like list, str)
62
+ "RUF018", # assignment-in-assert (catches typos)
63
+ "TRY002", # raise-vanilla-exception (use specific exceptions)
64
+ "TRY003", # raise-vanilla-args (proper exception messages)
65
+ "TRY004", # type-check-without-type-error (use TypeError for type checks)
66
+ "TRY201", # verbose-raise (use bare raise)
67
+ "TRY300", # try-consider-else (clear control flow)
68
+ "TRY400", # error-instead-of-exception (use logging.exception)
69
+ ]
70
+ ignore = [
71
+ "E501", # Line too long (handled by formatter)
72
+ "B008", # Typer uses function calls in defaults
73
+ "TRY003", # Avoid specifying long messages outside exception class (too opinionated - revisit later)
74
+ "TRY300", # Consider moving to else block (too opinionated about try/except style - revisit later)
75
+ "TRY400", # Use logging.exception instead of logging.error (we prefer explicit error logging - revisit later)
76
+ ]
77
+
78
+ [tool.ruff.lint.per-file-ignores]
79
+ "tests/**/*.py" = ["ANN001", "ANN201", "ANN202", "ANN204"] # Don't require type annotations in tests
80
+ "wafer/evaluate.py" = ["PLR0915", "PLR1702", "E402", "PLW2901", "ASYNC221"] # complex deployment flows - TODO: refactor
81
+ "wafer/output.py" = ["ANN401"] # Output collector uses **kwargs for flexible event data
82
+ "wafer/autotuner.py" = ["PLR0915", "PLR1702", "B007", "B904"] # complex sweep logic - TODO: refactor
83
+ "wafer/ncu_analyze.py" = ["PLR0915", "PLR1702"] # complex parsing logic - TODO: refactor
84
+ "wafer/cli.py" = ["PLR0915"] # CLI commands can be long - TODO: refactor
85
+ "wafer/targets.py" = ["PLR1702"] # complex target init flow - TODO: refactor
86
+ "wafer/workspaces.py" = ["PLR1702"] # SSE streaming has nested blocks - TODO: refactor
87
+ "wafer/kernel_scope.py" = ["ANN001", "ANN202", "PLR0913"] # complex filtering logic
88
+ "wafer/api_client.py" = ["PLR0913"] # API client needs many params
89
+ "wafer/rocprof_compute.py" = ["PLR0913", "B904"] # profiler commands need many params
90
+ "wafer/rocprof_sdk.py" = ["PLR0913"] # profiler commands need many params
91
+ "wafer/rocprof_systems.py" = ["PLR0913"] # profiler commands need many params
92
+
93
+ [tool.ruff.lint.pylint]
94
+ max-args = 7 # Max function arguments (Tiger Style: few parameters)
95
+ max-statements = 70 # Max statements per function (Tiger Style: 70 line limit)
96
+ max-branches = 12 # Max if/elif branches
97
+ max-nested-blocks = 5 # Max nesting depth (Linus rule: "if you need more than 3 levels, you're screwed")
98
+
99
+ [tool.pytest.ini_options]
100
+ testpaths = ["tests"]
101
+ python_files = ["test_*.py"]
102
+ python_functions = ["test_*"]
103
+ addopts = [
104
+ "-v",
105
+ "--cov=wafer",
106
+ "--cov-report=term-missing",
107
+ "--cov-report=xml:coverage.xml",
108
+ "--cov-report=html:coverage/html",
109
+ ]
110
+
111
+ [tool.coverage.run]
112
+ source = ["wafer"]
113
+ omit = ["tests/*"]
114
+
115
+ [tool.coverage.report]
116
+ exclude_lines = [
117
+ "pragma: no cover",
118
+ "if __name__ == .__main__.:",
119
+ "raise NotImplementedError",
120
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+