wafer-cli 0.2.31__py3-none-any.whl → 0.2.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wafer/GUIDE.md CHANGED
@@ -7,7 +7,7 @@ GPU development primitives for LLM agents.
7
7
  Run code on cloud GPUs instantly with workspaces:
8
8
 
9
9
  ```bash
10
- wafer auth login # One-time auth
10
+ wafer login # One-time auth
11
11
  wafer workspaces create dev --gpu B200 # Create workspace (NVIDIA B200)
12
12
  wafer workspaces exec dev -- python -c "import torch; print(torch.cuda.get_device_name(0))"
13
13
  wafer workspaces sync dev ./my-project # Sync files
wafer/agent_defaults.py CHANGED
@@ -1,8 +1,8 @@
1
1
  """Shared agent defaults for kernel optimization tasks.
2
2
 
3
3
  Single source of truth for bash allowlists and enabled tools used by both:
4
- - CLI templates (apps/wafer-cli/wafer/templates/optimize_kernelbench.py)
5
- - Eval configs (research/evals/optimize_kernelbench_eval/.../base_config.py)
4
+ - CLI templates (apps/wafer-cli/wafer/templates/*.py)
5
+ - Eval configs (research/evals/*_eval/*.py)
6
6
 
7
7
  Import from here instead of defining your own copy.
8
8
  """
@@ -12,6 +12,9 @@ from __future__ import annotations
12
12
  # Tools available to the agent (coding environment tools)
13
13
  ENABLED_TOOLS: list[str] = ["read", "write", "edit", "glob", "grep", "bash"]
14
14
 
15
+ # vLLM-specific tools (same as ENABLED_TOOLS for now)
16
+ VLLM_ENABLED_TOOLS: list[str] = ["read", "write", "edit", "glob", "grep", "bash"]
17
+
15
18
  # Bash commands allowed for kernel optimization agents.
16
19
  # Uses prefix matching — "wafer evaluate" also allows "wafer evaluate kernelbench".
17
20
  KERNELBENCH_BASH_ALLOWLIST: list[str] = [
@@ -40,3 +43,155 @@ KERNELBENCH_BASH_ALLOWLIST: list[str] = [
40
43
  "pwd",
41
44
  "which",
42
45
  ]
46
+
47
+ # Tools available to aiter optimization agents (full coding environment)
48
+ AITER_ENABLED_TOOLS: list[str] = ["read", "write", "edit", "glob", "grep", "bash"]
49
+
50
+ # System prompt for aiter optimization (shared between eval and template)
51
+ # Uses {op_name}, {test_file}, {target_flag} placeholders
52
+ AITER_SYSTEM_PROMPT = """\
53
+ You are a GPU kernel optimization expert specializing in AMD MI300X and the aiter library.
54
+
55
+ ## Context
56
+
57
+ aiter (ROCm/aiter) is AMD's centralized repository for high-performance AI operators.
58
+ Operators are implemented using Triton kernels, Composable Kernel (CK), or HIP/ROCm.
59
+
60
+ Each operator has a test in `op_tests/test_{{op}}.py` that validates correctness and
61
+ measures performance against a reference implementation.
62
+
63
+ ## Your Task
64
+
65
+ 1. **Understand the operator**: Read the test file and trace imports to find implementation
66
+ 2. **Establish baseline**: Run the evaluation to measure current performance
67
+ ```bash
68
+ # Quick check with one shape (fast iteration)
69
+ wafer evaluate aiter --aiter-dir . --cmd "python op_tests/test_{{op}}.py --mnk 128,32,8192" {target_flag}
70
+
71
+ # Full test suite (final validation)
72
+ wafer evaluate aiter --aiter-dir . --cmd "python op_tests/test_{{op}}.py" {target_flag}
73
+ ```
74
+ 3. **Identify optimizations**: Look for memory access patterns, occupancy, instruction selection
75
+ 4. **Implement changes**: Modify the operator to improve performance
76
+ 5. **Validate**: Re-run evaluation to verify correctness and measure speedup
77
+ 6. **Iterate**: Use quick checks during development, full suite for final validation
78
+
79
+ ## Finding Source Files
80
+
81
+ The aiter codebase structure varies by operator. To find implementation files:
82
+
83
+ 1. **Start with the test file**: `op_tests/test_{{op}}.py`
84
+ - Read imports to see what modules are used
85
+ - Look for the main function being tested
86
+
87
+ 2. **Check common locations** (not all ops have all of these):
88
+ - `aiter/ops/{{op}}.py` — High-level Python API (some ops)
89
+ - `aiter/triton_kernels/` — Triton kernel implementations
90
+ - `csrc/kernels/` — CUDA/HIP kernel implementations
91
+ - `csrc/py_itfs_cu/` — Python interface CUDA files
92
+ - `csrc/cktile_*/` — Composable Kernel tile implementations
93
+
94
+ 3. **Search for the op name**:
95
+ ```bash
96
+ find . -name "*{{op}}*" -type f | grep -v __pycache__
97
+ grep -r "def {{function_name}}" aiter/ csrc/ --include="*.py" --include="*.cu"
98
+ ```
99
+
100
+ ## Key Directories
101
+
102
+ - `aiter/` — Main package with operator implementations
103
+ - `aiter/ops/` — High-level operator APIs (some ops)
104
+ - `aiter/triton_kernels/` — Triton kernel implementations
105
+ - `csrc/` — C++/CUDA/HIP implementations
106
+ - `op_tests/` — Tests for each operator
107
+ - `aiter/configs/` — Tuned configurations (CSV files)
108
+
109
+ ## Output
110
+
111
+ Your goal is to produce:
112
+ 1. Modified operator code with optimizations
113
+ 2. Benchmark results showing correctness and speedup
114
+ 3. A summary of what you changed and why
115
+
116
+ The optimization should be correct (pass the op_test) and faster than baseline."""
117
+
118
+ # Bash commands allowed for aiter optimization agents.
119
+ AITER_BASH_ALLOWLIST: list[str] = [
120
+ # Read-only
121
+ "ls",
122
+ "cat",
123
+ "head",
124
+ "tail",
125
+ "wc",
126
+ "find",
127
+ "grep",
128
+ "rg",
129
+ "pwd",
130
+ "tree",
131
+ "which",
132
+ "diff",
133
+ "sort",
134
+ # Filesystem
135
+ "mkdir",
136
+ "cp",
137
+ "mv",
138
+ # Git
139
+ "git diff",
140
+ "git status",
141
+ "git log",
142
+ # Compilation
143
+ "hipcc",
144
+ "g++",
145
+ "gcc",
146
+ "clang",
147
+ "python",
148
+ "python3",
149
+ "pip",
150
+ "pytest",
151
+ # Execution — allows running compiled binaries and python scripts
152
+ "./",
153
+ # Kernel evaluation
154
+ "wafer evaluate aiter",
155
+ # Profiling — AMD
156
+ "wafer amd rocprof-compute",
157
+ "wafer amd rocprof-sdk",
158
+ "wafer amd rocprof-systems",
159
+ "wafer amd isa",
160
+ # Sub-agents
161
+ "wafer agent -t ask-docs",
162
+ # Misc
163
+ "timeout",
164
+ ]
165
+
166
+ # Bash commands allowed for vLLM kernel optimization agents.
167
+ VLLM_BASH_ALLOWLIST: list[str] = [
168
+ # vLLM evaluation
169
+ "wafer evaluate vllm",
170
+ # vLLM's own test and benchmark commands (run inside vllm dir)
171
+ "pytest",
172
+ # Profiling — AMD
173
+ "wafer amd rocprof-compute",
174
+ "wafer amd rocprof-sdk",
175
+ "wafer amd rocprof-systems",
176
+ # Profiling — NVIDIA
177
+ "wafer nvidia ncu",
178
+ "wafer nvidia nsys",
179
+ # Analysis
180
+ "wafer compiler-analyze",
181
+ # Sub-agents
182
+ "wafer agent -t ask-docs",
183
+ # General utilities
184
+ "python",
185
+ "python3",
186
+ "pip",
187
+ "timeout",
188
+ "ls",
189
+ "cat",
190
+ "head",
191
+ "tail",
192
+ "wc",
193
+ "pwd",
194
+ "which",
195
+ "cd",
196
+ "git",
197
+ ]
wafer/billing.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """Billing CLI - Manage credits and subscription.
2
2
 
3
- This module provides the implementation for the `wafer config billing` subcommand.
3
+ This module provides the implementation for the `wafer billing` subcommand.
4
4
  """
5
5
 
6
6
  import json
@@ -126,7 +126,7 @@ def format_usage_text(usage: dict) -> str:
126
126
  lines.extend([
127
127
  "",
128
128
  "Upgrade to Pro for hardware counters and credit topups:",
129
- " wafer config billing portal",
129
+ " wafer billing portal",
130
130
  ])
131
131
 
132
132
  return "\n".join(lines)
@@ -153,7 +153,7 @@ def get_usage(json_output: bool = False) -> str:
153
153
  usage = response.json()
154
154
  except httpx.HTTPStatusError as e:
155
155
  if e.response.status_code == 401:
156
- raise RuntimeError("Not authenticated. Run: wafer auth login") from e
156
+ raise RuntimeError("Not authenticated. Run: wafer login") from e
157
157
  raise RuntimeError(f"API error: {e.response.status_code} - {e.response.text}") from e
158
158
  except httpx.RequestError as e:
159
159
  raise RuntimeError(f"Could not reach API: {e}") from e
@@ -188,7 +188,7 @@ def create_topup(amount_cents: int) -> dict:
188
188
  return response.json()
189
189
  except httpx.HTTPStatusError as e:
190
190
  if e.response.status_code == 401:
191
- raise RuntimeError("Not authenticated. Run: wafer auth login") from e
191
+ raise RuntimeError("Not authenticated. Run: wafer login") from e
192
192
  if e.response.status_code == 400:
193
193
  # Invalid amount
194
194
  try:
@@ -200,7 +200,7 @@ def create_topup(amount_cents: int) -> dict:
200
200
  # Start tier or other restriction
201
201
  raise RuntimeError(
202
202
  "Topup not available for your subscription tier.\n"
203
- "Upgrade your subscription first: wafer config billing portal"
203
+ "Upgrade your subscription first: wafer billing portal"
204
204
  ) from e
205
205
  if e.response.status_code == 503:
206
206
  raise RuntimeError("Billing service temporarily unavailable. Please try again later.") from e
@@ -227,7 +227,7 @@ def get_portal_url() -> dict:
227
227
  return response.json()
228
228
  except httpx.HTTPStatusError as e:
229
229
  if e.response.status_code == 401:
230
- raise RuntimeError("Not authenticated. Run: wafer auth login") from e
230
+ raise RuntimeError("Not authenticated. Run: wafer login") from e
231
231
  raise RuntimeError(f"API error: {e.response.status_code} - {e.response.text}") from e
232
232
  except httpx.RequestError as e:
233
233
  raise RuntimeError(f"Could not reach API: {e}") from e