wafer-cli 0.2.31__py3-none-any.whl → 0.2.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wafer/GUIDE.md +1 -1
- wafer/agent_defaults.py +157 -2
- wafer/billing.py +6 -6
- wafer/cli.py +432 -346
- wafer/corpus.py +6 -72
- wafer/evaluate.py +143 -81
- wafer/global_config.py +0 -13
- wafer/kernel_scope.py +1 -1
- wafer/ncu_analyze.py +1 -1
- wafer/nsys_analyze.py +1 -1
- wafer/skills/wafer-guide/SKILL.md +6 -22
- wafer/ssh_keys.py +6 -6
- wafer/targets_ops.py +2 -29
- wafer/templates/aiter_optimize.py +59 -0
- wafer/templates/optimize_kernel.py +2 -4
- wafer/templates/optimize_kernelbench.py +62 -17
- wafer/templates/optimize_vllm.py +156 -0
- wafer/trace_compare.py +48 -139
- wafer/wevin_cli.py +1 -12
- wafer/workspaces.py +8 -8
- wafer_cli-0.2.33.dist-info/METADATA +260 -0
- {wafer_cli-0.2.31.dist-info → wafer_cli-0.2.33.dist-info}/RECORD +25 -23
- wafer_cli-0.2.31.dist-info/METADATA +0 -107
- {wafer_cli-0.2.31.dist-info → wafer_cli-0.2.33.dist-info}/WHEEL +0 -0
- {wafer_cli-0.2.31.dist-info → wafer_cli-0.2.33.dist-info}/entry_points.txt +0 -0
- {wafer_cli-0.2.31.dist-info → wafer_cli-0.2.33.dist-info}/top_level.txt +0 -0
wafer/corpus.py
CHANGED
|
@@ -109,34 +109,14 @@ CORPORA: dict[CorpusName, CorpusConfig] = {
|
|
|
109
109
|
),
|
|
110
110
|
"hip": CorpusConfig(
|
|
111
111
|
name="hip",
|
|
112
|
-
description="HIP programming guide
|
|
113
|
-
source_type="
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
RepoSource(
|
|
117
|
-
repo="ROCm/HIP",
|
|
118
|
-
paths=["docs"],
|
|
119
|
-
),
|
|
120
|
-
# HIP examples - code samples
|
|
121
|
-
RepoSource(
|
|
122
|
-
repo="ROCm/HIP-Examples",
|
|
123
|
-
paths=["HIP-Examples-Applications", "mini-nbody"],
|
|
124
|
-
),
|
|
125
|
-
# clr - HIP/OpenCL runtime (low-level)
|
|
126
|
-
RepoSource(
|
|
127
|
-
repo="ROCm/clr",
|
|
128
|
-
paths=["hipamd/include", "rocclr/device/gpu"],
|
|
129
|
-
),
|
|
130
|
-
# ROCm docs - official documentation
|
|
131
|
-
RepoSource(
|
|
132
|
-
repo="ROCm/ROCm",
|
|
133
|
-
paths=["docs"],
|
|
134
|
-
),
|
|
135
|
-
],
|
|
112
|
+
description="HIP programming guide and API reference",
|
|
113
|
+
source_type="github_repo",
|
|
114
|
+
repo="ROCm/HIP",
|
|
115
|
+
repo_paths=["docs"],
|
|
136
116
|
),
|
|
137
117
|
"amd": CorpusConfig(
|
|
138
118
|
name="amd",
|
|
139
|
-
description="AMD GPU kernel development (rocWMMA, CK, AITER, rocBLAS, HipKittens, vLLM
|
|
119
|
+
description="AMD GPU kernel development (rocWMMA, CK, AITER, rocBLAS, HipKittens, vLLM)",
|
|
140
120
|
source_type="github_multi_repo",
|
|
141
121
|
repos=[
|
|
142
122
|
# rocWMMA - wave matrix multiply-accumulate (WMMA) intrinsics
|
|
@@ -180,17 +160,11 @@ CORPORA: dict[CorpusName, CorpusConfig] = {
|
|
|
180
160
|
paths=["docs"],
|
|
181
161
|
branch="develop_deprecated",
|
|
182
162
|
),
|
|
183
|
-
# HipKittens - high-performance AMD kernels
|
|
163
|
+
# HipKittens - high-performance AMD kernels
|
|
184
164
|
RepoSource(
|
|
185
165
|
repo="HazyResearch/HipKittens",
|
|
186
166
|
paths=["docs", "kernels", "include"],
|
|
187
167
|
),
|
|
188
|
-
# HipKittens cdna3 branch - MI300X/MI325X (gfx942)
|
|
189
|
-
RepoSource(
|
|
190
|
-
repo="HazyResearch/HipKittens",
|
|
191
|
-
paths=["kernels", "include", "tests"],
|
|
192
|
-
branch="cdna3",
|
|
193
|
-
),
|
|
194
168
|
# vLLM AMD kernels
|
|
195
169
|
RepoSource(
|
|
196
170
|
repo="vllm-project/vllm",
|
|
@@ -206,46 +180,6 @@ CORPORA: dict[CorpusName, CorpusConfig] = {
|
|
|
206
180
|
repo="huggingface/hf-rocm-kernels",
|
|
207
181
|
paths=["csrc", "hf_rocm_kernels", "docs"],
|
|
208
182
|
),
|
|
209
|
-
# ROCm/flash-attention - FlashAttention for AMD GPUs
|
|
210
|
-
RepoSource(
|
|
211
|
-
repo="ROCm/flash-attention",
|
|
212
|
-
paths=["csrc", "docs"],
|
|
213
|
-
),
|
|
214
|
-
# ROCm/triton - Triton compiler for AMD GPUs
|
|
215
|
-
RepoSource(
|
|
216
|
-
repo="ROCm/triton",
|
|
217
|
-
paths=["python/tutorials", "third_party/amd"],
|
|
218
|
-
),
|
|
219
|
-
# ROCm/rccl - ROCm Communication Collectives Library (multi-GPU)
|
|
220
|
-
RepoSource(
|
|
221
|
-
repo="ROCm/rccl",
|
|
222
|
-
paths=["docs"],
|
|
223
|
-
),
|
|
224
|
-
# ROCm/rocprofiler-sdk - AMD GPU profiling SDK
|
|
225
|
-
RepoSource(
|
|
226
|
-
repo="ROCm/rocprofiler-sdk",
|
|
227
|
-
paths=["docs", "samples"],
|
|
228
|
-
),
|
|
229
|
-
# ROCm/omniperf - AMD GPU profiling tool
|
|
230
|
-
RepoSource(
|
|
231
|
-
repo="ROCm/omniperf",
|
|
232
|
-
paths=["docs", "src/omniperf_analyze"],
|
|
233
|
-
),
|
|
234
|
-
# ROCm/omnitrace - Application tracing for AMD
|
|
235
|
-
RepoSource(
|
|
236
|
-
repo="ROCm/omnitrace",
|
|
237
|
-
paths=["docs"],
|
|
238
|
-
),
|
|
239
|
-
# AMD GPUOpen Performance Guides
|
|
240
|
-
RepoSource(
|
|
241
|
-
repo="GPUOpen-Tools/gpu_performance_api",
|
|
242
|
-
paths=["docs"],
|
|
243
|
-
),
|
|
244
|
-
# AMD LLVM - AMD GPU compiler backend
|
|
245
|
-
RepoSource(
|
|
246
|
-
repo="ROCm/llvm-project",
|
|
247
|
-
paths=["amd/device-libs/README.md", "llvm/docs/AMDGPUUsage.rst"],
|
|
248
|
-
),
|
|
249
183
|
],
|
|
250
184
|
),
|
|
251
185
|
}
|
wafer/evaluate.py
CHANGED
|
@@ -78,10 +78,9 @@ def _build_docker_run_command(
|
|
|
78
78
|
for cap in cap_add:
|
|
79
79
|
parts.extend(["--cap-add", cap])
|
|
80
80
|
|
|
81
|
-
# GPU access - use
|
|
82
|
-
# with newer NVIDIA drivers (580+) where --gpus alone may not initialize CUDA
|
|
81
|
+
# GPU access - use single quotes for the device spec to avoid shell escaping issues
|
|
83
82
|
if gpus:
|
|
84
|
-
parts.extend(["--
|
|
83
|
+
parts.extend(["--gpus", f"'{gpus}'"])
|
|
85
84
|
|
|
86
85
|
# Volume mounts
|
|
87
86
|
if volumes:
|
|
@@ -380,6 +379,18 @@ def _build_docker_pip_install_cmd(target: BaremetalTarget | VMTarget) -> str:
|
|
|
380
379
|
return " && ".join(commands)
|
|
381
380
|
|
|
382
381
|
|
|
382
|
+
def _get_wafer_root() -> Path:
|
|
383
|
+
"""Get wafer monorepo root directory.
|
|
384
|
+
|
|
385
|
+
Walks up from this file to find the wafer repo root (contains apps/, packages/).
|
|
386
|
+
"""
|
|
387
|
+
current = Path(__file__).resolve()
|
|
388
|
+
for parent in [current] + list(current.parents):
|
|
389
|
+
if (parent / "apps").is_dir() and (parent / "packages").is_dir():
|
|
390
|
+
return parent
|
|
391
|
+
raise RuntimeError(f"Could not find wafer root from {__file__}")
|
|
392
|
+
|
|
393
|
+
|
|
383
394
|
async def run_evaluate_docker(
|
|
384
395
|
args: EvaluateArgs,
|
|
385
396
|
target: BaremetalTarget | VMTarget,
|
|
@@ -2022,13 +2033,54 @@ async def run_evaluate_runpod(
|
|
|
2022
2033
|
error_message=f"Failed to setup Python environment: {e}",
|
|
2023
2034
|
)
|
|
2024
2035
|
|
|
2025
|
-
#
|
|
2026
|
-
|
|
2027
|
-
|
|
2028
|
-
|
|
2029
|
-
f"
|
|
2030
|
-
|
|
2031
|
-
|
|
2036
|
+
# Upload wafer-core to remote
|
|
2037
|
+
try:
|
|
2038
|
+
wafer_root = _get_wafer_root()
|
|
2039
|
+
wafer_core_path = wafer_root / "packages" / "wafer-core"
|
|
2040
|
+
print(f"Uploading wafer-core from {wafer_core_path}...")
|
|
2041
|
+
|
|
2042
|
+
wafer_core_remote = f"{REMOTE_WORKSPACE}/wafer-core"
|
|
2043
|
+
await client.exec(f"mkdir -p {wafer_core_remote}")
|
|
2044
|
+
wafer_core_workspace = await client.expand_path(wafer_core_remote)
|
|
2045
|
+
|
|
2046
|
+
upload_result = await client.upload_files(
|
|
2047
|
+
str(wafer_core_path), wafer_core_workspace, recursive=True
|
|
2048
|
+
)
|
|
2049
|
+
|
|
2050
|
+
# Wide event logging for upload result
|
|
2051
|
+
upload_event = {
|
|
2052
|
+
"event": "wafer_core_upload",
|
|
2053
|
+
"target": target.name,
|
|
2054
|
+
"target_type": "runpod",
|
|
2055
|
+
"ssh_host": f"{client.user}@{client.host}:{client.port}",
|
|
2056
|
+
"local_path": str(wafer_core_path),
|
|
2057
|
+
"remote_path": wafer_core_workspace,
|
|
2058
|
+
"success": upload_result.success,
|
|
2059
|
+
"files_copied": upload_result.files_copied,
|
|
2060
|
+
"duration_seconds": upload_result.duration_seconds,
|
|
2061
|
+
"error_message": upload_result.error_message,
|
|
2062
|
+
}
|
|
2063
|
+
if upload_result.debug_info:
|
|
2064
|
+
upload_event["debug_info"] = upload_result.debug_info
|
|
2065
|
+
logger.info(json.dumps(upload_event))
|
|
2066
|
+
|
|
2067
|
+
# Fail fast if upload failed
|
|
2068
|
+
if not upload_result.success:
|
|
2069
|
+
print(f"ERROR: Upload failed: {upload_result.error_message}")
|
|
2070
|
+
if upload_result.debug_info:
|
|
2071
|
+
print(f"Debug info: {json.dumps(upload_result.debug_info, indent=2)}")
|
|
2072
|
+
return EvaluateResult(
|
|
2073
|
+
success=False,
|
|
2074
|
+
all_correct=False,
|
|
2075
|
+
correctness_score=0.0,
|
|
2076
|
+
geomean_speedup=0.0,
|
|
2077
|
+
passed_tests=0,
|
|
2078
|
+
total_tests=0,
|
|
2079
|
+
error_message=f"Failed to upload wafer-core: {upload_result.error_message}",
|
|
2080
|
+
)
|
|
2081
|
+
|
|
2082
|
+
print(f"Uploaded {upload_result.files_copied} files")
|
|
2083
|
+
except Exception as e:
|
|
2032
2084
|
return EvaluateResult(
|
|
2033
2085
|
success=False,
|
|
2034
2086
|
all_correct=False,
|
|
@@ -2036,7 +2088,7 @@ async def run_evaluate_runpod(
|
|
|
2036
2088
|
geomean_speedup=0.0,
|
|
2037
2089
|
passed_tests=0,
|
|
2038
2090
|
total_tests=0,
|
|
2039
|
-
error_message=f"Failed to
|
|
2091
|
+
error_message=f"Failed to upload wafer-core: {e}",
|
|
2040
2092
|
)
|
|
2041
2093
|
|
|
2042
2094
|
# Select GPU (RunPod pods typically have GPU 0)
|
|
@@ -2177,18 +2229,11 @@ async def run_evaluate_runpod(
|
|
|
2177
2229
|
error_message=f"Evaluation timed out after {target.eval_timeout}s",
|
|
2178
2230
|
)
|
|
2179
2231
|
|
|
2180
|
-
#
|
|
2232
|
+
# Parse output
|
|
2181
2233
|
stdout = result.stdout
|
|
2182
2234
|
stderr = result.stderr
|
|
2183
|
-
if stdout:
|
|
2184
|
-
print(stdout)
|
|
2185
2235
|
|
|
2186
2236
|
if result.exit_code != 0:
|
|
2187
|
-
error_parts = [f"Evaluation failed (exit code {result.exit_code}):"]
|
|
2188
|
-
if stdout:
|
|
2189
|
-
error_parts.append(f"stdout: {stdout}")
|
|
2190
|
-
if stderr:
|
|
2191
|
-
error_parts.append(f"stderr: {stderr}")
|
|
2192
2237
|
return EvaluateResult(
|
|
2193
2238
|
success=False,
|
|
2194
2239
|
all_correct=False,
|
|
@@ -2196,27 +2241,20 @@ async def run_evaluate_runpod(
|
|
|
2196
2241
|
geomean_speedup=0.0,
|
|
2197
2242
|
passed_tests=0,
|
|
2198
2243
|
total_tests=0,
|
|
2199
|
-
error_message="\
|
|
2244
|
+
error_message=f"Evaluation failed:\nstdout: {stdout}\nstderr: {stderr}",
|
|
2200
2245
|
)
|
|
2201
2246
|
|
|
2202
|
-
#
|
|
2203
|
-
|
|
2204
|
-
|
|
2205
|
-
|
|
2206
|
-
|
|
2207
|
-
|
|
2208
|
-
|
|
2209
|
-
|
|
2210
|
-
|
|
2211
|
-
|
|
2212
|
-
|
|
2213
|
-
total_tests=0,
|
|
2214
|
-
error_message=f"Failed to read results: {cat_result.stderr}",
|
|
2215
|
-
)
|
|
2216
|
-
|
|
2217
|
-
try:
|
|
2218
|
-
results_data = json.loads(cat_result.stdout)
|
|
2219
|
-
except json.JSONDecodeError as e:
|
|
2247
|
+
# Find JSON result in output
|
|
2248
|
+
result_json = None
|
|
2249
|
+
for line in reversed(stdout.strip().split("\n")):
|
|
2250
|
+
if line.startswith("{"):
|
|
2251
|
+
try:
|
|
2252
|
+
result_json = json.loads(line)
|
|
2253
|
+
break
|
|
2254
|
+
except json.JSONDecodeError:
|
|
2255
|
+
continue
|
|
2256
|
+
|
|
2257
|
+
if result_json is None:
|
|
2220
2258
|
return EvaluateResult(
|
|
2221
2259
|
success=False,
|
|
2222
2260
|
all_correct=False,
|
|
@@ -2224,12 +2262,10 @@ async def run_evaluate_runpod(
|
|
|
2224
2262
|
geomean_speedup=0.0,
|
|
2225
2263
|
passed_tests=0,
|
|
2226
2264
|
total_tests=0,
|
|
2227
|
-
error_message=f"
|
|
2265
|
+
error_message=f"No JSON result in output:\n{stdout}",
|
|
2228
2266
|
)
|
|
2229
2267
|
|
|
2230
|
-
|
|
2231
|
-
backends = results_data.get("backends", [])
|
|
2232
|
-
if not backends:
|
|
2268
|
+
if "error" in result_json:
|
|
2233
2269
|
return EvaluateResult(
|
|
2234
2270
|
success=False,
|
|
2235
2271
|
all_correct=False,
|
|
@@ -2237,20 +2273,18 @@ async def run_evaluate_runpod(
|
|
|
2237
2273
|
geomean_speedup=0.0,
|
|
2238
2274
|
passed_tests=0,
|
|
2239
2275
|
total_tests=0,
|
|
2240
|
-
error_message="
|
|
2276
|
+
error_message=result_json["error"],
|
|
2241
2277
|
)
|
|
2242
2278
|
|
|
2243
|
-
|
|
2244
|
-
|
|
2245
|
-
passed = sum(1 for t in correctness_tests if t.get("is_correct", False))
|
|
2246
|
-
total = len(correctness_tests)
|
|
2279
|
+
passed = result_json.get("passed", 0)
|
|
2280
|
+
total = result_json.get("total", 0)
|
|
2247
2281
|
correctness = passed / total if total > 0 else 0.0
|
|
2248
2282
|
|
|
2249
2283
|
return EvaluateResult(
|
|
2250
2284
|
success=True,
|
|
2251
|
-
all_correct=
|
|
2285
|
+
all_correct=result_json.get("all_correct", False),
|
|
2252
2286
|
correctness_score=correctness,
|
|
2253
|
-
geomean_speedup=
|
|
2287
|
+
geomean_speedup=result_json.get("speedup", 0.0),
|
|
2254
2288
|
passed_tests=passed,
|
|
2255
2289
|
total_tests=total,
|
|
2256
2290
|
)
|
|
@@ -2351,13 +2385,61 @@ async def run_evaluate_digitalocean(
|
|
|
2351
2385
|
error_message=f"Failed to setup Python environment: {e}",
|
|
2352
2386
|
)
|
|
2353
2387
|
|
|
2354
|
-
#
|
|
2355
|
-
|
|
2356
|
-
|
|
2357
|
-
|
|
2358
|
-
f"
|
|
2359
|
-
|
|
2360
|
-
|
|
2388
|
+
# Upload wafer-core to remote
|
|
2389
|
+
try:
|
|
2390
|
+
wafer_root = _get_wafer_root()
|
|
2391
|
+
wafer_core_path = wafer_root / "packages" / "wafer-core"
|
|
2392
|
+
print(f"Uploading wafer-core from {wafer_core_path}...")
|
|
2393
|
+
|
|
2394
|
+
wafer_core_remote = f"{REMOTE_WORKSPACE}/wafer-core"
|
|
2395
|
+
await client.exec(f"mkdir -p {wafer_core_remote}")
|
|
2396
|
+
wafer_core_workspace = await client.expand_path(wafer_core_remote)
|
|
2397
|
+
|
|
2398
|
+
# Use SFTP instead of rsync to avoid SSH subprocess timeout issues
|
|
2399
|
+
# (DigitalOcean may rate-limit new SSH connections)
|
|
2400
|
+
upload_result = await client.upload_files(
|
|
2401
|
+
str(wafer_core_path),
|
|
2402
|
+
wafer_core_workspace,
|
|
2403
|
+
recursive=True,
|
|
2404
|
+
use_sftp=True,
|
|
2405
|
+
)
|
|
2406
|
+
|
|
2407
|
+
# Wide event logging for upload result
|
|
2408
|
+
upload_event = {
|
|
2409
|
+
"event": "wafer_core_upload",
|
|
2410
|
+
"target": target.name,
|
|
2411
|
+
"target_type": "digitalocean",
|
|
2412
|
+
"ssh_host": f"{client.user}@{client.host}:{client.port}",
|
|
2413
|
+
"local_path": str(wafer_core_path),
|
|
2414
|
+
"remote_path": wafer_core_workspace,
|
|
2415
|
+
"success": upload_result.success,
|
|
2416
|
+
"files_copied": upload_result.files_copied,
|
|
2417
|
+
"duration_seconds": upload_result.duration_seconds,
|
|
2418
|
+
"error_message": upload_result.error_message,
|
|
2419
|
+
}
|
|
2420
|
+
if upload_result.debug_info:
|
|
2421
|
+
upload_event["debug_info"] = upload_result.debug_info
|
|
2422
|
+
logger.info(json.dumps(upload_event))
|
|
2423
|
+
|
|
2424
|
+
# Fail fast if upload failed
|
|
2425
|
+
if not upload_result.success:
|
|
2426
|
+
print(f"ERROR: Upload failed: {upload_result.error_message}")
|
|
2427
|
+
if upload_result.debug_info:
|
|
2428
|
+
print(
|
|
2429
|
+
f"Debug info: {json.dumps(upload_result.debug_info, indent=2)}"
|
|
2430
|
+
)
|
|
2431
|
+
return EvaluateResult(
|
|
2432
|
+
success=False,
|
|
2433
|
+
all_correct=False,
|
|
2434
|
+
correctness_score=0.0,
|
|
2435
|
+
geomean_speedup=0.0,
|
|
2436
|
+
passed_tests=0,
|
|
2437
|
+
total_tests=0,
|
|
2438
|
+
error_message=f"Failed to upload wafer-core: {upload_result.error_message}",
|
|
2439
|
+
)
|
|
2440
|
+
|
|
2441
|
+
print(f"Uploaded {upload_result.files_copied} files")
|
|
2442
|
+
except Exception as e:
|
|
2361
2443
|
return EvaluateResult(
|
|
2362
2444
|
success=False,
|
|
2363
2445
|
all_correct=False,
|
|
@@ -2365,7 +2447,7 @@ async def run_evaluate_digitalocean(
|
|
|
2365
2447
|
geomean_speedup=0.0,
|
|
2366
2448
|
passed_tests=0,
|
|
2367
2449
|
total_tests=0,
|
|
2368
|
-
error_message=f"Failed to
|
|
2450
|
+
error_message=f"Failed to upload wafer-core: {e}",
|
|
2369
2451
|
)
|
|
2370
2452
|
|
|
2371
2453
|
# Select GPU (DigitalOcean droplets typically have GPU 0)
|
|
@@ -3160,35 +3242,15 @@ def main():
|
|
|
3160
3242
|
inputs = [x.cuda() if isinstance(x, torch.Tensor) else x for x in inputs]
|
|
3161
3243
|
|
|
3162
3244
|
if run_defense and defense_module is not None:
|
|
3163
|
-
# Use
|
|
3245
|
+
# Use full defense suite
|
|
3164
3246
|
print("[KernelBench] Running defense checks on implementation...")
|
|
3165
|
-
|
|
3247
|
+
run_all_defenses = defense_module.run_all_defenses
|
|
3166
3248
|
time_with_defenses = defense_module.time_execution_with_defenses
|
|
3167
3249
|
|
|
3168
|
-
#
|
|
3169
|
-
|
|
3170
|
-
_kernel_code = None
|
|
3171
|
-
try:
|
|
3172
|
-
_problem_code = Path(args.reference).read_text()
|
|
3173
|
-
_kernel_code = Path(args.impl).read_text()
|
|
3174
|
-
except Exception:
|
|
3175
|
-
pass
|
|
3176
|
-
|
|
3177
|
-
# Input generator for caching/multi-input checks
|
|
3178
|
-
def _input_generator():
|
|
3179
|
-
_ins = get_inputs()
|
|
3180
|
-
return tuple(x.cuda() if isinstance(x, torch.Tensor) else x for x in _ins)
|
|
3181
|
-
|
|
3182
|
-
# Run all defense checks (original + extended)
|
|
3183
|
-
all_passed, defense_results, _ = run_extended(
|
|
3250
|
+
# Run defense checks on implementation
|
|
3251
|
+
all_passed, defense_results, _ = run_all_defenses(
|
|
3184
3252
|
lambda *x: new_model(*x),
|
|
3185
3253
|
*inputs,
|
|
3186
|
-
reference_fn=lambda *x: ref_model(*x),
|
|
3187
|
-
input_generator=_input_generator,
|
|
3188
|
-
test_shapes=[(128, 128), (256, 256), (512, 512)],
|
|
3189
|
-
check_precision_ulp=True,
|
|
3190
|
-
problem_code=_problem_code,
|
|
3191
|
-
kernel_code=_kernel_code,
|
|
3192
3254
|
)
|
|
3193
3255
|
results["defense_results"] = {
|
|
3194
3256
|
name: {"passed": passed, "message": msg}
|
wafer/global_config.py
CHANGED
|
@@ -234,20 +234,7 @@ def get_supabase_anon_key() -> str:
|
|
|
234
234
|
|
|
235
235
|
The anon key is public and used for client-side auth operations
|
|
236
236
|
like token refresh.
|
|
237
|
-
|
|
238
|
-
If SUPABASE_URL is set via env var, infer the matching anon key
|
|
239
|
-
from the built-in environments. Otherwise, use the config file's environment.
|
|
240
237
|
"""
|
|
241
|
-
supabase_url = get_supabase_url()
|
|
242
|
-
|
|
243
|
-
# If SUPABASE_URL was set via env var, find matching environment
|
|
244
|
-
if os.environ.get("SUPABASE_URL"):
|
|
245
|
-
# Check built-in environments to find matching Supabase URL
|
|
246
|
-
for env_name, env in BUILTIN_ENVIRONMENTS.items():
|
|
247
|
-
if env.supabase_url == supabase_url:
|
|
248
|
-
return env.supabase_anon_key
|
|
249
|
-
|
|
250
|
-
# Otherwise, use config file's environment
|
|
251
238
|
return load_global_config().get_api_environment().supabase_anon_key
|
|
252
239
|
|
|
253
240
|
|
wafer/kernel_scope.py
CHANGED
|
@@ -95,7 +95,7 @@ def analyze_command(
|
|
|
95
95
|
if not api_url or not auth_headers:
|
|
96
96
|
raise RuntimeError(
|
|
97
97
|
"API authentication required for .co file analysis. "
|
|
98
|
-
"Run 'wafer
|
|
98
|
+
"Run 'wafer login' first."
|
|
99
99
|
)
|
|
100
100
|
result = analyze_code_object(target_path, api_url, auth_headers)
|
|
101
101
|
# ISA files - use kernel_index parameter
|
wafer/ncu_analyze.py
CHANGED
|
@@ -520,7 +520,7 @@ def _analyze_remote_api(
|
|
|
520
520
|
|
|
521
521
|
except httpx.HTTPStatusError as e:
|
|
522
522
|
if e.response.status_code == 401:
|
|
523
|
-
raise RuntimeError("Not authenticated. Run: wafer
|
|
523
|
+
raise RuntimeError("Not authenticated. Run: wafer login") from e
|
|
524
524
|
raise RuntimeError(f"API error: {e.response.status_code} - {e.response.text}") from e
|
|
525
525
|
except httpx.RequestError as e:
|
|
526
526
|
raise RuntimeError(f"Could not reach API: {e}") from e
|
wafer/nsys_analyze.py
CHANGED
|
@@ -844,7 +844,7 @@ def _analyze_remote_api(
|
|
|
844
844
|
|
|
845
845
|
except httpx.HTTPStatusError as e:
|
|
846
846
|
if e.response.status_code == 401:
|
|
847
|
-
raise RuntimeError("Not authenticated. Run: wafer
|
|
847
|
+
raise RuntimeError("Not authenticated. Run: wafer login") from e
|
|
848
848
|
raise RuntimeError(f"API error: {e.response.status_code} - {e.response.text}") from e
|
|
849
849
|
except httpx.RequestError as e:
|
|
850
850
|
raise RuntimeError(f"Could not reach API: {e}") from e
|
|
@@ -16,7 +16,7 @@ Before using Wafer CLI commands, install the tool:
|
|
|
16
16
|
uv tool install wafer-cli
|
|
17
17
|
|
|
18
18
|
# Authenticate (one-time setup)
|
|
19
|
-
wafer
|
|
19
|
+
wafer login
|
|
20
20
|
|
|
21
21
|
```
|
|
22
22
|
|
|
@@ -71,31 +71,15 @@ Test correctness and measure speedup against a reference:
|
|
|
71
71
|
wafer evaluate make-template ./my-kernel
|
|
72
72
|
# Creates: kernel.py, reference.py, test_cases.json
|
|
73
73
|
|
|
74
|
-
#
|
|
75
|
-
|
|
76
|
-
# Each dict is passed as **kwargs to generate_input() in reference.py
|
|
77
|
-
|
|
78
|
-
# Run correctness check (GPUMode functional format)
|
|
79
|
-
wafer evaluate gpumode \
|
|
74
|
+
# Run evaluation on a configured target
|
|
75
|
+
wafer evaluate \
|
|
80
76
|
--impl ./my-kernel/kernel.py \
|
|
81
77
|
--reference ./my-kernel/reference.py \
|
|
82
78
|
--test-cases ./my-kernel/test_cases.json \
|
|
83
79
|
--target <target-name>
|
|
84
80
|
|
|
85
|
-
#
|
|
86
|
-
wafer evaluate
|
|
87
|
-
--impl ./my-kernel/kernel.py \
|
|
88
|
-
--reference ./my-kernel/reference.py \
|
|
89
|
-
--test-cases ./my-kernel/test_cases.json \
|
|
90
|
-
--target <target-name> --benchmark
|
|
91
|
-
|
|
92
|
-
# Run with defensive timing (detects evaluation hacking)
|
|
93
|
-
wafer evaluate gpumode ... --benchmark --defensive
|
|
94
|
-
|
|
95
|
-
# KernelBench format (ModelNew class)
|
|
96
|
-
wafer evaluate kernelbench \
|
|
97
|
-
--impl my_kernel.py --reference problem.py \
|
|
98
|
-
--target <target-name> --stages all
|
|
81
|
+
# With profiling
|
|
82
|
+
wafer evaluate ... --profile
|
|
99
83
|
```
|
|
100
84
|
|
|
101
85
|
### 4. AI-Assisted Optimization
|
|
@@ -142,4 +126,4 @@ wafer config targets init runpod # RunPod cloud GPUs
|
|
|
142
126
|
wafer config targets init digitalocean # DigitalOcean AMD GPUs
|
|
143
127
|
```
|
|
144
128
|
|
|
145
|
-
Then use: `wafer evaluate
|
|
129
|
+
Then use: `wafer evaluate --target <name> ...`
|
wafer/ssh_keys.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""SSH Keys CLI - Manage SSH public keys for workspace access.
|
|
2
2
|
|
|
3
|
-
This module provides the implementation for the `wafer
|
|
3
|
+
This module provides the implementation for the `wafer ssh-keys` subcommand.
|
|
4
4
|
Users register their SSH public keys here, which are then installed in all
|
|
5
5
|
workspaces they attach to (BYOK - Bring Your Own Key model).
|
|
6
6
|
"""
|
|
@@ -94,7 +94,7 @@ def list_ssh_keys(json_output: bool = False) -> str:
|
|
|
94
94
|
keys = response.json()
|
|
95
95
|
except httpx.HTTPStatusError as e:
|
|
96
96
|
if e.response.status_code == 401:
|
|
97
|
-
raise RuntimeError("Not authenticated. Run: wafer
|
|
97
|
+
raise RuntimeError("Not authenticated. Run: wafer login") from e
|
|
98
98
|
raise RuntimeError(f"API error: {e.response.status_code} - {e.response.text}") from e
|
|
99
99
|
except httpx.RequestError as e:
|
|
100
100
|
raise RuntimeError(f"Could not reach API: {e}") from e
|
|
@@ -107,7 +107,7 @@ def list_ssh_keys(json_output: bool = False) -> str:
|
|
|
107
107
|
"No SSH keys registered.\n"
|
|
108
108
|
"\n"
|
|
109
109
|
"Add your SSH key:\n"
|
|
110
|
-
" wafer
|
|
110
|
+
" wafer ssh-keys add\n"
|
|
111
111
|
"\n"
|
|
112
112
|
"This will auto-detect your key from ~/.ssh/"
|
|
113
113
|
)
|
|
@@ -149,7 +149,7 @@ def add_ssh_key(
|
|
|
149
149
|
" ssh-keygen -t ed25519\n"
|
|
150
150
|
"\n"
|
|
151
151
|
"Or specify a path:\n"
|
|
152
|
-
" wafer
|
|
152
|
+
" wafer ssh-keys add /path/to/key.pub"
|
|
153
153
|
)
|
|
154
154
|
pubkey_path = detected[0]
|
|
155
155
|
|
|
@@ -202,7 +202,7 @@ def add_ssh_key(
|
|
|
202
202
|
key_data = response.json()
|
|
203
203
|
except httpx.HTTPStatusError as e:
|
|
204
204
|
if e.response.status_code == 401:
|
|
205
|
-
raise RuntimeError("Not authenticated. Run: wafer
|
|
205
|
+
raise RuntimeError("Not authenticated. Run: wafer login") from e
|
|
206
206
|
if e.response.status_code == 400:
|
|
207
207
|
# Parse error detail
|
|
208
208
|
try:
|
|
@@ -248,7 +248,7 @@ def remove_ssh_key(key_id: str, json_output: bool = False) -> str:
|
|
|
248
248
|
response.raise_for_status()
|
|
249
249
|
except httpx.HTTPStatusError as e:
|
|
250
250
|
if e.response.status_code == 401:
|
|
251
|
-
raise RuntimeError("Not authenticated. Run: wafer
|
|
251
|
+
raise RuntimeError("Not authenticated. Run: wafer login") from e
|
|
252
252
|
if e.response.status_code == 404:
|
|
253
253
|
raise RuntimeError(f"SSH key not found: {key_id}") from e
|
|
254
254
|
raise RuntimeError(f"API error: {e.response.status_code} - {e.response.text}") from e
|
wafer/targets_ops.py
CHANGED
|
@@ -15,7 +15,6 @@ import logging
|
|
|
15
15
|
import subprocess
|
|
16
16
|
from collections.abc import Callable
|
|
17
17
|
from dataclasses import dataclass, replace
|
|
18
|
-
from datetime import UTC
|
|
19
18
|
from pathlib import Path
|
|
20
19
|
from typing import TYPE_CHECKING
|
|
21
20
|
|
|
@@ -31,26 +30,6 @@ if TYPE_CHECKING:
|
|
|
31
30
|
logger = logging.getLogger(__name__)
|
|
32
31
|
|
|
33
32
|
|
|
34
|
-
def _update_binding_cache(resource_id: str, spec_name: str, provider: str) -> None:
|
|
35
|
-
"""Update the new target state cache when provisioning through the legacy path.
|
|
36
|
-
|
|
37
|
-
This bridges the old per-provider state files with the new unified cache
|
|
38
|
-
so that `wafer targets list` can see resources provisioned via the old flow.
|
|
39
|
-
"""
|
|
40
|
-
from datetime import datetime
|
|
41
|
-
|
|
42
|
-
from wafer_core.targets.state_cache import BindingEntry, add_binding
|
|
43
|
-
|
|
44
|
-
add_binding(
|
|
45
|
-
resource_id,
|
|
46
|
-
BindingEntry(
|
|
47
|
-
spec_name=spec_name,
|
|
48
|
-
provider=provider,
|
|
49
|
-
bound_at=datetime.now(UTC).isoformat(),
|
|
50
|
-
),
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
|
|
54
33
|
@dataclass(frozen=True)
|
|
55
34
|
class TargetSSHInfo:
|
|
56
35
|
"""SSH connection info for a target."""
|
|
@@ -156,8 +135,7 @@ async def _get_runpod_ssh_info(target: RunPodTarget) -> TargetSSHInfo:
|
|
|
156
135
|
# Check if pod already exists and is running
|
|
157
136
|
existing = get_pod_state(target.name)
|
|
158
137
|
if existing and await check_pod_running(existing.pod_id):
|
|
159
|
-
# Reuse existing pod
|
|
160
|
-
_update_binding_cache(existing.pod_id, target.name, "runpod")
|
|
138
|
+
# Reuse existing pod
|
|
161
139
|
return TargetSSHInfo(
|
|
162
140
|
host=existing.public_ip,
|
|
163
141
|
port=existing.ssh_port,
|
|
@@ -173,8 +151,6 @@ async def _get_runpod_ssh_info(target: RunPodTarget) -> TargetSSHInfo:
|
|
|
173
151
|
target_keep_alive = replace(target, keep_alive=True)
|
|
174
152
|
|
|
175
153
|
async with runpod_ssh_context(target_keep_alive) as ssh_info:
|
|
176
|
-
# Update new state cache with provisioned pod
|
|
177
|
-
_update_binding_cache(ssh_info.pod_id, target.name, "runpod")
|
|
178
154
|
return TargetSSHInfo(
|
|
179
155
|
host=ssh_info.host,
|
|
180
156
|
port=ssh_info.port,
|
|
@@ -196,8 +172,7 @@ async def _get_digitalocean_ssh_info(target: DigitalOceanTarget) -> TargetSSHInf
|
|
|
196
172
|
# Check if droplet already exists and is running
|
|
197
173
|
existing = get_droplet_state(target.name)
|
|
198
174
|
if existing and await check_droplet_running(existing.droplet_id):
|
|
199
|
-
# Reuse existing droplet
|
|
200
|
-
_update_binding_cache(existing.droplet_id, target.name, "digitalocean")
|
|
175
|
+
# Reuse existing droplet
|
|
201
176
|
return TargetSSHInfo(
|
|
202
177
|
host=existing.public_ip,
|
|
203
178
|
port=22, # DigitalOcean uses standard SSH port
|
|
@@ -209,8 +184,6 @@ async def _get_digitalocean_ssh_info(target: DigitalOceanTarget) -> TargetSSHInf
|
|
|
209
184
|
target_keep_alive = replace(target, keep_alive=True)
|
|
210
185
|
|
|
211
186
|
async with digitalocean_ssh_context(target_keep_alive) as ssh_info:
|
|
212
|
-
# Update new state cache with provisioned droplet
|
|
213
|
-
_update_binding_cache(ssh_info.droplet_id, target.name, "digitalocean")
|
|
214
187
|
return TargetSSHInfo(
|
|
215
188
|
host=ssh_info.host,
|
|
216
189
|
port=ssh_info.port,
|