wafer-core 0.1.21__py3-none-any.whl → 0.1.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wafer_core/auth.py +38 -6
- wafer_core/environments/coding.py +8 -0
- wafer_core/rollouts/dtypes.py +4 -0
- wafer_core/rollouts/environments/localfs.py +50 -2
- wafer_core/rollouts/evaluation.py +17 -1
- wafer_core/rollouts/prompt.py +14 -4
- wafer_core/rollouts/skills.py +176 -0
- wafer_core/rollouts/templates/base.py +3 -0
- wafer_core/targets/runpod.py +154 -15
- wafer_core/tools/__init__.py +14 -0
- wafer_core/tools/file_tools/grep_tool.py +56 -29
- wafer_core/tools/search_docs_tool.py +196 -0
- wafer_core/tools/skill_tool.py +64 -0
- wafer_core/utils/backend.py +3 -0
- wafer_core/utils/kernel_utils/static_checker.py +175 -3
- wafer_core/utils/kernel_utils/targets/config.py +58 -24
- wafer_core/utils/modal_execution/modal_app.py +14 -27
- {wafer_core-0.1.21.dist-info → wafer_core-0.1.23.dist-info}/METADATA +1 -1
- {wafer_core-0.1.21.dist-info → wafer_core-0.1.23.dist-info}/RECORD +20 -17
- {wafer_core-0.1.21.dist-info → wafer_core-0.1.23.dist-info}/WHEEL +0 -0
|
@@ -155,6 +155,161 @@ def check_torch_computation_ops(code: str) -> tuple[bool, str]:
|
|
|
155
155
|
return (False, "")
|
|
156
156
|
|
|
157
157
|
|
|
158
|
+
# =============================================================================
|
|
159
|
+
# NN.MODULE FORWARD CALL CHECKS (Reward Hacking in forward())
|
|
160
|
+
# =============================================================================
|
|
161
|
+
|
|
162
|
+
# These patterns detect calling PyTorch nn.Module forward methods inside forward()
|
|
163
|
+
# e.g., self.conv(x), self.linear(x), self.bn(x) - these invoke cuBLAS/cuDNN
|
|
164
|
+
#
|
|
165
|
+
# This is different from:
|
|
166
|
+
# - nn.Linear(...) in __init__ = OK (just creates parameter container)
|
|
167
|
+
# - self.linear.weight in forward() = OK (accessing weights for custom kernel)
|
|
168
|
+
# - self.linear(x) in forward() = BAD (invokes PyTorch's matmul via cuBLAS)
|
|
169
|
+
|
|
170
|
+
NN_MODULE_FORWARD_PATTERNS = [
|
|
171
|
+
# Common layer types being called as functions
|
|
172
|
+
r"self\.(conv\d*d?|linear|bn|batch_norm|layer_norm|group_norm|instance_norm)\s*\(",
|
|
173
|
+
# More generic pattern: self.<name>(x) or self.<name>(input)
|
|
174
|
+
# But we need to be careful not to match custom module calls
|
|
175
|
+
]
|
|
176
|
+
|
|
177
|
+
# =============================================================================
|
|
178
|
+
# TORCH.NN.FUNCTIONAL CHECKS (Reward Hacking)
|
|
179
|
+
# =============================================================================
|
|
180
|
+
|
|
181
|
+
# Patterns for torch.nn.functional / F.* calls that bypass custom kernel requirement
|
|
182
|
+
# These call into cuBLAS/cuDNN under the hood
|
|
183
|
+
TORCH_FUNCTIONAL_PATTERNS = [
|
|
184
|
+
# F.linear, F.conv*, F.batch_norm etc. (common alias)
|
|
185
|
+
r"\bF\.(linear|conv[123]d|conv_transpose[123]d|batch_norm|layer_norm|group_norm|instance_norm)\s*\(",
|
|
186
|
+
# Full path torch.nn.functional.*
|
|
187
|
+
r"\btorch\.nn\.functional\.(linear|conv[123]d|conv_transpose[123]d|batch_norm|layer_norm|group_norm|instance_norm)\s*\(",
|
|
188
|
+
]
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def check_torch_functional_calls(code: str) -> tuple[bool, str]:
|
|
192
|
+
"""Check for torch.nn.functional / F.* calls in forward() method (reward hacking).
|
|
193
|
+
|
|
194
|
+
Detects patterns like F.linear(x, weight), F.conv2d(x, weight) which invoke
|
|
195
|
+
PyTorch's built-in operations (backed by cuBLAS/cuDNN) instead of custom kernels.
|
|
196
|
+
|
|
197
|
+
Note: We only check inside forward() to avoid false positives from imports or
|
|
198
|
+
training-mode fallbacks that aren't used during inference.
|
|
199
|
+
"""
|
|
200
|
+
# Only check inside forward() method
|
|
201
|
+
forward_code = _extract_forward_method(code)
|
|
202
|
+
if not forward_code:
|
|
203
|
+
return (False, "")
|
|
204
|
+
|
|
205
|
+
forward_code = _strip_comments(forward_code)
|
|
206
|
+
|
|
207
|
+
for pattern in TORCH_FUNCTIONAL_PATTERNS:
|
|
208
|
+
match = re.search(pattern, forward_code)
|
|
209
|
+
if match:
|
|
210
|
+
return (True, f"Uses torch.nn.functional in forward(): {match.group(0)} (reward hacking - must use custom kernel)")
|
|
211
|
+
|
|
212
|
+
return (False, "")
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _extract_forward_method(code: str) -> str:
|
|
216
|
+
"""Extract only the forward() method content from code."""
|
|
217
|
+
lines = code.split('\n')
|
|
218
|
+
result = []
|
|
219
|
+
in_forward = False
|
|
220
|
+
base_indent = 0
|
|
221
|
+
|
|
222
|
+
for i, line in enumerate(lines):
|
|
223
|
+
# Look for forward method definition
|
|
224
|
+
if re.search(r'\bdef\s+forward\s*\(\s*self', line):
|
|
225
|
+
in_forward = True
|
|
226
|
+
# Get the indentation level of the def line
|
|
227
|
+
base_indent = len(line) - len(line.lstrip())
|
|
228
|
+
result.append(line)
|
|
229
|
+
continue
|
|
230
|
+
|
|
231
|
+
if in_forward:
|
|
232
|
+
# Check if we've exited the forward method (new method/class at same or lower indent)
|
|
233
|
+
stripped = line.strip()
|
|
234
|
+
if stripped and not stripped.startswith('#') and not stripped.startswith('"""') and not stripped.startswith("'''"):
|
|
235
|
+
current_indent = len(line) - len(line.lstrip())
|
|
236
|
+
# If we hit a new def/class at the same or lower indentation, we're done
|
|
237
|
+
if current_indent <= base_indent and (stripped.startswith('def ') or stripped.startswith('class ')):
|
|
238
|
+
break
|
|
239
|
+
result.append(line)
|
|
240
|
+
|
|
241
|
+
return '\n'.join(result)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def check_nn_module_forward_call(code: str) -> tuple[bool, str]:
|
|
245
|
+
"""Check for nn.Module forward calls inside forward() method (reward hacking).
|
|
246
|
+
|
|
247
|
+
Detects patterns like self.conv(x), self.linear(x) which invoke PyTorch's
|
|
248
|
+
built-in layers (backed by cuBLAS/cuDNN) instead of custom kernels.
|
|
249
|
+
"""
|
|
250
|
+
# Only check inside forward() method
|
|
251
|
+
forward_code = _extract_forward_method(code)
|
|
252
|
+
if not forward_code:
|
|
253
|
+
return (False, "")
|
|
254
|
+
|
|
255
|
+
forward_code = _strip_comments(forward_code)
|
|
256
|
+
|
|
257
|
+
for pattern in NN_MODULE_FORWARD_PATTERNS:
|
|
258
|
+
match = re.search(pattern, forward_code)
|
|
259
|
+
if match:
|
|
260
|
+
return (True, f"Calls PyTorch nn.Module in forward(): {match.group(0)} (reward hacking - must use custom kernel)")
|
|
261
|
+
|
|
262
|
+
return (False, "")
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
# =============================================================================
|
|
266
|
+
# CUBLAS/CUDNN DIRECT USAGE CHECKS (Reward Hacking)
|
|
267
|
+
# =============================================================================
|
|
268
|
+
|
|
269
|
+
# Direct cuBLAS calls bypass custom kernel requirement
|
|
270
|
+
CUBLAS_PATTERNS = [
|
|
271
|
+
r"\bcublas[A-Z]\w+\s*\(", # cublasSgemm, cublasGemmEx, etc.
|
|
272
|
+
r"\bcublasCreate\b",
|
|
273
|
+
r"\bcublasDestroy\b",
|
|
274
|
+
r"\bcublasSetStream\b",
|
|
275
|
+
r"\bcublasSetMathMode\b",
|
|
276
|
+
r"#include\s*[<\"]cublas", # #include <cublas_v2.h>
|
|
277
|
+
r"CUBLAS_TENSOR_OP_MATH",
|
|
278
|
+
]
|
|
279
|
+
|
|
280
|
+
# Direct cuDNN calls bypass custom kernel requirement
|
|
281
|
+
CUDNN_PATTERNS = [
|
|
282
|
+
r"\bcudnn[A-Z]\w+\s*\(", # cudnnConvolutionForward, etc.
|
|
283
|
+
r"\bcudnnCreate\b",
|
|
284
|
+
r"\bcudnnDestroy\b",
|
|
285
|
+
r"#include\s*[<\"]cudnn", # #include <cudnn.h>
|
|
286
|
+
]
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def check_cublas_usage(code: str) -> tuple[bool, str]:
|
|
290
|
+
"""Check for direct cuBLAS API usage (reward hacking)."""
|
|
291
|
+
code = _strip_comments(code)
|
|
292
|
+
|
|
293
|
+
for pattern in CUBLAS_PATTERNS:
|
|
294
|
+
match = re.search(pattern, code)
|
|
295
|
+
if match:
|
|
296
|
+
return (True, f"Uses cuBLAS directly: {match.group(0)} (reward hacking - must write custom kernel)")
|
|
297
|
+
|
|
298
|
+
return (False, "")
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def check_cudnn_usage(code: str) -> tuple[bool, str]:
|
|
302
|
+
"""Check for direct cuDNN API usage (reward hacking)."""
|
|
303
|
+
code = _strip_comments(code)
|
|
304
|
+
|
|
305
|
+
for pattern in CUDNN_PATTERNS:
|
|
306
|
+
match = re.search(pattern, code)
|
|
307
|
+
if match:
|
|
308
|
+
return (True, f"Uses cuDNN directly: {match.group(0)} (reward hacking - must write custom kernel)")
|
|
309
|
+
|
|
310
|
+
return (False, "")
|
|
311
|
+
|
|
312
|
+
|
|
158
313
|
# =============================================================================
|
|
159
314
|
# TIMING MANIPULATION CHECKS
|
|
160
315
|
# =============================================================================
|
|
@@ -310,8 +465,16 @@ def check_tk_impl(code: str) -> tuple[bool, str]:
|
|
|
310
465
|
def check_cute_impl(code: str) -> tuple[bool, str]:
|
|
311
466
|
"""Check for valid CUTLASS/CuTe kernel implementation."""
|
|
312
467
|
code = _strip_comments(code)
|
|
313
|
-
|
|
314
|
-
|
|
468
|
+
# Accept explicit namespace qualifiers OR using namespace declarations
|
|
469
|
+
valid_patterns = [
|
|
470
|
+
"cute::",
|
|
471
|
+
"cutlass::",
|
|
472
|
+
"from cutlass",
|
|
473
|
+
"using namespace cute",
|
|
474
|
+
"using namespace cutlass",
|
|
475
|
+
]
|
|
476
|
+
if not any(p in code for p in valid_patterns):
|
|
477
|
+
return (True, "Missing cute:: or cutlass:: namespace (or 'using namespace')")
|
|
315
478
|
return (False, "")
|
|
316
479
|
|
|
317
480
|
|
|
@@ -334,6 +497,11 @@ CHECK_FUNCTIONS: dict[str, Callable[[str], tuple[bool, str]]] = {
|
|
|
334
497
|
"timing_event_patch": check_timing_event_patch,
|
|
335
498
|
# Torch ops
|
|
336
499
|
"torch_computation_ops": check_torch_computation_ops,
|
|
500
|
+
# Reward hacking checks
|
|
501
|
+
"cublas_usage": check_cublas_usage,
|
|
502
|
+
"cudnn_usage": check_cudnn_usage,
|
|
503
|
+
"nn_module_forward_call": check_nn_module_forward_call,
|
|
504
|
+
"torch_functional_calls": check_torch_functional_calls,
|
|
337
505
|
# Timing manipulation
|
|
338
506
|
"stream_injection": check_stream_injection,
|
|
339
507
|
"thread_injection": check_thread_injection,
|
|
@@ -363,12 +531,16 @@ STRICT_CHECKS = [
|
|
|
363
531
|
"timing_event_patch",
|
|
364
532
|
"thread_injection",
|
|
365
533
|
"lazy_eval",
|
|
534
|
+
"cublas_usage", # Direct cuBLAS is reward hacking
|
|
535
|
+
"cudnn_usage", # Direct cuDNN is reward hacking
|
|
536
|
+
"nn_module_forward_call", # Calling self.conv(x), self.linear(x) in forward() is reward hacking
|
|
537
|
+
"torch_functional_calls", # Calling F.linear(), F.conv2d() in forward() is reward hacking
|
|
538
|
+
"torch_computation_ops", # torch.mm, torch.matmul, torch.conv* etc. are reward hacking
|
|
366
539
|
]
|
|
367
540
|
|
|
368
541
|
# Checks that emit warnings but don't fail
|
|
369
542
|
WARNING_CHECKS = [
|
|
370
543
|
"pytorch_wrap",
|
|
371
|
-
"torch_computation_ops",
|
|
372
544
|
"stream_injection",
|
|
373
545
|
]
|
|
374
546
|
|
|
@@ -61,7 +61,9 @@ class BaremetalTarget:
|
|
|
61
61
|
ncu_available: bool = True # Baremetal typically has NCU
|
|
62
62
|
|
|
63
63
|
# Docker execution config (Modal-like). If docker_image is set, run in container.
|
|
64
|
-
docker_image: str | None =
|
|
64
|
+
docker_image: str | None = (
|
|
65
|
+
None # Docker image to use (e.g., "nvcr.io/nvidia/cutlass:4.3-devel")
|
|
66
|
+
)
|
|
65
67
|
pip_packages: tuple[str, ...] = () # Packages to install via uv pip install
|
|
66
68
|
torch_package: str | None = None # Torch package spec (e.g., "torch>=2.8.0")
|
|
67
69
|
torch_index_url: str | None = None # Custom index for torch (e.g., PyTorch nightly)
|
|
@@ -69,7 +71,9 @@ class BaremetalTarget:
|
|
|
69
71
|
def __post_init__(self) -> None:
|
|
70
72
|
"""Validate configuration."""
|
|
71
73
|
assert len(self.gpu_ids) > 0, "Must specify at least one GPU ID"
|
|
72
|
-
assert ":" in self.ssh_target,
|
|
74
|
+
assert ":" in self.ssh_target, (
|
|
75
|
+
f"ssh_target must include port (user@host:port), got: {self.ssh_target}"
|
|
76
|
+
)
|
|
73
77
|
# If torch_index_url is set, torch_package must also be set
|
|
74
78
|
if self.torch_index_url:
|
|
75
79
|
assert self.torch_package, "torch_package must be set when torch_index_url is provided"
|
|
@@ -114,7 +118,9 @@ class VMTarget:
|
|
|
114
118
|
ncu_available: bool = False # VMs typically don't have NCU
|
|
115
119
|
|
|
116
120
|
# Docker execution config (Modal-like). If docker_image is set, run in container.
|
|
117
|
-
docker_image: str | None =
|
|
121
|
+
docker_image: str | None = (
|
|
122
|
+
None # Docker image to use (e.g., "nvcr.io/nvidia/pytorch:24.01-py3")
|
|
123
|
+
)
|
|
118
124
|
pip_packages: tuple[str, ...] = () # Packages to install via uv pip install
|
|
119
125
|
torch_package: str | None = None # Torch package spec (e.g., "torch>=2.8.0")
|
|
120
126
|
torch_index_url: str | None = None # Custom index for torch (e.g., PyTorch nightly)
|
|
@@ -122,7 +128,9 @@ class VMTarget:
|
|
|
122
128
|
def __post_init__(self) -> None:
|
|
123
129
|
"""Validate configuration."""
|
|
124
130
|
assert len(self.gpu_ids) > 0, "Must specify at least one GPU ID"
|
|
125
|
-
assert ":" in self.ssh_target,
|
|
131
|
+
assert ":" in self.ssh_target, (
|
|
132
|
+
f"ssh_target must include port (user@host:port), got: {self.ssh_target}"
|
|
133
|
+
)
|
|
126
134
|
# If torch_index_url is set, torch_package must also be set
|
|
127
135
|
if self.torch_index_url:
|
|
128
136
|
assert self.torch_package, "torch_package must be set when torch_index_url is provided"
|
|
@@ -282,7 +290,7 @@ class RunPodTarget:
|
|
|
282
290
|
ssh_key="~/.ssh/id_ed25519",
|
|
283
291
|
gpu_type_id="AMD Instinct MI300X OAM",
|
|
284
292
|
gpu_count=1,
|
|
285
|
-
image="
|
|
293
|
+
image="rocm/pytorch:rocm7.0.2_ubuntu24.04_py3.12_pytorch_release_2.7.1",
|
|
286
294
|
keep_alive=True, # Don't terminate after eval
|
|
287
295
|
)
|
|
288
296
|
|
|
@@ -296,7 +304,21 @@ class RunPodTarget:
|
|
|
296
304
|
gpu_type_id: str = AMD_MI300X_GPU_ID # RunPod GPU type identifier
|
|
297
305
|
gpu_count: int = 1
|
|
298
306
|
container_disk_gb: int = 50
|
|
299
|
-
|
|
307
|
+
# TODO: Consider creating a custom Docker image with HipKittens pre-installed
|
|
308
|
+
# to avoid needing `wafer config targets install <target> hipkittens`.
|
|
309
|
+
# HipKittens repo: https://github.com/HazyResearch/hipkittens
|
|
310
|
+
# CK (Composable Kernel) is already included in ROCm 7.0.
|
|
311
|
+
#
|
|
312
|
+
# WARNING: PyTorch's hipify can corrupt /opt/rocm/include/thrust/ headers.
|
|
313
|
+
# If you see "cuda/__cccl_config not found" errors, run:
|
|
314
|
+
# apt-get install --reinstall -y rocthrust
|
|
315
|
+
# See docker/rocm7-runpod/README.md for details.
|
|
316
|
+
image: str = "rocm/pytorch:rocm7.0.2_ubuntu24.04_py3.12_pytorch_release_2.7.1"
|
|
317
|
+
|
|
318
|
+
# RunPod template ID — required for non-RunPod images that need custom
|
|
319
|
+
# dockerArgs (e.g. to install and start sshd). When set, takes priority
|
|
320
|
+
# over `image` in the deploy mutation.
|
|
321
|
+
template_id: str | None = None
|
|
300
322
|
|
|
301
323
|
# Timeouts
|
|
302
324
|
provision_timeout: int = 900 # 15 min for SSH to be ready
|
|
@@ -325,12 +347,13 @@ class RunPodTarget:
|
|
|
325
347
|
|
|
326
348
|
# Check for API key (env var or ~/.wafer/auth.json)
|
|
327
349
|
api_key = get_api_key("runpod")
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
350
|
+
if not api_key:
|
|
351
|
+
raise ValueError(
|
|
352
|
+
"RunPod API key not found.\n"
|
|
353
|
+
"Set WAFER_RUNPOD_API_KEY environment variable, or run:\n"
|
|
354
|
+
" wafer auth login runpod\n"
|
|
355
|
+
"Get your API key from: https://runpod.io/console/user/settings"
|
|
356
|
+
)
|
|
334
357
|
|
|
335
358
|
|
|
336
359
|
@dataclass(frozen=True)
|
|
@@ -370,7 +393,9 @@ class LocalTarget:
|
|
|
370
393
|
"""Validate configuration."""
|
|
371
394
|
assert self.name, "name cannot be empty"
|
|
372
395
|
assert len(self.gpu_ids) > 0, "Must specify at least one GPU ID"
|
|
373
|
-
assert self.vendor in ("nvidia", "amd"),
|
|
396
|
+
assert self.vendor in ("nvidia", "amd"), (
|
|
397
|
+
f"vendor must be 'nvidia' or 'amd', got: {self.vendor}"
|
|
398
|
+
)
|
|
374
399
|
|
|
375
400
|
|
|
376
401
|
@dataclass(frozen=True)
|
|
@@ -415,7 +440,7 @@ class DigitalOceanTarget:
|
|
|
415
440
|
# DigitalOcean instance configuration
|
|
416
441
|
region: str = "atl1" # Atlanta (AMD GPUs available here)
|
|
417
442
|
size_slug: str = "gpu-mi300x1-192gb-devcloud" # Single MI300X GPU
|
|
418
|
-
image: str = "
|
|
443
|
+
image: str = "amd-pytorchrocm7" # PyTorch (ROCm7) marketplace image
|
|
419
444
|
|
|
420
445
|
# Timeouts
|
|
421
446
|
provision_timeout: int = 600 # 10 min for droplet to be ready
|
|
@@ -443,16 +468,25 @@ class DigitalOceanTarget:
|
|
|
443
468
|
|
|
444
469
|
# Check for API key (env var or ~/.wafer/auth.json)
|
|
445
470
|
api_key = get_api_key("digitalocean")
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
471
|
+
if not api_key:
|
|
472
|
+
raise ValueError(
|
|
473
|
+
"DigitalOcean API key not found.\n"
|
|
474
|
+
"Set WAFER_AMD_DIGITALOCEAN_API_KEY environment variable, or run:\n"
|
|
475
|
+
" wafer auth login digitalocean\n"
|
|
476
|
+
"Get your API key from: https://cloud.digitalocean.com/account/api/tokens"
|
|
477
|
+
)
|
|
452
478
|
|
|
453
479
|
|
|
454
480
|
# Union type for target configs
|
|
455
|
-
TargetConfig =
|
|
481
|
+
TargetConfig = (
|
|
482
|
+
BaremetalTarget
|
|
483
|
+
| VMTarget
|
|
484
|
+
| ModalTarget
|
|
485
|
+
| WorkspaceTarget
|
|
486
|
+
| RunPodTarget
|
|
487
|
+
| DigitalOceanTarget
|
|
488
|
+
| LocalTarget
|
|
489
|
+
)
|
|
456
490
|
|
|
457
491
|
|
|
458
492
|
# Type guard functions for pattern matching
|
|
@@ -517,9 +551,9 @@ def target_to_deployment_config(target: TargetConfig, gpu_id: int) -> Deployment
|
|
|
517
551
|
from wafer_core.utils.kernel_utils.deployment import DeploymentConfig
|
|
518
552
|
|
|
519
553
|
# Type narrowing: Only SSH-based targets supported (not Modal)
|
|
520
|
-
assert not isinstance(
|
|
521
|
-
|
|
522
|
-
)
|
|
554
|
+
assert not isinstance(target, ModalTarget), (
|
|
555
|
+
f"target_to_deployment_config only supports SSH targets, got {type(target).__name__}"
|
|
556
|
+
)
|
|
523
557
|
|
|
524
558
|
return DeploymentConfig(
|
|
525
559
|
ssh_target=target.ssh_target,
|
|
@@ -20,35 +20,17 @@ import modal
|
|
|
20
20
|
|
|
21
21
|
# Build Modal image with all dependencies
|
|
22
22
|
# This image is cached and reused across function invocations
|
|
23
|
-
def build_modal_image(
|
|
24
|
-
gpu_type: str = "B200",
|
|
25
|
-
compute_capability: str = "10.0",
|
|
26
|
-
) -> modal.Image:
|
|
23
|
+
def build_modal_image() -> modal.Image:
|
|
27
24
|
"""Build Modal image with PyTorch, CUTLASS, and evaluation dependencies.
|
|
28
25
|
|
|
29
26
|
Uses explicit local code inclusion to avoid pulling in SSH deployment code.
|
|
30
27
|
|
|
31
|
-
Phase 2 solution from MODAL_HANDOFF.md:
|
|
32
|
-
- Use add_local_dir with ignore parameter to exclude deployment files
|
|
33
|
-
- Only include files needed for kernel evaluation
|
|
34
|
-
|
|
35
|
-
Args:
|
|
36
|
-
gpu_type: GPU type (determines PyTorch index URL)
|
|
37
|
-
compute_capability: CUDA compute capability
|
|
38
|
-
|
|
39
28
|
Returns:
|
|
40
29
|
Modal Image ready for kernel evaluation
|
|
41
30
|
"""
|
|
42
|
-
#
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
# Blackwell requires PyTorch 2.8+ with CUDA 12.8
|
|
46
|
-
torch_index = "https://download.pytorch.org/whl/nightly/cu128"
|
|
47
|
-
torch_version = "torch>=2.8.0"
|
|
48
|
-
else:
|
|
49
|
-
# Older GPUs (H100, A100) use stable PyTorch
|
|
50
|
-
torch_index = "https://download.pytorch.org/whl/cu124"
|
|
51
|
-
torch_version = "torch>=2.4.0"
|
|
31
|
+
# Use CUDA 13.0 for all GPUs (H100, A100, B200, GB200)
|
|
32
|
+
torch_index = "https://download.pytorch.org/whl/cu130"
|
|
33
|
+
torch_version = "torch>=2.6.0"
|
|
52
34
|
|
|
53
35
|
# Build image with dependencies
|
|
54
36
|
image = (
|
|
@@ -74,6 +56,15 @@ def build_modal_image(
|
|
|
74
56
|
"scipy",
|
|
75
57
|
"pytest",
|
|
76
58
|
)
|
|
59
|
+
# Install CUTLASS headers for C++ kernel compilation (v4.3.5)
|
|
60
|
+
.run_commands(
|
|
61
|
+
"git clone --depth 1 --branch v4.3.5 https://github.com/NVIDIA/cutlass.git /usr/local/cutlass",
|
|
62
|
+
# Verify CUTLASS was installed correctly
|
|
63
|
+
"ls -la /usr/local/cutlass/include/cutlass/util/ | head -20",
|
|
64
|
+
"test -f /usr/local/cutlass/include/cutlass/util/packed_stride.hpp && echo 'CUTLASS headers OK' || echo 'CUTLASS headers MISSING'",
|
|
65
|
+
)
|
|
66
|
+
# Set CUTLASS_PATH environment variable
|
|
67
|
+
.env({"CUTLASS_PATH": "/usr/local/cutlass/include"})
|
|
77
68
|
# Create empty __init__.py files for proper Python package structure
|
|
78
69
|
# MUST run before add_local_* commands (Modal restriction)
|
|
79
70
|
.run_commands(
|
|
@@ -111,20 +102,16 @@ def build_modal_image(
|
|
|
111
102
|
# Create app (can be customized per target)
|
|
112
103
|
def create_modal_app(
|
|
113
104
|
app_name: str = "test-kernel-eval", # Match test script default
|
|
114
|
-
gpu_type: str = "B200",
|
|
115
|
-
compute_capability: str = "10.0",
|
|
116
105
|
) -> modal.App:
|
|
117
106
|
"""Create Modal app for kernel evaluation.
|
|
118
107
|
|
|
119
108
|
Args:
|
|
120
109
|
app_name: Modal app name
|
|
121
|
-
gpu_type: GPU type for image building
|
|
122
|
-
compute_capability: CUDA compute capability
|
|
123
110
|
|
|
124
111
|
Returns:
|
|
125
112
|
Modal App instance
|
|
126
113
|
"""
|
|
127
|
-
image = build_modal_image(
|
|
114
|
+
image = build_modal_image()
|
|
128
115
|
return modal.App(name=app_name, image=image)
|
|
129
116
|
|
|
130
117
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
wafer_core/__init__.py,sha256=syB2JjzvL91otODCKsGCgCPE6gVCINhTUv9j98k6tL0,3209
|
|
2
2
|
wafer_core/async_ssh.py,sha256=ocw2Gh5p8ltKeoqG_q32DXOBfu5q-IE7jCnzMbQN9WI,28713
|
|
3
|
-
wafer_core/auth.py,sha256=
|
|
3
|
+
wafer_core/auth.py,sha256=JpUkZ3bROIsgexayak5TLiGqUAR5kqGjekwqQRvIXH0,7235
|
|
4
4
|
wafer_core/gpu.py,sha256=ENa92btjXsx6ldpoyKfRrAmfy-LHG2KpA5k7SWd6Q_s,28627
|
|
5
5
|
wafer_core/gpu_detect.py,sha256=kpD8Q_G6GA9j-WnnnTNA3BBPulkGcWnZiogOmjKDao0,13650
|
|
6
6
|
wafer_core/problem_config.py,sha256=8oqxL9-pvgzi8BtFxgDcqZW4e6DV2OCZOYkcPoyXrc8,10913
|
|
@@ -12,7 +12,7 @@ wafer_core/config/__init__.py,sha256=hKywfjA4YXd4lBeBFEcBoMwFoflPHJTiBnkTq7_JYOQ
|
|
|
12
12
|
wafer_core/config/loader.py,sha256=k7JnILmO13TWUzIv9Lm8fvmj3UfYHZDgaFurjQ-GXpY,6623
|
|
13
13
|
wafer_core/config/schema.py,sha256=2WhFlnG0VYYX4T-70BLeJK8Janvi4KEa8KKGZA7331w,3898
|
|
14
14
|
wafer_core/environments/__init__.py,sha256=SIsResVtm22tr_d-oHPeeSxrkhFdmPOFico3DqtRqK8,238
|
|
15
|
-
wafer_core/environments/coding.py,sha256=
|
|
15
|
+
wafer_core/environments/coding.py,sha256=N-ELZwJu5vKLCVtwO25c6JSty6fmqf85VR2d3WJ4RXw,8559
|
|
16
16
|
wafer_core/environments/gpumode.py,sha256=8Da08nltvN_YloNyYI6-omN2D4n5C7aptKDCtUgT2bQ,17191
|
|
17
17
|
wafer_core/lib/__init__.py,sha256=4-4p3mhwlquejWGglYXU8_nHdA0LoPaa_jGzcm13USA,1325
|
|
18
18
|
wafer_core/lib/kernel_scope/__init__.py,sha256=WW2vu8jUlqOu-MCpgO40lIYacCA9N2u-uuECIs_JO2w,2817
|
|
@@ -336,9 +336,9 @@ wafer_core/rollouts/agents.py,sha256=Uv1kjYogUfdPl18YfkVxVqFTbmWfuJQrxem_iHTUgdw
|
|
|
336
336
|
wafer_core/rollouts/cli.py,sha256=2NqgegKdlmxD0eJzGOMB5o_1Hb5t7O5JpP_32uvF2BE,80117
|
|
337
337
|
wafer_core/rollouts/cli_agents.py,sha256=e4qqqYBzWLsbw8FsNnddGApWp_on9Cvzrfd1amiAyvI,20641
|
|
338
338
|
wafer_core/rollouts/deploy.py,sha256=3t88fM_BMyAPkxIl8pS4r5ogHJvrlqWQDuIaltDZBRc,40924
|
|
339
|
-
wafer_core/rollouts/dtypes.py,sha256=
|
|
339
|
+
wafer_core/rollouts/dtypes.py,sha256=GUezPTzkd8E-nDlqdGE7idUthyZC-7jTrbpa4ye-v8k,61146
|
|
340
340
|
wafer_core/rollouts/eval_helpers.py,sha256=OE7uQZRcbqQhpFqb4zOj8zafc9Gr6xZJpSrMvxXKVUw,1699
|
|
341
|
-
wafer_core/rollouts/evaluation.py,sha256=
|
|
341
|
+
wafer_core/rollouts/evaluation.py,sha256=fk-pGZ5vpocVmw1iBbHtxMK0K6l8pYTLHCpDNvRY1Xo,69142
|
|
342
342
|
wafer_core/rollouts/events.py,sha256=z85J8kq0LXPj5CiUk4RkiTQg--r9xiO7QeeJwkyUOto,7505
|
|
343
343
|
wafer_core/rollouts/export.py,sha256=0CfdBB7Du4E3VekKEUcTwTEFS1bOMGZ9GbD5KU3CecQ,11583
|
|
344
344
|
wafer_core/rollouts/feedback.py,sha256=mu17eQbAinXZWI3hMYLq-LyF4JAdH9SfNRWY-0S8jvQ,6769
|
|
@@ -350,11 +350,12 @@ wafer_core/rollouts/paths.py,sha256=9XtrA9ylhb5LttMFe2DE7X0IHeUMjuGUerII9OscYec,
|
|
|
350
350
|
wafer_core/rollouts/pipeline.py,sha256=vlJTYE3ZX2XScpF9pmtv91K8Q0g8uLmcbI5jn6b5Hzg,15319
|
|
351
351
|
wafer_core/rollouts/progress.py,sha256=szA9cvWT2xUxGVhF9BaAqJMmKDqMAUlxImxcOpcnqbY,29228
|
|
352
352
|
wafer_core/rollouts/progress_display.py,sha256=it-IiI37k9whAuB6T_66GYgsZyidCq5x00URiOcxe2c,15769
|
|
353
|
-
wafer_core/rollouts/prompt.py,sha256=
|
|
353
|
+
wafer_core/rollouts/prompt.py,sha256=EDmGb0rhWwke7tokIcO8dukc3q5c8x0n5Omi5CpAQmA,11022
|
|
354
354
|
wafer_core/rollouts/providers.py,sha256=dcGJh1p30hstVbCDDtJ902lyafkg81DKjcOzb0uuKS0,1400
|
|
355
355
|
wafer_core/rollouts/remote.py,sha256=cAYpRCONlsTeRxzLiegAUfjZWGtqBNwZTHehMhk5ldA,8816
|
|
356
356
|
wafer_core/rollouts/scoring.py,sha256=qeIT8Z7pK51XRDmN2sGdg_hIPRabWqoQIYKsuytlvRo,8838
|
|
357
357
|
wafer_core/rollouts/search.py,sha256=5BEDuw9FVbQhei3nvUXEVwBU5ouwgJE6ONhEqvU5Ldc,14696
|
|
358
|
+
wafer_core/rollouts/skills.py,sha256=ATYoG02Cc6_VrtE415TnseBFJrKOMq27z-5YgBgPpZQ,5081
|
|
358
359
|
wafer_core/rollouts/slice.py,sha256=darOZO53BuSPfvv_KjOSzulGVSWbL4OuoE3k6xXpBFg,20195
|
|
359
360
|
wafer_core/rollouts/store.py,sha256=UDP9idDOEVs_0Pslx0K_Y8E1i-BeoqVSaxdQiaqtz1E,18051
|
|
360
361
|
wafer_core/rollouts/transform_messages.py,sha256=yldzdLgugNYb5Zxju7myFBel1tmrHXx9M399ImqPLGI,20891
|
|
@@ -394,7 +395,7 @@ wafer_core/rollouts/environments/compose.py,sha256=DlJA_GdzByWjVvGeR4MrcQIB4ucV6
|
|
|
394
395
|
wafer_core/rollouts/environments/cuda_grep.py,sha256=o4GPJcnKuB96KwE4UWkxXq5DdaKYMz-QizcZXleOKLs,22007
|
|
395
396
|
wafer_core/rollouts/environments/git_worktree.py,sha256=f4S-OI-m6OEMyrEl3TfD4oYLXkkNgsiHuX6NOHCVfSQ,22397
|
|
396
397
|
wafer_core/rollouts/environments/handoff.py,sha256=pvhcSDdltZ1zJ3Y_SAJlBmX6FhXjZLNYneXwjFqb9lE,16941
|
|
397
|
-
wafer_core/rollouts/environments/localfs.py,sha256=
|
|
398
|
+
wafer_core/rollouts/environments/localfs.py,sha256=xceepFc-eh9RqwTSJd_WiODVCs4Jm-d8rQm_MvutG0A,45766
|
|
398
399
|
wafer_core/rollouts/environments/no_tools.py,sha256=FRsiMDzi8ma15xePTANteUxAOiOw0s90459XMvSq1_k,2970
|
|
399
400
|
wafer_core/rollouts/environments/oracle.py,sha256=OtviDKed6DEIy67TdohqSGmvQT8XuuQLGnxJF9B7yrE,7563
|
|
400
401
|
wafer_core/rollouts/environments/repl.py,sha256=DyvyqEbBBytliqZ2-uAJqm-F7gdROG5-9LDFVyCo6lo,36042
|
|
@@ -501,7 +502,7 @@ wafer_core/rollouts/providers/openai_completions.py,sha256=3vUA74qjrxG-aOjyngtnZ
|
|
|
501
502
|
wafer_core/rollouts/providers/openai_responses.py,sha256=xlyeI9h7aZEbpFY_8_zQ6IIYbMeNgcsEVRi92PBEAWc,32470
|
|
502
503
|
wafer_core/rollouts/providers/sglang.py,sha256=kahYlrFG008D4jRA-c6mylBsTe-qTryKMhsjUbidduU,35525
|
|
503
504
|
wafer_core/rollouts/templates/__init__.py,sha256=8qANHtoWZe9zpAWufkXlo8tQ07_Lw-RhX-lm2i-0ORQ,976
|
|
504
|
-
wafer_core/rollouts/templates/base.py,sha256=
|
|
505
|
+
wafer_core/rollouts/templates/base.py,sha256=aBojLjssj2YaPjgCZMvHYWxtGa83Y4uqt1Cxoy1J3v0,7010
|
|
505
506
|
wafer_core/rollouts/templates/loader.py,sha256=eNRWP2zMTsygpewBhAO3Co0vLb4o4SwJo74Jp1DWS-0,4726
|
|
506
507
|
wafer_core/rollouts/tests/test_slice.py,sha256=jUJFkTUzY6TNrimFH5kkfOavn3J5Gg-hsFWUOzs30lk,9148
|
|
507
508
|
wafer_core/rollouts/tools/__init__.py,sha256=nLEJrlhT1anqInh6u4CeX8CCrjUJpAEMp0jx-G0V6gs,298
|
|
@@ -584,11 +585,13 @@ wafer_core/sessions/dtypes.py,sha256=K6nOjvL6sjCGY7GTtdEygf1IZY_18R9YkHGqFyMd8wY
|
|
|
584
585
|
wafer_core/sessions/hooks.py,sha256=A-txm6ufnRGQCdtP3vwh7oEOdlLN9Tv0XsjORMihuAI,4295
|
|
585
586
|
wafer_core/targets/__init__.py,sha256=sHndC7AAOaHXlrmDXFLB53a5Y8DBjuyqS6nwsO2nj-Y,1728
|
|
586
587
|
wafer_core/targets/digitalocean.py,sha256=cvoYpYjtSyy5t2lQAPi7ERruuuibronah_ivOiduAHQ,16550
|
|
587
|
-
wafer_core/targets/runpod.py,sha256=
|
|
588
|
-
wafer_core/tools/__init__.py,sha256=
|
|
588
|
+
wafer_core/targets/runpod.py,sha256=LrVmNvA6qjzL5nbGSWvtw7CHrK6bDu7_o3vKIek00Tc,20286
|
|
589
|
+
wafer_core/tools/__init__.py,sha256=deGQQlcdSD6zQx8JHizfSXgF5-EntdBOF_ngtob1-VU,3506
|
|
589
590
|
wafer_core/tools/bash_tool.py,sha256=daoKOVGSgL0x9X_3l8Apd6-wFH4VMXMGJwVemw2FIfc,16828
|
|
590
591
|
wafer_core/tools/glob_tool.py,sha256=9X5PdOjQJj7kiVNqqCZC0-1LmnE6wHx3Zc9zfMjtXdc,3533
|
|
591
592
|
wafer_core/tools/grep_tool.py,sha256=cStyDz-J47oDLLZCL83yOvYo8Ijv4qu3D372JKT_ptM,4580
|
|
593
|
+
wafer_core/tools/search_docs_tool.py,sha256=WY4hY83sseX8Fpxvw6DZxiG-F95F2t3-4PyfMD1Lpkg,6809
|
|
594
|
+
wafer_core/tools/skill_tool.py,sha256=JXsT5hBTUH5U4tmzHEywU7eHHt5xCEF79tL2tsuk4-c,2067
|
|
592
595
|
wafer_core/tools/wafer_tool.py,sha256=-dgPTHbWXq3I3wFj0mP7-lj5iZqGRoFvFf9IEEo3plQ,6345
|
|
593
596
|
wafer_core/tools/write_kernel_tool.py,sha256=dJjhr-WBhVNe06hcJQVmBZTbS8mid64KF1MwlE2s2R4,21547
|
|
594
597
|
wafer_core/tools/autotuner/BENCHMARKING.md,sha256=RkJ2wFhbDFXuMbw0mOW4pRqntT0UirptXwIxyrA1_KM,3825
|
|
@@ -613,7 +616,7 @@ wafer_core/tools/capture_tool/metrics.py,sha256=BFZNmdE-kh3LneYdWXTNZmlLuo-DCrP5
|
|
|
613
616
|
wafer_core/tools/file_tools/__init__.py,sha256=2H7Rq5bijNQHGO4W6jjQAShkrcmdcHC0EQ8mBpgrApI,632
|
|
614
617
|
wafer_core/tools/file_tools/edit_tool.py,sha256=Efx83pM1Ljb07cJmAGVhPX4YiPJICK70sZM6uCjRWB0,4109
|
|
615
618
|
wafer_core/tools/file_tools/glob_tool.py,sha256=Av4LfC21fHXbnSsgh_9zDxlY9Qhb48aApaGos4j3B4g,3437
|
|
616
|
-
wafer_core/tools/file_tools/grep_tool.py,sha256=
|
|
619
|
+
wafer_core/tools/file_tools/grep_tool.py,sha256=42eFj2pxBBrs5eg_GhyYJ-j2fNWkmGPvrEqXFmi5E10,5539
|
|
617
620
|
wafer_core/tools/file_tools/read_tool.py,sha256=K0Hd8zwyL4Yva5YO9spXDfTRfXvfjqh9ztVrA8s1bJE,3961
|
|
618
621
|
wafer_core/tools/file_tools/utils.py,sha256=HgaqYan2Pky4hTLX2L9d2Gj9oS325H7rFbJj-jryNtc,2576
|
|
619
622
|
wafer_core/tools/file_tools/write_tool.py,sha256=X4N8y8wB-k9d5PcMRmZMRKIXlG9jHJiRdlEFFRLdZzs,2083
|
|
@@ -640,7 +643,7 @@ wafer_core/tools/tracelens_tools/tracelens_collective_tool.py,sha256=0E3FhfaA1N0
|
|
|
640
643
|
wafer_core/tools/tracelens_tools/tracelens_compare_tool.py,sha256=99dUsB4wuYjxbh4X6Nsf2AtDMs94Uzy04tSemDOmKhg,4458
|
|
641
644
|
wafer_core/tools/tracelens_tools/tracelens_report_tool.py,sha256=unuEx2zXaK42lA3qojS-WzFlBmIFrS75GHSgXUnDXGE,4720
|
|
642
645
|
wafer_core/utils/__init__.py,sha256=oPHgkMkE7wS2lYKLlXrw4Ia5EHnpVcGHFfpWebIlVKs,354
|
|
643
|
-
wafer_core/utils/backend.py,sha256=
|
|
646
|
+
wafer_core/utils/backend.py,sha256=zt5AX00OXSIstprvQ1_WNf_PQYphD2y53kOXWvg20RY,8986
|
|
644
647
|
wafer_core/utils/code_validation.py,sha256=UqS4UVDxO-atdbn6i7JygX6IFPITvT56zZn1t-ZNuM8,4692
|
|
645
648
|
wafer_core/utils/environment_serialization.py,sha256=cVDkapx0JC60CekazgirPEMAeGZhbLdX1WMIkFvId60,5047
|
|
646
649
|
wafer_core/utils/event_streaming.py,sha256=Sg3-hI043Ofc2b29Z3DWrKgu4HkfJoIqhhbfGRJv70Q,2260
|
|
@@ -659,18 +662,18 @@ wafer_core/utils/kernel_utils/evaluate.py,sha256=1kxFNMl9VCXfKfk_BIiuA_zFfvDB1sl
|
|
|
659
662
|
wafer_core/utils/kernel_utils/gpu_validation.py,sha256=LRiDjW_xAK4fXf1Vw1aYHG54B1W0J6b5L0K6PXzM2tI,3759
|
|
660
663
|
wafer_core/utils/kernel_utils/reference_cache.py,sha256=4IQ2gND1StHULRO7geyAElEStbjQxwOeP6X09E5wCB0,11283
|
|
661
664
|
wafer_core/utils/kernel_utils/results.py,sha256=QJGeah_41LSzxyYwGl9VxHPxTVAN2bLtk5bWdWLIpL4,6705
|
|
662
|
-
wafer_core/utils/kernel_utils/static_checker.py,sha256=
|
|
665
|
+
wafer_core/utils/kernel_utils/static_checker.py,sha256=XIQkzAOkGH5xtrOuZM4tNUqVJ0QRkYeJ7_8DosDOtkw,19886
|
|
663
666
|
wafer_core/utils/kernel_utils/task.py,sha256=XcmKxKUWh5It6nX3zGqj77tWgA32uPfQMqNOqyD5T48,2682
|
|
664
667
|
wafer_core/utils/kernel_utils/utils.py,sha256=uDZoJDxh07hJeLNlPdKN2vgB15pqIr1LbXf0YIBHU4E,43056
|
|
665
668
|
wafer_core/utils/kernel_utils/targets/__init__.py,sha256=4NwRLsuJ__S4xKAfda4Ag82C5MQ3Qio-4xA5S-mQGlU,2067
|
|
666
|
-
wafer_core/utils/kernel_utils/targets/config.py,sha256=
|
|
669
|
+
wafer_core/utils/kernel_utils/targets/config.py,sha256=sNXyYTZ9rL9OET4xqbHZ0d4b8ChzST1yUKvNOv8JSQs,19933
|
|
667
670
|
wafer_core/utils/kernel_utils/targets/execution.py,sha256=bZuNXCo0sIdD6hFhetLPrtDC-zMSiIsAx_aml49VVL0,15033
|
|
668
671
|
wafer_core/utils/kernel_utils/targets/selection.py,sha256=5I_RG_7cfhq7uaeR28meC2EeNNKssFsK-Tc3QFG6Ze0,3590
|
|
669
672
|
wafer_core/utils/modal_execution/__init__.py,sha256=jkVqYOLzCT5K73N9Od0UIUsx-99A0m6bpDrxfyXxQZ8,945
|
|
670
|
-
wafer_core/utils/modal_execution/modal_app.py,sha256=
|
|
673
|
+
wafer_core/utils/modal_execution/modal_app.py,sha256=VfS2cX8gHtnlPXemmMcEwDPeQdhiv2tly3CifOyh9f4,11455
|
|
671
674
|
wafer_core/utils/modal_execution/modal_config.py,sha256=7cGX9TGqilQ3qxI3OFGXV5orjtyRU-PEDOJ4vP2oxno,4421
|
|
672
675
|
wafer_core/utils/modal_execution/modal_execution.py,sha256=gChjnV6jqA3A7IRP3DfvV5cSfm_MN0X4f7JZufXgdZE,24594
|
|
673
676
|
wafer_core/utils/modal_execution/test_modal.py,sha256=_jqou_hrLs1Daf1590Pnb0a_lXMMa2rczAPpW9HpoNQ,8153
|
|
674
|
-
wafer_core-0.1.
|
|
675
|
-
wafer_core-0.1.
|
|
676
|
-
wafer_core-0.1.
|
|
677
|
+
wafer_core-0.1.23.dist-info/METADATA,sha256=HnIqBmqEQ6t_dc54Rnyg_Wyy-HKuAr3XTmsEoJkjJLo,1420
|
|
678
|
+
wafer_core-0.1.23.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
679
|
+
wafer_core-0.1.23.dist-info/RECORD,,
|
|
File without changes
|