wafer-core 0.1.25__py3-none-any.whl → 0.1.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wafer_core/lib/trace_compare/PERFORMANCE.md +148 -0
- wafer_core/lib/trace_compare/__init__.py +45 -0
- wafer_core/lib/trace_compare/aligner.py +369 -0
- wafer_core/lib/trace_compare/analyzer.py +729 -0
- wafer_core/lib/trace_compare/api.py +225 -0
- wafer_core/lib/trace_compare/architecture.py +77 -0
- wafer_core/lib/trace_compare/classifier.py +486 -0
- wafer_core/lib/trace_compare/formatter.py +951 -0
- wafer_core/lib/trace_compare/fusion_analyzer.py +356 -0
- wafer_core/lib/trace_compare/kernel_registry.yaml +349 -0
- wafer_core/lib/trace_compare/layer_segmentation.py +114 -0
- wafer_core/lib/trace_compare/loader.py +635 -0
- wafer_core/lib/trace_compare/same_kernel_analyzer.py +119 -0
- wafer_core/lib/trace_compare/warnings.py +99 -0
- wafer_core/problem_config.py +3 -3
- wafer_core/rollouts/agent_presets/rlm_01_01.py +2 -2
- wafer_core/rollouts/dtypes.py +18 -3
- wafer_core/rollouts/providers/anthropic.py +35 -3
- wafer_core/utils/kernel_utils/defense.py +10 -0
- wafer_core/utils/kernel_utils/targets/config.py +10 -0
- {wafer_core-0.1.25.dist-info → wafer_core-0.1.27.dist-info}/METADATA +3 -1
- {wafer_core-0.1.25.dist-info → wafer_core-0.1.27.dist-info}/RECORD +23 -9
- {wafer_core-0.1.25.dist-info → wafer_core-0.1.27.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""Same kernel analysis - comparing identical kernel names across platforms.
|
|
2
|
+
|
|
3
|
+
Identifies kernels where AMD and NVIDIA use the same kernel name/pattern
|
|
4
|
+
and compares their performance directly.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from collections import defaultdict
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from .aligner import KernelPair, LayerAlignment
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class SameKernelComparison:
|
|
16
|
+
"""Comparison of identical kernels across platforms."""
|
|
17
|
+
|
|
18
|
+
layer: int
|
|
19
|
+
kernel_name: str
|
|
20
|
+
operation: str
|
|
21
|
+
amd_avg_us: float
|
|
22
|
+
nvidia_avg_us: float
|
|
23
|
+
ratio: float
|
|
24
|
+
gap_us: float
|
|
25
|
+
amd_count: int
|
|
26
|
+
nvidia_count: int
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class SameKernelAnalysis:
|
|
31
|
+
"""Complete same kernel analysis result."""
|
|
32
|
+
|
|
33
|
+
kernels: list[SameKernelComparison] = field(default_factory=list)
|
|
34
|
+
summary: dict[str, Any] = field(default_factory=dict)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def analyze_same_kernels(
|
|
38
|
+
layer_alignments: list[LayerAlignment],
|
|
39
|
+
) -> SameKernelAnalysis:
|
|
40
|
+
"""Find and compare kernels with identical names across platforms.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
layer_alignments: List of aligned layers
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
SameKernelAnalysis with comparisons
|
|
47
|
+
"""
|
|
48
|
+
same_kernels: list[SameKernelComparison] = []
|
|
49
|
+
|
|
50
|
+
for layer_alignment in layer_alignments:
|
|
51
|
+
for pair in layer_alignment.kernel_pairs:
|
|
52
|
+
if pair.is_same_kernel and pair.amd_kernel and pair.nvidia_kernel:
|
|
53
|
+
same_kernels.append(
|
|
54
|
+
SameKernelComparison(
|
|
55
|
+
layer=layer_alignment.layer,
|
|
56
|
+
kernel_name=pair.amd_kernel,
|
|
57
|
+
operation=pair.operation,
|
|
58
|
+
amd_avg_us=pair.amd_avg_us,
|
|
59
|
+
nvidia_avg_us=pair.nvidia_avg_us,
|
|
60
|
+
ratio=pair.ratio,
|
|
61
|
+
gap_us=pair.gap_us,
|
|
62
|
+
amd_count=pair.amd_count,
|
|
63
|
+
nvidia_count=pair.nvidia_count,
|
|
64
|
+
)
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
if same_kernels:
|
|
68
|
+
ratios = [k.ratio for k in same_kernels if k.ratio != float("inf")]
|
|
69
|
+
avg_ratio = sum(ratios) / len(ratios) if ratios else 1.0
|
|
70
|
+
amd_faster = sum(1 for k in same_kernels if k.ratio < 1.0)
|
|
71
|
+
nvidia_faster = sum(1 for k in same_kernels if k.ratio > 1.0)
|
|
72
|
+
else:
|
|
73
|
+
avg_ratio = 1.0
|
|
74
|
+
amd_faster = 0
|
|
75
|
+
nvidia_faster = 0
|
|
76
|
+
|
|
77
|
+
return SameKernelAnalysis(
|
|
78
|
+
kernels=same_kernels,
|
|
79
|
+
summary={
|
|
80
|
+
"total_same_kernels": len(same_kernels),
|
|
81
|
+
"avg_ratio": avg_ratio,
|
|
82
|
+
"kernels_where_amd_faster": amd_faster,
|
|
83
|
+
"kernels_where_nvidia_faster": nvidia_faster,
|
|
84
|
+
},
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def analyze_same_kernels_from_alignment(
|
|
89
|
+
layer_alignments: list[LayerAlignment],
|
|
90
|
+
) -> dict[str, Any]:
|
|
91
|
+
"""Analyze same kernels from alignment data (for API compatibility).
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
layer_alignments: List of aligned layers
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
Dictionary with same kernel analysis results
|
|
98
|
+
"""
|
|
99
|
+
analysis = analyze_same_kernels(layer_alignments)
|
|
100
|
+
|
|
101
|
+
kernels = [
|
|
102
|
+
{
|
|
103
|
+
"layer": k.layer,
|
|
104
|
+
"kernel_name": k.kernel_name,
|
|
105
|
+
"operation": k.operation,
|
|
106
|
+
"amd_avg_us": k.amd_avg_us,
|
|
107
|
+
"nvidia_avg_us": k.nvidia_avg_us,
|
|
108
|
+
"ratio": k.ratio,
|
|
109
|
+
"gap_us": k.gap_us,
|
|
110
|
+
"amd_count": k.amd_count,
|
|
111
|
+
"nvidia_count": k.nvidia_count,
|
|
112
|
+
}
|
|
113
|
+
for k in analysis.kernels
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
return {
|
|
117
|
+
"kernels": kernels,
|
|
118
|
+
"summary": analysis.summary,
|
|
119
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Warning detection and reporting for trace analysis.
|
|
2
|
+
|
|
3
|
+
Detects issues with trace data quality and provides actionable suggestions.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Literal
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True)
|
|
11
|
+
class TraceWarning:
|
|
12
|
+
"""A warning about trace data quality or analysis limitations."""
|
|
13
|
+
|
|
14
|
+
code: str # e.g., "NO_PHASE_ANNOTATIONS", "NO_PYTHON_STACKS"
|
|
15
|
+
severity: Literal["info", "warning", "error"]
|
|
16
|
+
message: str
|
|
17
|
+
suggestion: str
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def detect_warnings(
|
|
21
|
+
events: list[dict],
|
|
22
|
+
kernel_names: list[str],
|
|
23
|
+
phases: list[dict] | None = None,
|
|
24
|
+
layers_detected: int = 0,
|
|
25
|
+
total_kernels: int = 0,
|
|
26
|
+
) -> list[TraceWarning]:
|
|
27
|
+
"""Detect warnings from trace data.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
events: All trace events
|
|
31
|
+
kernel_names: List of all kernel names
|
|
32
|
+
phases: Optional list of phase events (for checking phase annotations)
|
|
33
|
+
layers_detected: Number of layers detected
|
|
34
|
+
total_kernels: Total number of kernels
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
List of warnings
|
|
38
|
+
"""
|
|
39
|
+
warnings: list[TraceWarning] = []
|
|
40
|
+
|
|
41
|
+
# Check for phase annotations
|
|
42
|
+
has_phase_annotations = any(
|
|
43
|
+
ev.get("cat") == "user_annotation" and ev.get("name", "").startswith("execute_context")
|
|
44
|
+
for ev in events
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
if not has_phase_annotations:
|
|
48
|
+
warnings.append(
|
|
49
|
+
TraceWarning(
|
|
50
|
+
code="NO_PHASE_ANNOTATIONS",
|
|
51
|
+
severity="warning",
|
|
52
|
+
message="No vLLM phase annotations found. Phase analysis (prefill/decode) will be unavailable.",
|
|
53
|
+
suggestion="Ensure you're using vLLM v1.0+ with profiling enabled. Re-profile with torch.profiler.profile() to capture phase markers.",
|
|
54
|
+
)
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Check for Python stack traces
|
|
58
|
+
has_python_stacks = any(
|
|
59
|
+
ev.get("cat") == "python_function"
|
|
60
|
+
for ev in events
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
if not has_python_stacks:
|
|
64
|
+
warnings.append(
|
|
65
|
+
TraceWarning(
|
|
66
|
+
code="NO_PYTHON_STACKS",
|
|
67
|
+
severity="info",
|
|
68
|
+
message="No Python stack traces available. CPU→kernel mapping will be limited.",
|
|
69
|
+
suggestion="Re-profile with with_stack=True: torch.profiler.profile(with_stack=True) for better CPU operator identification.",
|
|
70
|
+
)
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Check for high percentage of unknown kernels
|
|
74
|
+
if total_kernels > 0:
|
|
75
|
+
unknown_count = sum(1 for name in kernel_names if "unknown" in name.lower() or name == "Other")
|
|
76
|
+
unknown_percentage = (unknown_count / total_kernels) * 100
|
|
77
|
+
|
|
78
|
+
if unknown_percentage > 20:
|
|
79
|
+
warnings.append(
|
|
80
|
+
TraceWarning(
|
|
81
|
+
code="HIGH_UNKNOWN_KERNELS",
|
|
82
|
+
severity="warning",
|
|
83
|
+
message=f"{unknown_percentage:.1f}% of kernels are classified as 'Unknown'. Kernel registry may be outdated.",
|
|
84
|
+
suggestion="Update kernel pattern registry or report unrecognized kernel patterns for support.",
|
|
85
|
+
)
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Check for layer detection failure
|
|
89
|
+
if layers_detected == 0 and total_kernels > 100:
|
|
90
|
+
warnings.append(
|
|
91
|
+
TraceWarning(
|
|
92
|
+
code="LAYER_DETECTION_FAILED",
|
|
93
|
+
severity="warning",
|
|
94
|
+
message="No transformer layers detected. Layer-wise analysis unavailable.",
|
|
95
|
+
suggestion="This may indicate a non-transformer model (e.g., SSM/Mamba) or insufficient correlation data. Check model architecture.",
|
|
96
|
+
)
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
return warnings
|
wafer_core/problem_config.py
CHANGED
|
@@ -84,7 +84,7 @@ class ProblemConfig:
|
|
|
84
84
|
benchmarks: list[dict[str, Any]]
|
|
85
85
|
|
|
86
86
|
# Optional with defaults
|
|
87
|
-
model: str = "claude-
|
|
87
|
+
model: str = "claude-opus-4-5-20251101"
|
|
88
88
|
temperature: float = 0.2
|
|
89
89
|
max_tokens: int = 8192
|
|
90
90
|
max_turns: int = 10
|
|
@@ -219,7 +219,7 @@ def _parse_config(data: dict[str, Any], base_dir: Path) -> tuple[ProblemConfig |
|
|
|
219
219
|
reference_code=reference_code,
|
|
220
220
|
tests=tests,
|
|
221
221
|
benchmarks=benchmarks,
|
|
222
|
-
model=data.get("model", "claude-
|
|
222
|
+
model=data.get("model", "claude-opus-4-5-20251101"),
|
|
223
223
|
temperature=data.get("temperature", 0.2),
|
|
224
224
|
max_tokens=data.get("max_tokens", 8192),
|
|
225
225
|
max_turns=data.get("max_turns", 10),
|
|
@@ -269,7 +269,7 @@ def create_problem_config_from_cli(
|
|
|
269
269
|
reference_code=reference_code,
|
|
270
270
|
tests=tests,
|
|
271
271
|
benchmarks=benchmarks or tests, # Use tests as benchmarks if not specified
|
|
272
|
-
model=kwargs.get("model", "claude-
|
|
272
|
+
model=kwargs.get("model", "claude-opus-4-5-20251101"),
|
|
273
273
|
temperature=kwargs.get("temperature", 0.2),
|
|
274
274
|
max_tokens=kwargs.get("max_tokens", 8192),
|
|
275
275
|
max_turns=kwargs.get("max_turns", 10),
|
|
@@ -119,7 +119,7 @@ FINAL(42)
|
|
|
119
119
|
|
|
120
120
|
config = AgentPresetConfig(
|
|
121
121
|
name="rlm",
|
|
122
|
-
model="anthropic/claude-
|
|
122
|
+
model="anthropic/claude-opus-4-5-20251101",
|
|
123
123
|
env="repl", # Uses REPLEnvironment
|
|
124
124
|
thinking=True,
|
|
125
125
|
system_prompt=RLM_TOOL_SYSTEM_PROMPT,
|
|
@@ -128,7 +128,7 @@ config = AgentPresetConfig(
|
|
|
128
128
|
# Variant for message-parsing mode
|
|
129
129
|
config_block_mode = AgentPresetConfig(
|
|
130
130
|
name="rlm_blocks",
|
|
131
|
-
model="anthropic/claude-
|
|
131
|
+
model="anthropic/claude-opus-4-5-20251101",
|
|
132
132
|
env="repl_blocks", # Uses MessageParsingREPLEnvironment
|
|
133
133
|
thinking=True,
|
|
134
134
|
system_prompt=RLM_BLOCK_SYSTEM_PROMPT,
|
wafer_core/rollouts/dtypes.py
CHANGED
|
@@ -1238,6 +1238,12 @@ class Endpoint(JsonSerializable):
|
|
|
1238
1238
|
api_base: str = ""
|
|
1239
1239
|
api_key: str = ""
|
|
1240
1240
|
oauth_token: str = "" # OAuth bearer token (takes precedence over api_key for Anthropic)
|
|
1241
|
+
# TODO: Callbacks on a frozen dataclass are a code smell. This exists because wafer-core
|
|
1242
|
+
# can't depend on wafer-cli (where the Supabase refresh logic lives). A cleaner approach
|
|
1243
|
+
# would be a TokenProvider protocol that Endpoint delegates to, keeping the dataclass pure.
|
|
1244
|
+
api_key_refresh: Callable[[], Awaitable[str | None]] | None = field(
|
|
1245
|
+
default=None, repr=False, compare=False
|
|
1246
|
+
)
|
|
1241
1247
|
is_claude_code_api_key: bool = (
|
|
1242
1248
|
False # API key created via Claude Code OAuth (requires special headers)
|
|
1243
1249
|
)
|
|
@@ -1300,6 +1306,7 @@ class Endpoint(JsonSerializable):
|
|
|
1300
1306
|
exclude_secrets: If True (default), omits api_key and oauth_token.
|
|
1301
1307
|
"""
|
|
1302
1308
|
d = asdict(self)
|
|
1309
|
+
d.pop("api_key_refresh", None) # Callable, not serializable
|
|
1303
1310
|
if exclude_secrets:
|
|
1304
1311
|
d.pop("api_key", None)
|
|
1305
1312
|
d.pop("oauth_token", None)
|
|
@@ -1307,7 +1314,11 @@ class Endpoint(JsonSerializable):
|
|
|
1307
1314
|
|
|
1308
1315
|
@classmethod
|
|
1309
1316
|
def from_dict(
|
|
1310
|
-
cls,
|
|
1317
|
+
cls,
|
|
1318
|
+
data: dict[str, Any],
|
|
1319
|
+
api_key: str = "",
|
|
1320
|
+
oauth_token: str = "",
|
|
1321
|
+
api_key_refresh: "Callable[[], Awaitable[str | None]] | None" = None,
|
|
1311
1322
|
) -> "Endpoint":
|
|
1312
1323
|
"""Deserialize from dict, injecting secrets at runtime.
|
|
1313
1324
|
|
|
@@ -1315,12 +1326,16 @@ class Endpoint(JsonSerializable):
|
|
|
1315
1326
|
data: Dict from to_dict()
|
|
1316
1327
|
api_key: API key to inject (not stored in session)
|
|
1317
1328
|
oauth_token: OAuth token to inject (not stored in session)
|
|
1329
|
+
api_key_refresh: Callback to refresh api_key mid-session (not stored)
|
|
1318
1330
|
"""
|
|
1319
|
-
# Remove secrets if present (they shouldn't be, but be safe)
|
|
1331
|
+
# Remove secrets/callables if present (they shouldn't be, but be safe)
|
|
1320
1332
|
data = data.copy()
|
|
1321
1333
|
data.pop("api_key", None)
|
|
1322
1334
|
data.pop("oauth_token", None)
|
|
1323
|
-
|
|
1335
|
+
data.pop("api_key_refresh", None)
|
|
1336
|
+
return cls(
|
|
1337
|
+
**data, api_key=api_key, oauth_token=oauth_token, api_key_refresh=api_key_refresh
|
|
1338
|
+
)
|
|
1324
1339
|
|
|
1325
1340
|
|
|
1326
1341
|
@dataclass(frozen=True)
|
|
@@ -725,9 +725,16 @@ async def rollout_anthropic(
|
|
|
725
725
|
oauth_token = fresh_token
|
|
726
726
|
# If refresh failed, continue with existing token - it might still work
|
|
727
727
|
|
|
728
|
+
# Get fresh wafer proxy token if refresh callback is available
|
|
729
|
+
api_key = actor.endpoint.api_key
|
|
730
|
+
if actor.endpoint.api_key_refresh:
|
|
731
|
+
fresh_key = await actor.endpoint.api_key_refresh()
|
|
732
|
+
if fresh_key:
|
|
733
|
+
api_key = fresh_key
|
|
734
|
+
|
|
728
735
|
client = _create_anthropic_client(
|
|
729
736
|
oauth_token=oauth_token,
|
|
730
|
-
api_key=
|
|
737
|
+
api_key=api_key,
|
|
731
738
|
api_base=actor.endpoint.api_base,
|
|
732
739
|
max_retries=actor.endpoint.max_retries,
|
|
733
740
|
timeout=actor.endpoint.timeout,
|
|
@@ -973,7 +980,7 @@ async def rollout_anthropic(
|
|
|
973
980
|
f"Model not found: {e}\nCheck your model ID is correct."
|
|
974
981
|
) from e
|
|
975
982
|
|
|
976
|
-
#
|
|
983
|
+
# Try to refresh token and retry once on auth errors
|
|
977
984
|
if isinstance(e, anthropic.AuthenticationError):
|
|
978
985
|
if oauth_token and attempt == 0:
|
|
979
986
|
# Emit retry event for OAuth refresh
|
|
@@ -993,12 +1000,37 @@ async def rollout_anthropic(
|
|
|
993
1000
|
await client.close()
|
|
994
1001
|
client = _create_anthropic_client(
|
|
995
1002
|
oauth_token=oauth_token,
|
|
996
|
-
api_key=
|
|
1003
|
+
api_key=api_key,
|
|
997
1004
|
api_base=actor.endpoint.api_base,
|
|
998
1005
|
max_retries=actor.endpoint.max_retries,
|
|
999
1006
|
timeout=actor.endpoint.timeout,
|
|
1000
1007
|
)
|
|
1001
1008
|
continue
|
|
1009
|
+
|
|
1010
|
+
# Wafer proxy token refresh (Supabase JWTs expire after ~1hr)
|
|
1011
|
+
if actor.endpoint.api_key_refresh and attempt == 0:
|
|
1012
|
+
await on_chunk(
|
|
1013
|
+
RetryStart(
|
|
1014
|
+
attempt=1,
|
|
1015
|
+
max_attempts=2,
|
|
1016
|
+
delay_seconds=0,
|
|
1017
|
+
error_message="Wafer proxy token expired, refreshing",
|
|
1018
|
+
provider="anthropic",
|
|
1019
|
+
)
|
|
1020
|
+
)
|
|
1021
|
+
fresh_key = await actor.endpoint.api_key_refresh()
|
|
1022
|
+
if fresh_key and fresh_key != api_key:
|
|
1023
|
+
api_key = fresh_key
|
|
1024
|
+
await client.close()
|
|
1025
|
+
client = _create_anthropic_client(
|
|
1026
|
+
oauth_token=oauth_token,
|
|
1027
|
+
api_key=api_key,
|
|
1028
|
+
api_base=actor.endpoint.api_base,
|
|
1029
|
+
max_retries=actor.endpoint.max_retries,
|
|
1030
|
+
timeout=actor.endpoint.timeout,
|
|
1031
|
+
)
|
|
1032
|
+
continue
|
|
1033
|
+
|
|
1002
1034
|
raise FatalEvalError(
|
|
1003
1035
|
f"Authentication failed: {e}\nCheck your API key or OAuth token."
|
|
1004
1036
|
) from e
|
|
@@ -12,6 +12,16 @@ Attack types defended against:
|
|
|
12
12
|
5. Monkey-patching - Replacing CUDA timing functions with fake implementations
|
|
13
13
|
|
|
14
14
|
Reference: "Hacks and Defenses in Automatic GPU Kernel Generation" by Jiwei Li (Dec 2025)
|
|
15
|
+
|
|
16
|
+
TODO: Memory guard buffers (from CUDA-L2's zero_one_correctness_check.py) — wrap
|
|
17
|
+
input/output tensors with guard regions and check for out-of-bounds writes after
|
|
18
|
+
kernel execution. Catches shared memory overflow and buffer overrun at the memory
|
|
19
|
+
boundary, rather than inferring from output non-determinism.
|
|
20
|
+
|
|
21
|
+
TODO: Exact correctness for GEMM kernels (from CUDA-L2) — use {0,1} input matrices
|
|
22
|
+
where FP16 results ≤2048 are exactly representable, enabling zero-tolerance
|
|
23
|
+
validation (torch.equal instead of torch.allclose). Eliminates the "bounded garbage
|
|
24
|
+
passes tolerance check" failure mode for matmul kernels entirely.
|
|
15
25
|
"""
|
|
16
26
|
|
|
17
27
|
import random
|
|
@@ -21,6 +21,12 @@ if TYPE_CHECKING:
|
|
|
21
21
|
from wafer_core.utils.kernel_utils.deployment import DeploymentConfig
|
|
22
22
|
|
|
23
23
|
|
|
24
|
+
# TODO: Split BaremetalTarget into BaremetalTarget (persistent servers like Vultr,
|
|
25
|
+
# never auto-removed) and SSHTarget (ephemeral SSH endpoints from providers like
|
|
26
|
+
# RunPod/DO, safe to auto-clean when unreachable). Currently the pool bridge creates
|
|
27
|
+
# ephemeral pod endpoints as type="baremetal", losing provenance. SSHTarget should
|
|
28
|
+
# subclass BaremetalTarget so existing isinstance() checks still work. The `provider`
|
|
29
|
+
# field is a stopgap until this split happens.
|
|
24
30
|
@dataclass(frozen=True)
|
|
25
31
|
class BaremetalTarget:
|
|
26
32
|
"""Configuration for baremetal GPU server.
|
|
@@ -59,6 +65,9 @@ class BaremetalTarget:
|
|
|
59
65
|
gpu_type: str = "B200"
|
|
60
66
|
compute_capability: str = "10.0"
|
|
61
67
|
ncu_available: bool = True # Baremetal typically has NCU
|
|
68
|
+
provider: str | None = (
|
|
69
|
+
None # Source provider ("runpod", "digitalocean") — enables auto-cleanup when instance is gone
|
|
70
|
+
)
|
|
62
71
|
|
|
63
72
|
# Docker execution config (Modal-like). If docker_image is set, run in container.
|
|
64
73
|
docker_image: str | None = (
|
|
@@ -314,6 +323,7 @@ class RunPodTarget:
|
|
|
314
323
|
# apt-get install --reinstall -y rocthrust
|
|
315
324
|
# See docker/rocm7-runpod/README.md for details.
|
|
316
325
|
image: str = "rocm/pytorch:rocm7.0.2_ubuntu24.04_py3.12_pytorch_release_2.7.1"
|
|
326
|
+
template_id: str | None = None # RunPod template ID for custom pod configuration
|
|
317
327
|
|
|
318
328
|
# RunPod template ID — required for non-RunPod images that need custom
|
|
319
329
|
# dockerArgs (e.g. to install and start sshd). When set, takes priority
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: wafer-core
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.27
|
|
4
4
|
Summary: Core utilities and environments for Wafer GPU kernel optimization
|
|
5
5
|
Requires-Python: >=3.10
|
|
6
6
|
Requires-Dist: aiohttp>=3.9.0
|
|
@@ -15,12 +15,14 @@ Requires-Dist: dash-svg>=0.0.11
|
|
|
15
15
|
Requires-Dist: dash>=3.0.0
|
|
16
16
|
Requires-Dist: dnspython>=2.8.0
|
|
17
17
|
Requires-Dist: httpx>=0.25.0
|
|
18
|
+
Requires-Dist: ijson>=3.2.0
|
|
18
19
|
Requires-Dist: kaleido==0.2.1
|
|
19
20
|
Requires-Dist: markdownify>=0.11.0
|
|
20
21
|
Requires-Dist: matplotlib>=3.0.0
|
|
21
22
|
Requires-Dist: modal>=0.64.0
|
|
22
23
|
Requires-Dist: numpy>=1.17.5
|
|
23
24
|
Requires-Dist: openai>=1.0.0
|
|
25
|
+
Requires-Dist: orjson>=3.9.0
|
|
24
26
|
Requires-Dist: pandas~=3.0.0
|
|
25
27
|
Requires-Dist: paramiko>=3.0.0
|
|
26
28
|
Requires-Dist: paramiko>=3.4.0
|
|
@@ -3,7 +3,7 @@ wafer_core/async_ssh.py,sha256=ocw2Gh5p8ltKeoqG_q32DXOBfu5q-IE7jCnzMbQN9WI,28713
|
|
|
3
3
|
wafer_core/auth.py,sha256=JpUkZ3bROIsgexayak5TLiGqUAR5kqGjekwqQRvIXH0,7235
|
|
4
4
|
wafer_core/gpu.py,sha256=ENa92btjXsx6ldpoyKfRrAmfy-LHG2KpA5k7SWd6Q_s,28627
|
|
5
5
|
wafer_core/gpu_detect.py,sha256=kpD8Q_G6GA9j-WnnnTNA3BBPulkGcWnZiogOmjKDao0,13650
|
|
6
|
-
wafer_core/problem_config.py,sha256=
|
|
6
|
+
wafer_core/problem_config.py,sha256=IM4ZRul4306dF7yo8wwyxXYORUZ7nz5wnphG59HN6fo,10907
|
|
7
7
|
wafer_core/remote_env.py,sha256=0ACTL-A_qn2B43qgQakqGaern-pspvwBGB9iebz199k,15354
|
|
8
8
|
wafer_core/remote_jobs.py,sha256=7HdBDCigSxfp32BreWoljzG5xjK6fp25rwC_6D7D04s,8306
|
|
9
9
|
wafer_core/retry.py,sha256=OIvSElJZbSm4-SFBpOFuYtoX2DWGiANomCmb3qxsirM,14821
|
|
@@ -318,6 +318,20 @@ wafer_core/lib/rocprofiler/systems/run/analyzer.py,sha256=Qg3M8-kCKdV82ehn6Ta20N
|
|
|
318
318
|
wafer_core/lib/rocprofiler/systems/run/profiler.py,sha256=aiQLsDnfQHSeCM5zLnO4VlbTmREYnAtiuT50Eq6uWfg,8387
|
|
319
319
|
wafer_core/lib/rocprofiler/systems/sample/__init__.py,sha256=31rNmLPQ7OVhvlOEEOwPKgk8_qrCidj6AmzDXexQJ_o,288
|
|
320
320
|
wafer_core/lib/rocprofiler/systems/sample/profiler.py,sha256=CYZPTzNXd48LoCfmY6h_5RSYEdWYccuv3-t4YncHJLE,7384
|
|
321
|
+
wafer_core/lib/trace_compare/PERFORMANCE.md,sha256=jkJh7ApZi8H7NKTcz8v0LNtwSFtIUqY88e3QbL749ww,3823
|
|
322
|
+
wafer_core/lib/trace_compare/__init__.py,sha256=CyUPbPQDYhVLCFFA7S_jNSilG3OgqYjmHSKfR5X11go,1377
|
|
323
|
+
wafer_core/lib/trace_compare/aligner.py,sha256=6HplOHCUIb0cMXA-Lu-91T-hKVTMK4bk8Ei-v7HE1G4,13471
|
|
324
|
+
wafer_core/lib/trace_compare/analyzer.py,sha256=m-waAiU5S72M9J4kUwIy9fPWUecg_oOUczri8Na6xUY,29360
|
|
325
|
+
wafer_core/lib/trace_compare/api.py,sha256=JSRTcd7eZK1Z8l18TFEiA5A8ENJS1TMz7oIiw1KBbAs,8796
|
|
326
|
+
wafer_core/lib/trace_compare/architecture.py,sha256=8bqlAJQeJLBHblyXvFV-w55PIKiVQDPjDQZ8Jx4tuGg,2110
|
|
327
|
+
wafer_core/lib/trace_compare/classifier.py,sha256=CDGzY9TY-I5wRuEGsu4mTCdljqVTOnLWyFLyNgmkGXI,16864
|
|
328
|
+
wafer_core/lib/trace_compare/formatter.py,sha256=GNrCZ45ueBN05CEXjOtTuKvTI8z-g-ZZFil-ni3sWVY,37962
|
|
329
|
+
wafer_core/lib/trace_compare/fusion_analyzer.py,sha256=bD_CJ3JoVg_N6vxJJULd6G8l_-O5qnLuXKDEDItcQtg,15489
|
|
330
|
+
wafer_core/lib/trace_compare/kernel_registry.yaml,sha256=0-knXwsF3pR1x1JdIz-aWaH-5xDgTylh53E47Kf6nHo,9808
|
|
331
|
+
wafer_core/lib/trace_compare/layer_segmentation.py,sha256=kI_Y1e9nrKZfdwfcrGo4h7gpMxqXI_xkgXk46zuFen4,4642
|
|
332
|
+
wafer_core/lib/trace_compare/loader.py,sha256=zBHI0r7CX_wJ2mz0_-s0lm9KGSdaVaq7OKyxUL6KIlw,23997
|
|
333
|
+
wafer_core/lib/trace_compare/same_kernel_analyzer.py,sha256=sp81NJGVJeYdAfRQRgMbB5HcGTOneF1Rau3rbLPfpv4,3489
|
|
334
|
+
wafer_core/lib/trace_compare/warnings.py,sha256=B1HxFt-v1mDqLT2aD5bSm1Yn88bfPYnM-wui0WBF3xM,3548
|
|
321
335
|
wafer_core/lib/tracelens/__init__.py,sha256=AkHdmOnKlBO4RpsAqVVGe7MOfv6E6uhEaC_iKrYeMPI,2002
|
|
322
336
|
wafer_core/lib/tracelens/comparator.py,sha256=71YEPfjBi7_24u1oQuPerNtFsN0sDQ5CT_uBi0XLllw,3460
|
|
323
337
|
wafer_core/lib/tracelens/finder.py,sha256=HpbN8TuRNbbBytPYOmkBkfsFVBReQqVgsvFX-mBrln4,2459
|
|
@@ -336,7 +350,7 @@ wafer_core/rollouts/agents.py,sha256=Uv1kjYogUfdPl18YfkVxVqFTbmWfuJQrxem_iHTUgdw
|
|
|
336
350
|
wafer_core/rollouts/cli.py,sha256=2NqgegKdlmxD0eJzGOMB5o_1Hb5t7O5JpP_32uvF2BE,80117
|
|
337
351
|
wafer_core/rollouts/cli_agents.py,sha256=e4qqqYBzWLsbw8FsNnddGApWp_on9Cvzrfd1amiAyvI,20641
|
|
338
352
|
wafer_core/rollouts/deploy.py,sha256=3t88fM_BMyAPkxIl8pS4r5ogHJvrlqWQDuIaltDZBRc,40924
|
|
339
|
-
wafer_core/rollouts/dtypes.py,sha256=
|
|
353
|
+
wafer_core/rollouts/dtypes.py,sha256=oRWjpbUOTf4uyXvnO9QThcSzD1fBrDQnAfRhGbxdgrg,61916
|
|
340
354
|
wafer_core/rollouts/eval_helpers.py,sha256=OE7uQZRcbqQhpFqb4zOj8zafc9Gr6xZJpSrMvxXKVUw,1699
|
|
341
355
|
wafer_core/rollouts/evaluation.py,sha256=fk-pGZ5vpocVmw1iBbHtxMK0K6l8pYTLHCpDNvRY1Xo,69142
|
|
342
356
|
wafer_core/rollouts/events.py,sha256=z85J8kq0LXPj5CiUk4RkiTQg--r9xiO7QeeJwkyUOto,7505
|
|
@@ -371,7 +385,7 @@ wafer_core/rollouts/agent_presets/gpt_5_1_codex_04_04.py,sha256=42NIBBYAnVoy5mbu
|
|
|
371
385
|
wafer_core/rollouts/agent_presets/gpt_5_2_03_03.py,sha256=lEsHRUhhr8UbP5wSVKMOVDVOOtH_bQMRRgZ0dRGZMVc,1166
|
|
372
386
|
wafer_core/rollouts/agent_presets/loader.py,sha256=WSkTbL7QhgMamZR5sXxep1n4cuy8LC3a4MN2phYTm-4,3666
|
|
373
387
|
wafer_core/rollouts/agent_presets/opus_4_01_01.py,sha256=rurZMI-Df7O-Q-uVJj2zfY_DSjdNbMKBDZlRg9MLADc,3568
|
|
374
|
-
wafer_core/rollouts/agent_presets/rlm_01_01.py,sha256=
|
|
388
|
+
wafer_core/rollouts/agent_presets/rlm_01_01.py,sha256=jsjwDgACQxxJj4GYOUCcQvYjcICAaKV3eccQu9oyEcw,4781
|
|
375
389
|
wafer_core/rollouts/agent_presets/sonnet_4_02_02.py,sha256=ZdHNxioki3wsfD6ficgB2r7HkgQDH_trCR-baGFgoHk,1269
|
|
376
390
|
wafer_core/rollouts/agent_presets/sonnet_4_subagent_03_02.py,sha256=nxyjs4HWAPOAYLmPknSQr3viBXhboKC7wQ76LWB-jA0,2165
|
|
377
391
|
wafer_core/rollouts/config/README.md,sha256=i0r0a3sKLkc1Eq3EqqR2Gahsgo-c8O3OZ0cCh7rp8Uw,9899
|
|
@@ -495,7 +509,7 @@ wafer_core/rollouts/prompt_optimization/adapters/system_prompt.py,sha256=CWFox1N
|
|
|
495
509
|
wafer_core/rollouts/prompt_optimization/adapters/system_user_prompt.py,sha256=8JsSirihgZ5gacyYhn31GagyIxG0xQ7f7i4PnEupWz8,12090
|
|
496
510
|
wafer_core/rollouts/prompt_optimization/adapters/terminal_bench.py,sha256=Etswuqf5dBIZQ2x2p26AXz4LT33YxT2qEeHqKXTJy18,12273
|
|
497
511
|
wafer_core/rollouts/providers/__init__.py,sha256=Xu8PPDHOmF97ylMJXfE9JX2FJCanNVh7LXkHMmg0vWs,3121
|
|
498
|
-
wafer_core/rollouts/providers/anthropic.py,sha256=
|
|
512
|
+
wafer_core/rollouts/providers/anthropic.py,sha256=9x1GIL6JE8gutxVrLNiyAkymknIEKtl-98TnIUpFxoI,45223
|
|
499
513
|
wafer_core/rollouts/providers/base.py,sha256=2ADu6pDz6yEcazo4j6-O12rs19bPewAfycjK_N03ZkY,14544
|
|
500
514
|
wafer_core/rollouts/providers/google.py,sha256=IbqdXOpzSuMdI7eOZqRtzni85ysKby13PGe482Fq13w,22073
|
|
501
515
|
wafer_core/rollouts/providers/openai_completions.py,sha256=3vUA74qjrxG-aOjyngtnZp0MzIhnzW5kudwxmOGxXfs,28820
|
|
@@ -655,7 +669,7 @@ wafer_core/utils/remote_execution.py,sha256=z7nLiOgmDiM_VmElLnT2LF-aKNeeKFYjXigT
|
|
|
655
669
|
wafer_core/utils/submission_selection.py,sha256=LucdMTAbkqZA-GitSb3ZJ2pAeJ36wKqt5cTeS8xuAQ4,5655
|
|
656
670
|
wafer_core/utils/kernel_utils/__init__.py,sha256=NsfKpbfpIsfupWIpIjWLGCjGAVqaONiwiWil5zXbrRc,2015
|
|
657
671
|
wafer_core/utils/kernel_utils/backends.py,sha256=t3wY73Y-pVc_wALNu_bPsaFkqJ2dp2pf38KQ5ofP_go,1143
|
|
658
|
-
wafer_core/utils/kernel_utils/defense.py,sha256=
|
|
672
|
+
wafer_core/utils/kernel_utils/defense.py,sha256=8tHVTZlJfFcB_FWjNZfeGHwReSjG191OmFXtWXa07OM,20124
|
|
659
673
|
wafer_core/utils/kernel_utils/deployment.py,sha256=-tMb3qWmAoXHWCmmT7SQBH7KBKyyLP0e5Dk6lOrTPW8,55957
|
|
660
674
|
wafer_core/utils/kernel_utils/evaluate.py,sha256=1kxFNMl9VCXfKfk_BIiuA_zFfvDB1sl_feS2OEIJA1k,72346
|
|
661
675
|
wafer_core/utils/kernel_utils/gpu_validation.py,sha256=LRiDjW_xAK4fXf1Vw1aYHG54B1W0J6b5L0K6PXzM2tI,3759
|
|
@@ -665,7 +679,7 @@ wafer_core/utils/kernel_utils/static_checker.py,sha256=XIQkzAOkGH5xtrOuZM4tNUqVJ
|
|
|
665
679
|
wafer_core/utils/kernel_utils/task.py,sha256=XcmKxKUWh5It6nX3zGqj77tWgA32uPfQMqNOqyD5T48,2682
|
|
666
680
|
wafer_core/utils/kernel_utils/utils.py,sha256=uDZoJDxh07hJeLNlPdKN2vgB15pqIr1LbXf0YIBHU4E,43056
|
|
667
681
|
wafer_core/utils/kernel_utils/targets/__init__.py,sha256=4NwRLsuJ__S4xKAfda4Ag82C5MQ3Qio-4xA5S-mQGlU,2067
|
|
668
|
-
wafer_core/utils/kernel_utils/targets/config.py,sha256=
|
|
682
|
+
wafer_core/utils/kernel_utils/targets/config.py,sha256=V587DYkisEFoWwkmLQUW6I0mXkMEwA52sM7ZINslkK8,20625
|
|
669
683
|
wafer_core/utils/kernel_utils/targets/execution.py,sha256=bZuNXCo0sIdD6hFhetLPrtDC-zMSiIsAx_aml49VVL0,15033
|
|
670
684
|
wafer_core/utils/kernel_utils/targets/selection.py,sha256=5I_RG_7cfhq7uaeR28meC2EeNNKssFsK-Tc3QFG6Ze0,3590
|
|
671
685
|
wafer_core/utils/modal_execution/__init__.py,sha256=jkVqYOLzCT5K73N9Od0UIUsx-99A0m6bpDrxfyXxQZ8,945
|
|
@@ -673,6 +687,6 @@ wafer_core/utils/modal_execution/modal_app.py,sha256=VfS2cX8gHtnlPXemmMcEwDPeQdh
|
|
|
673
687
|
wafer_core/utils/modal_execution/modal_config.py,sha256=7cGX9TGqilQ3qxI3OFGXV5orjtyRU-PEDOJ4vP2oxno,4421
|
|
674
688
|
wafer_core/utils/modal_execution/modal_execution.py,sha256=gChjnV6jqA3A7IRP3DfvV5cSfm_MN0X4f7JZufXgdZE,24594
|
|
675
689
|
wafer_core/utils/modal_execution/test_modal.py,sha256=_jqou_hrLs1Daf1590Pnb0a_lXMMa2rczAPpW9HpoNQ,8153
|
|
676
|
-
wafer_core-0.1.
|
|
677
|
-
wafer_core-0.1.
|
|
678
|
-
wafer_core-0.1.
|
|
690
|
+
wafer_core-0.1.27.dist-info/METADATA,sha256=NYiI9hCaVd9RCCAfd8Ys0UwTMju6wiyaT7nsk4gsB8A,1477
|
|
691
|
+
wafer_core-0.1.27.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
692
|
+
wafer_core-0.1.27.dist-info/RECORD,,
|
|
File without changes
|