wafer-cli 0.2.25__py3-none-any.whl → 0.2.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wafer/cli.py +196 -37
- wafer/corpus.py +65 -5
- wafer/specs_cli.py +157 -0
- wafer/targets_cli.py +472 -0
- wafer/targets_ops.py +29 -2
- wafer/trace_compare.py +139 -48
- {wafer_cli-0.2.25.dist-info → wafer_cli-0.2.27.dist-info}/METADATA +1 -1
- {wafer_cli-0.2.25.dist-info → wafer_cli-0.2.27.dist-info}/RECORD +11 -9
- {wafer_cli-0.2.25.dist-info → wafer_cli-0.2.27.dist-info}/WHEEL +0 -0
- {wafer_cli-0.2.25.dist-info → wafer_cli-0.2.27.dist-info}/entry_points.txt +0 -0
- {wafer_cli-0.2.25.dist-info → wafer_cli-0.2.27.dist-info}/top_level.txt +0 -0
wafer/trace_compare.py
CHANGED
|
@@ -6,19 +6,22 @@ All core logic is in wafer_core.lib.trace_compare.
|
|
|
6
6
|
|
|
7
7
|
import sys
|
|
8
8
|
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
9
10
|
|
|
10
11
|
import typer
|
|
11
12
|
|
|
13
|
+
import json
|
|
14
|
+
import sys
|
|
15
|
+
|
|
12
16
|
from wafer_core.lib.trace_compare import (
|
|
13
|
-
|
|
14
|
-
analyze_traces,
|
|
17
|
+
analyze_trace_pair,
|
|
15
18
|
format_csv,
|
|
16
|
-
format_fusion_csv,
|
|
17
|
-
format_fusion_json,
|
|
18
|
-
format_fusion_text,
|
|
19
19
|
format_json,
|
|
20
20
|
format_text,
|
|
21
|
+
ArchitectureType,
|
|
22
|
+
detect_architecture,
|
|
21
23
|
)
|
|
24
|
+
from wafer_core.lib.trace_compare.loader import StreamingMetadata
|
|
22
25
|
|
|
23
26
|
|
|
24
27
|
def compare_traces(
|
|
@@ -30,6 +33,7 @@ def compare_traces(
|
|
|
30
33
|
show_layers: bool = False,
|
|
31
34
|
show_all: bool = False,
|
|
32
35
|
show_stack_traces: bool = False,
|
|
36
|
+
recommendations: bool = False,
|
|
33
37
|
) -> None:
|
|
34
38
|
"""Compare two GPU traces and generate performance report.
|
|
35
39
|
|
|
@@ -52,21 +56,60 @@ def compare_traces(
|
|
|
52
56
|
typer.secho(f"❌ File not found: {trace2}", fg=typer.colors.RED, err=True)
|
|
53
57
|
raise typer.Exit(1)
|
|
54
58
|
|
|
55
|
-
#
|
|
56
|
-
|
|
59
|
+
# Progress callback for JSON format (emits NDJSON to stdout)
|
|
60
|
+
def progress_callback(stage: str, fraction: float) -> None:
|
|
61
|
+
if output_format == 'json':
|
|
62
|
+
progress_msg = json.dumps({"type": "progress", "stage": stage, "fraction": fraction})
|
|
63
|
+
print(progress_msg, file=sys.stdout, flush=True)
|
|
64
|
+
elif output_format != 'json':
|
|
65
|
+
percent = int(fraction * 100)
|
|
66
|
+
typer.echo(f"📊 {stage}: {percent}%", err=True)
|
|
67
|
+
|
|
68
|
+
# Metadata callback for JSON format (emits NDJSON with early GPU info)
|
|
69
|
+
def metadata_callback(meta1: StreamingMetadata, meta2: StreamingMetadata) -> None:
|
|
70
|
+
if output_format == 'json':
|
|
71
|
+
metadata_msg = json.dumps({
|
|
72
|
+
"type": "metadata",
|
|
73
|
+
"trace1": {
|
|
74
|
+
"platform": meta1.platform,
|
|
75
|
+
"gpu": meta1.gpu_name,
|
|
76
|
+
"file_size_mb": round(meta1.file_size_mb, 1),
|
|
77
|
+
},
|
|
78
|
+
"trace2": {
|
|
79
|
+
"platform": meta2.platform,
|
|
80
|
+
"gpu": meta2.gpu_name,
|
|
81
|
+
"file_size_mb": round(meta2.file_size_mb, 1),
|
|
82
|
+
},
|
|
83
|
+
})
|
|
84
|
+
print(metadata_msg, file=sys.stdout, flush=True)
|
|
85
|
+
else:
|
|
86
|
+
typer.echo(f"📊 Trace 1: {meta1.platform} - {meta1.gpu_name} ({meta1.file_size_mb:.1f}MB)", err=True)
|
|
87
|
+
typer.echo(f"📊 Trace 2: {meta2.platform} - {meta2.gpu_name} ({meta2.file_size_mb:.1f}MB)", err=True)
|
|
88
|
+
|
|
89
|
+
# Analyze traces using unified API
|
|
57
90
|
if output_format != 'json':
|
|
58
91
|
typer.echo("📊 Loading traces...")
|
|
59
92
|
|
|
60
|
-
# Determine how many stack traces to collect
|
|
61
|
-
max_stacks = 0 if (show_stack_traces and show_all) else (3 if show_stack_traces else 3)
|
|
62
|
-
|
|
63
93
|
try:
|
|
64
|
-
|
|
94
|
+
result_obj = analyze_trace_pair(
|
|
65
95
|
trace1,
|
|
66
96
|
trace2,
|
|
67
|
-
|
|
68
|
-
|
|
97
|
+
phase=phase,
|
|
98
|
+
include_stacks=True,
|
|
99
|
+
on_progress=progress_callback,
|
|
100
|
+
on_metadata=metadata_callback,
|
|
69
101
|
)
|
|
102
|
+
|
|
103
|
+
results = {
|
|
104
|
+
"metadata": result_obj.metadata,
|
|
105
|
+
"operations": result_obj.operations,
|
|
106
|
+
"layers": result_obj.layers,
|
|
107
|
+
"warnings": [{"code": w.code, "severity": w.severity, "message": w.message, "suggestion": w.suggestion} for w in result_obj.warnings],
|
|
108
|
+
"architecture": result_obj.architecture.value,
|
|
109
|
+
"layer_alignments": result_obj.layer_alignments,
|
|
110
|
+
"fusion_analysis": result_obj.fusion_analysis,
|
|
111
|
+
"same_kernel_analysis": result_obj.same_kernel_analysis,
|
|
112
|
+
}
|
|
70
113
|
except ValueError as e:
|
|
71
114
|
typer.secho(f"❌ {e}", fg=typer.colors.RED, err=True)
|
|
72
115
|
raise typer.Exit(1)
|
|
@@ -74,17 +117,26 @@ def compare_traces(
|
|
|
74
117
|
typer.secho(f"❌ Error analyzing traces: {e}", fg=typer.colors.RED, err=True)
|
|
75
118
|
raise typer.Exit(1)
|
|
76
119
|
|
|
77
|
-
# Show loading confirmation
|
|
78
120
|
if output_format != 'json':
|
|
79
121
|
meta = results["metadata"]
|
|
80
|
-
# Determine which trace is AMD and which is NVIDIA
|
|
81
122
|
if meta['trace1_platform'] == 'AMD':
|
|
82
123
|
amd_gpu, nvidia_gpu = meta['trace1_gpu'], meta['trace2_gpu']
|
|
83
124
|
else:
|
|
84
125
|
amd_gpu, nvidia_gpu = meta['trace2_gpu'], meta['trace1_gpu']
|
|
85
126
|
typer.echo(f"✅ Loaded: AMD ({amd_gpu}) vs NVIDIA ({nvidia_gpu})")
|
|
127
|
+
|
|
128
|
+
# Display warnings
|
|
129
|
+
warnings = results.get("warnings", [])
|
|
130
|
+
if warnings:
|
|
131
|
+
typer.echo()
|
|
132
|
+
for warning in warnings:
|
|
133
|
+
icon = "❌" if warning["severity"] == "error" else "⚠️" if warning["severity"] == "warning" else "ℹ️"
|
|
134
|
+
typer.secho(f"{icon} {warning['message']}", fg=typer.colors.YELLOW if warning["severity"] == "warning" else typer.colors.BLUE)
|
|
135
|
+
if warning.get("suggestion"):
|
|
136
|
+
typer.secho(f" Suggestion: {warning['suggestion']}", fg=typer.colors.BLUE)
|
|
86
137
|
typer.echo()
|
|
87
138
|
|
|
139
|
+
|
|
88
140
|
# Generate output based on format
|
|
89
141
|
if output_format == "text":
|
|
90
142
|
output_str = format_text(results, show_layers=show_layers, show_all=show_all, show_stack_traces=show_stack_traces)
|
|
@@ -108,21 +160,23 @@ def compare_traces(
|
|
|
108
160
|
typer.echo(output_str)
|
|
109
161
|
|
|
110
162
|
|
|
111
|
-
def
|
|
163
|
+
def compare_align(
|
|
112
164
|
trace1: Path,
|
|
113
165
|
trace2: Path,
|
|
114
166
|
output: Path | None = None,
|
|
115
|
-
|
|
116
|
-
|
|
167
|
+
output_format: str = "json",
|
|
168
|
+
phase: str = "all",
|
|
169
|
+
layer: int | None = None,
|
|
117
170
|
) -> None:
|
|
118
|
-
"""
|
|
171
|
+
"""Align kernels at layer level for exact kernel-to-kernel comparison.
|
|
119
172
|
|
|
120
173
|
Args:
|
|
121
174
|
trace1: Path to first trace file (AMD or NVIDIA)
|
|
122
175
|
trace2: Path to second trace file (AMD or NVIDIA)
|
|
123
176
|
output: Optional output file path (default: stdout)
|
|
124
|
-
|
|
125
|
-
|
|
177
|
+
output_format: Output format ('json' only for now)
|
|
178
|
+
phase: Filter by phase ('all', 'prefill', or 'decode')
|
|
179
|
+
layer: Focus on specific layer number (optional)
|
|
126
180
|
"""
|
|
127
181
|
# Validate files exist
|
|
128
182
|
if not trace1.exists():
|
|
@@ -133,49 +187,86 @@ def compare_fusion(
|
|
|
133
187
|
typer.secho(f"❌ File not found: {trace2}", fg=typer.colors.RED, err=True)
|
|
134
188
|
raise typer.Exit(1)
|
|
135
189
|
|
|
136
|
-
#
|
|
137
|
-
|
|
138
|
-
|
|
190
|
+
# Progress callback for JSON format (emits NDJSON to stdout)
|
|
191
|
+
def progress_callback(stage: str, fraction: float) -> None:
|
|
192
|
+
if output_format == 'json':
|
|
193
|
+
progress_msg = json.dumps({"type": "progress", "stage": stage, "fraction": fraction})
|
|
194
|
+
print(progress_msg, file=sys.stdout, flush=True)
|
|
195
|
+
else:
|
|
196
|
+
percent = int(fraction * 100)
|
|
197
|
+
typer.echo(f"📊 {stage}: {percent}%", err=True)
|
|
198
|
+
|
|
199
|
+
# Metadata callback for JSON format
|
|
200
|
+
def metadata_callback(meta1: StreamingMetadata, meta2: StreamingMetadata) -> None:
|
|
201
|
+
if output_format == 'json':
|
|
202
|
+
metadata_msg = json.dumps({
|
|
203
|
+
"type": "metadata",
|
|
204
|
+
"trace1": {
|
|
205
|
+
"platform": meta1.platform,
|
|
206
|
+
"gpu": meta1.gpu_name,
|
|
207
|
+
"file_size_mb": round(meta1.file_size_mb, 1),
|
|
208
|
+
},
|
|
209
|
+
"trace2": {
|
|
210
|
+
"platform": meta2.platform,
|
|
211
|
+
"gpu": meta2.gpu_name,
|
|
212
|
+
"file_size_mb": round(meta2.file_size_mb, 1),
|
|
213
|
+
},
|
|
214
|
+
})
|
|
215
|
+
print(metadata_msg, file=sys.stdout, flush=True)
|
|
216
|
+
else:
|
|
217
|
+
typer.echo(f"📊 Trace 1: {meta1.platform} - {meta1.gpu_name} ({meta1.file_size_mb:.1f}MB)", err=True)
|
|
218
|
+
typer.echo(f"📊 Trace 2: {meta2.platform} - {meta2.gpu_name} ({meta2.file_size_mb:.1f}MB)", err=True)
|
|
219
|
+
|
|
220
|
+
# Analyze traces using unified API
|
|
221
|
+
if output_format != 'json':
|
|
139
222
|
typer.echo("📊 Loading traces...")
|
|
223
|
+
|
|
140
224
|
try:
|
|
141
|
-
|
|
225
|
+
result_obj = analyze_trace_pair(
|
|
142
226
|
trace1,
|
|
143
227
|
trace2,
|
|
144
|
-
|
|
228
|
+
phase=phase,
|
|
229
|
+
include_stacks=True,
|
|
230
|
+
on_progress=progress_callback,
|
|
231
|
+
on_metadata=metadata_callback,
|
|
145
232
|
)
|
|
233
|
+
|
|
234
|
+
results = {
|
|
235
|
+
"metadata": result_obj.metadata,
|
|
236
|
+
"layer_alignments": result_obj.layer_alignments or [],
|
|
237
|
+
"fusion_analysis": result_obj.fusion_analysis or {},
|
|
238
|
+
"same_kernel_analysis": result_obj.same_kernel_analysis or {},
|
|
239
|
+
"operations": result_obj.operations,
|
|
240
|
+
"layers": result_obj.layers,
|
|
241
|
+
"warnings": [{"code": w.code, "severity": w.severity, "message": w.message, "suggestion": w.suggestion} for w in result_obj.warnings],
|
|
242
|
+
"architecture": result_obj.architecture.value,
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
if layer is not None:
|
|
246
|
+
results["layer_alignments"] = [
|
|
247
|
+
la for la in results["layer_alignments"] if la.get("layer") == layer
|
|
248
|
+
]
|
|
249
|
+
except ValueError as e:
|
|
250
|
+
typer.secho(f"❌ {e}", fg=typer.colors.RED, err=True)
|
|
251
|
+
raise typer.Exit(1)
|
|
146
252
|
except Exception as e:
|
|
147
|
-
typer.secho(
|
|
148
|
-
f"❌ Error analyzing traces: {e}", fg=typer.colors.RED, err=True
|
|
149
|
-
)
|
|
253
|
+
typer.secho(f"❌ Error analyzing traces: {e}", fg=typer.colors.RED, err=True)
|
|
150
254
|
import traceback
|
|
151
|
-
|
|
152
255
|
traceback.print_exc()
|
|
153
256
|
raise typer.Exit(1)
|
|
154
257
|
|
|
155
|
-
|
|
156
|
-
if format_type != 'json':
|
|
258
|
+
if output_format != 'json':
|
|
157
259
|
meta = results["metadata"]
|
|
158
|
-
|
|
159
|
-
typer.echo(f"✅
|
|
160
|
-
typer.echo(
|
|
161
|
-
f"Found {meta['trace1_correlation_groups']} trace1 groups and "
|
|
162
|
-
f"{meta['trace2_correlation_groups']} trace2 groups with ≥{min_group_size} kernels"
|
|
163
|
-
)
|
|
164
|
-
typer.echo(f"✅ Matched {meta['matched_groups']} correlation groups")
|
|
260
|
+
typer.echo(f"✅ Loaded: {meta.get('amd_gpu', 'Unknown')} vs {meta.get('nvidia_gpu', 'Unknown')}")
|
|
261
|
+
typer.echo(f"✅ Found {len(results['layer_alignments'])} layers")
|
|
165
262
|
typer.echo()
|
|
166
263
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
output_str = format_fusion_text(results)
|
|
170
|
-
elif format_type == "csv":
|
|
171
|
-
output_str = format_fusion_csv(results)
|
|
172
|
-
elif format_type == "json":
|
|
173
|
-
output_str = format_fusion_json(results)
|
|
264
|
+
if output_format == "json":
|
|
265
|
+
output_str = format_json(results)
|
|
174
266
|
else:
|
|
175
|
-
typer.secho(f"❌
|
|
267
|
+
typer.secho(f"❌ Format {output_format} not yet supported for align command. Use 'json'.", fg=typer.colors.RED, err=True)
|
|
176
268
|
raise typer.Exit(1)
|
|
177
269
|
|
|
178
|
-
# Write output
|
|
179
270
|
if output:
|
|
180
271
|
output.write_text(output_str)
|
|
181
272
|
typer.secho(f"✅ Report saved to {output}", fg=typer.colors.GREEN)
|
|
@@ -6,10 +6,10 @@ wafer/api_client.py,sha256=i_Az2b2llC3DSW8yOL-BKqa7LSKuxOr8hSN40s-oQXY,6313
|
|
|
6
6
|
wafer/auth.py,sha256=dwss_se5P-FFc9IN38q4kh_dBrA6k-CguDBkivgcdj0,14003
|
|
7
7
|
wafer/autotuner.py,sha256=41WYP41pTDvMijv2h42vm89bcHtDMJXObDlWmn6xpFU,44416
|
|
8
8
|
wafer/billing.py,sha256=hEEwtrtIsbPQ3lLJNcyTLMsapUbcuvcVW_e9_0SxzVo,7199
|
|
9
|
-
wafer/cli.py,sha256=
|
|
9
|
+
wafer/cli.py,sha256=zuVZhPdML5AOBtLUqLwAwjl8XMNe9EwQkffZxtBGLx4,282748
|
|
10
10
|
wafer/cli_instructions.py,sha256=bziUKDNDAXABVMvKPLEMXm-hFSD2TcFSh-FKRYa949k,4693
|
|
11
11
|
wafer/config.py,sha256=h5Eo9_yfWqWGoPNdVQikI9GoZVUeysunSYiixf1mKcw,3411
|
|
12
|
-
wafer/corpus.py,sha256=
|
|
12
|
+
wafer/corpus.py,sha256=CY9T7wXENNDJxnrtI-XsQmXeptrFfKG4x-lngrc9_3s,24748
|
|
13
13
|
wafer/evaluate.py,sha256=HMFQD-uwC6Wky1t_0JxYZaoHWgLaTBkjxOxgpZVnGrc,190519
|
|
14
14
|
wafer/global_config.py,sha256=fhaR_RU3ufMksDmOohH1OLeQ0JT0SDW1hEip_zaP75k,11345
|
|
15
15
|
wafer/gpu_run.py,sha256=TwqXy72T7f2I7e6n5WWod3xgxCPnDhU0BgLsB4CUoQY,9716
|
|
@@ -23,11 +23,13 @@ wafer/problems.py,sha256=ce2sy10A1nnNUG3VGsseTS8jL7LZsku4dE8zVf9JHQ4,11296
|
|
|
23
23
|
wafer/rocprof_compute.py,sha256=n_yOGZaFbOXna_ghhmYWXeyUoSabgH4KkjlYq38DlHo,19888
|
|
24
24
|
wafer/rocprof_sdk.py,sha256=0Q7Ye6dUfa1anFZbqKc21rItgqva8V8VIZoSB7wqbmA,10085
|
|
25
25
|
wafer/rocprof_systems.py,sha256=4IWbMcbYk1x_8iS7P3FC_u5sgH6EXADCtR2lV9id80M,18629
|
|
26
|
+
wafer/specs_cli.py,sha256=frMEKwMflxVNpFlAuxprmr33ZZ1Oeh2lB0KWZ4oZWzw,4360
|
|
26
27
|
wafer/ssh_keys.py,sha256=MxiHlSm6wuDUFzkOQtx5K7OIbx_a6bXxE-m8OpwLx98,8130
|
|
27
28
|
wafer/target_lock.py,sha256=SDKhNzv2N7gsphGflcNni9FE5YYuAMuEthngAJEo4Gs,7809
|
|
28
29
|
wafer/targets.py,sha256=9r-iRWoKSH5cQl1LcamaX-T7cNVOg99ngIm_hlRk-qU,26922
|
|
29
|
-
wafer/
|
|
30
|
-
wafer/
|
|
30
|
+
wafer/targets_cli.py,sha256=Oe3e02rSXeNrMbe_Qv9DNfQ8dEOKodtU7BbQQWxlNwA,16348
|
|
31
|
+
wafer/targets_ops.py,sha256=wLPyq55H_wz0wEAEg8KFLYs9LIIyiVIphcsXD2NLa-E,22623
|
|
32
|
+
wafer/trace_compare.py,sha256=COuxxKY874DteOSLUvJuJFREPMBSybq9dtANi3ATsg4,10803
|
|
31
33
|
wafer/tracelens.py,sha256=g9ZIeFyNojZn4uTd3skPqIrRiL7aMJOz_-GOd3aiyy4,7998
|
|
32
34
|
wafer/wevin_cli.py,sha256=eo1ETsXIsCftXSG5AxEYYZipNGcXayKyIevs5F6MjXg,26140
|
|
33
35
|
wafer/workspaces.py,sha256=J-TXGwHXSZlzRWCew63KNvk6HLJ-zTSELRgzjryTkMk,35710
|
|
@@ -38,8 +40,8 @@ wafer/templates/optimize_kernel.py,sha256=4-MaKm_C9BQHQEllrNLLYkcdhJpcj6D-8zbJ4F
|
|
|
38
40
|
wafer/templates/optimize_kernelbench.py,sha256=T3co9Y9eSLWDrZG66gwQVFMdnGVoyUQos-TxnMMBLL8,3747
|
|
39
41
|
wafer/templates/trace_analyze.py,sha256=B7CiRlsokERzBjLL-k49kGjpU2zlJZqzTE05xbRS1WI,2878
|
|
40
42
|
wafer/tests/test_eval_cli_parity.py,sha256=SGmaj2NGBZ7GdDF53bXsECvQbV21iHZw8YeL_MJOLk0,7206
|
|
41
|
-
wafer_cli-0.2.
|
|
42
|
-
wafer_cli-0.2.
|
|
43
|
-
wafer_cli-0.2.
|
|
44
|
-
wafer_cli-0.2.
|
|
45
|
-
wafer_cli-0.2.
|
|
43
|
+
wafer_cli-0.2.27.dist-info/METADATA,sha256=Atrnv-RhoThH0JF_gczLjsQluDb3ycEsvRwn16RDf8k,2799
|
|
44
|
+
wafer_cli-0.2.27.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
45
|
+
wafer_cli-0.2.27.dist-info/entry_points.txt,sha256=WqB7hB__WhtPY8y1cO2sZiUz7fCq6Ik-usAigpeFvWE,41
|
|
46
|
+
wafer_cli-0.2.27.dist-info/top_level.txt,sha256=2MK1IVMWfpLL8BZCQ3E9aG6L6L666gSA_teYlwan4fs,6
|
|
47
|
+
wafer_cli-0.2.27.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|