wafer-cli 0.2.25__py3-none-any.whl → 0.2.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wafer/cli.py +63 -4
- wafer/corpus.py +65 -5
- wafer/trace_compare.py +139 -48
- {wafer_cli-0.2.25.dist-info → wafer_cli-0.2.26.dist-info}/METADATA +1 -1
- {wafer_cli-0.2.25.dist-info → wafer_cli-0.2.26.dist-info}/RECORD +8 -8
- {wafer_cli-0.2.25.dist-info → wafer_cli-0.2.26.dist-info}/WHEEL +0 -0
- {wafer_cli-0.2.25.dist-info → wafer_cli-0.2.26.dist-info}/entry_points.txt +0 -0
- {wafer_cli-0.2.25.dist-info → wafer_cli-0.2.26.dist-info}/top_level.txt +0 -0
wafer/cli.py
CHANGED
|
@@ -7787,6 +7787,9 @@ def compare_analyze(
|
|
|
7787
7787
|
stack_traces: bool = typer.Option(
|
|
7788
7788
|
False, "--stack-traces", help="Show Python stack traces for operations"
|
|
7789
7789
|
),
|
|
7790
|
+
recommendations: bool = typer.Option(
|
|
7791
|
+
False, "--recommendations", help="Generate prioritized recommendations for kernel team"
|
|
7792
|
+
),
|
|
7790
7793
|
json: bool = typer.Option(
|
|
7791
7794
|
False, "--json", hidden=True, help="Ignored (for compatibility with cliExecutor)"
|
|
7792
7795
|
),
|
|
@@ -7839,6 +7842,7 @@ def compare_analyze(
|
|
|
7839
7842
|
show_layers=layers,
|
|
7840
7843
|
show_all=all,
|
|
7841
7844
|
show_stack_traces=stack_traces,
|
|
7845
|
+
recommendations=recommendations,
|
|
7842
7846
|
)
|
|
7843
7847
|
_mark_command_success()
|
|
7844
7848
|
|
|
@@ -7883,14 +7887,69 @@ def compare_fusion_cmd(
|
|
|
7883
7887
|
# CSV output to file
|
|
7884
7888
|
wafer compare fusion amd_trace.json nvidia_trace.json --format csv -o fusion.csv
|
|
7885
7889
|
"""
|
|
7886
|
-
from .trace_compare import
|
|
7890
|
+
from .trace_compare import compare_align
|
|
7891
|
+
|
|
7892
|
+
compare_align(
|
|
7893
|
+
trace1=trace1,
|
|
7894
|
+
trace2=trace2,
|
|
7895
|
+
output=output,
|
|
7896
|
+
output_format=format,
|
|
7897
|
+
phase="all",
|
|
7898
|
+
)
|
|
7899
|
+
_mark_command_success()
|
|
7887
7900
|
|
|
7888
|
-
|
|
7901
|
+
|
|
7902
|
+
@compare_app.command("align")
|
|
7903
|
+
def compare_align_cmd(
|
|
7904
|
+
trace1: Path = typer.Argument(..., help="First trace file (AMD or NVIDIA)", exists=True),
|
|
7905
|
+
trace2: Path = typer.Argument(..., help="Second trace file (AMD or NVIDIA)", exists=True),
|
|
7906
|
+
format: str = typer.Option(
|
|
7907
|
+
"json",
|
|
7908
|
+
"--format",
|
|
7909
|
+
"-f",
|
|
7910
|
+
help="Output format: json",
|
|
7911
|
+
),
|
|
7912
|
+
output: Path | None = typer.Option(
|
|
7913
|
+
None, "--output", "-o", help="Output file (default: stdout)"
|
|
7914
|
+
),
|
|
7915
|
+
phase: str = typer.Option(
|
|
7916
|
+
"all",
|
|
7917
|
+
"--phase",
|
|
7918
|
+
help="Filter by phase: all, prefill, decode",
|
|
7919
|
+
),
|
|
7920
|
+
layer: int | None = typer.Option(
|
|
7921
|
+
None,
|
|
7922
|
+
"--layer",
|
|
7923
|
+
help="Focus on specific layer number",
|
|
7924
|
+
),
|
|
7925
|
+
) -> None:
|
|
7926
|
+
"""Align kernels at layer level for exact kernel-to-kernel comparison.
|
|
7927
|
+
|
|
7928
|
+
Provides kernel-to-kernel mapping across AMD and NVIDIA platforms,
|
|
7929
|
+
showing which kernels correspond to each other at each layer position.
|
|
7930
|
+
|
|
7931
|
+
Examples:
|
|
7932
|
+
# Basic alignment (stdout JSON)
|
|
7933
|
+
wafer compare align amd_trace.json nvidia_trace.json
|
|
7934
|
+
|
|
7935
|
+
# Save to file
|
|
7936
|
+
wafer compare align amd_trace.json nvidia_trace.json -o alignment.json
|
|
7937
|
+
|
|
7938
|
+
# Focus on decode phase only
|
|
7939
|
+
wafer compare align amd_trace.json nvidia_trace.json --phase decode
|
|
7940
|
+
|
|
7941
|
+
# Focus on specific layer
|
|
7942
|
+
wafer compare align amd_trace.json nvidia_trace.json --layer 5
|
|
7943
|
+
"""
|
|
7944
|
+
from .trace_compare import compare_align
|
|
7945
|
+
|
|
7946
|
+
compare_align(
|
|
7889
7947
|
trace1=trace1,
|
|
7890
7948
|
trace2=trace2,
|
|
7891
7949
|
output=output,
|
|
7892
|
-
|
|
7893
|
-
|
|
7950
|
+
output_format=format,
|
|
7951
|
+
phase=phase,
|
|
7952
|
+
layer=layer,
|
|
7894
7953
|
)
|
|
7895
7954
|
_mark_command_success()
|
|
7896
7955
|
|
wafer/corpus.py
CHANGED
|
@@ -109,14 +109,34 @@ CORPORA: dict[CorpusName, CorpusConfig] = {
|
|
|
109
109
|
),
|
|
110
110
|
"hip": CorpusConfig(
|
|
111
111
|
name="hip",
|
|
112
|
-
description="HIP programming guide
|
|
113
|
-
source_type="
|
|
114
|
-
|
|
115
|
-
|
|
112
|
+
description="HIP programming guide, API reference, and examples",
|
|
113
|
+
source_type="github_multi_repo",
|
|
114
|
+
repos=[
|
|
115
|
+
# HIP - main documentation and API
|
|
116
|
+
RepoSource(
|
|
117
|
+
repo="ROCm/HIP",
|
|
118
|
+
paths=["docs"],
|
|
119
|
+
),
|
|
120
|
+
# HIP examples - code samples
|
|
121
|
+
RepoSource(
|
|
122
|
+
repo="ROCm/HIP-Examples",
|
|
123
|
+
paths=["HIP-Examples-Applications", "mini-nbody"],
|
|
124
|
+
),
|
|
125
|
+
# clr - HIP/OpenCL runtime (low-level)
|
|
126
|
+
RepoSource(
|
|
127
|
+
repo="ROCm/clr",
|
|
128
|
+
paths=["hipamd/include", "rocclr/device/gpu"],
|
|
129
|
+
),
|
|
130
|
+
# ROCm docs - official documentation
|
|
131
|
+
RepoSource(
|
|
132
|
+
repo="ROCm/ROCm",
|
|
133
|
+
paths=["docs"],
|
|
134
|
+
),
|
|
135
|
+
],
|
|
116
136
|
),
|
|
117
137
|
"amd": CorpusConfig(
|
|
118
138
|
name="amd",
|
|
119
|
-
description="AMD GPU kernel development (rocWMMA, CK, AITER, rocBLAS, HipKittens, vLLM)",
|
|
139
|
+
description="AMD GPU kernel development (rocWMMA, CK, AITER, rocBLAS, HipKittens, vLLM, FlashAttention)",
|
|
120
140
|
source_type="github_multi_repo",
|
|
121
141
|
repos=[
|
|
122
142
|
# rocWMMA - wave matrix multiply-accumulate (WMMA) intrinsics
|
|
@@ -186,6 +206,46 @@ CORPORA: dict[CorpusName, CorpusConfig] = {
|
|
|
186
206
|
repo="huggingface/hf-rocm-kernels",
|
|
187
207
|
paths=["csrc", "hf_rocm_kernels", "docs"],
|
|
188
208
|
),
|
|
209
|
+
# ROCm/flash-attention - FlashAttention for AMD GPUs
|
|
210
|
+
RepoSource(
|
|
211
|
+
repo="ROCm/flash-attention",
|
|
212
|
+
paths=["csrc", "docs"],
|
|
213
|
+
),
|
|
214
|
+
# ROCm/triton - Triton compiler for AMD GPUs
|
|
215
|
+
RepoSource(
|
|
216
|
+
repo="ROCm/triton",
|
|
217
|
+
paths=["python/tutorials", "third_party/amd"],
|
|
218
|
+
),
|
|
219
|
+
# ROCm/rccl - ROCm Communication Collectives Library (multi-GPU)
|
|
220
|
+
RepoSource(
|
|
221
|
+
repo="ROCm/rccl",
|
|
222
|
+
paths=["docs"],
|
|
223
|
+
),
|
|
224
|
+
# ROCm/rocprofiler-sdk - AMD GPU profiling SDK
|
|
225
|
+
RepoSource(
|
|
226
|
+
repo="ROCm/rocprofiler-sdk",
|
|
227
|
+
paths=["docs", "samples"],
|
|
228
|
+
),
|
|
229
|
+
# ROCm/omniperf - AMD GPU profiling tool
|
|
230
|
+
RepoSource(
|
|
231
|
+
repo="ROCm/omniperf",
|
|
232
|
+
paths=["docs", "src/omniperf_analyze"],
|
|
233
|
+
),
|
|
234
|
+
# ROCm/omnitrace - Application tracing for AMD
|
|
235
|
+
RepoSource(
|
|
236
|
+
repo="ROCm/omnitrace",
|
|
237
|
+
paths=["docs"],
|
|
238
|
+
),
|
|
239
|
+
# AMD GPUOpen Performance Guides
|
|
240
|
+
RepoSource(
|
|
241
|
+
repo="GPUOpen-Tools/gpu_performance_api",
|
|
242
|
+
paths=["docs"],
|
|
243
|
+
),
|
|
244
|
+
# AMD LLVM - AMD GPU compiler backend
|
|
245
|
+
RepoSource(
|
|
246
|
+
repo="ROCm/llvm-project",
|
|
247
|
+
paths=["amd/device-libs/README.md", "llvm/docs/AMDGPUUsage.rst"],
|
|
248
|
+
),
|
|
189
249
|
],
|
|
190
250
|
),
|
|
191
251
|
}
|
wafer/trace_compare.py
CHANGED
|
@@ -6,19 +6,22 @@ All core logic is in wafer_core.lib.trace_compare.
|
|
|
6
6
|
|
|
7
7
|
import sys
|
|
8
8
|
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
9
10
|
|
|
10
11
|
import typer
|
|
11
12
|
|
|
13
|
+
import json
|
|
14
|
+
import sys
|
|
15
|
+
|
|
12
16
|
from wafer_core.lib.trace_compare import (
|
|
13
|
-
|
|
14
|
-
analyze_traces,
|
|
17
|
+
analyze_trace_pair,
|
|
15
18
|
format_csv,
|
|
16
|
-
format_fusion_csv,
|
|
17
|
-
format_fusion_json,
|
|
18
|
-
format_fusion_text,
|
|
19
19
|
format_json,
|
|
20
20
|
format_text,
|
|
21
|
+
ArchitectureType,
|
|
22
|
+
detect_architecture,
|
|
21
23
|
)
|
|
24
|
+
from wafer_core.lib.trace_compare.loader import StreamingMetadata
|
|
22
25
|
|
|
23
26
|
|
|
24
27
|
def compare_traces(
|
|
@@ -30,6 +33,7 @@ def compare_traces(
|
|
|
30
33
|
show_layers: bool = False,
|
|
31
34
|
show_all: bool = False,
|
|
32
35
|
show_stack_traces: bool = False,
|
|
36
|
+
recommendations: bool = False,
|
|
33
37
|
) -> None:
|
|
34
38
|
"""Compare two GPU traces and generate performance report.
|
|
35
39
|
|
|
@@ -52,21 +56,60 @@ def compare_traces(
|
|
|
52
56
|
typer.secho(f"❌ File not found: {trace2}", fg=typer.colors.RED, err=True)
|
|
53
57
|
raise typer.Exit(1)
|
|
54
58
|
|
|
55
|
-
#
|
|
56
|
-
|
|
59
|
+
# Progress callback for JSON format (emits NDJSON to stdout)
|
|
60
|
+
def progress_callback(stage: str, fraction: float) -> None:
|
|
61
|
+
if output_format == 'json':
|
|
62
|
+
progress_msg = json.dumps({"type": "progress", "stage": stage, "fraction": fraction})
|
|
63
|
+
print(progress_msg, file=sys.stdout, flush=True)
|
|
64
|
+
elif output_format != 'json':
|
|
65
|
+
percent = int(fraction * 100)
|
|
66
|
+
typer.echo(f"📊 {stage}: {percent}%", err=True)
|
|
67
|
+
|
|
68
|
+
# Metadata callback for JSON format (emits NDJSON with early GPU info)
|
|
69
|
+
def metadata_callback(meta1: StreamingMetadata, meta2: StreamingMetadata) -> None:
|
|
70
|
+
if output_format == 'json':
|
|
71
|
+
metadata_msg = json.dumps({
|
|
72
|
+
"type": "metadata",
|
|
73
|
+
"trace1": {
|
|
74
|
+
"platform": meta1.platform,
|
|
75
|
+
"gpu": meta1.gpu_name,
|
|
76
|
+
"file_size_mb": round(meta1.file_size_mb, 1),
|
|
77
|
+
},
|
|
78
|
+
"trace2": {
|
|
79
|
+
"platform": meta2.platform,
|
|
80
|
+
"gpu": meta2.gpu_name,
|
|
81
|
+
"file_size_mb": round(meta2.file_size_mb, 1),
|
|
82
|
+
},
|
|
83
|
+
})
|
|
84
|
+
print(metadata_msg, file=sys.stdout, flush=True)
|
|
85
|
+
else:
|
|
86
|
+
typer.echo(f"📊 Trace 1: {meta1.platform} - {meta1.gpu_name} ({meta1.file_size_mb:.1f}MB)", err=True)
|
|
87
|
+
typer.echo(f"📊 Trace 2: {meta2.platform} - {meta2.gpu_name} ({meta2.file_size_mb:.1f}MB)", err=True)
|
|
88
|
+
|
|
89
|
+
# Analyze traces using unified API
|
|
57
90
|
if output_format != 'json':
|
|
58
91
|
typer.echo("📊 Loading traces...")
|
|
59
92
|
|
|
60
|
-
# Determine how many stack traces to collect
|
|
61
|
-
max_stacks = 0 if (show_stack_traces and show_all) else (3 if show_stack_traces else 3)
|
|
62
|
-
|
|
63
93
|
try:
|
|
64
|
-
|
|
94
|
+
result_obj = analyze_trace_pair(
|
|
65
95
|
trace1,
|
|
66
96
|
trace2,
|
|
67
|
-
|
|
68
|
-
|
|
97
|
+
phase=phase,
|
|
98
|
+
include_stacks=True,
|
|
99
|
+
on_progress=progress_callback,
|
|
100
|
+
on_metadata=metadata_callback,
|
|
69
101
|
)
|
|
102
|
+
|
|
103
|
+
results = {
|
|
104
|
+
"metadata": result_obj.metadata,
|
|
105
|
+
"operations": result_obj.operations,
|
|
106
|
+
"layers": result_obj.layers,
|
|
107
|
+
"warnings": [{"code": w.code, "severity": w.severity, "message": w.message, "suggestion": w.suggestion} for w in result_obj.warnings],
|
|
108
|
+
"architecture": result_obj.architecture.value,
|
|
109
|
+
"layer_alignments": result_obj.layer_alignments,
|
|
110
|
+
"fusion_analysis": result_obj.fusion_analysis,
|
|
111
|
+
"same_kernel_analysis": result_obj.same_kernel_analysis,
|
|
112
|
+
}
|
|
70
113
|
except ValueError as e:
|
|
71
114
|
typer.secho(f"❌ {e}", fg=typer.colors.RED, err=True)
|
|
72
115
|
raise typer.Exit(1)
|
|
@@ -74,17 +117,26 @@ def compare_traces(
|
|
|
74
117
|
typer.secho(f"❌ Error analyzing traces: {e}", fg=typer.colors.RED, err=True)
|
|
75
118
|
raise typer.Exit(1)
|
|
76
119
|
|
|
77
|
-
# Show loading confirmation
|
|
78
120
|
if output_format != 'json':
|
|
79
121
|
meta = results["metadata"]
|
|
80
|
-
# Determine which trace is AMD and which is NVIDIA
|
|
81
122
|
if meta['trace1_platform'] == 'AMD':
|
|
82
123
|
amd_gpu, nvidia_gpu = meta['trace1_gpu'], meta['trace2_gpu']
|
|
83
124
|
else:
|
|
84
125
|
amd_gpu, nvidia_gpu = meta['trace2_gpu'], meta['trace1_gpu']
|
|
85
126
|
typer.echo(f"✅ Loaded: AMD ({amd_gpu}) vs NVIDIA ({nvidia_gpu})")
|
|
127
|
+
|
|
128
|
+
# Display warnings
|
|
129
|
+
warnings = results.get("warnings", [])
|
|
130
|
+
if warnings:
|
|
131
|
+
typer.echo()
|
|
132
|
+
for warning in warnings:
|
|
133
|
+
icon = "❌" if warning["severity"] == "error" else "⚠️" if warning["severity"] == "warning" else "ℹ️"
|
|
134
|
+
typer.secho(f"{icon} {warning['message']}", fg=typer.colors.YELLOW if warning["severity"] == "warning" else typer.colors.BLUE)
|
|
135
|
+
if warning.get("suggestion"):
|
|
136
|
+
typer.secho(f" Suggestion: {warning['suggestion']}", fg=typer.colors.BLUE)
|
|
86
137
|
typer.echo()
|
|
87
138
|
|
|
139
|
+
|
|
88
140
|
# Generate output based on format
|
|
89
141
|
if output_format == "text":
|
|
90
142
|
output_str = format_text(results, show_layers=show_layers, show_all=show_all, show_stack_traces=show_stack_traces)
|
|
@@ -108,21 +160,23 @@ def compare_traces(
|
|
|
108
160
|
typer.echo(output_str)
|
|
109
161
|
|
|
110
162
|
|
|
111
|
-
def
|
|
163
|
+
def compare_align(
|
|
112
164
|
trace1: Path,
|
|
113
165
|
trace2: Path,
|
|
114
166
|
output: Path | None = None,
|
|
115
|
-
|
|
116
|
-
|
|
167
|
+
output_format: str = "json",
|
|
168
|
+
phase: str = "all",
|
|
169
|
+
layer: int | None = None,
|
|
117
170
|
) -> None:
|
|
118
|
-
"""
|
|
171
|
+
"""Align kernels at layer level for exact kernel-to-kernel comparison.
|
|
119
172
|
|
|
120
173
|
Args:
|
|
121
174
|
trace1: Path to first trace file (AMD or NVIDIA)
|
|
122
175
|
trace2: Path to second trace file (AMD or NVIDIA)
|
|
123
176
|
output: Optional output file path (default: stdout)
|
|
124
|
-
|
|
125
|
-
|
|
177
|
+
output_format: Output format ('json' only for now)
|
|
178
|
+
phase: Filter by phase ('all', 'prefill', or 'decode')
|
|
179
|
+
layer: Focus on specific layer number (optional)
|
|
126
180
|
"""
|
|
127
181
|
# Validate files exist
|
|
128
182
|
if not trace1.exists():
|
|
@@ -133,49 +187,86 @@ def compare_fusion(
|
|
|
133
187
|
typer.secho(f"❌ File not found: {trace2}", fg=typer.colors.RED, err=True)
|
|
134
188
|
raise typer.Exit(1)
|
|
135
189
|
|
|
136
|
-
#
|
|
137
|
-
|
|
138
|
-
|
|
190
|
+
# Progress callback for JSON format (emits NDJSON to stdout)
|
|
191
|
+
def progress_callback(stage: str, fraction: float) -> None:
|
|
192
|
+
if output_format == 'json':
|
|
193
|
+
progress_msg = json.dumps({"type": "progress", "stage": stage, "fraction": fraction})
|
|
194
|
+
print(progress_msg, file=sys.stdout, flush=True)
|
|
195
|
+
else:
|
|
196
|
+
percent = int(fraction * 100)
|
|
197
|
+
typer.echo(f"📊 {stage}: {percent}%", err=True)
|
|
198
|
+
|
|
199
|
+
# Metadata callback for JSON format
|
|
200
|
+
def metadata_callback(meta1: StreamingMetadata, meta2: StreamingMetadata) -> None:
|
|
201
|
+
if output_format == 'json':
|
|
202
|
+
metadata_msg = json.dumps({
|
|
203
|
+
"type": "metadata",
|
|
204
|
+
"trace1": {
|
|
205
|
+
"platform": meta1.platform,
|
|
206
|
+
"gpu": meta1.gpu_name,
|
|
207
|
+
"file_size_mb": round(meta1.file_size_mb, 1),
|
|
208
|
+
},
|
|
209
|
+
"trace2": {
|
|
210
|
+
"platform": meta2.platform,
|
|
211
|
+
"gpu": meta2.gpu_name,
|
|
212
|
+
"file_size_mb": round(meta2.file_size_mb, 1),
|
|
213
|
+
},
|
|
214
|
+
})
|
|
215
|
+
print(metadata_msg, file=sys.stdout, flush=True)
|
|
216
|
+
else:
|
|
217
|
+
typer.echo(f"📊 Trace 1: {meta1.platform} - {meta1.gpu_name} ({meta1.file_size_mb:.1f}MB)", err=True)
|
|
218
|
+
typer.echo(f"📊 Trace 2: {meta2.platform} - {meta2.gpu_name} ({meta2.file_size_mb:.1f}MB)", err=True)
|
|
219
|
+
|
|
220
|
+
# Analyze traces using unified API
|
|
221
|
+
if output_format != 'json':
|
|
139
222
|
typer.echo("📊 Loading traces...")
|
|
223
|
+
|
|
140
224
|
try:
|
|
141
|
-
|
|
225
|
+
result_obj = analyze_trace_pair(
|
|
142
226
|
trace1,
|
|
143
227
|
trace2,
|
|
144
|
-
|
|
228
|
+
phase=phase,
|
|
229
|
+
include_stacks=True,
|
|
230
|
+
on_progress=progress_callback,
|
|
231
|
+
on_metadata=metadata_callback,
|
|
145
232
|
)
|
|
233
|
+
|
|
234
|
+
results = {
|
|
235
|
+
"metadata": result_obj.metadata,
|
|
236
|
+
"layer_alignments": result_obj.layer_alignments or [],
|
|
237
|
+
"fusion_analysis": result_obj.fusion_analysis or {},
|
|
238
|
+
"same_kernel_analysis": result_obj.same_kernel_analysis or {},
|
|
239
|
+
"operations": result_obj.operations,
|
|
240
|
+
"layers": result_obj.layers,
|
|
241
|
+
"warnings": [{"code": w.code, "severity": w.severity, "message": w.message, "suggestion": w.suggestion} for w in result_obj.warnings],
|
|
242
|
+
"architecture": result_obj.architecture.value,
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
if layer is not None:
|
|
246
|
+
results["layer_alignments"] = [
|
|
247
|
+
la for la in results["layer_alignments"] if la.get("layer") == layer
|
|
248
|
+
]
|
|
249
|
+
except ValueError as e:
|
|
250
|
+
typer.secho(f"❌ {e}", fg=typer.colors.RED, err=True)
|
|
251
|
+
raise typer.Exit(1)
|
|
146
252
|
except Exception as e:
|
|
147
|
-
typer.secho(
|
|
148
|
-
f"❌ Error analyzing traces: {e}", fg=typer.colors.RED, err=True
|
|
149
|
-
)
|
|
253
|
+
typer.secho(f"❌ Error analyzing traces: {e}", fg=typer.colors.RED, err=True)
|
|
150
254
|
import traceback
|
|
151
|
-
|
|
152
255
|
traceback.print_exc()
|
|
153
256
|
raise typer.Exit(1)
|
|
154
257
|
|
|
155
|
-
|
|
156
|
-
if format_type != 'json':
|
|
258
|
+
if output_format != 'json':
|
|
157
259
|
meta = results["metadata"]
|
|
158
|
-
|
|
159
|
-
typer.echo(f"✅
|
|
160
|
-
typer.echo(
|
|
161
|
-
f"Found {meta['trace1_correlation_groups']} trace1 groups and "
|
|
162
|
-
f"{meta['trace2_correlation_groups']} trace2 groups with ≥{min_group_size} kernels"
|
|
163
|
-
)
|
|
164
|
-
typer.echo(f"✅ Matched {meta['matched_groups']} correlation groups")
|
|
260
|
+
typer.echo(f"✅ Loaded: {meta.get('amd_gpu', 'Unknown')} vs {meta.get('nvidia_gpu', 'Unknown')}")
|
|
261
|
+
typer.echo(f"✅ Found {len(results['layer_alignments'])} layers")
|
|
165
262
|
typer.echo()
|
|
166
263
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
output_str = format_fusion_text(results)
|
|
170
|
-
elif format_type == "csv":
|
|
171
|
-
output_str = format_fusion_csv(results)
|
|
172
|
-
elif format_type == "json":
|
|
173
|
-
output_str = format_fusion_json(results)
|
|
264
|
+
if output_format == "json":
|
|
265
|
+
output_str = format_json(results)
|
|
174
266
|
else:
|
|
175
|
-
typer.secho(f"❌
|
|
267
|
+
typer.secho(f"❌ Format {output_format} not yet supported for align command. Use 'json'.", fg=typer.colors.RED, err=True)
|
|
176
268
|
raise typer.Exit(1)
|
|
177
269
|
|
|
178
|
-
# Write output
|
|
179
270
|
if output:
|
|
180
271
|
output.write_text(output_str)
|
|
181
272
|
typer.secho(f"✅ Report saved to {output}", fg=typer.colors.GREEN)
|
|
@@ -6,10 +6,10 @@ wafer/api_client.py,sha256=i_Az2b2llC3DSW8yOL-BKqa7LSKuxOr8hSN40s-oQXY,6313
|
|
|
6
6
|
wafer/auth.py,sha256=dwss_se5P-FFc9IN38q4kh_dBrA6k-CguDBkivgcdj0,14003
|
|
7
7
|
wafer/autotuner.py,sha256=41WYP41pTDvMijv2h42vm89bcHtDMJXObDlWmn6xpFU,44416
|
|
8
8
|
wafer/billing.py,sha256=hEEwtrtIsbPQ3lLJNcyTLMsapUbcuvcVW_e9_0SxzVo,7199
|
|
9
|
-
wafer/cli.py,sha256=
|
|
9
|
+
wafer/cli.py,sha256=s3m6SJzK1vRJxaQCrd_I4rcxrt3skty0GBdFHzIBc6U,279424
|
|
10
10
|
wafer/cli_instructions.py,sha256=bziUKDNDAXABVMvKPLEMXm-hFSD2TcFSh-FKRYa949k,4693
|
|
11
11
|
wafer/config.py,sha256=h5Eo9_yfWqWGoPNdVQikI9GoZVUeysunSYiixf1mKcw,3411
|
|
12
|
-
wafer/corpus.py,sha256=
|
|
12
|
+
wafer/corpus.py,sha256=CY9T7wXENNDJxnrtI-XsQmXeptrFfKG4x-lngrc9_3s,24748
|
|
13
13
|
wafer/evaluate.py,sha256=HMFQD-uwC6Wky1t_0JxYZaoHWgLaTBkjxOxgpZVnGrc,190519
|
|
14
14
|
wafer/global_config.py,sha256=fhaR_RU3ufMksDmOohH1OLeQ0JT0SDW1hEip_zaP75k,11345
|
|
15
15
|
wafer/gpu_run.py,sha256=TwqXy72T7f2I7e6n5WWod3xgxCPnDhU0BgLsB4CUoQY,9716
|
|
@@ -27,7 +27,7 @@ wafer/ssh_keys.py,sha256=MxiHlSm6wuDUFzkOQtx5K7OIbx_a6bXxE-m8OpwLx98,8130
|
|
|
27
27
|
wafer/target_lock.py,sha256=SDKhNzv2N7gsphGflcNni9FE5YYuAMuEthngAJEo4Gs,7809
|
|
28
28
|
wafer/targets.py,sha256=9r-iRWoKSH5cQl1LcamaX-T7cNVOg99ngIm_hlRk-qU,26922
|
|
29
29
|
wafer/targets_ops.py,sha256=jN1oIBx0mutxRNE9xpIc7SaBxPkVmOyus2eqn0kEKNI,21475
|
|
30
|
-
wafer/trace_compare.py,sha256=
|
|
30
|
+
wafer/trace_compare.py,sha256=COuxxKY874DteOSLUvJuJFREPMBSybq9dtANi3ATsg4,10803
|
|
31
31
|
wafer/tracelens.py,sha256=g9ZIeFyNojZn4uTd3skPqIrRiL7aMJOz_-GOd3aiyy4,7998
|
|
32
32
|
wafer/wevin_cli.py,sha256=eo1ETsXIsCftXSG5AxEYYZipNGcXayKyIevs5F6MjXg,26140
|
|
33
33
|
wafer/workspaces.py,sha256=J-TXGwHXSZlzRWCew63KNvk6HLJ-zTSELRgzjryTkMk,35710
|
|
@@ -38,8 +38,8 @@ wafer/templates/optimize_kernel.py,sha256=4-MaKm_C9BQHQEllrNLLYkcdhJpcj6D-8zbJ4F
|
|
|
38
38
|
wafer/templates/optimize_kernelbench.py,sha256=T3co9Y9eSLWDrZG66gwQVFMdnGVoyUQos-TxnMMBLL8,3747
|
|
39
39
|
wafer/templates/trace_analyze.py,sha256=B7CiRlsokERzBjLL-k49kGjpU2zlJZqzTE05xbRS1WI,2878
|
|
40
40
|
wafer/tests/test_eval_cli_parity.py,sha256=SGmaj2NGBZ7GdDF53bXsECvQbV21iHZw8YeL_MJOLk0,7206
|
|
41
|
-
wafer_cli-0.2.
|
|
42
|
-
wafer_cli-0.2.
|
|
43
|
-
wafer_cli-0.2.
|
|
44
|
-
wafer_cli-0.2.
|
|
45
|
-
wafer_cli-0.2.
|
|
41
|
+
wafer_cli-0.2.26.dist-info/METADATA,sha256=IM8Eatar1KYIBo1hHEBjvpX6J272f0PWfV4mwhV1jIY,2799
|
|
42
|
+
wafer_cli-0.2.26.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
43
|
+
wafer_cli-0.2.26.dist-info/entry_points.txt,sha256=WqB7hB__WhtPY8y1cO2sZiUz7fCq6Ik-usAigpeFvWE,41
|
|
44
|
+
wafer_cli-0.2.26.dist-info/top_level.txt,sha256=2MK1IVMWfpLL8BZCQ3E9aG6L6L666gSA_teYlwan4fs,6
|
|
45
|
+
wafer_cli-0.2.26.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|