wafer-cli 0.2.25__tar.gz → 0.2.26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/PKG-INFO +1 -1
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/pyproject.toml +1 -1
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/cli.py +63 -4
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/corpus.py +65 -5
- wafer_cli-0.2.26/wafer/trace_compare.py +274 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer_cli.egg-info/PKG-INFO +1 -1
- wafer_cli-0.2.25/wafer/trace_compare.py +0 -183
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/README.md +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/setup.cfg +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_analytics.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_auth.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_billing.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_cli_coverage.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_cli_parity_integration.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_config_integration.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_file_operations_integration.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_kernel_scope_cli.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_nsys_analyze.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_nsys_profile.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_output.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_rocprof_compute_integration.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_skill_commands.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_ssh_integration.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_targets_ops.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_wevin_cli.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_workflow_integration.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/GUIDE.md +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/__init__.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/agent_defaults.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/analytics.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/api_client.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/auth.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/autotuner.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/billing.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/cli_instructions.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/config.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/evaluate.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/global_config.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/gpu_run.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/inference.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/kernel_scope.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/ncu_analyze.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/nsys_analyze.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/nsys_profile.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/output.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/problems.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/rocprof_compute.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/rocprof_sdk.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/rocprof_systems.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/skills/wafer-guide/SKILL.md +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/ssh_keys.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/target_lock.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/targets.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/targets_ops.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/templates/__init__.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/templates/ask_docs.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/templates/optimize_kernel.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/templates/optimize_kernelbench.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/templates/trace_analyze.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/tests/test_eval_cli_parity.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/tracelens.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/wevin_cli.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/workspaces.py +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer_cli.egg-info/SOURCES.txt +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer_cli.egg-info/dependency_links.txt +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer_cli.egg-info/entry_points.txt +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer_cli.egg-info/requires.txt +0 -0
- {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer_cli.egg-info/top_level.txt +0 -0
|
@@ -7787,6 +7787,9 @@ def compare_analyze(
|
|
|
7787
7787
|
stack_traces: bool = typer.Option(
|
|
7788
7788
|
False, "--stack-traces", help="Show Python stack traces for operations"
|
|
7789
7789
|
),
|
|
7790
|
+
recommendations: bool = typer.Option(
|
|
7791
|
+
False, "--recommendations", help="Generate prioritized recommendations for kernel team"
|
|
7792
|
+
),
|
|
7790
7793
|
json: bool = typer.Option(
|
|
7791
7794
|
False, "--json", hidden=True, help="Ignored (for compatibility with cliExecutor)"
|
|
7792
7795
|
),
|
|
@@ -7839,6 +7842,7 @@ def compare_analyze(
|
|
|
7839
7842
|
show_layers=layers,
|
|
7840
7843
|
show_all=all,
|
|
7841
7844
|
show_stack_traces=stack_traces,
|
|
7845
|
+
recommendations=recommendations,
|
|
7842
7846
|
)
|
|
7843
7847
|
_mark_command_success()
|
|
7844
7848
|
|
|
@@ -7883,14 +7887,69 @@ def compare_fusion_cmd(
|
|
|
7883
7887
|
# CSV output to file
|
|
7884
7888
|
wafer compare fusion amd_trace.json nvidia_trace.json --format csv -o fusion.csv
|
|
7885
7889
|
"""
|
|
7886
|
-
from .trace_compare import
|
|
7890
|
+
from .trace_compare import compare_align
|
|
7891
|
+
|
|
7892
|
+
compare_align(
|
|
7893
|
+
trace1=trace1,
|
|
7894
|
+
trace2=trace2,
|
|
7895
|
+
output=output,
|
|
7896
|
+
output_format=format,
|
|
7897
|
+
phase="all",
|
|
7898
|
+
)
|
|
7899
|
+
_mark_command_success()
|
|
7887
7900
|
|
|
7888
|
-
|
|
7901
|
+
|
|
7902
|
+
@compare_app.command("align")
|
|
7903
|
+
def compare_align_cmd(
|
|
7904
|
+
trace1: Path = typer.Argument(..., help="First trace file (AMD or NVIDIA)", exists=True),
|
|
7905
|
+
trace2: Path = typer.Argument(..., help="Second trace file (AMD or NVIDIA)", exists=True),
|
|
7906
|
+
format: str = typer.Option(
|
|
7907
|
+
"json",
|
|
7908
|
+
"--format",
|
|
7909
|
+
"-f",
|
|
7910
|
+
help="Output format: json",
|
|
7911
|
+
),
|
|
7912
|
+
output: Path | None = typer.Option(
|
|
7913
|
+
None, "--output", "-o", help="Output file (default: stdout)"
|
|
7914
|
+
),
|
|
7915
|
+
phase: str = typer.Option(
|
|
7916
|
+
"all",
|
|
7917
|
+
"--phase",
|
|
7918
|
+
help="Filter by phase: all, prefill, decode",
|
|
7919
|
+
),
|
|
7920
|
+
layer: int | None = typer.Option(
|
|
7921
|
+
None,
|
|
7922
|
+
"--layer",
|
|
7923
|
+
help="Focus on specific layer number",
|
|
7924
|
+
),
|
|
7925
|
+
) -> None:
|
|
7926
|
+
"""Align kernels at layer level for exact kernel-to-kernel comparison.
|
|
7927
|
+
|
|
7928
|
+
Provides kernel-to-kernel mapping across AMD and NVIDIA platforms,
|
|
7929
|
+
showing which kernels correspond to each other at each layer position.
|
|
7930
|
+
|
|
7931
|
+
Examples:
|
|
7932
|
+
# Basic alignment (stdout JSON)
|
|
7933
|
+
wafer compare align amd_trace.json nvidia_trace.json
|
|
7934
|
+
|
|
7935
|
+
# Save to file
|
|
7936
|
+
wafer compare align amd_trace.json nvidia_trace.json -o alignment.json
|
|
7937
|
+
|
|
7938
|
+
# Focus on decode phase only
|
|
7939
|
+
wafer compare align amd_trace.json nvidia_trace.json --phase decode
|
|
7940
|
+
|
|
7941
|
+
# Focus on specific layer
|
|
7942
|
+
wafer compare align amd_trace.json nvidia_trace.json --layer 5
|
|
7943
|
+
"""
|
|
7944
|
+
from .trace_compare import compare_align
|
|
7945
|
+
|
|
7946
|
+
compare_align(
|
|
7889
7947
|
trace1=trace1,
|
|
7890
7948
|
trace2=trace2,
|
|
7891
7949
|
output=output,
|
|
7892
|
-
|
|
7893
|
-
|
|
7950
|
+
output_format=format,
|
|
7951
|
+
phase=phase,
|
|
7952
|
+
layer=layer,
|
|
7894
7953
|
)
|
|
7895
7954
|
_mark_command_success()
|
|
7896
7955
|
|
|
@@ -109,14 +109,34 @@ CORPORA: dict[CorpusName, CorpusConfig] = {
|
|
|
109
109
|
),
|
|
110
110
|
"hip": CorpusConfig(
|
|
111
111
|
name="hip",
|
|
112
|
-
description="HIP programming guide
|
|
113
|
-
source_type="
|
|
114
|
-
|
|
115
|
-
|
|
112
|
+
description="HIP programming guide, API reference, and examples",
|
|
113
|
+
source_type="github_multi_repo",
|
|
114
|
+
repos=[
|
|
115
|
+
# HIP - main documentation and API
|
|
116
|
+
RepoSource(
|
|
117
|
+
repo="ROCm/HIP",
|
|
118
|
+
paths=["docs"],
|
|
119
|
+
),
|
|
120
|
+
# HIP examples - code samples
|
|
121
|
+
RepoSource(
|
|
122
|
+
repo="ROCm/HIP-Examples",
|
|
123
|
+
paths=["HIP-Examples-Applications", "mini-nbody"],
|
|
124
|
+
),
|
|
125
|
+
# clr - HIP/OpenCL runtime (low-level)
|
|
126
|
+
RepoSource(
|
|
127
|
+
repo="ROCm/clr",
|
|
128
|
+
paths=["hipamd/include", "rocclr/device/gpu"],
|
|
129
|
+
),
|
|
130
|
+
# ROCm docs - official documentation
|
|
131
|
+
RepoSource(
|
|
132
|
+
repo="ROCm/ROCm",
|
|
133
|
+
paths=["docs"],
|
|
134
|
+
),
|
|
135
|
+
],
|
|
116
136
|
),
|
|
117
137
|
"amd": CorpusConfig(
|
|
118
138
|
name="amd",
|
|
119
|
-
description="AMD GPU kernel development (rocWMMA, CK, AITER, rocBLAS, HipKittens, vLLM)",
|
|
139
|
+
description="AMD GPU kernel development (rocWMMA, CK, AITER, rocBLAS, HipKittens, vLLM, FlashAttention)",
|
|
120
140
|
source_type="github_multi_repo",
|
|
121
141
|
repos=[
|
|
122
142
|
# rocWMMA - wave matrix multiply-accumulate (WMMA) intrinsics
|
|
@@ -186,6 +206,46 @@ CORPORA: dict[CorpusName, CorpusConfig] = {
|
|
|
186
206
|
repo="huggingface/hf-rocm-kernels",
|
|
187
207
|
paths=["csrc", "hf_rocm_kernels", "docs"],
|
|
188
208
|
),
|
|
209
|
+
# ROCm/flash-attention - FlashAttention for AMD GPUs
|
|
210
|
+
RepoSource(
|
|
211
|
+
repo="ROCm/flash-attention",
|
|
212
|
+
paths=["csrc", "docs"],
|
|
213
|
+
),
|
|
214
|
+
# ROCm/triton - Triton compiler for AMD GPUs
|
|
215
|
+
RepoSource(
|
|
216
|
+
repo="ROCm/triton",
|
|
217
|
+
paths=["python/tutorials", "third_party/amd"],
|
|
218
|
+
),
|
|
219
|
+
# ROCm/rccl - ROCm Communication Collectives Library (multi-GPU)
|
|
220
|
+
RepoSource(
|
|
221
|
+
repo="ROCm/rccl",
|
|
222
|
+
paths=["docs"],
|
|
223
|
+
),
|
|
224
|
+
# ROCm/rocprofiler-sdk - AMD GPU profiling SDK
|
|
225
|
+
RepoSource(
|
|
226
|
+
repo="ROCm/rocprofiler-sdk",
|
|
227
|
+
paths=["docs", "samples"],
|
|
228
|
+
),
|
|
229
|
+
# ROCm/omniperf - AMD GPU profiling tool
|
|
230
|
+
RepoSource(
|
|
231
|
+
repo="ROCm/omniperf",
|
|
232
|
+
paths=["docs", "src/omniperf_analyze"],
|
|
233
|
+
),
|
|
234
|
+
# ROCm/omnitrace - Application tracing for AMD
|
|
235
|
+
RepoSource(
|
|
236
|
+
repo="ROCm/omnitrace",
|
|
237
|
+
paths=["docs"],
|
|
238
|
+
),
|
|
239
|
+
# AMD GPUOpen Performance Guides
|
|
240
|
+
RepoSource(
|
|
241
|
+
repo="GPUOpen-Tools/gpu_performance_api",
|
|
242
|
+
paths=["docs"],
|
|
243
|
+
),
|
|
244
|
+
# AMD LLVM - AMD GPU compiler backend
|
|
245
|
+
RepoSource(
|
|
246
|
+
repo="ROCm/llvm-project",
|
|
247
|
+
paths=["amd/device-libs/README.md", "llvm/docs/AMDGPUUsage.rst"],
|
|
248
|
+
),
|
|
189
249
|
],
|
|
190
250
|
),
|
|
191
251
|
}
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
"""CLI wrapper for trace comparison commands.
|
|
2
|
+
|
|
3
|
+
This module provides the CLI interface for the `wafer compare` commands.
|
|
4
|
+
All core logic is in wafer_core.lib.trace_compare.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import typer
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import sys
|
|
15
|
+
|
|
16
|
+
from wafer_core.lib.trace_compare import (
|
|
17
|
+
analyze_trace_pair,
|
|
18
|
+
format_csv,
|
|
19
|
+
format_json,
|
|
20
|
+
format_text,
|
|
21
|
+
ArchitectureType,
|
|
22
|
+
detect_architecture,
|
|
23
|
+
)
|
|
24
|
+
from wafer_core.lib.trace_compare.loader import StreamingMetadata
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def compare_traces(
|
|
28
|
+
trace1: Path,
|
|
29
|
+
trace2: Path,
|
|
30
|
+
output: Path | None = None,
|
|
31
|
+
output_format: str = "text",
|
|
32
|
+
phase: str = "all",
|
|
33
|
+
show_layers: bool = False,
|
|
34
|
+
show_all: bool = False,
|
|
35
|
+
show_stack_traces: bool = False,
|
|
36
|
+
recommendations: bool = False,
|
|
37
|
+
) -> None:
|
|
38
|
+
"""Compare two GPU traces and generate performance report.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
trace1: Path to first trace file (AMD or NVIDIA)
|
|
42
|
+
trace2: Path to second trace file (AMD or NVIDIA)
|
|
43
|
+
output: Optional output file path (default: stdout)
|
|
44
|
+
output_format: Output format ('text', 'text-layers', 'csv', 'csv-layers', or 'json')
|
|
45
|
+
phase: Filter by phase ('all', 'prefill', or 'decode')
|
|
46
|
+
show_layers: Show layer-wise performance breakdown (text format only)
|
|
47
|
+
show_all: Show all items without truncation (applies to layers, operations, kernels)
|
|
48
|
+
show_stack_traces: Show Python stack traces for operations
|
|
49
|
+
"""
|
|
50
|
+
# Validate files exist
|
|
51
|
+
if not trace1.exists():
|
|
52
|
+
typer.secho(f"❌ File not found: {trace1}", fg=typer.colors.RED, err=True)
|
|
53
|
+
raise typer.Exit(1)
|
|
54
|
+
|
|
55
|
+
if not trace2.exists():
|
|
56
|
+
typer.secho(f"❌ File not found: {trace2}", fg=typer.colors.RED, err=True)
|
|
57
|
+
raise typer.Exit(1)
|
|
58
|
+
|
|
59
|
+
# Progress callback for JSON format (emits NDJSON to stdout)
|
|
60
|
+
def progress_callback(stage: str, fraction: float) -> None:
|
|
61
|
+
if output_format == 'json':
|
|
62
|
+
progress_msg = json.dumps({"type": "progress", "stage": stage, "fraction": fraction})
|
|
63
|
+
print(progress_msg, file=sys.stdout, flush=True)
|
|
64
|
+
elif output_format != 'json':
|
|
65
|
+
percent = int(fraction * 100)
|
|
66
|
+
typer.echo(f"📊 {stage}: {percent}%", err=True)
|
|
67
|
+
|
|
68
|
+
# Metadata callback for JSON format (emits NDJSON with early GPU info)
|
|
69
|
+
def metadata_callback(meta1: StreamingMetadata, meta2: StreamingMetadata) -> None:
|
|
70
|
+
if output_format == 'json':
|
|
71
|
+
metadata_msg = json.dumps({
|
|
72
|
+
"type": "metadata",
|
|
73
|
+
"trace1": {
|
|
74
|
+
"platform": meta1.platform,
|
|
75
|
+
"gpu": meta1.gpu_name,
|
|
76
|
+
"file_size_mb": round(meta1.file_size_mb, 1),
|
|
77
|
+
},
|
|
78
|
+
"trace2": {
|
|
79
|
+
"platform": meta2.platform,
|
|
80
|
+
"gpu": meta2.gpu_name,
|
|
81
|
+
"file_size_mb": round(meta2.file_size_mb, 1),
|
|
82
|
+
},
|
|
83
|
+
})
|
|
84
|
+
print(metadata_msg, file=sys.stdout, flush=True)
|
|
85
|
+
else:
|
|
86
|
+
typer.echo(f"📊 Trace 1: {meta1.platform} - {meta1.gpu_name} ({meta1.file_size_mb:.1f}MB)", err=True)
|
|
87
|
+
typer.echo(f"📊 Trace 2: {meta2.platform} - {meta2.gpu_name} ({meta2.file_size_mb:.1f}MB)", err=True)
|
|
88
|
+
|
|
89
|
+
# Analyze traces using unified API
|
|
90
|
+
if output_format != 'json':
|
|
91
|
+
typer.echo("📊 Loading traces...")
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
result_obj = analyze_trace_pair(
|
|
95
|
+
trace1,
|
|
96
|
+
trace2,
|
|
97
|
+
phase=phase,
|
|
98
|
+
include_stacks=True,
|
|
99
|
+
on_progress=progress_callback,
|
|
100
|
+
on_metadata=metadata_callback,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
results = {
|
|
104
|
+
"metadata": result_obj.metadata,
|
|
105
|
+
"operations": result_obj.operations,
|
|
106
|
+
"layers": result_obj.layers,
|
|
107
|
+
"warnings": [{"code": w.code, "severity": w.severity, "message": w.message, "suggestion": w.suggestion} for w in result_obj.warnings],
|
|
108
|
+
"architecture": result_obj.architecture.value,
|
|
109
|
+
"layer_alignments": result_obj.layer_alignments,
|
|
110
|
+
"fusion_analysis": result_obj.fusion_analysis,
|
|
111
|
+
"same_kernel_analysis": result_obj.same_kernel_analysis,
|
|
112
|
+
}
|
|
113
|
+
except ValueError as e:
|
|
114
|
+
typer.secho(f"❌ {e}", fg=typer.colors.RED, err=True)
|
|
115
|
+
raise typer.Exit(1)
|
|
116
|
+
except Exception as e:
|
|
117
|
+
typer.secho(f"❌ Error analyzing traces: {e}", fg=typer.colors.RED, err=True)
|
|
118
|
+
raise typer.Exit(1)
|
|
119
|
+
|
|
120
|
+
if output_format != 'json':
|
|
121
|
+
meta = results["metadata"]
|
|
122
|
+
if meta['trace1_platform'] == 'AMD':
|
|
123
|
+
amd_gpu, nvidia_gpu = meta['trace1_gpu'], meta['trace2_gpu']
|
|
124
|
+
else:
|
|
125
|
+
amd_gpu, nvidia_gpu = meta['trace2_gpu'], meta['trace1_gpu']
|
|
126
|
+
typer.echo(f"✅ Loaded: AMD ({amd_gpu}) vs NVIDIA ({nvidia_gpu})")
|
|
127
|
+
|
|
128
|
+
# Display warnings
|
|
129
|
+
warnings = results.get("warnings", [])
|
|
130
|
+
if warnings:
|
|
131
|
+
typer.echo()
|
|
132
|
+
for warning in warnings:
|
|
133
|
+
icon = "❌" if warning["severity"] == "error" else "⚠️" if warning["severity"] == "warning" else "ℹ️"
|
|
134
|
+
typer.secho(f"{icon} {warning['message']}", fg=typer.colors.YELLOW if warning["severity"] == "warning" else typer.colors.BLUE)
|
|
135
|
+
if warning.get("suggestion"):
|
|
136
|
+
typer.secho(f" Suggestion: {warning['suggestion']}", fg=typer.colors.BLUE)
|
|
137
|
+
typer.echo()
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
# Generate output based on format
|
|
141
|
+
if output_format == "text":
|
|
142
|
+
output_str = format_text(results, show_layers=show_layers, show_all=show_all, show_stack_traces=show_stack_traces)
|
|
143
|
+
elif output_format == "text-layers":
|
|
144
|
+
output_str = format_text(results, show_layers=True, show_all=show_all, show_stack_traces=show_stack_traces)
|
|
145
|
+
elif output_format == "csv":
|
|
146
|
+
output_str = format_csv(results, report_type="operations")
|
|
147
|
+
elif output_format == "csv-layers":
|
|
148
|
+
output_str = format_csv(results, report_type="layers")
|
|
149
|
+
elif output_format == "json":
|
|
150
|
+
output_str = format_json(results)
|
|
151
|
+
else:
|
|
152
|
+
typer.secho(f"❌ Unknown format: {output_format}", fg=typer.colors.RED, err=True)
|
|
153
|
+
raise typer.Exit(1)
|
|
154
|
+
|
|
155
|
+
# Write output
|
|
156
|
+
if output:
|
|
157
|
+
output.write_text(output_str)
|
|
158
|
+
typer.secho(f"✅ Report saved to {output}", fg=typer.colors.GREEN)
|
|
159
|
+
else:
|
|
160
|
+
typer.echo(output_str)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def compare_align(
|
|
164
|
+
trace1: Path,
|
|
165
|
+
trace2: Path,
|
|
166
|
+
output: Path | None = None,
|
|
167
|
+
output_format: str = "json",
|
|
168
|
+
phase: str = "all",
|
|
169
|
+
layer: int | None = None,
|
|
170
|
+
) -> None:
|
|
171
|
+
"""Align kernels at layer level for exact kernel-to-kernel comparison.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
trace1: Path to first trace file (AMD or NVIDIA)
|
|
175
|
+
trace2: Path to second trace file (AMD or NVIDIA)
|
|
176
|
+
output: Optional output file path (default: stdout)
|
|
177
|
+
output_format: Output format ('json' only for now)
|
|
178
|
+
phase: Filter by phase ('all', 'prefill', or 'decode')
|
|
179
|
+
layer: Focus on specific layer number (optional)
|
|
180
|
+
"""
|
|
181
|
+
# Validate files exist
|
|
182
|
+
if not trace1.exists():
|
|
183
|
+
typer.secho(f"❌ File not found: {trace1}", fg=typer.colors.RED, err=True)
|
|
184
|
+
raise typer.Exit(1)
|
|
185
|
+
|
|
186
|
+
if not trace2.exists():
|
|
187
|
+
typer.secho(f"❌ File not found: {trace2}", fg=typer.colors.RED, err=True)
|
|
188
|
+
raise typer.Exit(1)
|
|
189
|
+
|
|
190
|
+
# Progress callback for JSON format (emits NDJSON to stdout)
|
|
191
|
+
def progress_callback(stage: str, fraction: float) -> None:
|
|
192
|
+
if output_format == 'json':
|
|
193
|
+
progress_msg = json.dumps({"type": "progress", "stage": stage, "fraction": fraction})
|
|
194
|
+
print(progress_msg, file=sys.stdout, flush=True)
|
|
195
|
+
else:
|
|
196
|
+
percent = int(fraction * 100)
|
|
197
|
+
typer.echo(f"📊 {stage}: {percent}%", err=True)
|
|
198
|
+
|
|
199
|
+
# Metadata callback for JSON format
|
|
200
|
+
def metadata_callback(meta1: StreamingMetadata, meta2: StreamingMetadata) -> None:
|
|
201
|
+
if output_format == 'json':
|
|
202
|
+
metadata_msg = json.dumps({
|
|
203
|
+
"type": "metadata",
|
|
204
|
+
"trace1": {
|
|
205
|
+
"platform": meta1.platform,
|
|
206
|
+
"gpu": meta1.gpu_name,
|
|
207
|
+
"file_size_mb": round(meta1.file_size_mb, 1),
|
|
208
|
+
},
|
|
209
|
+
"trace2": {
|
|
210
|
+
"platform": meta2.platform,
|
|
211
|
+
"gpu": meta2.gpu_name,
|
|
212
|
+
"file_size_mb": round(meta2.file_size_mb, 1),
|
|
213
|
+
},
|
|
214
|
+
})
|
|
215
|
+
print(metadata_msg, file=sys.stdout, flush=True)
|
|
216
|
+
else:
|
|
217
|
+
typer.echo(f"📊 Trace 1: {meta1.platform} - {meta1.gpu_name} ({meta1.file_size_mb:.1f}MB)", err=True)
|
|
218
|
+
typer.echo(f"📊 Trace 2: {meta2.platform} - {meta2.gpu_name} ({meta2.file_size_mb:.1f}MB)", err=True)
|
|
219
|
+
|
|
220
|
+
# Analyze traces using unified API
|
|
221
|
+
if output_format != 'json':
|
|
222
|
+
typer.echo("📊 Loading traces...")
|
|
223
|
+
|
|
224
|
+
try:
|
|
225
|
+
result_obj = analyze_trace_pair(
|
|
226
|
+
trace1,
|
|
227
|
+
trace2,
|
|
228
|
+
phase=phase,
|
|
229
|
+
include_stacks=True,
|
|
230
|
+
on_progress=progress_callback,
|
|
231
|
+
on_metadata=metadata_callback,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
results = {
|
|
235
|
+
"metadata": result_obj.metadata,
|
|
236
|
+
"layer_alignments": result_obj.layer_alignments or [],
|
|
237
|
+
"fusion_analysis": result_obj.fusion_analysis or {},
|
|
238
|
+
"same_kernel_analysis": result_obj.same_kernel_analysis or {},
|
|
239
|
+
"operations": result_obj.operations,
|
|
240
|
+
"layers": result_obj.layers,
|
|
241
|
+
"warnings": [{"code": w.code, "severity": w.severity, "message": w.message, "suggestion": w.suggestion} for w in result_obj.warnings],
|
|
242
|
+
"architecture": result_obj.architecture.value,
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
if layer is not None:
|
|
246
|
+
results["layer_alignments"] = [
|
|
247
|
+
la for la in results["layer_alignments"] if la.get("layer") == layer
|
|
248
|
+
]
|
|
249
|
+
except ValueError as e:
|
|
250
|
+
typer.secho(f"❌ {e}", fg=typer.colors.RED, err=True)
|
|
251
|
+
raise typer.Exit(1)
|
|
252
|
+
except Exception as e:
|
|
253
|
+
typer.secho(f"❌ Error analyzing traces: {e}", fg=typer.colors.RED, err=True)
|
|
254
|
+
import traceback
|
|
255
|
+
traceback.print_exc()
|
|
256
|
+
raise typer.Exit(1)
|
|
257
|
+
|
|
258
|
+
if output_format != 'json':
|
|
259
|
+
meta = results["metadata"]
|
|
260
|
+
typer.echo(f"✅ Loaded: {meta.get('amd_gpu', 'Unknown')} vs {meta.get('nvidia_gpu', 'Unknown')}")
|
|
261
|
+
typer.echo(f"✅ Found {len(results['layer_alignments'])} layers")
|
|
262
|
+
typer.echo()
|
|
263
|
+
|
|
264
|
+
if output_format == "json":
|
|
265
|
+
output_str = format_json(results)
|
|
266
|
+
else:
|
|
267
|
+
typer.secho(f"❌ Format {output_format} not yet supported for align command. Use 'json'.", fg=typer.colors.RED, err=True)
|
|
268
|
+
raise typer.Exit(1)
|
|
269
|
+
|
|
270
|
+
if output:
|
|
271
|
+
output.write_text(output_str)
|
|
272
|
+
typer.secho(f"✅ Report saved to {output}", fg=typer.colors.GREEN)
|
|
273
|
+
else:
|
|
274
|
+
typer.echo(output_str)
|
|
@@ -1,183 +0,0 @@
|
|
|
1
|
-
"""CLI wrapper for trace comparison commands.
|
|
2
|
-
|
|
3
|
-
This module provides the CLI interface for the `wafer compare` commands.
|
|
4
|
-
All core logic is in wafer_core.lib.trace_compare.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import sys
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
|
|
10
|
-
import typer
|
|
11
|
-
|
|
12
|
-
from wafer_core.lib.trace_compare import (
|
|
13
|
-
analyze_fusion_differences,
|
|
14
|
-
analyze_traces,
|
|
15
|
-
format_csv,
|
|
16
|
-
format_fusion_csv,
|
|
17
|
-
format_fusion_json,
|
|
18
|
-
format_fusion_text,
|
|
19
|
-
format_json,
|
|
20
|
-
format_text,
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def compare_traces(
|
|
25
|
-
trace1: Path,
|
|
26
|
-
trace2: Path,
|
|
27
|
-
output: Path | None = None,
|
|
28
|
-
output_format: str = "text",
|
|
29
|
-
phase: str = "all",
|
|
30
|
-
show_layers: bool = False,
|
|
31
|
-
show_all: bool = False,
|
|
32
|
-
show_stack_traces: bool = False,
|
|
33
|
-
) -> None:
|
|
34
|
-
"""Compare two GPU traces and generate performance report.
|
|
35
|
-
|
|
36
|
-
Args:
|
|
37
|
-
trace1: Path to first trace file (AMD or NVIDIA)
|
|
38
|
-
trace2: Path to second trace file (AMD or NVIDIA)
|
|
39
|
-
output: Optional output file path (default: stdout)
|
|
40
|
-
output_format: Output format ('text', 'text-layers', 'csv', 'csv-layers', or 'json')
|
|
41
|
-
phase: Filter by phase ('all', 'prefill', or 'decode')
|
|
42
|
-
show_layers: Show layer-wise performance breakdown (text format only)
|
|
43
|
-
show_all: Show all items without truncation (applies to layers, operations, kernels)
|
|
44
|
-
show_stack_traces: Show Python stack traces for operations
|
|
45
|
-
"""
|
|
46
|
-
# Validate files exist
|
|
47
|
-
if not trace1.exists():
|
|
48
|
-
typer.secho(f"❌ File not found: {trace1}", fg=typer.colors.RED, err=True)
|
|
49
|
-
raise typer.Exit(1)
|
|
50
|
-
|
|
51
|
-
if not trace2.exists():
|
|
52
|
-
typer.secho(f"❌ File not found: {trace2}", fg=typer.colors.RED, err=True)
|
|
53
|
-
raise typer.Exit(1)
|
|
54
|
-
|
|
55
|
-
# Analyze traces
|
|
56
|
-
# Only show progress messages for non-JSON formats (JSON needs clean stdout)
|
|
57
|
-
if output_format != 'json':
|
|
58
|
-
typer.echo("📊 Loading traces...")
|
|
59
|
-
|
|
60
|
-
# Determine how many stack traces to collect
|
|
61
|
-
max_stacks = 0 if (show_stack_traces and show_all) else (3 if show_stack_traces else 3)
|
|
62
|
-
|
|
63
|
-
try:
|
|
64
|
-
results = analyze_traces(
|
|
65
|
-
trace1,
|
|
66
|
-
trace2,
|
|
67
|
-
phase_filter=phase,
|
|
68
|
-
max_stacks=max_stacks,
|
|
69
|
-
)
|
|
70
|
-
except ValueError as e:
|
|
71
|
-
typer.secho(f"❌ {e}", fg=typer.colors.RED, err=True)
|
|
72
|
-
raise typer.Exit(1)
|
|
73
|
-
except Exception as e:
|
|
74
|
-
typer.secho(f"❌ Error analyzing traces: {e}", fg=typer.colors.RED, err=True)
|
|
75
|
-
raise typer.Exit(1)
|
|
76
|
-
|
|
77
|
-
# Show loading confirmation
|
|
78
|
-
if output_format != 'json':
|
|
79
|
-
meta = results["metadata"]
|
|
80
|
-
# Determine which trace is AMD and which is NVIDIA
|
|
81
|
-
if meta['trace1_platform'] == 'AMD':
|
|
82
|
-
amd_gpu, nvidia_gpu = meta['trace1_gpu'], meta['trace2_gpu']
|
|
83
|
-
else:
|
|
84
|
-
amd_gpu, nvidia_gpu = meta['trace2_gpu'], meta['trace1_gpu']
|
|
85
|
-
typer.echo(f"✅ Loaded: AMD ({amd_gpu}) vs NVIDIA ({nvidia_gpu})")
|
|
86
|
-
typer.echo()
|
|
87
|
-
|
|
88
|
-
# Generate output based on format
|
|
89
|
-
if output_format == "text":
|
|
90
|
-
output_str = format_text(results, show_layers=show_layers, show_all=show_all, show_stack_traces=show_stack_traces)
|
|
91
|
-
elif output_format == "text-layers":
|
|
92
|
-
output_str = format_text(results, show_layers=True, show_all=show_all, show_stack_traces=show_stack_traces)
|
|
93
|
-
elif output_format == "csv":
|
|
94
|
-
output_str = format_csv(results, report_type="operations")
|
|
95
|
-
elif output_format == "csv-layers":
|
|
96
|
-
output_str = format_csv(results, report_type="layers")
|
|
97
|
-
elif output_format == "json":
|
|
98
|
-
output_str = format_json(results)
|
|
99
|
-
else:
|
|
100
|
-
typer.secho(f"❌ Unknown format: {output_format}", fg=typer.colors.RED, err=True)
|
|
101
|
-
raise typer.Exit(1)
|
|
102
|
-
|
|
103
|
-
# Write output
|
|
104
|
-
if output:
|
|
105
|
-
output.write_text(output_str)
|
|
106
|
-
typer.secho(f"✅ Report saved to {output}", fg=typer.colors.GREEN)
|
|
107
|
-
else:
|
|
108
|
-
typer.echo(output_str)
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
def compare_fusion(
|
|
112
|
-
trace1: Path,
|
|
113
|
-
trace2: Path,
|
|
114
|
-
output: Path | None = None,
|
|
115
|
-
format_type: str = "text",
|
|
116
|
-
min_group_size: int = 50,
|
|
117
|
-
) -> None:
|
|
118
|
-
"""Analyze kernel fusion differences between AMD and NVIDIA traces.
|
|
119
|
-
|
|
120
|
-
Args:
|
|
121
|
-
trace1: Path to first trace file (AMD or NVIDIA)
|
|
122
|
-
trace2: Path to second trace file (AMD or NVIDIA)
|
|
123
|
-
output: Optional output file path (default: stdout)
|
|
124
|
-
format_type: Output format ('text', 'csv', or 'json')
|
|
125
|
-
min_group_size: Minimum correlation group size to analyze
|
|
126
|
-
"""
|
|
127
|
-
# Validate files exist
|
|
128
|
-
if not trace1.exists():
|
|
129
|
-
typer.secho(f"❌ File not found: {trace1}", fg=typer.colors.RED, err=True)
|
|
130
|
-
raise typer.Exit(1)
|
|
131
|
-
|
|
132
|
-
if not trace2.exists():
|
|
133
|
-
typer.secho(f"❌ File not found: {trace2}", fg=typer.colors.RED, err=True)
|
|
134
|
-
raise typer.Exit(1)
|
|
135
|
-
|
|
136
|
-
# Analyze fusion
|
|
137
|
-
# Only show progress messages for non-JSON formats (JSON needs clean stdout)
|
|
138
|
-
if format_type != 'json':
|
|
139
|
-
typer.echo("📊 Loading traces...")
|
|
140
|
-
try:
|
|
141
|
-
results = analyze_fusion_differences(
|
|
142
|
-
trace1,
|
|
143
|
-
trace2,
|
|
144
|
-
min_group_size=min_group_size,
|
|
145
|
-
)
|
|
146
|
-
except Exception as e:
|
|
147
|
-
typer.secho(
|
|
148
|
-
f"❌ Error analyzing traces: {e}", fg=typer.colors.RED, err=True
|
|
149
|
-
)
|
|
150
|
-
import traceback
|
|
151
|
-
|
|
152
|
-
traceback.print_exc()
|
|
153
|
-
raise typer.Exit(1)
|
|
154
|
-
|
|
155
|
-
# Show loading confirmation
|
|
156
|
-
if format_type != 'json':
|
|
157
|
-
meta = results["metadata"]
|
|
158
|
-
# Note: fusion analyzer always uses trace1=AMD, trace2=NVIDIA
|
|
159
|
-
typer.echo(f"✅ Loaded: {meta['trace1_gpu']} vs {meta['trace2_gpu']}")
|
|
160
|
-
typer.echo(
|
|
161
|
-
f"Found {meta['trace1_correlation_groups']} trace1 groups and "
|
|
162
|
-
f"{meta['trace2_correlation_groups']} trace2 groups with ≥{min_group_size} kernels"
|
|
163
|
-
)
|
|
164
|
-
typer.echo(f"✅ Matched {meta['matched_groups']} correlation groups")
|
|
165
|
-
typer.echo()
|
|
166
|
-
|
|
167
|
-
# Generate output
|
|
168
|
-
if format_type == "text":
|
|
169
|
-
output_str = format_fusion_text(results)
|
|
170
|
-
elif format_type == "csv":
|
|
171
|
-
output_str = format_fusion_csv(results)
|
|
172
|
-
elif format_type == "json":
|
|
173
|
-
output_str = format_fusion_json(results)
|
|
174
|
-
else:
|
|
175
|
-
typer.secho(f"❌ Unknown format: {format_type}", fg=typer.colors.RED, err=True)
|
|
176
|
-
raise typer.Exit(1)
|
|
177
|
-
|
|
178
|
-
# Write output
|
|
179
|
-
if output:
|
|
180
|
-
output.write_text(output_str)
|
|
181
|
-
typer.secho(f"✅ Report saved to {output}", fg=typer.colors.GREEN)
|
|
182
|
-
else:
|
|
183
|
-
typer.echo(output_str)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|