wafer-cli 0.2.25__py3-none-any.whl → 0.2.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wafer/trace_compare.py CHANGED
@@ -6,19 +6,22 @@ All core logic is in wafer_core.lib.trace_compare.
6
6
 
7
7
  import sys
8
8
  from pathlib import Path
9
+ from typing import Any
9
10
 
10
11
  import typer
11
12
 
13
+ import json
14
+ import sys
15
+
12
16
  from wafer_core.lib.trace_compare import (
13
- analyze_fusion_differences,
14
- analyze_traces,
17
+ analyze_trace_pair,
15
18
  format_csv,
16
- format_fusion_csv,
17
- format_fusion_json,
18
- format_fusion_text,
19
19
  format_json,
20
20
  format_text,
21
+ ArchitectureType,
22
+ detect_architecture,
21
23
  )
24
+ from wafer_core.lib.trace_compare.loader import StreamingMetadata
22
25
 
23
26
 
24
27
  def compare_traces(
@@ -30,6 +33,7 @@ def compare_traces(
30
33
  show_layers: bool = False,
31
34
  show_all: bool = False,
32
35
  show_stack_traces: bool = False,
36
+ recommendations: bool = False,
33
37
  ) -> None:
34
38
  """Compare two GPU traces and generate performance report.
35
39
 
@@ -52,21 +56,60 @@ def compare_traces(
52
56
  typer.secho(f"❌ File not found: {trace2}", fg=typer.colors.RED, err=True)
53
57
  raise typer.Exit(1)
54
58
 
55
- # Analyze traces
56
- # Only show progress messages for non-JSON formats (JSON needs clean stdout)
59
+ # Progress callback for JSON format (emits NDJSON to stdout)
60
+ def progress_callback(stage: str, fraction: float) -> None:
61
+ if output_format == 'json':
62
+ progress_msg = json.dumps({"type": "progress", "stage": stage, "fraction": fraction})
63
+ print(progress_msg, file=sys.stdout, flush=True)
64
+ elif output_format != 'json':
65
+ percent = int(fraction * 100)
66
+ typer.echo(f"📊 {stage}: {percent}%", err=True)
67
+
68
+ # Metadata callback for JSON format (emits NDJSON with early GPU info)
69
+ def metadata_callback(meta1: StreamingMetadata, meta2: StreamingMetadata) -> None:
70
+ if output_format == 'json':
71
+ metadata_msg = json.dumps({
72
+ "type": "metadata",
73
+ "trace1": {
74
+ "platform": meta1.platform,
75
+ "gpu": meta1.gpu_name,
76
+ "file_size_mb": round(meta1.file_size_mb, 1),
77
+ },
78
+ "trace2": {
79
+ "platform": meta2.platform,
80
+ "gpu": meta2.gpu_name,
81
+ "file_size_mb": round(meta2.file_size_mb, 1),
82
+ },
83
+ })
84
+ print(metadata_msg, file=sys.stdout, flush=True)
85
+ else:
86
+ typer.echo(f"📊 Trace 1: {meta1.platform} - {meta1.gpu_name} ({meta1.file_size_mb:.1f}MB)", err=True)
87
+ typer.echo(f"📊 Trace 2: {meta2.platform} - {meta2.gpu_name} ({meta2.file_size_mb:.1f}MB)", err=True)
88
+
89
+ # Analyze traces using unified API
57
90
  if output_format != 'json':
58
91
  typer.echo("📊 Loading traces...")
59
92
 
60
- # Determine how many stack traces to collect
61
- max_stacks = 0 if (show_stack_traces and show_all) else (3 if show_stack_traces else 3)
62
-
63
93
  try:
64
- results = analyze_traces(
94
+ result_obj = analyze_trace_pair(
65
95
  trace1,
66
96
  trace2,
67
- phase_filter=phase,
68
- max_stacks=max_stacks,
97
+ phase=phase,
98
+ include_stacks=True,
99
+ on_progress=progress_callback,
100
+ on_metadata=metadata_callback,
69
101
  )
102
+
103
+ results = {
104
+ "metadata": result_obj.metadata,
105
+ "operations": result_obj.operations,
106
+ "layers": result_obj.layers,
107
+ "warnings": [{"code": w.code, "severity": w.severity, "message": w.message, "suggestion": w.suggestion} for w in result_obj.warnings],
108
+ "architecture": result_obj.architecture.value,
109
+ "layer_alignments": result_obj.layer_alignments,
110
+ "fusion_analysis": result_obj.fusion_analysis,
111
+ "same_kernel_analysis": result_obj.same_kernel_analysis,
112
+ }
70
113
  except ValueError as e:
71
114
  typer.secho(f"❌ {e}", fg=typer.colors.RED, err=True)
72
115
  raise typer.Exit(1)
@@ -74,17 +117,26 @@ def compare_traces(
74
117
  typer.secho(f"❌ Error analyzing traces: {e}", fg=typer.colors.RED, err=True)
75
118
  raise typer.Exit(1)
76
119
 
77
- # Show loading confirmation
78
120
  if output_format != 'json':
79
121
  meta = results["metadata"]
80
- # Determine which trace is AMD and which is NVIDIA
81
122
  if meta['trace1_platform'] == 'AMD':
82
123
  amd_gpu, nvidia_gpu = meta['trace1_gpu'], meta['trace2_gpu']
83
124
  else:
84
125
  amd_gpu, nvidia_gpu = meta['trace2_gpu'], meta['trace1_gpu']
85
126
  typer.echo(f"✅ Loaded: AMD ({amd_gpu}) vs NVIDIA ({nvidia_gpu})")
127
+
128
+ # Display warnings
129
+ warnings = results.get("warnings", [])
130
+ if warnings:
131
+ typer.echo()
132
+ for warning in warnings:
133
+ icon = "❌" if warning["severity"] == "error" else "⚠️" if warning["severity"] == "warning" else "ℹ️"
134
+ typer.secho(f"{icon} {warning['message']}", fg=typer.colors.YELLOW if warning["severity"] == "warning" else typer.colors.BLUE)
135
+ if warning.get("suggestion"):
136
+ typer.secho(f" Suggestion: {warning['suggestion']}", fg=typer.colors.BLUE)
86
137
  typer.echo()
87
138
 
139
+
88
140
  # Generate output based on format
89
141
  if output_format == "text":
90
142
  output_str = format_text(results, show_layers=show_layers, show_all=show_all, show_stack_traces=show_stack_traces)
@@ -108,21 +160,23 @@ def compare_traces(
108
160
  typer.echo(output_str)
109
161
 
110
162
 
111
- def compare_fusion(
163
+ def compare_align(
112
164
  trace1: Path,
113
165
  trace2: Path,
114
166
  output: Path | None = None,
115
- format_type: str = "text",
116
- min_group_size: int = 50,
167
+ output_format: str = "json",
168
+ phase: str = "all",
169
+ layer: int | None = None,
117
170
  ) -> None:
118
- """Analyze kernel fusion differences between AMD and NVIDIA traces.
171
+ """Align kernels at layer level for exact kernel-to-kernel comparison.
119
172
 
120
173
  Args:
121
174
  trace1: Path to first trace file (AMD or NVIDIA)
122
175
  trace2: Path to second trace file (AMD or NVIDIA)
123
176
  output: Optional output file path (default: stdout)
124
- format_type: Output format ('text', 'csv', or 'json')
125
- min_group_size: Minimum correlation group size to analyze
177
+ output_format: Output format ('json' only for now)
178
+ phase: Filter by phase ('all', 'prefill', or 'decode')
179
+ layer: Focus on specific layer number (optional)
126
180
  """
127
181
  # Validate files exist
128
182
  if not trace1.exists():
@@ -133,49 +187,86 @@ def compare_fusion(
133
187
  typer.secho(f"❌ File not found: {trace2}", fg=typer.colors.RED, err=True)
134
188
  raise typer.Exit(1)
135
189
 
136
- # Analyze fusion
137
- # Only show progress messages for non-JSON formats (JSON needs clean stdout)
138
- if format_type != 'json':
190
+ # Progress callback for JSON format (emits NDJSON to stdout)
191
+ def progress_callback(stage: str, fraction: float) -> None:
192
+ if output_format == 'json':
193
+ progress_msg = json.dumps({"type": "progress", "stage": stage, "fraction": fraction})
194
+ print(progress_msg, file=sys.stdout, flush=True)
195
+ else:
196
+ percent = int(fraction * 100)
197
+ typer.echo(f"📊 {stage}: {percent}%", err=True)
198
+
199
+ # Metadata callback for JSON format
200
+ def metadata_callback(meta1: StreamingMetadata, meta2: StreamingMetadata) -> None:
201
+ if output_format == 'json':
202
+ metadata_msg = json.dumps({
203
+ "type": "metadata",
204
+ "trace1": {
205
+ "platform": meta1.platform,
206
+ "gpu": meta1.gpu_name,
207
+ "file_size_mb": round(meta1.file_size_mb, 1),
208
+ },
209
+ "trace2": {
210
+ "platform": meta2.platform,
211
+ "gpu": meta2.gpu_name,
212
+ "file_size_mb": round(meta2.file_size_mb, 1),
213
+ },
214
+ })
215
+ print(metadata_msg, file=sys.stdout, flush=True)
216
+ else:
217
+ typer.echo(f"📊 Trace 1: {meta1.platform} - {meta1.gpu_name} ({meta1.file_size_mb:.1f}MB)", err=True)
218
+ typer.echo(f"📊 Trace 2: {meta2.platform} - {meta2.gpu_name} ({meta2.file_size_mb:.1f}MB)", err=True)
219
+
220
+ # Analyze traces using unified API
221
+ if output_format != 'json':
139
222
  typer.echo("📊 Loading traces...")
223
+
140
224
  try:
141
- results = analyze_fusion_differences(
225
+ result_obj = analyze_trace_pair(
142
226
  trace1,
143
227
  trace2,
144
- min_group_size=min_group_size,
228
+ phase=phase,
229
+ include_stacks=True,
230
+ on_progress=progress_callback,
231
+ on_metadata=metadata_callback,
145
232
  )
233
+
234
+ results = {
235
+ "metadata": result_obj.metadata,
236
+ "layer_alignments": result_obj.layer_alignments or [],
237
+ "fusion_analysis": result_obj.fusion_analysis or {},
238
+ "same_kernel_analysis": result_obj.same_kernel_analysis or {},
239
+ "operations": result_obj.operations,
240
+ "layers": result_obj.layers,
241
+ "warnings": [{"code": w.code, "severity": w.severity, "message": w.message, "suggestion": w.suggestion} for w in result_obj.warnings],
242
+ "architecture": result_obj.architecture.value,
243
+ }
244
+
245
+ if layer is not None:
246
+ results["layer_alignments"] = [
247
+ la for la in results["layer_alignments"] if la.get("layer") == layer
248
+ ]
249
+ except ValueError as e:
250
+ typer.secho(f"❌ {e}", fg=typer.colors.RED, err=True)
251
+ raise typer.Exit(1)
146
252
  except Exception as e:
147
- typer.secho(
148
- f"❌ Error analyzing traces: {e}", fg=typer.colors.RED, err=True
149
- )
253
+ typer.secho(f"❌ Error analyzing traces: {e}", fg=typer.colors.RED, err=True)
150
254
  import traceback
151
-
152
255
  traceback.print_exc()
153
256
  raise typer.Exit(1)
154
257
 
155
- # Show loading confirmation
156
- if format_type != 'json':
258
+ if output_format != 'json':
157
259
  meta = results["metadata"]
158
- # Note: fusion analyzer always uses trace1=AMD, trace2=NVIDIA
159
- typer.echo(f"✅ Loaded: {meta['trace1_gpu']} vs {meta['trace2_gpu']}")
160
- typer.echo(
161
- f"Found {meta['trace1_correlation_groups']} trace1 groups and "
162
- f"{meta['trace2_correlation_groups']} trace2 groups with ≥{min_group_size} kernels"
163
- )
164
- typer.echo(f"✅ Matched {meta['matched_groups']} correlation groups")
260
+ typer.echo(f"✅ Loaded: {meta.get('amd_gpu', 'Unknown')} vs {meta.get('nvidia_gpu', 'Unknown')}")
261
+ typer.echo(f"✅ Found {len(results['layer_alignments'])} layers")
165
262
  typer.echo()
166
263
 
167
- # Generate output
168
- if format_type == "text":
169
- output_str = format_fusion_text(results)
170
- elif format_type == "csv":
171
- output_str = format_fusion_csv(results)
172
- elif format_type == "json":
173
- output_str = format_fusion_json(results)
264
+ if output_format == "json":
265
+ output_str = format_json(results)
174
266
  else:
175
- typer.secho(f"❌ Unknown format: {format_type}", fg=typer.colors.RED, err=True)
267
+ typer.secho(f"❌ Format {output_format} not yet supported for align command. Use 'json'.", fg=typer.colors.RED, err=True)
176
268
  raise typer.Exit(1)
177
269
 
178
- # Write output
179
270
  if output:
180
271
  output.write_text(output_str)
181
272
  typer.secho(f"✅ Report saved to {output}", fg=typer.colors.GREEN)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wafer-cli
3
- Version: 0.2.25
3
+ Version: 0.2.27
4
4
  Summary: CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels
5
5
  Requires-Python: >=3.11
6
6
  Description-Content-Type: text/markdown
@@ -6,10 +6,10 @@ wafer/api_client.py,sha256=i_Az2b2llC3DSW8yOL-BKqa7LSKuxOr8hSN40s-oQXY,6313
6
6
  wafer/auth.py,sha256=dwss_se5P-FFc9IN38q4kh_dBrA6k-CguDBkivgcdj0,14003
7
7
  wafer/autotuner.py,sha256=41WYP41pTDvMijv2h42vm89bcHtDMJXObDlWmn6xpFU,44416
8
8
  wafer/billing.py,sha256=hEEwtrtIsbPQ3lLJNcyTLMsapUbcuvcVW_e9_0SxzVo,7199
9
- wafer/cli.py,sha256=vboIOEGLWrNUejSWfO0bcQ0IJOAR6Inva7r7PeYb6jI,277592
9
+ wafer/cli.py,sha256=zuVZhPdML5AOBtLUqLwAwjl8XMNe9EwQkffZxtBGLx4,282748
10
10
  wafer/cli_instructions.py,sha256=bziUKDNDAXABVMvKPLEMXm-hFSD2TcFSh-FKRYa949k,4693
11
11
  wafer/config.py,sha256=h5Eo9_yfWqWGoPNdVQikI9GoZVUeysunSYiixf1mKcw,3411
12
- wafer/corpus.py,sha256=B7xHNP_ssGbkL0DpXeXisycm_SxrLv5s4oss735GRWI,22567
12
+ wafer/corpus.py,sha256=CY9T7wXENNDJxnrtI-XsQmXeptrFfKG4x-lngrc9_3s,24748
13
13
  wafer/evaluate.py,sha256=HMFQD-uwC6Wky1t_0JxYZaoHWgLaTBkjxOxgpZVnGrc,190519
14
14
  wafer/global_config.py,sha256=fhaR_RU3ufMksDmOohH1OLeQ0JT0SDW1hEip_zaP75k,11345
15
15
  wafer/gpu_run.py,sha256=TwqXy72T7f2I7e6n5WWod3xgxCPnDhU0BgLsB4CUoQY,9716
@@ -23,11 +23,13 @@ wafer/problems.py,sha256=ce2sy10A1nnNUG3VGsseTS8jL7LZsku4dE8zVf9JHQ4,11296
23
23
  wafer/rocprof_compute.py,sha256=n_yOGZaFbOXna_ghhmYWXeyUoSabgH4KkjlYq38DlHo,19888
24
24
  wafer/rocprof_sdk.py,sha256=0Q7Ye6dUfa1anFZbqKc21rItgqva8V8VIZoSB7wqbmA,10085
25
25
  wafer/rocprof_systems.py,sha256=4IWbMcbYk1x_8iS7P3FC_u5sgH6EXADCtR2lV9id80M,18629
26
+ wafer/specs_cli.py,sha256=frMEKwMflxVNpFlAuxprmr33ZZ1Oeh2lB0KWZ4oZWzw,4360
26
27
  wafer/ssh_keys.py,sha256=MxiHlSm6wuDUFzkOQtx5K7OIbx_a6bXxE-m8OpwLx98,8130
27
28
  wafer/target_lock.py,sha256=SDKhNzv2N7gsphGflcNni9FE5YYuAMuEthngAJEo4Gs,7809
28
29
  wafer/targets.py,sha256=9r-iRWoKSH5cQl1LcamaX-T7cNVOg99ngIm_hlRk-qU,26922
29
- wafer/targets_ops.py,sha256=jN1oIBx0mutxRNE9xpIc7SaBxPkVmOyus2eqn0kEKNI,21475
30
- wafer/trace_compare.py,sha256=IBVSGI8u5A10haDzL4eQ0R24fM1G_dd1F3-4iEkG1EQ,6349
30
+ wafer/targets_cli.py,sha256=Oe3e02rSXeNrMbe_Qv9DNfQ8dEOKodtU7BbQQWxlNwA,16348
31
+ wafer/targets_ops.py,sha256=wLPyq55H_wz0wEAEg8KFLYs9LIIyiVIphcsXD2NLa-E,22623
32
+ wafer/trace_compare.py,sha256=COuxxKY874DteOSLUvJuJFREPMBSybq9dtANi3ATsg4,10803
31
33
  wafer/tracelens.py,sha256=g9ZIeFyNojZn4uTd3skPqIrRiL7aMJOz_-GOd3aiyy4,7998
32
34
  wafer/wevin_cli.py,sha256=eo1ETsXIsCftXSG5AxEYYZipNGcXayKyIevs5F6MjXg,26140
33
35
  wafer/workspaces.py,sha256=J-TXGwHXSZlzRWCew63KNvk6HLJ-zTSELRgzjryTkMk,35710
@@ -38,8 +40,8 @@ wafer/templates/optimize_kernel.py,sha256=4-MaKm_C9BQHQEllrNLLYkcdhJpcj6D-8zbJ4F
38
40
  wafer/templates/optimize_kernelbench.py,sha256=T3co9Y9eSLWDrZG66gwQVFMdnGVoyUQos-TxnMMBLL8,3747
39
41
  wafer/templates/trace_analyze.py,sha256=B7CiRlsokERzBjLL-k49kGjpU2zlJZqzTE05xbRS1WI,2878
40
42
  wafer/tests/test_eval_cli_parity.py,sha256=SGmaj2NGBZ7GdDF53bXsECvQbV21iHZw8YeL_MJOLk0,7206
41
- wafer_cli-0.2.25.dist-info/METADATA,sha256=sJEMfe-FTEqIj_Ij9gYek8BMB3_MD1vbMvJ9VhK8Qow,2799
42
- wafer_cli-0.2.25.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
43
- wafer_cli-0.2.25.dist-info/entry_points.txt,sha256=WqB7hB__WhtPY8y1cO2sZiUz7fCq6Ik-usAigpeFvWE,41
44
- wafer_cli-0.2.25.dist-info/top_level.txt,sha256=2MK1IVMWfpLL8BZCQ3E9aG6L6L666gSA_teYlwan4fs,6
45
- wafer_cli-0.2.25.dist-info/RECORD,,
43
+ wafer_cli-0.2.27.dist-info/METADATA,sha256=Atrnv-RhoThH0JF_gczLjsQluDb3ycEsvRwn16RDf8k,2799
44
+ wafer_cli-0.2.27.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
45
+ wafer_cli-0.2.27.dist-info/entry_points.txt,sha256=WqB7hB__WhtPY8y1cO2sZiUz7fCq6Ik-usAigpeFvWE,41
46
+ wafer_cli-0.2.27.dist-info/top_level.txt,sha256=2MK1IVMWfpLL8BZCQ3E9aG6L6L666gSA_teYlwan4fs,6
47
+ wafer_cli-0.2.27.dist-info/RECORD,,