wafer-cli 0.2.25__tar.gz → 0.2.26__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/PKG-INFO +1 -1
  2. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/pyproject.toml +1 -1
  3. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/cli.py +63 -4
  4. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/corpus.py +65 -5
  5. wafer_cli-0.2.26/wafer/trace_compare.py +274 -0
  6. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer_cli.egg-info/PKG-INFO +1 -1
  7. wafer_cli-0.2.25/wafer/trace_compare.py +0 -183
  8. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/README.md +0 -0
  9. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/setup.cfg +0 -0
  10. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_analytics.py +0 -0
  11. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_auth.py +0 -0
  12. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_billing.py +0 -0
  13. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_cli_coverage.py +0 -0
  14. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_cli_parity_integration.py +0 -0
  15. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_config_integration.py +0 -0
  16. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_file_operations_integration.py +0 -0
  17. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_kernel_scope_cli.py +0 -0
  18. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_nsys_analyze.py +0 -0
  19. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_nsys_profile.py +0 -0
  20. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_output.py +0 -0
  21. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_rocprof_compute_integration.py +0 -0
  22. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_skill_commands.py +0 -0
  23. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_ssh_integration.py +0 -0
  24. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_targets_ops.py +0 -0
  25. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_wevin_cli.py +0 -0
  26. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/tests/test_workflow_integration.py +0 -0
  27. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/GUIDE.md +0 -0
  28. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/__init__.py +0 -0
  29. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/agent_defaults.py +0 -0
  30. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/analytics.py +0 -0
  31. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/api_client.py +0 -0
  32. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/auth.py +0 -0
  33. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/autotuner.py +0 -0
  34. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/billing.py +0 -0
  35. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/cli_instructions.py +0 -0
  36. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/config.py +0 -0
  37. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/evaluate.py +0 -0
  38. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/global_config.py +0 -0
  39. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/gpu_run.py +0 -0
  40. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/inference.py +0 -0
  41. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/kernel_scope.py +0 -0
  42. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/ncu_analyze.py +0 -0
  43. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/nsys_analyze.py +0 -0
  44. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/nsys_profile.py +0 -0
  45. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/output.py +0 -0
  46. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/problems.py +0 -0
  47. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/rocprof_compute.py +0 -0
  48. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/rocprof_sdk.py +0 -0
  49. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/rocprof_systems.py +0 -0
  50. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/skills/wafer-guide/SKILL.md +0 -0
  51. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/ssh_keys.py +0 -0
  52. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/target_lock.py +0 -0
  53. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/targets.py +0 -0
  54. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/targets_ops.py +0 -0
  55. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/templates/__init__.py +0 -0
  56. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/templates/ask_docs.py +0 -0
  57. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/templates/optimize_kernel.py +0 -0
  58. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/templates/optimize_kernelbench.py +0 -0
  59. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/templates/trace_analyze.py +0 -0
  60. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/tests/test_eval_cli_parity.py +0 -0
  61. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/tracelens.py +0 -0
  62. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/wevin_cli.py +0 -0
  63. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer/workspaces.py +0 -0
  64. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer_cli.egg-info/SOURCES.txt +0 -0
  65. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer_cli.egg-info/dependency_links.txt +0 -0
  66. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer_cli.egg-info/entry_points.txt +0 -0
  67. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer_cli.egg-info/requires.txt +0 -0
  68. {wafer_cli-0.2.25 → wafer_cli-0.2.26}/wafer_cli.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wafer-cli
3
- Version: 0.2.25
3
+ Version: 0.2.26
4
4
  Summary: CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels
5
5
  Requires-Python: >=3.11
6
6
  Description-Content-Type: text/markdown
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "wafer-cli"
3
- version = "0.2.25"
3
+ version = "0.2.26"
4
4
  description = "CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -7787,6 +7787,9 @@ def compare_analyze(
7787
7787
  stack_traces: bool = typer.Option(
7788
7788
  False, "--stack-traces", help="Show Python stack traces for operations"
7789
7789
  ),
7790
+ recommendations: bool = typer.Option(
7791
+ False, "--recommendations", help="Generate prioritized recommendations for kernel team"
7792
+ ),
7790
7793
  json: bool = typer.Option(
7791
7794
  False, "--json", hidden=True, help="Ignored (for compatibility with cliExecutor)"
7792
7795
  ),
@@ -7839,6 +7842,7 @@ def compare_analyze(
7839
7842
  show_layers=layers,
7840
7843
  show_all=all,
7841
7844
  show_stack_traces=stack_traces,
7845
+ recommendations=recommendations,
7842
7846
  )
7843
7847
  _mark_command_success()
7844
7848
 
@@ -7883,14 +7887,69 @@ def compare_fusion_cmd(
7883
7887
  # CSV output to file
7884
7888
  wafer compare fusion amd_trace.json nvidia_trace.json --format csv -o fusion.csv
7885
7889
  """
7886
- from .trace_compare import compare_fusion
7890
+ from .trace_compare import compare_align
7891
+
7892
+ compare_align(
7893
+ trace1=trace1,
7894
+ trace2=trace2,
7895
+ output=output,
7896
+ output_format=format,
7897
+ phase="all",
7898
+ )
7899
+ _mark_command_success()
7887
7900
 
7888
- compare_fusion(
7901
+
7902
+ @compare_app.command("align")
7903
+ def compare_align_cmd(
7904
+ trace1: Path = typer.Argument(..., help="First trace file (AMD or NVIDIA)", exists=True),
7905
+ trace2: Path = typer.Argument(..., help="Second trace file (AMD or NVIDIA)", exists=True),
7906
+ format: str = typer.Option(
7907
+ "json",
7908
+ "--format",
7909
+ "-f",
7910
+ help="Output format: json",
7911
+ ),
7912
+ output: Path | None = typer.Option(
7913
+ None, "--output", "-o", help="Output file (default: stdout)"
7914
+ ),
7915
+ phase: str = typer.Option(
7916
+ "all",
7917
+ "--phase",
7918
+ help="Filter by phase: all, prefill, decode",
7919
+ ),
7920
+ layer: int | None = typer.Option(
7921
+ None,
7922
+ "--layer",
7923
+ help="Focus on specific layer number",
7924
+ ),
7925
+ ) -> None:
7926
+ """Align kernels at layer level for exact kernel-to-kernel comparison.
7927
+
7928
+ Provides kernel-to-kernel mapping across AMD and NVIDIA platforms,
7929
+ showing which kernels correspond to each other at each layer position.
7930
+
7931
+ Examples:
7932
+ # Basic alignment (stdout JSON)
7933
+ wafer compare align amd_trace.json nvidia_trace.json
7934
+
7935
+ # Save to file
7936
+ wafer compare align amd_trace.json nvidia_trace.json -o alignment.json
7937
+
7938
+ # Focus on decode phase only
7939
+ wafer compare align amd_trace.json nvidia_trace.json --phase decode
7940
+
7941
+ # Focus on specific layer
7942
+ wafer compare align amd_trace.json nvidia_trace.json --layer 5
7943
+ """
7944
+ from .trace_compare import compare_align
7945
+
7946
+ compare_align(
7889
7947
  trace1=trace1,
7890
7948
  trace2=trace2,
7891
7949
  output=output,
7892
- format_type=format,
7893
- min_group_size=min_group_size,
7950
+ output_format=format,
7951
+ phase=phase,
7952
+ layer=layer,
7894
7953
  )
7895
7954
  _mark_command_success()
7896
7955
 
@@ -109,14 +109,34 @@ CORPORA: dict[CorpusName, CorpusConfig] = {
109
109
  ),
110
110
  "hip": CorpusConfig(
111
111
  name="hip",
112
- description="HIP programming guide and API reference",
113
- source_type="github_repo",
114
- repo="ROCm/HIP",
115
- repo_paths=["docs"],
112
+ description="HIP programming guide, API reference, and examples",
113
+ source_type="github_multi_repo",
114
+ repos=[
115
+ # HIP - main documentation and API
116
+ RepoSource(
117
+ repo="ROCm/HIP",
118
+ paths=["docs"],
119
+ ),
120
+ # HIP examples - code samples
121
+ RepoSource(
122
+ repo="ROCm/HIP-Examples",
123
+ paths=["HIP-Examples-Applications", "mini-nbody"],
124
+ ),
125
+ # clr - HIP/OpenCL runtime (low-level)
126
+ RepoSource(
127
+ repo="ROCm/clr",
128
+ paths=["hipamd/include", "rocclr/device/gpu"],
129
+ ),
130
+ # ROCm docs - official documentation
131
+ RepoSource(
132
+ repo="ROCm/ROCm",
133
+ paths=["docs"],
134
+ ),
135
+ ],
116
136
  ),
117
137
  "amd": CorpusConfig(
118
138
  name="amd",
119
- description="AMD GPU kernel development (rocWMMA, CK, AITER, rocBLAS, HipKittens, vLLM)",
139
+ description="AMD GPU kernel development (rocWMMA, CK, AITER, rocBLAS, HipKittens, vLLM, FlashAttention)",
120
140
  source_type="github_multi_repo",
121
141
  repos=[
122
142
  # rocWMMA - wave matrix multiply-accumulate (WMMA) intrinsics
@@ -186,6 +206,46 @@ CORPORA: dict[CorpusName, CorpusConfig] = {
186
206
  repo="huggingface/hf-rocm-kernels",
187
207
  paths=["csrc", "hf_rocm_kernels", "docs"],
188
208
  ),
209
+ # ROCm/flash-attention - FlashAttention for AMD GPUs
210
+ RepoSource(
211
+ repo="ROCm/flash-attention",
212
+ paths=["csrc", "docs"],
213
+ ),
214
+ # ROCm/triton - Triton compiler for AMD GPUs
215
+ RepoSource(
216
+ repo="ROCm/triton",
217
+ paths=["python/tutorials", "third_party/amd"],
218
+ ),
219
+ # ROCm/rccl - ROCm Communication Collectives Library (multi-GPU)
220
+ RepoSource(
221
+ repo="ROCm/rccl",
222
+ paths=["docs"],
223
+ ),
224
+ # ROCm/rocprofiler-sdk - AMD GPU profiling SDK
225
+ RepoSource(
226
+ repo="ROCm/rocprofiler-sdk",
227
+ paths=["docs", "samples"],
228
+ ),
229
+ # ROCm/omniperf - AMD GPU profiling tool
230
+ RepoSource(
231
+ repo="ROCm/omniperf",
232
+ paths=["docs", "src/omniperf_analyze"],
233
+ ),
234
+ # ROCm/omnitrace - Application tracing for AMD
235
+ RepoSource(
236
+ repo="ROCm/omnitrace",
237
+ paths=["docs"],
238
+ ),
239
+ # AMD GPUOpen Performance Guides
240
+ RepoSource(
241
+ repo="GPUOpen-Tools/gpu_performance_api",
242
+ paths=["docs"],
243
+ ),
244
+ # AMD LLVM - AMD GPU compiler backend
245
+ RepoSource(
246
+ repo="ROCm/llvm-project",
247
+ paths=["amd/device-libs/README.md", "llvm/docs/AMDGPUUsage.rst"],
248
+ ),
189
249
  ],
190
250
  ),
191
251
  }
@@ -0,0 +1,274 @@
1
+ """CLI wrapper for trace comparison commands.
2
+
3
+ This module provides the CLI interface for the `wafer compare` commands.
4
+ All core logic is in wafer_core.lib.trace_compare.
5
+ """
6
+
7
+ import sys
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ import typer
12
+
13
+ import json
14
+ import sys
15
+
16
+ from wafer_core.lib.trace_compare import (
17
+ analyze_trace_pair,
18
+ format_csv,
19
+ format_json,
20
+ format_text,
21
+ ArchitectureType,
22
+ detect_architecture,
23
+ )
24
+ from wafer_core.lib.trace_compare.loader import StreamingMetadata
25
+
26
+
27
+ def compare_traces(
28
+ trace1: Path,
29
+ trace2: Path,
30
+ output: Path | None = None,
31
+ output_format: str = "text",
32
+ phase: str = "all",
33
+ show_layers: bool = False,
34
+ show_all: bool = False,
35
+ show_stack_traces: bool = False,
36
+ recommendations: bool = False,
37
+ ) -> None:
38
+ """Compare two GPU traces and generate performance report.
39
+
40
+ Args:
41
+ trace1: Path to first trace file (AMD or NVIDIA)
42
+ trace2: Path to second trace file (AMD or NVIDIA)
43
+ output: Optional output file path (default: stdout)
44
+ output_format: Output format ('text', 'text-layers', 'csv', 'csv-layers', or 'json')
45
+ phase: Filter by phase ('all', 'prefill', or 'decode')
46
+ show_layers: Show layer-wise performance breakdown (text format only)
47
+ show_all: Show all items without truncation (applies to layers, operations, kernels)
48
+ show_stack_traces: Show Python stack traces for operations
49
+ """
50
+ # Validate files exist
51
+ if not trace1.exists():
52
+ typer.secho(f"❌ File not found: {trace1}", fg=typer.colors.RED, err=True)
53
+ raise typer.Exit(1)
54
+
55
+ if not trace2.exists():
56
+ typer.secho(f"❌ File not found: {trace2}", fg=typer.colors.RED, err=True)
57
+ raise typer.Exit(1)
58
+
59
+ # Progress callback for JSON format (emits NDJSON to stdout)
60
+ def progress_callback(stage: str, fraction: float) -> None:
61
+ if output_format == 'json':
62
+ progress_msg = json.dumps({"type": "progress", "stage": stage, "fraction": fraction})
63
+ print(progress_msg, file=sys.stdout, flush=True)
64
+ elif output_format != 'json':
65
+ percent = int(fraction * 100)
66
+ typer.echo(f"📊 {stage}: {percent}%", err=True)
67
+
68
+ # Metadata callback for JSON format (emits NDJSON with early GPU info)
69
+ def metadata_callback(meta1: StreamingMetadata, meta2: StreamingMetadata) -> None:
70
+ if output_format == 'json':
71
+ metadata_msg = json.dumps({
72
+ "type": "metadata",
73
+ "trace1": {
74
+ "platform": meta1.platform,
75
+ "gpu": meta1.gpu_name,
76
+ "file_size_mb": round(meta1.file_size_mb, 1),
77
+ },
78
+ "trace2": {
79
+ "platform": meta2.platform,
80
+ "gpu": meta2.gpu_name,
81
+ "file_size_mb": round(meta2.file_size_mb, 1),
82
+ },
83
+ })
84
+ print(metadata_msg, file=sys.stdout, flush=True)
85
+ else:
86
+ typer.echo(f"📊 Trace 1: {meta1.platform} - {meta1.gpu_name} ({meta1.file_size_mb:.1f}MB)", err=True)
87
+ typer.echo(f"📊 Trace 2: {meta2.platform} - {meta2.gpu_name} ({meta2.file_size_mb:.1f}MB)", err=True)
88
+
89
+ # Analyze traces using unified API
90
+ if output_format != 'json':
91
+ typer.echo("📊 Loading traces...")
92
+
93
+ try:
94
+ result_obj = analyze_trace_pair(
95
+ trace1,
96
+ trace2,
97
+ phase=phase,
98
+ include_stacks=True,
99
+ on_progress=progress_callback,
100
+ on_metadata=metadata_callback,
101
+ )
102
+
103
+ results = {
104
+ "metadata": result_obj.metadata,
105
+ "operations": result_obj.operations,
106
+ "layers": result_obj.layers,
107
+ "warnings": [{"code": w.code, "severity": w.severity, "message": w.message, "suggestion": w.suggestion} for w in result_obj.warnings],
108
+ "architecture": result_obj.architecture.value,
109
+ "layer_alignments": result_obj.layer_alignments,
110
+ "fusion_analysis": result_obj.fusion_analysis,
111
+ "same_kernel_analysis": result_obj.same_kernel_analysis,
112
+ }
113
+ except ValueError as e:
114
+ typer.secho(f"❌ {e}", fg=typer.colors.RED, err=True)
115
+ raise typer.Exit(1)
116
+ except Exception as e:
117
+ typer.secho(f"❌ Error analyzing traces: {e}", fg=typer.colors.RED, err=True)
118
+ raise typer.Exit(1)
119
+
120
+ if output_format != 'json':
121
+ meta = results["metadata"]
122
+ if meta['trace1_platform'] == 'AMD':
123
+ amd_gpu, nvidia_gpu = meta['trace1_gpu'], meta['trace2_gpu']
124
+ else:
125
+ amd_gpu, nvidia_gpu = meta['trace2_gpu'], meta['trace1_gpu']
126
+ typer.echo(f"✅ Loaded: AMD ({amd_gpu}) vs NVIDIA ({nvidia_gpu})")
127
+
128
+ # Display warnings
129
+ warnings = results.get("warnings", [])
130
+ if warnings:
131
+ typer.echo()
132
+ for warning in warnings:
133
+ icon = "❌" if warning["severity"] == "error" else "⚠️" if warning["severity"] == "warning" else "ℹ️"
134
+ typer.secho(f"{icon} {warning['message']}", fg=typer.colors.YELLOW if warning["severity"] == "warning" else typer.colors.BLUE)
135
+ if warning.get("suggestion"):
136
+ typer.secho(f" Suggestion: {warning['suggestion']}", fg=typer.colors.BLUE)
137
+ typer.echo()
138
+
139
+
140
+ # Generate output based on format
141
+ if output_format == "text":
142
+ output_str = format_text(results, show_layers=show_layers, show_all=show_all, show_stack_traces=show_stack_traces)
143
+ elif output_format == "text-layers":
144
+ output_str = format_text(results, show_layers=True, show_all=show_all, show_stack_traces=show_stack_traces)
145
+ elif output_format == "csv":
146
+ output_str = format_csv(results, report_type="operations")
147
+ elif output_format == "csv-layers":
148
+ output_str = format_csv(results, report_type="layers")
149
+ elif output_format == "json":
150
+ output_str = format_json(results)
151
+ else:
152
+ typer.secho(f"❌ Unknown format: {output_format}", fg=typer.colors.RED, err=True)
153
+ raise typer.Exit(1)
154
+
155
+ # Write output
156
+ if output:
157
+ output.write_text(output_str)
158
+ typer.secho(f"✅ Report saved to {output}", fg=typer.colors.GREEN)
159
+ else:
160
+ typer.echo(output_str)
161
+
162
+
163
+ def compare_align(
164
+ trace1: Path,
165
+ trace2: Path,
166
+ output: Path | None = None,
167
+ output_format: str = "json",
168
+ phase: str = "all",
169
+ layer: int | None = None,
170
+ ) -> None:
171
+ """Align kernels at layer level for exact kernel-to-kernel comparison.
172
+
173
+ Args:
174
+ trace1: Path to first trace file (AMD or NVIDIA)
175
+ trace2: Path to second trace file (AMD or NVIDIA)
176
+ output: Optional output file path (default: stdout)
177
+ output_format: Output format ('json' only for now)
178
+ phase: Filter by phase ('all', 'prefill', or 'decode')
179
+ layer: Focus on specific layer number (optional)
180
+ """
181
+ # Validate files exist
182
+ if not trace1.exists():
183
+ typer.secho(f"❌ File not found: {trace1}", fg=typer.colors.RED, err=True)
184
+ raise typer.Exit(1)
185
+
186
+ if not trace2.exists():
187
+ typer.secho(f"❌ File not found: {trace2}", fg=typer.colors.RED, err=True)
188
+ raise typer.Exit(1)
189
+
190
+ # Progress callback for JSON format (emits NDJSON to stdout)
191
+ def progress_callback(stage: str, fraction: float) -> None:
192
+ if output_format == 'json':
193
+ progress_msg = json.dumps({"type": "progress", "stage": stage, "fraction": fraction})
194
+ print(progress_msg, file=sys.stdout, flush=True)
195
+ else:
196
+ percent = int(fraction * 100)
197
+ typer.echo(f"📊 {stage}: {percent}%", err=True)
198
+
199
+ # Metadata callback for JSON format
200
+ def metadata_callback(meta1: StreamingMetadata, meta2: StreamingMetadata) -> None:
201
+ if output_format == 'json':
202
+ metadata_msg = json.dumps({
203
+ "type": "metadata",
204
+ "trace1": {
205
+ "platform": meta1.platform,
206
+ "gpu": meta1.gpu_name,
207
+ "file_size_mb": round(meta1.file_size_mb, 1),
208
+ },
209
+ "trace2": {
210
+ "platform": meta2.platform,
211
+ "gpu": meta2.gpu_name,
212
+ "file_size_mb": round(meta2.file_size_mb, 1),
213
+ },
214
+ })
215
+ print(metadata_msg, file=sys.stdout, flush=True)
216
+ else:
217
+ typer.echo(f"📊 Trace 1: {meta1.platform} - {meta1.gpu_name} ({meta1.file_size_mb:.1f}MB)", err=True)
218
+ typer.echo(f"📊 Trace 2: {meta2.platform} - {meta2.gpu_name} ({meta2.file_size_mb:.1f}MB)", err=True)
219
+
220
+ # Analyze traces using unified API
221
+ if output_format != 'json':
222
+ typer.echo("📊 Loading traces...")
223
+
224
+ try:
225
+ result_obj = analyze_trace_pair(
226
+ trace1,
227
+ trace2,
228
+ phase=phase,
229
+ include_stacks=True,
230
+ on_progress=progress_callback,
231
+ on_metadata=metadata_callback,
232
+ )
233
+
234
+ results = {
235
+ "metadata": result_obj.metadata,
236
+ "layer_alignments": result_obj.layer_alignments or [],
237
+ "fusion_analysis": result_obj.fusion_analysis or {},
238
+ "same_kernel_analysis": result_obj.same_kernel_analysis or {},
239
+ "operations": result_obj.operations,
240
+ "layers": result_obj.layers,
241
+ "warnings": [{"code": w.code, "severity": w.severity, "message": w.message, "suggestion": w.suggestion} for w in result_obj.warnings],
242
+ "architecture": result_obj.architecture.value,
243
+ }
244
+
245
+ if layer is not None:
246
+ results["layer_alignments"] = [
247
+ la for la in results["layer_alignments"] if la.get("layer") == layer
248
+ ]
249
+ except ValueError as e:
250
+ typer.secho(f"❌ {e}", fg=typer.colors.RED, err=True)
251
+ raise typer.Exit(1)
252
+ except Exception as e:
253
+ typer.secho(f"❌ Error analyzing traces: {e}", fg=typer.colors.RED, err=True)
254
+ import traceback
255
+ traceback.print_exc()
256
+ raise typer.Exit(1)
257
+
258
+ if output_format != 'json':
259
+ meta = results["metadata"]
260
+ typer.echo(f"✅ Loaded: {meta.get('amd_gpu', 'Unknown')} vs {meta.get('nvidia_gpu', 'Unknown')}")
261
+ typer.echo(f"✅ Found {len(results['layer_alignments'])} layers")
262
+ typer.echo()
263
+
264
+ if output_format == "json":
265
+ output_str = format_json(results)
266
+ else:
267
+ typer.secho(f"❌ Format {output_format} not yet supported for align command. Use 'json'.", fg=typer.colors.RED, err=True)
268
+ raise typer.Exit(1)
269
+
270
+ if output:
271
+ output.write_text(output_str)
272
+ typer.secho(f"✅ Report saved to {output}", fg=typer.colors.GREEN)
273
+ else:
274
+ typer.echo(output_str)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wafer-cli
3
- Version: 0.2.25
3
+ Version: 0.2.26
4
4
  Summary: CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels
5
5
  Requires-Python: >=3.11
6
6
  Description-Content-Type: text/markdown
@@ -1,183 +0,0 @@
1
- """CLI wrapper for trace comparison commands.
2
-
3
- This module provides the CLI interface for the `wafer compare` commands.
4
- All core logic is in wafer_core.lib.trace_compare.
5
- """
6
-
7
- import sys
8
- from pathlib import Path
9
-
10
- import typer
11
-
12
- from wafer_core.lib.trace_compare import (
13
- analyze_fusion_differences,
14
- analyze_traces,
15
- format_csv,
16
- format_fusion_csv,
17
- format_fusion_json,
18
- format_fusion_text,
19
- format_json,
20
- format_text,
21
- )
22
-
23
-
24
- def compare_traces(
25
- trace1: Path,
26
- trace2: Path,
27
- output: Path | None = None,
28
- output_format: str = "text",
29
- phase: str = "all",
30
- show_layers: bool = False,
31
- show_all: bool = False,
32
- show_stack_traces: bool = False,
33
- ) -> None:
34
- """Compare two GPU traces and generate performance report.
35
-
36
- Args:
37
- trace1: Path to first trace file (AMD or NVIDIA)
38
- trace2: Path to second trace file (AMD or NVIDIA)
39
- output: Optional output file path (default: stdout)
40
- output_format: Output format ('text', 'text-layers', 'csv', 'csv-layers', or 'json')
41
- phase: Filter by phase ('all', 'prefill', or 'decode')
42
- show_layers: Show layer-wise performance breakdown (text format only)
43
- show_all: Show all items without truncation (applies to layers, operations, kernels)
44
- show_stack_traces: Show Python stack traces for operations
45
- """
46
- # Validate files exist
47
- if not trace1.exists():
48
- typer.secho(f"❌ File not found: {trace1}", fg=typer.colors.RED, err=True)
49
- raise typer.Exit(1)
50
-
51
- if not trace2.exists():
52
- typer.secho(f"❌ File not found: {trace2}", fg=typer.colors.RED, err=True)
53
- raise typer.Exit(1)
54
-
55
- # Analyze traces
56
- # Only show progress messages for non-JSON formats (JSON needs clean stdout)
57
- if output_format != 'json':
58
- typer.echo("📊 Loading traces...")
59
-
60
- # Determine how many stack traces to collect
61
- max_stacks = 0 if (show_stack_traces and show_all) else (3 if show_stack_traces else 3)
62
-
63
- try:
64
- results = analyze_traces(
65
- trace1,
66
- trace2,
67
- phase_filter=phase,
68
- max_stacks=max_stacks,
69
- )
70
- except ValueError as e:
71
- typer.secho(f"❌ {e}", fg=typer.colors.RED, err=True)
72
- raise typer.Exit(1)
73
- except Exception as e:
74
- typer.secho(f"❌ Error analyzing traces: {e}", fg=typer.colors.RED, err=True)
75
- raise typer.Exit(1)
76
-
77
- # Show loading confirmation
78
- if output_format != 'json':
79
- meta = results["metadata"]
80
- # Determine which trace is AMD and which is NVIDIA
81
- if meta['trace1_platform'] == 'AMD':
82
- amd_gpu, nvidia_gpu = meta['trace1_gpu'], meta['trace2_gpu']
83
- else:
84
- amd_gpu, nvidia_gpu = meta['trace2_gpu'], meta['trace1_gpu']
85
- typer.echo(f"✅ Loaded: AMD ({amd_gpu}) vs NVIDIA ({nvidia_gpu})")
86
- typer.echo()
87
-
88
- # Generate output based on format
89
- if output_format == "text":
90
- output_str = format_text(results, show_layers=show_layers, show_all=show_all, show_stack_traces=show_stack_traces)
91
- elif output_format == "text-layers":
92
- output_str = format_text(results, show_layers=True, show_all=show_all, show_stack_traces=show_stack_traces)
93
- elif output_format == "csv":
94
- output_str = format_csv(results, report_type="operations")
95
- elif output_format == "csv-layers":
96
- output_str = format_csv(results, report_type="layers")
97
- elif output_format == "json":
98
- output_str = format_json(results)
99
- else:
100
- typer.secho(f"❌ Unknown format: {output_format}", fg=typer.colors.RED, err=True)
101
- raise typer.Exit(1)
102
-
103
- # Write output
104
- if output:
105
- output.write_text(output_str)
106
- typer.secho(f"✅ Report saved to {output}", fg=typer.colors.GREEN)
107
- else:
108
- typer.echo(output_str)
109
-
110
-
111
- def compare_fusion(
112
- trace1: Path,
113
- trace2: Path,
114
- output: Path | None = None,
115
- format_type: str = "text",
116
- min_group_size: int = 50,
117
- ) -> None:
118
- """Analyze kernel fusion differences between AMD and NVIDIA traces.
119
-
120
- Args:
121
- trace1: Path to first trace file (AMD or NVIDIA)
122
- trace2: Path to second trace file (AMD or NVIDIA)
123
- output: Optional output file path (default: stdout)
124
- format_type: Output format ('text', 'csv', or 'json')
125
- min_group_size: Minimum correlation group size to analyze
126
- """
127
- # Validate files exist
128
- if not trace1.exists():
129
- typer.secho(f"❌ File not found: {trace1}", fg=typer.colors.RED, err=True)
130
- raise typer.Exit(1)
131
-
132
- if not trace2.exists():
133
- typer.secho(f"❌ File not found: {trace2}", fg=typer.colors.RED, err=True)
134
- raise typer.Exit(1)
135
-
136
- # Analyze fusion
137
- # Only show progress messages for non-JSON formats (JSON needs clean stdout)
138
- if format_type != 'json':
139
- typer.echo("📊 Loading traces...")
140
- try:
141
- results = analyze_fusion_differences(
142
- trace1,
143
- trace2,
144
- min_group_size=min_group_size,
145
- )
146
- except Exception as e:
147
- typer.secho(
148
- f"❌ Error analyzing traces: {e}", fg=typer.colors.RED, err=True
149
- )
150
- import traceback
151
-
152
- traceback.print_exc()
153
- raise typer.Exit(1)
154
-
155
- # Show loading confirmation
156
- if format_type != 'json':
157
- meta = results["metadata"]
158
- # Note: fusion analyzer always uses trace1=AMD, trace2=NVIDIA
159
- typer.echo(f"✅ Loaded: {meta['trace1_gpu']} vs {meta['trace2_gpu']}")
160
- typer.echo(
161
- f"Found {meta['trace1_correlation_groups']} trace1 groups and "
162
- f"{meta['trace2_correlation_groups']} trace2 groups with ≥{min_group_size} kernels"
163
- )
164
- typer.echo(f"✅ Matched {meta['matched_groups']} correlation groups")
165
- typer.echo()
166
-
167
- # Generate output
168
- if format_type == "text":
169
- output_str = format_fusion_text(results)
170
- elif format_type == "csv":
171
- output_str = format_fusion_csv(results)
172
- elif format_type == "json":
173
- output_str = format_fusion_json(results)
174
- else:
175
- typer.secho(f"❌ Unknown format: {format_type}", fg=typer.colors.RED, err=True)
176
- raise typer.Exit(1)
177
-
178
- # Write output
179
- if output:
180
- output.write_text(output_str)
181
- typer.secho(f"✅ Report saved to {output}", fg=typer.colors.GREEN)
182
- else:
183
- typer.echo(output_str)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes