wafer-cli 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wafer/rocprof_sdk.py ADDED
@@ -0,0 +1,274 @@
1
+ """ROCprof-SDK - CLI wrapper for rocprofv3 profiling tool.
2
+
3
+ This module provides the CLI wrapper for the `wafer rocprof-sdk` command.
4
+ It supports multiple subcommands:
5
+ - check: Check rocprofv3 installation
6
+ - profile: Run profiling on a command
7
+ - analyze: Analyze profiling output files
8
+
9
+ This follows the design in Wafer-391: ROCprofiler Tools Architecture.
10
+ Architecture pattern matches rocprof_compute.py.
11
+ """
12
+
13
+ import json
14
+ import shlex
15
+ import sys
16
+ from dataclasses import asdict
17
+ from pathlib import Path
18
+
19
+
20
+ def print_usage() -> None:
21
+ """Print CLI usage information."""
22
+ print("Usage: wafer rocprof-sdk <subcommand> [options]", file=sys.stderr)
23
+ print("", file=sys.stderr)
24
+ print("Subcommands:", file=sys.stderr)
25
+ print(" check Check rocprofv3 installation status", file=sys.stderr)
26
+ print(" list-counters List available hardware counters for your GPU", file=sys.stderr)
27
+ print(" profile COMMAND Profile a command with rocprofv3", file=sys.stderr)
28
+ print(" analyze FILE Analyze profiling output file", file=sys.stderr)
29
+ print("", file=sys.stderr)
30
+ print("Profile Options:", file=sys.stderr)
31
+ print(
32
+ " --output-dir DIR Output directory for results (default: current directory)",
33
+ file=sys.stderr,
34
+ )
35
+ print(
36
+ " --format FORMAT Output format: csv, json, rocpd, pftrace, otf2 (default: csv)",
37
+ file=sys.stderr,
38
+ )
39
+ print(
40
+ " --counters C1,C2 Hardware counters to collect (comma-separated)",
41
+ file=sys.stderr,
42
+ )
43
+ print(" --json Output result as JSON", file=sys.stderr)
44
+ print("", file=sys.stderr)
45
+ print("Analyze Options:", file=sys.stderr)
46
+ print(" --json Output result as JSON", file=sys.stderr)
47
+ print("", file=sys.stderr)
48
+ print("Examples:", file=sys.stderr)
49
+ print(" wafer rocprof-sdk check", file=sys.stderr)
50
+ print(" wafer rocprof-sdk list-counters", file=sys.stderr)
51
+ print(" wafer rocprof-sdk profile './my_app --arg'", file=sys.stderr)
52
+ print(
53
+ " wafer rocprof-sdk profile './kernel' --format csv --output-dir ./results",
54
+ file=sys.stderr,
55
+ )
56
+ print(
57
+ " wafer rocprof-sdk profile './kernel' --counters SQ_WAVES,L2_CACHE_HITS",
58
+ file=sys.stderr,
59
+ )
60
+ print(" wafer rocprof-sdk analyze stats_kernel.csv", file=sys.stderr)
61
+ print(" wafer rocprof-sdk analyze results.json --json", file=sys.stderr)
62
+
63
+
64
+ def check_command(json_output: bool = False) -> str:
65
+ """CLI wrapper for checking rocprofv3 installation.
66
+
67
+ Args:
68
+ json_output: If True, return JSON; otherwise print human-readable
69
+
70
+ Returns:
71
+ Status message or JSON string
72
+ """
73
+ from wafer_core.lib.rocprofiler.sdk import check_installation # pragma: no cover
74
+
75
+ result = check_installation()
76
+
77
+ if json_output:
78
+ return json.dumps(asdict(result), indent=2)
79
+ else:
80
+ if result.installed:
81
+ print("✓ rocprofv3 is installed", file=sys.stderr)
82
+ if result.path:
83
+ print(f" Path: {result.path}", file=sys.stderr)
84
+ if result.version:
85
+ print(f" Version: {result.version}", file=sys.stderr)
86
+ return "rocprofv3 is installed"
87
+ else:
88
+ print("✗ rocprofv3 is not installed", file=sys.stderr)
89
+ if result.install_command:
90
+ print(f" {result.install_command}", file=sys.stderr)
91
+ return "rocprofv3 is not installed"
92
+
93
+
94
+ def list_counters_command() -> str:
95
+ """CLI wrapper for listing available hardware counters.
96
+
97
+ Returns:
98
+ Counter list output
99
+
100
+ Raises:
101
+ RuntimeError: If listing fails
102
+ """
103
+ from wafer_core.lib.rocprofiler.sdk import list_counters # pragma: no cover
104
+
105
+ success, output, error = list_counters()
106
+
107
+ if success:
108
+ # Print the output directly to stdout
109
+ print(output)
110
+ return output
111
+ else:
112
+ print("✗ Failed to list counters", file=sys.stderr)
113
+ print(f" {error}", file=sys.stderr)
114
+ raise RuntimeError(error)
115
+
116
+
117
+ def profile_command(
118
+ command: str,
119
+ output_dir: str | None = None,
120
+ output_format: str = "csv",
121
+ counters: list[str] | None = None,
122
+ kernel_include: str | None = None,
123
+ kernel_exclude: str | None = None,
124
+ trace_hip_runtime: bool = False,
125
+ trace_hip_compiler: bool = False,
126
+ trace_hsa: bool = False,
127
+ trace_marker: bool = False,
128
+ trace_memory_copy: bool = False,
129
+ json_output: bool = False,
130
+ ) -> str:
131
+ """Run rocprofv3 profiling.
132
+
133
+ Args:
134
+ command: Shell command to profile
135
+ output_dir: Output directory for results
136
+ output_format: Output format (csv, json, rocpd, pftrace)
137
+ counters: List of hardware counters to collect
138
+ kernel_include: Include only kernels matching this regex
139
+ kernel_exclude: Exclude kernels matching this regex
140
+ trace_hip_runtime: Enable HIP runtime API tracing
141
+ trace_hip_compiler: Enable HIP compiler code tracing
142
+ trace_hsa: Enable HSA API tracing
143
+ trace_marker: Enable ROCTx marker tracing
144
+ trace_memory_copy: Enable memory copy tracing
145
+ json_output: If True, return JSON; otherwise print human-readable
146
+
147
+ Returns:
148
+ Success message or JSON string
149
+
150
+ Raises:
151
+ RuntimeError: If profiling fails
152
+ """
153
+ from wafer_core.lib.rocprofiler.sdk import run_profile # pragma: no cover
154
+
155
+ # Parse command string into list
156
+ cmd_list = shlex.split(command)
157
+
158
+ result = run_profile(
159
+ command=cmd_list,
160
+ output_dir=Path(output_dir) if output_dir else None,
161
+ output_format=output_format,
162
+ counters=counters,
163
+ kernel_include_regex=kernel_include,
164
+ kernel_exclude_regex=kernel_exclude,
165
+ trace_hip_runtime=trace_hip_runtime,
166
+ trace_hip_compiler=trace_hip_compiler,
167
+ trace_hsa=trace_hsa,
168
+ trace_marker=trace_marker,
169
+ trace_memory_copy=trace_memory_copy,
170
+ )
171
+
172
+ if json_output:
173
+ result_dict = asdict(result)
174
+ return json.dumps(result_dict, indent=2)
175
+ else:
176
+ if result.success:
177
+ print("✓ Profiling completed successfully", file=sys.stderr)
178
+ if result.output_files:
179
+ print(" Output files:", file=sys.stderr)
180
+ for f in result.output_files:
181
+ print(f" - {f}", file=sys.stderr)
182
+ if result.stdout:
183
+ print("", file=sys.stderr)
184
+ print("Output:", file=sys.stderr)
185
+ print(result.stdout, file=sys.stderr)
186
+ return "Profiling completed"
187
+ else:
188
+ print("✗ Profiling failed", file=sys.stderr)
189
+ if result.error:
190
+ print(f" Error: {result.error}", file=sys.stderr)
191
+ if result.stderr:
192
+ print(" stderr:", file=sys.stderr)
193
+ print(result.stderr, file=sys.stderr)
194
+ raise RuntimeError(result.error or "Profiling failed")
195
+
196
+
197
+ def analyze_command(
198
+ file_path: str,
199
+ json_output: bool = False,
200
+ ) -> str:
201
+ """Analyze rocprofiler output file.
202
+
203
+ Args:
204
+ file_path: Path to output file (CSV, JSON, or rocpd database)
205
+ json_output: If True, return JSON; otherwise print human-readable
206
+
207
+ Returns:
208
+ Analysis summary or JSON string
209
+
210
+ Raises:
211
+ RuntimeError: If analysis fails
212
+ """
213
+ from wafer_core.lib.rocprofiler.sdk import analyze_file # pragma: no cover
214
+
215
+ result = analyze_file(Path(file_path))
216
+
217
+ if json_output:
218
+ result_dict = asdict(result)
219
+ # Convert KernelMetrics objects to dicts
220
+ if result.kernels:
221
+ result_dict["kernels"] = [asdict(k) for k in result.kernels]
222
+ return json.dumps(result_dict, indent=2)
223
+ else:
224
+ if result.success:
225
+ print("✓ Analysis completed", file=sys.stderr)
226
+ print(f" Format: {result.file_format}", file=sys.stderr)
227
+
228
+ if result.summary:
229
+ print(
230
+ f" Kernels: {result.summary.get('total_kernels', 0)}",
231
+ file=sys.stderr,
232
+ )
233
+ total_ms = result.summary.get("total_duration_ms", 0)
234
+ print(f" Total Duration: {total_ms:.3f} ms", file=sys.stderr)
235
+ avg_ms = result.summary.get("avg_duration_ms", 0)
236
+ print(f" Avg Duration: {avg_ms:.3f} ms", file=sys.stderr)
237
+
238
+ print("", file=sys.stderr)
239
+
240
+ # Print kernel table
241
+ if result.kernels:
242
+ print("Kernel Summary:", file=sys.stderr)
243
+ print(
244
+ f"{'Name':<40} {'Duration (ms)':>13} {'Grid':>12} {'Block':>12} {'SGPRs':>7} {'VGPRs':>7} {'LDS (B)':>9}",
245
+ file=sys.stderr,
246
+ )
247
+ print("-" * 110, file=sys.stderr)
248
+
249
+ for k in result.kernels[:20]: # Limit to first 20
250
+ duration_ms = (k.duration_ns or 0) / 1_000_000
251
+ grid = k.grid_size or "-"
252
+ block = k.block_size or "-"
253
+ sgprs = str(k.sgprs) if k.sgprs is not None else "-"
254
+ vgprs = str(k.vgprs) if k.vgprs is not None else "-"
255
+ lds = str(k.lds_per_workgroup) if k.lds_per_workgroup is not None else "-"
256
+ # Truncate long kernel names
257
+ name = k.name[:37] + "..." if len(k.name) > 40 else k.name
258
+ print(
259
+ f"{name:<40} {duration_ms:>13.3f} {grid:>12} {block:>12} {sgprs:>7} {vgprs:>7} {lds:>9}",
260
+ file=sys.stderr,
261
+ )
262
+
263
+ if len(result.kernels) > 20:
264
+ print(
265
+ f"... and {len(result.kernels) - 20} more kernels",
266
+ file=sys.stderr,
267
+ )
268
+
269
+ return "Analysis completed"
270
+ else:
271
+ print("✗ Analysis failed", file=sys.stderr)
272
+ if result.error:
273
+ print(f" Error: {result.error}", file=sys.stderr)
274
+ raise RuntimeError(result.error or "Analysis failed")