wafer-cli 0.2.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wafer/GUIDE.md +118 -0
- wafer/__init__.py +3 -0
- wafer/analytics.py +306 -0
- wafer/api_client.py +195 -0
- wafer/auth.py +432 -0
- wafer/autotuner.py +1080 -0
- wafer/billing.py +233 -0
- wafer/cli.py +7289 -0
- wafer/config.py +105 -0
- wafer/corpus.py +366 -0
- wafer/evaluate.py +4593 -0
- wafer/global_config.py +350 -0
- wafer/gpu_run.py +307 -0
- wafer/inference.py +148 -0
- wafer/kernel_scope.py +552 -0
- wafer/ncu_analyze.py +651 -0
- wafer/nsys_analyze.py +1042 -0
- wafer/nsys_profile.py +510 -0
- wafer/output.py +248 -0
- wafer/problems.py +357 -0
- wafer/rocprof_compute.py +490 -0
- wafer/rocprof_sdk.py +274 -0
- wafer/rocprof_systems.py +520 -0
- wafer/skills/wafer-guide/SKILL.md +129 -0
- wafer/ssh_keys.py +261 -0
- wafer/target_lock.py +270 -0
- wafer/targets.py +842 -0
- wafer/targets_ops.py +717 -0
- wafer/templates/__init__.py +0 -0
- wafer/templates/ask_docs.py +61 -0
- wafer/templates/optimize_kernel.py +71 -0
- wafer/templates/optimize_kernelbench.py +137 -0
- wafer/templates/trace_analyze.py +74 -0
- wafer/tracelens.py +218 -0
- wafer/wevin_cli.py +577 -0
- wafer/workspaces.py +852 -0
- wafer_cli-0.2.14.dist-info/METADATA +16 -0
- wafer_cli-0.2.14.dist-info/RECORD +41 -0
- wafer_cli-0.2.14.dist-info/WHEEL +5 -0
- wafer_cli-0.2.14.dist-info/entry_points.txt +2 -0
- wafer_cli-0.2.14.dist-info/top_level.txt +1 -0
wafer/rocprof_sdk.py
ADDED
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
"""ROCprof-SDK - CLI wrapper for rocprofv3 profiling tool.
|
|
2
|
+
|
|
3
|
+
This module provides the CLI wrapper for the `wafer rocprof-sdk` command.
|
|
4
|
+
It supports multiple subcommands:
|
|
5
|
+
- check: Check rocprofv3 installation
|
|
6
|
+
- profile: Run profiling on a command
|
|
7
|
+
- analyze: Analyze profiling output files
|
|
8
|
+
|
|
9
|
+
This follows the design in Wafer-391: ROCprofiler Tools Architecture.
|
|
10
|
+
Architecture pattern matches rocprof_compute.py.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import shlex
|
|
15
|
+
import sys
|
|
16
|
+
from dataclasses import asdict
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def print_usage() -> None:
|
|
21
|
+
"""Print CLI usage information."""
|
|
22
|
+
print("Usage: wafer rocprof-sdk <subcommand> [options]", file=sys.stderr)
|
|
23
|
+
print("", file=sys.stderr)
|
|
24
|
+
print("Subcommands:", file=sys.stderr)
|
|
25
|
+
print(" check Check rocprofv3 installation status", file=sys.stderr)
|
|
26
|
+
print(" list-counters List available hardware counters for your GPU", file=sys.stderr)
|
|
27
|
+
print(" profile COMMAND Profile a command with rocprofv3", file=sys.stderr)
|
|
28
|
+
print(" analyze FILE Analyze profiling output file", file=sys.stderr)
|
|
29
|
+
print("", file=sys.stderr)
|
|
30
|
+
print("Profile Options:", file=sys.stderr)
|
|
31
|
+
print(
|
|
32
|
+
" --output-dir DIR Output directory for results (default: current directory)",
|
|
33
|
+
file=sys.stderr,
|
|
34
|
+
)
|
|
35
|
+
print(
|
|
36
|
+
" --format FORMAT Output format: csv, json, rocpd, pftrace, otf2 (default: csv)",
|
|
37
|
+
file=sys.stderr,
|
|
38
|
+
)
|
|
39
|
+
print(
|
|
40
|
+
" --counters C1,C2 Hardware counters to collect (comma-separated)",
|
|
41
|
+
file=sys.stderr,
|
|
42
|
+
)
|
|
43
|
+
print(" --json Output result as JSON", file=sys.stderr)
|
|
44
|
+
print("", file=sys.stderr)
|
|
45
|
+
print("Analyze Options:", file=sys.stderr)
|
|
46
|
+
print(" --json Output result as JSON", file=sys.stderr)
|
|
47
|
+
print("", file=sys.stderr)
|
|
48
|
+
print("Examples:", file=sys.stderr)
|
|
49
|
+
print(" wafer rocprof-sdk check", file=sys.stderr)
|
|
50
|
+
print(" wafer rocprof-sdk list-counters", file=sys.stderr)
|
|
51
|
+
print(" wafer rocprof-sdk profile './my_app --arg'", file=sys.stderr)
|
|
52
|
+
print(
|
|
53
|
+
" wafer rocprof-sdk profile './kernel' --format csv --output-dir ./results",
|
|
54
|
+
file=sys.stderr,
|
|
55
|
+
)
|
|
56
|
+
print(
|
|
57
|
+
" wafer rocprof-sdk profile './kernel' --counters SQ_WAVES,L2_CACHE_HITS",
|
|
58
|
+
file=sys.stderr,
|
|
59
|
+
)
|
|
60
|
+
print(" wafer rocprof-sdk analyze stats_kernel.csv", file=sys.stderr)
|
|
61
|
+
print(" wafer rocprof-sdk analyze results.json --json", file=sys.stderr)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def check_command(json_output: bool = False) -> str:
|
|
65
|
+
"""CLI wrapper for checking rocprofv3 installation.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
json_output: If True, return JSON; otherwise print human-readable
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Status message or JSON string
|
|
72
|
+
"""
|
|
73
|
+
from wafer_core.lib.rocprofiler.sdk import check_installation # pragma: no cover
|
|
74
|
+
|
|
75
|
+
result = check_installation()
|
|
76
|
+
|
|
77
|
+
if json_output:
|
|
78
|
+
return json.dumps(asdict(result), indent=2)
|
|
79
|
+
else:
|
|
80
|
+
if result.installed:
|
|
81
|
+
print("✓ rocprofv3 is installed", file=sys.stderr)
|
|
82
|
+
if result.path:
|
|
83
|
+
print(f" Path: {result.path}", file=sys.stderr)
|
|
84
|
+
if result.version:
|
|
85
|
+
print(f" Version: {result.version}", file=sys.stderr)
|
|
86
|
+
return "rocprofv3 is installed"
|
|
87
|
+
else:
|
|
88
|
+
print("✗ rocprofv3 is not installed", file=sys.stderr)
|
|
89
|
+
if result.install_command:
|
|
90
|
+
print(f" {result.install_command}", file=sys.stderr)
|
|
91
|
+
return "rocprofv3 is not installed"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def list_counters_command() -> str:
|
|
95
|
+
"""CLI wrapper for listing available hardware counters.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Counter list output
|
|
99
|
+
|
|
100
|
+
Raises:
|
|
101
|
+
RuntimeError: If listing fails
|
|
102
|
+
"""
|
|
103
|
+
from wafer_core.lib.rocprofiler.sdk import list_counters # pragma: no cover
|
|
104
|
+
|
|
105
|
+
success, output, error = list_counters()
|
|
106
|
+
|
|
107
|
+
if success:
|
|
108
|
+
# Print the output directly to stdout
|
|
109
|
+
print(output)
|
|
110
|
+
return output
|
|
111
|
+
else:
|
|
112
|
+
print("✗ Failed to list counters", file=sys.stderr)
|
|
113
|
+
print(f" {error}", file=sys.stderr)
|
|
114
|
+
raise RuntimeError(error)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def profile_command(
|
|
118
|
+
command: str,
|
|
119
|
+
output_dir: str | None = None,
|
|
120
|
+
output_format: str = "csv",
|
|
121
|
+
counters: list[str] | None = None,
|
|
122
|
+
kernel_include: str | None = None,
|
|
123
|
+
kernel_exclude: str | None = None,
|
|
124
|
+
trace_hip_runtime: bool = False,
|
|
125
|
+
trace_hip_compiler: bool = False,
|
|
126
|
+
trace_hsa: bool = False,
|
|
127
|
+
trace_marker: bool = False,
|
|
128
|
+
trace_memory_copy: bool = False,
|
|
129
|
+
json_output: bool = False,
|
|
130
|
+
) -> str:
|
|
131
|
+
"""Run rocprofv3 profiling.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
command: Shell command to profile
|
|
135
|
+
output_dir: Output directory for results
|
|
136
|
+
output_format: Output format (csv, json, rocpd, pftrace)
|
|
137
|
+
counters: List of hardware counters to collect
|
|
138
|
+
kernel_include: Include only kernels matching this regex
|
|
139
|
+
kernel_exclude: Exclude kernels matching this regex
|
|
140
|
+
trace_hip_runtime: Enable HIP runtime API tracing
|
|
141
|
+
trace_hip_compiler: Enable HIP compiler code tracing
|
|
142
|
+
trace_hsa: Enable HSA API tracing
|
|
143
|
+
trace_marker: Enable ROCTx marker tracing
|
|
144
|
+
trace_memory_copy: Enable memory copy tracing
|
|
145
|
+
json_output: If True, return JSON; otherwise print human-readable
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
Success message or JSON string
|
|
149
|
+
|
|
150
|
+
Raises:
|
|
151
|
+
RuntimeError: If profiling fails
|
|
152
|
+
"""
|
|
153
|
+
from wafer_core.lib.rocprofiler.sdk import run_profile # pragma: no cover
|
|
154
|
+
|
|
155
|
+
# Parse command string into list
|
|
156
|
+
cmd_list = shlex.split(command)
|
|
157
|
+
|
|
158
|
+
result = run_profile(
|
|
159
|
+
command=cmd_list,
|
|
160
|
+
output_dir=Path(output_dir) if output_dir else None,
|
|
161
|
+
output_format=output_format,
|
|
162
|
+
counters=counters,
|
|
163
|
+
kernel_include_regex=kernel_include,
|
|
164
|
+
kernel_exclude_regex=kernel_exclude,
|
|
165
|
+
trace_hip_runtime=trace_hip_runtime,
|
|
166
|
+
trace_hip_compiler=trace_hip_compiler,
|
|
167
|
+
trace_hsa=trace_hsa,
|
|
168
|
+
trace_marker=trace_marker,
|
|
169
|
+
trace_memory_copy=trace_memory_copy,
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
if json_output:
|
|
173
|
+
result_dict = asdict(result)
|
|
174
|
+
return json.dumps(result_dict, indent=2)
|
|
175
|
+
else:
|
|
176
|
+
if result.success:
|
|
177
|
+
print("✓ Profiling completed successfully", file=sys.stderr)
|
|
178
|
+
if result.output_files:
|
|
179
|
+
print(" Output files:", file=sys.stderr)
|
|
180
|
+
for f in result.output_files:
|
|
181
|
+
print(f" - {f}", file=sys.stderr)
|
|
182
|
+
if result.stdout:
|
|
183
|
+
print("", file=sys.stderr)
|
|
184
|
+
print("Output:", file=sys.stderr)
|
|
185
|
+
print(result.stdout, file=sys.stderr)
|
|
186
|
+
return "Profiling completed"
|
|
187
|
+
else:
|
|
188
|
+
print("✗ Profiling failed", file=sys.stderr)
|
|
189
|
+
if result.error:
|
|
190
|
+
print(f" Error: {result.error}", file=sys.stderr)
|
|
191
|
+
if result.stderr:
|
|
192
|
+
print(" stderr:", file=sys.stderr)
|
|
193
|
+
print(result.stderr, file=sys.stderr)
|
|
194
|
+
raise RuntimeError(result.error or "Profiling failed")
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def analyze_command(
|
|
198
|
+
file_path: str,
|
|
199
|
+
json_output: bool = False,
|
|
200
|
+
) -> str:
|
|
201
|
+
"""Analyze rocprofiler output file.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
file_path: Path to output file (CSV, JSON, or rocpd database)
|
|
205
|
+
json_output: If True, return JSON; otherwise print human-readable
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
Analysis summary or JSON string
|
|
209
|
+
|
|
210
|
+
Raises:
|
|
211
|
+
RuntimeError: If analysis fails
|
|
212
|
+
"""
|
|
213
|
+
from wafer_core.lib.rocprofiler.sdk import analyze_file # pragma: no cover
|
|
214
|
+
|
|
215
|
+
result = analyze_file(Path(file_path))
|
|
216
|
+
|
|
217
|
+
if json_output:
|
|
218
|
+
result_dict = asdict(result)
|
|
219
|
+
# Convert KernelMetrics objects to dicts
|
|
220
|
+
if result.kernels:
|
|
221
|
+
result_dict["kernels"] = [asdict(k) for k in result.kernels]
|
|
222
|
+
return json.dumps(result_dict, indent=2)
|
|
223
|
+
else:
|
|
224
|
+
if result.success:
|
|
225
|
+
print("✓ Analysis completed", file=sys.stderr)
|
|
226
|
+
print(f" Format: {result.file_format}", file=sys.stderr)
|
|
227
|
+
|
|
228
|
+
if result.summary:
|
|
229
|
+
print(
|
|
230
|
+
f" Kernels: {result.summary.get('total_kernels', 0)}",
|
|
231
|
+
file=sys.stderr,
|
|
232
|
+
)
|
|
233
|
+
total_ms = result.summary.get("total_duration_ms", 0)
|
|
234
|
+
print(f" Total Duration: {total_ms:.3f} ms", file=sys.stderr)
|
|
235
|
+
avg_ms = result.summary.get("avg_duration_ms", 0)
|
|
236
|
+
print(f" Avg Duration: {avg_ms:.3f} ms", file=sys.stderr)
|
|
237
|
+
|
|
238
|
+
print("", file=sys.stderr)
|
|
239
|
+
|
|
240
|
+
# Print kernel table
|
|
241
|
+
if result.kernels:
|
|
242
|
+
print("Kernel Summary:", file=sys.stderr)
|
|
243
|
+
print(
|
|
244
|
+
f"{'Name':<40} {'Duration (ms)':>13} {'Grid':>12} {'Block':>12} {'SGPRs':>7} {'VGPRs':>7} {'LDS (B)':>9}",
|
|
245
|
+
file=sys.stderr,
|
|
246
|
+
)
|
|
247
|
+
print("-" * 110, file=sys.stderr)
|
|
248
|
+
|
|
249
|
+
for k in result.kernels[:20]: # Limit to first 20
|
|
250
|
+
duration_ms = (k.duration_ns or 0) / 1_000_000
|
|
251
|
+
grid = k.grid_size or "-"
|
|
252
|
+
block = k.block_size or "-"
|
|
253
|
+
sgprs = str(k.sgprs) if k.sgprs is not None else "-"
|
|
254
|
+
vgprs = str(k.vgprs) if k.vgprs is not None else "-"
|
|
255
|
+
lds = str(k.lds_per_workgroup) if k.lds_per_workgroup is not None else "-"
|
|
256
|
+
# Truncate long kernel names
|
|
257
|
+
name = k.name[:37] + "..." if len(k.name) > 40 else k.name
|
|
258
|
+
print(
|
|
259
|
+
f"{name:<40} {duration_ms:>13.3f} {grid:>12} {block:>12} {sgprs:>7} {vgprs:>7} {lds:>9}",
|
|
260
|
+
file=sys.stderr,
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
if len(result.kernels) > 20:
|
|
264
|
+
print(
|
|
265
|
+
f"... and {len(result.kernels) - 20} more kernels",
|
|
266
|
+
file=sys.stderr,
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
return "Analysis completed"
|
|
270
|
+
else:
|
|
271
|
+
print("✗ Analysis failed", file=sys.stderr)
|
|
272
|
+
if result.error:
|
|
273
|
+
print(f" Error: {result.error}", file=sys.stderr)
|
|
274
|
+
raise RuntimeError(result.error or "Analysis failed")
|