wafer-cli 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wafer/nsys_profile.py ADDED
@@ -0,0 +1,510 @@
1
+ """NSYS Profile - Execute NSYS profiling on local, remote, or workspace targets.
2
+
3
+ This module provides the implementation for the `wafer nvidia nsys profile` command.
4
+ Supports local profiling (when nsys is installed), workspace execution, and direct SSH.
5
+
6
+ Profiling requires an NVIDIA GPU. Analysis can be done locally or remotely.
7
+ """
8
+
9
+ import json
10
+ import os
11
+ import shlex
12
+ import subprocess
13
+ import sys
14
+ from dataclasses import dataclass
15
+ from pathlib import Path
16
+
17
+ from .nsys_analyze import (
18
+ NSYSAnalysisResult,
19
+ _find_nsys,
20
+ _get_install_command,
21
+ _parse_target,
22
+ is_macos,
23
+ )
24
+
25
+
26
+ @dataclass(frozen=True)
27
+ class NSYSProfileResult:
28
+ """Result of NSYS profiling execution."""
29
+
30
+ success: bool
31
+ output_path: str | None = None
32
+ stdout: str | None = None
33
+ stderr: str | None = None
34
+ error: str | None = None
35
+
36
+
37
+ @dataclass(frozen=True)
38
+ class NSYSProfileOptions:
39
+ """Options for NSYS profiling."""
40
+
41
+ command: str
42
+ output: str = "profile"
43
+ trace: list[str] | None = None # cuda, nvtx, osrt, cudnn, cublas
44
+ duration: int | None = None # Max duration in seconds
45
+ extra_args: str | None = None
46
+ working_dir: str | None = None
47
+
48
+
49
+ def _build_nsys_command(
50
+ nsys_path: str,
51
+ options: NSYSProfileOptions,
52
+ ) -> list[str]:
53
+ """Build nsys profile command from options.
54
+
55
+ Args:
56
+ nsys_path: Path to nsys executable
57
+ options: Profiling options
58
+
59
+ Returns:
60
+ Command as list of arguments
61
+ """
62
+ cmd = [nsys_path, "profile"]
63
+
64
+ # Output file (without extension - nsys adds .nsys-rep)
65
+ output_name = options.output
66
+ if output_name.endswith(".nsys-rep"):
67
+ output_name = output_name[:-9]
68
+ cmd.extend(["-o", output_name])
69
+
70
+ # Trace options
71
+ if options.trace:
72
+ cmd.extend(["-t", ",".join(options.trace)])
73
+ else:
74
+ cmd.extend(["-t", "cuda"]) # Default to CUDA tracing
75
+
76
+ # Duration limit
77
+ if options.duration:
78
+ cmd.extend(["--duration", str(options.duration)])
79
+
80
+ # Force overwrite
81
+ cmd.append("--force-overwrite=true")
82
+
83
+ # Extra args
84
+ if options.extra_args:
85
+ cmd.extend(shlex.split(options.extra_args))
86
+
87
+ # Command to profile
88
+ cmd.extend(shlex.split(options.command))
89
+
90
+ return cmd
91
+
92
+
93
+ def profile_local(
94
+ options: NSYSProfileOptions,
95
+ verbose: bool = False,
96
+ ) -> NSYSProfileResult:
97
+ """Execute NSYS profiling locally.
98
+
99
+ Args:
100
+ options: Profiling options
101
+ verbose: If True, print progress messages
102
+
103
+ Returns:
104
+ NSYSProfileResult with success status and output path
105
+
106
+ Raises:
107
+ FileNotFoundError: If nsys not installed
108
+ RuntimeError: If profiling fails
109
+ """
110
+ # Find nsys
111
+ nsys_path = _find_nsys()
112
+ if nsys_path is None:
113
+ if is_macos():
114
+ raise FileNotFoundError(
115
+ "NSYS CLI is not available on macOS. "
116
+ "Use --target to profile on a remote GPU server or workspace."
117
+ )
118
+ raise FileNotFoundError(
119
+ f"NSYS not installed. Install with: {_get_install_command()}"
120
+ )
121
+
122
+ # Build command
123
+ cmd = _build_nsys_command(nsys_path, options)
124
+
125
+ if verbose:
126
+ print(f"[nsys] Running: {' '.join(cmd)}", file=sys.stderr)
127
+
128
+ # Execute
129
+ try:
130
+ cwd = options.working_dir or os.getcwd()
131
+ result = subprocess.run(
132
+ cmd,
133
+ capture_output=True,
134
+ text=True,
135
+ cwd=cwd,
136
+ timeout=options.duration + 60 if options.duration else 660,
137
+ )
138
+
139
+ # Check for output file
140
+ output_name = options.output
141
+ if not output_name.endswith(".nsys-rep"):
142
+ output_name = f"{output_name}.nsys-rep"
143
+
144
+ output_path = Path(cwd) / output_name
145
+
146
+ if result.returncode != 0:
147
+ return NSYSProfileResult(
148
+ success=False,
149
+ stdout=result.stdout,
150
+ stderr=result.stderr,
151
+ error=f"nsys profile failed with exit code {result.returncode}",
152
+ )
153
+
154
+ if not output_path.exists():
155
+ return NSYSProfileResult(
156
+ success=False,
157
+ stdout=result.stdout,
158
+ stderr=result.stderr,
159
+ error=f"Output file not created: {output_path}",
160
+ )
161
+
162
+ return NSYSProfileResult(
163
+ success=True,
164
+ output_path=str(output_path),
165
+ stdout=result.stdout,
166
+ stderr=result.stderr,
167
+ )
168
+
169
+ except subprocess.TimeoutExpired:
170
+ return NSYSProfileResult(
171
+ success=False,
172
+ error=f"Profiling timed out after {options.duration or 600} seconds",
173
+ )
174
+ except OSError as e:
175
+ return NSYSProfileResult(
176
+ success=False,
177
+ error=f"Failed to execute nsys: {e}",
178
+ )
179
+
180
+
181
+ def profile_workspace(
182
+ workspace_id: str,
183
+ options: NSYSProfileOptions,
184
+ verbose: bool = False,
185
+ sync_artifacts: bool = True,
186
+ ) -> NSYSProfileResult:
187
+ """Execute NSYS profiling on a workspace.
188
+
189
+ Args:
190
+ workspace_id: Workspace ID to profile on
191
+ options: Profiling options
192
+ verbose: If True, print progress messages
193
+ sync_artifacts: If True, sync output file back to local
194
+
195
+ Returns:
196
+ NSYSProfileResult with success status and output path
197
+ """
198
+ from .workspaces import exec_command_capture, get_workspace_info
199
+
200
+ # Get workspace info to verify it exists
201
+ try:
202
+ workspace_info = get_workspace_info(workspace_id)
203
+ if not workspace_info:
204
+ return NSYSProfileResult(
205
+ success=False,
206
+ error=f"Workspace not found: {workspace_id}",
207
+ )
208
+ except Exception as e:
209
+ return NSYSProfileResult(
210
+ success=False,
211
+ error=f"Failed to get workspace info: {e}",
212
+ )
213
+
214
+ if verbose:
215
+ print(f"[nsys] Profiling on workspace: {workspace_id}", file=sys.stderr)
216
+
217
+ # Build nsys command for remote execution
218
+ # On workspace, nsys is expected to be in PATH
219
+ nsys_cmd = "nsys profile"
220
+
221
+ # Output file
222
+ output_name = options.output
223
+ if not output_name.endswith(".nsys-rep"):
224
+ output_name_base = output_name
225
+ else:
226
+ output_name_base = output_name[:-9]
227
+
228
+ nsys_cmd += f" -o {output_name_base}"
229
+
230
+ # Trace options
231
+ if options.trace:
232
+ nsys_cmd += f" -t {','.join(options.trace)}"
233
+ else:
234
+ nsys_cmd += " -t cuda"
235
+
236
+ # Duration
237
+ if options.duration:
238
+ nsys_cmd += f" --duration {options.duration}"
239
+
240
+ # Force overwrite
241
+ nsys_cmd += " --force-overwrite=true"
242
+
243
+ # Extra args
244
+ if options.extra_args:
245
+ nsys_cmd += f" {options.extra_args}"
246
+
247
+ # Command to profile
248
+ nsys_cmd += f" {options.command}"
249
+
250
+ if verbose:
251
+ print(f"[nsys] Running: {nsys_cmd}", file=sys.stderr)
252
+
253
+ # Execute on workspace
254
+ exit_code, output = exec_command_capture(workspace_id, nsys_cmd)
255
+
256
+ if exit_code != 0:
257
+ return NSYSProfileResult(
258
+ success=False,
259
+ stdout=output,
260
+ error=f"nsys profile failed on workspace with exit code {exit_code}",
261
+ )
262
+
263
+ # Check if output file was created
264
+ output_file = f"{output_name_base}.nsys-rep"
265
+ check_cmd = f"test -f {output_file} && echo 'exists' || echo 'not found'"
266
+ check_code, check_output = exec_command_capture(workspace_id, check_cmd)
267
+
268
+ if "not found" in check_output:
269
+ return NSYSProfileResult(
270
+ success=False,
271
+ stdout=output,
272
+ error=f"Output file not created on workspace: {output_file}",
273
+ )
274
+
275
+ if verbose:
276
+ print(f"[nsys] Profile created: {output_file}", file=sys.stderr)
277
+
278
+ # Optionally sync back to local
279
+ local_path = None
280
+ if sync_artifacts:
281
+ if verbose:
282
+ print(f"[nsys] Syncing {output_file} to local...", file=sys.stderr)
283
+
284
+ try:
285
+ from .workspaces import sync_workspace_file
286
+
287
+ local_path = sync_workspace_file(workspace_id, output_file, Path.cwd())
288
+ if verbose:
289
+ print(f"[nsys] Synced to: {local_path}", file=sys.stderr)
290
+ except Exception as e:
291
+ if verbose:
292
+ print(f"[nsys] Warning: Failed to sync: {e}", file=sys.stderr)
293
+ # Not a failure - file exists on workspace
294
+ local_path = None
295
+
296
+ return NSYSProfileResult(
297
+ success=True,
298
+ output_path=str(local_path) if local_path else f"workspace:{workspace_id}:{output_file}",
299
+ stdout=output,
300
+ )
301
+
302
+
303
+ def profile_remote_ssh(
304
+ target: str,
305
+ options: NSYSProfileOptions,
306
+ verbose: bool = False,
307
+ ) -> NSYSProfileResult:
308
+ """Execute NSYS profiling on a remote target via SSH.
309
+
310
+ Args:
311
+ target: Target name from ~/.wafer/targets/
312
+ options: Profiling options
313
+ verbose: If True, print progress messages
314
+
315
+ Returns:
316
+ NSYSProfileResult with success status and output path
317
+ """
318
+ import trio
319
+
320
+ from .targets import load_target
321
+ from .targets_ops import TargetExecError, exec_on_target_sync, get_target_ssh_info
322
+
323
+ # Load target
324
+ try:
325
+ target_config = load_target(target)
326
+ except FileNotFoundError as e:
327
+ return NSYSProfileResult(
328
+ success=False,
329
+ error=f"Target not found: {e}",
330
+ )
331
+ except ValueError as e:
332
+ return NSYSProfileResult(
333
+ success=False,
334
+ error=f"Invalid target config: {e}",
335
+ )
336
+
337
+ if verbose:
338
+ print(f"[nsys] Connecting to target: {target}", file=sys.stderr)
339
+
340
+ # Get SSH info
341
+ try:
342
+ ssh_info = trio.run(get_target_ssh_info, target_config)
343
+ except TargetExecError as e:
344
+ return NSYSProfileResult(
345
+ success=False,
346
+ error=f"Failed to connect to target: {e}",
347
+ )
348
+
349
+ if verbose:
350
+ print(
351
+ f"[nsys] Connected: {ssh_info.user}@{ssh_info.host}:{ssh_info.port}",
352
+ file=sys.stderr,
353
+ )
354
+
355
+ # Build nsys command
356
+ output_name = options.output
357
+ if not output_name.endswith(".nsys-rep"):
358
+ output_name_base = output_name
359
+ else:
360
+ output_name_base = output_name[:-9]
361
+
362
+ nsys_cmd = f"nsys profile -o {output_name_base}"
363
+
364
+ if options.trace:
365
+ nsys_cmd += f" -t {','.join(options.trace)}"
366
+ else:
367
+ nsys_cmd += " -t cuda"
368
+
369
+ if options.duration:
370
+ nsys_cmd += f" --duration {options.duration}"
371
+
372
+ nsys_cmd += " --force-overwrite=true"
373
+
374
+ if options.extra_args:
375
+ nsys_cmd += f" {options.extra_args}"
376
+
377
+ nsys_cmd += f" {options.command}"
378
+
379
+ if verbose:
380
+ print(f"[nsys] Running: {nsys_cmd}", file=sys.stderr)
381
+
382
+ # Execute
383
+ try:
384
+ timeout = options.duration + 60 if options.duration else 660
385
+ exit_code = exec_on_target_sync(ssh_info, nsys_cmd, timeout)
386
+
387
+ if exit_code != 0:
388
+ return NSYSProfileResult(
389
+ success=False,
390
+ error=f"nsys profile failed on target with exit code {exit_code}",
391
+ )
392
+
393
+ output_file = f"{output_name_base}.nsys-rep"
394
+ return NSYSProfileResult(
395
+ success=True,
396
+ output_path=f"ssh:{target}:{output_file}",
397
+ )
398
+
399
+ except TargetExecError as e:
400
+ return NSYSProfileResult(
401
+ success=False,
402
+ error=f"Execution failed: {e}",
403
+ )
404
+
405
+
406
+ def profile_and_analyze(
407
+ options: NSYSProfileOptions,
408
+ target: str | None = None,
409
+ json_output: bool = False,
410
+ verbose: bool = False,
411
+ ) -> tuple[NSYSProfileResult, NSYSAnalysisResult | None]:
412
+ """Profile and optionally analyze in one operation.
413
+
414
+ Args:
415
+ options: Profiling options
416
+ target: Optional target (workspace:id or target name)
417
+ json_output: If True, analysis returns JSON
418
+ verbose: If True, print progress messages
419
+
420
+ Returns:
421
+ Tuple of (profile_result, analysis_result or None)
422
+ """
423
+ from .nsys_analyze import analyze_nsys_profile
424
+
425
+ # Profile
426
+ if target:
427
+ target_type, target_id = _parse_target(target)
428
+ if target_type == "workspace":
429
+ profile_result = profile_workspace(
430
+ target_id, options, verbose=verbose, sync_artifacts=True
431
+ )
432
+ else:
433
+ profile_result = profile_remote_ssh(target_id, options, verbose=verbose)
434
+ else:
435
+ profile_result = profile_local(options, verbose=verbose)
436
+
437
+ if not profile_result.success:
438
+ return profile_result, None
439
+
440
+ # Analyze
441
+ if profile_result.output_path:
442
+ # Check if it's a local path we can analyze
443
+ output_path = profile_result.output_path
444
+ if output_path.startswith("workspace:") or output_path.startswith("ssh:"):
445
+ # Remote file - need to analyze on remote
446
+ if verbose:
447
+ print(
448
+ f"[nsys] Analyzing remote file: {output_path}", file=sys.stderr
449
+ )
450
+ # For workspace, we can use workspace analysis
451
+ if target and target.startswith("workspace:"):
452
+ parts = output_path.split(":")
453
+ ws_id = parts[1]
454
+ filepath = parts[2]
455
+ try:
456
+ analysis_output = analyze_nsys_profile(
457
+ Path(filepath),
458
+ json_output=json_output,
459
+ target=f"workspace:{ws_id}",
460
+ )
461
+ # Parse the output if needed
462
+ if json_output:
463
+ analysis_data = json.loads(analysis_output)
464
+ analysis_result = NSYSAnalysisResult(
465
+ success=True,
466
+ kernels=analysis_data.get("kernels"),
467
+ memory_transfers=analysis_data.get("memory_transfers"),
468
+ )
469
+ else:
470
+ analysis_result = NSYSAnalysisResult(
471
+ success=True,
472
+ )
473
+ return profile_result, analysis_result
474
+ except Exception as e:
475
+ return profile_result, NSYSAnalysisResult(
476
+ success=False,
477
+ error=f"Analysis failed: {e}",
478
+ )
479
+ else:
480
+ # For SSH targets, we'd need to implement analysis there
481
+ return profile_result, NSYSAnalysisResult(
482
+ success=False,
483
+ error="Remote analysis for SSH targets not yet implemented. Download the file and analyze locally.",
484
+ )
485
+ else:
486
+ # Local file
487
+ try:
488
+ analysis_output = analyze_nsys_profile(
489
+ Path(output_path),
490
+ json_output=json_output,
491
+ )
492
+ if json_output:
493
+ analysis_data = json.loads(analysis_output)
494
+ analysis_result = NSYSAnalysisResult(
495
+ success=True,
496
+ kernels=analysis_data.get("kernels"),
497
+ memory_transfers=analysis_data.get("memory_transfers"),
498
+ )
499
+ else:
500
+ analysis_result = NSYSAnalysisResult(success=True)
501
+ # Print the analysis
502
+ print(analysis_output)
503
+ return profile_result, analysis_result
504
+ except Exception as e:
505
+ return profile_result, NSYSAnalysisResult(
506
+ success=False,
507
+ error=f"Analysis failed: {e}",
508
+ )
509
+
510
+ return profile_result, None