wafer-cli 0.2.9__py3-none-any.whl → 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wafer/cli.py CHANGED
@@ -182,7 +182,12 @@ workspaces_app = typer.Typer(
182
182
 
183
183
  Workspaces are on-demand cloud GPU environments. Requires authentication (wafer login).
184
184
 
185
- wafer workspaces create dev --gpu H100 # Create workspace
185
+ Available GPUs:
186
+ MI300X AMD Instinct MI300X (192GB HBM3, ROCm)
187
+ B200 NVIDIA Blackwell B200 (180GB HBM3e, CUDA)
188
+
189
+ Commands:
190
+ wafer workspaces create dev --gpu B200 # Create workspace
186
191
  wafer workspaces exec dev -- python x.py # Run commands
187
192
  wafer workspaces ssh dev # Interactive SSH
188
193
  wafer workspaces sync dev ./project # Sync files
@@ -190,6 +195,36 @@ Workspaces are on-demand cloud GPU environments. Requires authentication (wafer
190
195
  )
191
196
  app.add_typer(workspaces_app, name="workspaces")
192
197
 
198
+ # SSH Key management (BYOK - Bring Your Own Key)
199
+ ssh_keys_app = typer.Typer(
200
+ help="""Manage SSH public keys for workspace access.
201
+
202
+ Register your SSH public keys here. These keys are installed in all workspaces
203
+ you provision, enabling SSH access from any machine with your private key.
204
+
205
+ wafer ssh-keys list # List registered keys
206
+ wafer ssh-keys add # Add key (auto-detects ~/.ssh/id_ed25519.pub)
207
+ wafer ssh-keys add ~/.ssh/id_rsa.pub --name laptop # Add specific key
208
+ wafer ssh-keys remove <key-id> # Remove a key"""
209
+ )
210
+ app.add_typer(ssh_keys_app, name="ssh-keys")
211
+
212
+ # Target operations (exec/ssh/sync on configured targets)
213
+ targets_ops_app = typer.Typer(
214
+ help="""Execute commands on configured GPU targets.
215
+
216
+ Run commands, SSH, or sync files to targets without going through evaluate.
217
+ Useful for exploratory work, debugging, or custom scripts.
218
+
219
+ wafer targets exec my-target -- python test.py # Run command
220
+ wafer targets ssh my-target # Interactive SSH
221
+ wafer targets sync my-target ./local_dir # Sync files
222
+
223
+ Supports: RunPod, DigitalOcean (auto-provisions), SSH targets (baremetal/vm).
224
+ Configure targets with: wafer config targets init ..."""
225
+ )
226
+ app.add_typer(targets_ops_app, name="targets")
227
+
193
228
  # Billing management
194
229
  billing_app = typer.Typer(help="Manage billing, credits, and subscription")
195
230
  app.add_typer(billing_app, name="billing")
@@ -257,13 +292,100 @@ nvidia_app.add_typer(tracelens_app, name="tracelens")
257
292
  amd_app = typer.Typer(help="AMD GPU profiling and analysis tools")
258
293
  app.add_typer(amd_app, name="amd")
259
294
 
260
- # ISA analysis - under amd
261
- isa_app = typer.Typer(help="ISA analysis for AMD GPU code objects (.co files)")
295
+ # Unified ISA Analyzer - supports both .co files and Triton artifacts
296
+ isa_app = typer.Typer(help="ISA analysis for AMD GPU kernels (.co, .s, .ll, .ttgir files)")
262
297
  amd_app.add_typer(isa_app, name="isa")
263
298
 
264
- # Kernel Scope - static ISA analysis for Triton kernels
265
- kernel_scope_app = typer.Typer(help="Static ISA analysis for Triton compilation artifacts")
266
- amd_app.add_typer(kernel_scope_app, name="kernel-scope")
299
+ # =============================================================================
300
+ # Roofline analysis (wafer roofline)
301
+ # =============================================================================
302
+
303
+
304
+ @app.command("roofline")
305
+ def roofline_cmd(
306
+ gpu: str | None = typer.Option(
307
+ None, "--gpu", "-g", help="GPU name (e.g., H100, B200, MI300X, A100)"
308
+ ),
309
+ bytes_moved: float | None = typer.Option(
310
+ None, "--bytes", "-b", help="Theoretical minimum bytes moved"
311
+ ),
312
+ flops: float | None = typer.Option(None, "--flops", "-f", help="Theoretical minimum FLOPs"),
313
+ time_ms: float | None = typer.Option(
314
+ None, "--time-ms", "-t", help="Actual kernel time in milliseconds"
315
+ ),
316
+ dtype: str = typer.Option(
317
+ "fp16", "--dtype", "-d", help="Data type for compute ceiling (fp16, fp32, bf16, fp8, int8)"
318
+ ),
319
+ list_gpus: bool = typer.Option(False, "--list-gpus", help="List available GPU specs and exit"),
320
+ ) -> None:
321
+ """Analyze kernel performance against roofline model.
322
+
323
+ The roofline model shows the theoretical speed-of-light (SOL) for your kernel
324
+ based on whether it's memory-bound or compute-bound.
325
+
326
+ You need to provide:
327
+ - The GPU you ran on
328
+ - Theoretical minimum bytes moved (not actual - what the algorithm requires)
329
+ - Theoretical minimum FLOPs
330
+ - Actual measured kernel time
331
+
332
+ Example:
333
+ # Analyze a matmul kernel (4096x4096x4096, FP16)
334
+ # Theoretical: 2*M*N*K FLOPs = 137.4 TFLOP
335
+ # Theoretical bytes: (M*K + K*N + M*N) * 2 = 100.7 MB
336
+ wafer roofline --gpu H100 --bytes 100.7e6 --flops 137.4e12 --time-ms 85
337
+
338
+ # Analyze a memory-bound elementwise add (1B elements FP32)
339
+ # Reads 2 tensors, writes 1 = 12 GB total
340
+ # 1B adds = 1 GFLOP
341
+ wafer roofline --gpu H100 --bytes 12e9 --flops 1e9 --time-ms 4 --dtype fp32
342
+
343
+ # List available GPUs
344
+ wafer roofline --list-gpus
345
+ """
346
+ from wafer_core.roofline import get_gpu_spec, roofline_analysis
347
+ from wafer_core.roofline import list_gpus as get_all_gpus
348
+
349
+ if list_gpus:
350
+ typer.echo("Available GPUs:")
351
+ for name in get_all_gpus():
352
+ spec = get_gpu_spec(name)
353
+ typer.echo(
354
+ f" {name}: {spec.peak_bandwidth_gbps:.0f} GB/s, {spec.peak_tflops_fp16:.0f} TFLOPS FP16"
355
+ )
356
+ return
357
+
358
+ # Validate required args for analysis
359
+ missing = []
360
+ if gpu is None:
361
+ missing.append("--gpu")
362
+ if bytes_moved is None:
363
+ missing.append("--bytes")
364
+ if flops is None:
365
+ missing.append("--flops")
366
+ if time_ms is None:
367
+ missing.append("--time-ms")
368
+
369
+ if missing:
370
+ typer.echo(f"Error: Missing required options: {', '.join(missing)}", err=True)
371
+ typer.echo("", err=True)
372
+ typer.echo("Run 'wafer roofline --help' for usage.", err=True)
373
+ raise typer.Exit(1)
374
+
375
+ try:
376
+ result = roofline_analysis(
377
+ gpu=gpu,
378
+ dtype=dtype,
379
+ bytes_moved=bytes_moved,
380
+ flops=flops,
381
+ time_ms=time_ms,
382
+ )
383
+ except ValueError as e:
384
+ typer.echo(f"Error: {e}", err=True)
385
+ raise typer.Exit(1) from None
386
+
387
+ typer.echo(result.format_report())
388
+
267
389
 
268
390
  # =============================================================================
269
391
  # Skill management (wafer skill ...)
@@ -279,21 +401,22 @@ def skill_install(
279
401
  "all",
280
402
  "--target",
281
403
  "-t",
282
- help="Target tool: claude, codex, or all",
404
+ help="Target tool: claude, codex, cursor, or all",
283
405
  ),
284
406
  force: bool = typer.Option(False, "--force", "-f", help="Overwrite existing skill"),
285
407
  ) -> None:
286
408
  """Install the wafer-guide skill for AI coding assistants.
287
409
 
288
410
  Installs the bundled skill to make wafer commands discoverable by
289
- Claude Code and/or OpenAI Codex CLI.
411
+ Claude Code, OpenAI Codex CLI, and/or Cursor.
290
412
 
291
413
  Skills follow the open agent skills specification (agentskills.io).
292
414
 
293
415
  Examples:
294
- wafer skill install # Install for both Claude and Codex
416
+ wafer skill install # Install for all tools
295
417
  wafer skill install -t claude # Install for Claude Code only
296
418
  wafer skill install -t codex # Install for Codex CLI only
419
+ wafer skill install -t cursor # Install for Cursor only
297
420
  wafer skill install --force # Overwrite existing installation
298
421
  """
299
422
  # Locate bundled skill
@@ -311,9 +434,13 @@ def skill_install(
311
434
  ))
312
435
  if target in ("all", "codex"):
313
436
  targets_to_install.append(("Codex CLI", Path.home() / ".codex" / "skills" / "wafer-guide"))
437
+ if target in ("all", "cursor"):
438
+ targets_to_install.append(("Cursor", Path.home() / ".cursor" / "skills" / "wafer-guide"))
314
439
 
315
440
  if not targets_to_install:
316
- typer.echo(f"Error: Unknown target '{target}'. Use: claude, codex, or all", err=True)
441
+ typer.echo(
442
+ f"Error: Unknown target '{target}'. Use: claude, codex, cursor, or all", err=True
443
+ )
317
444
  raise typer.Exit(1)
318
445
 
319
446
  for tool_name, dest_path in targets_to_install:
@@ -348,14 +475,15 @@ def skill_uninstall(
348
475
  "all",
349
476
  "--target",
350
477
  "-t",
351
- help="Target tool: claude, codex, or all",
478
+ help="Target tool: claude, codex, cursor, or all",
352
479
  ),
353
480
  ) -> None:
354
481
  """Uninstall the wafer-guide skill.
355
482
 
356
483
  Examples:
357
- wafer skill uninstall # Uninstall from both
484
+ wafer skill uninstall # Uninstall from all tools
358
485
  wafer skill uninstall -t claude # Uninstall from Claude Code only
486
+ wafer skill uninstall -t cursor # Uninstall from Cursor only
359
487
  """
360
488
  targets_to_uninstall: list[tuple[str, Path]] = []
361
489
 
@@ -369,9 +497,16 @@ def skill_uninstall(
369
497
  "Codex CLI",
370
498
  Path.home() / ".codex" / "skills" / "wafer-guide",
371
499
  ))
500
+ if target in ("all", "cursor"):
501
+ targets_to_uninstall.append((
502
+ "Cursor",
503
+ Path.home() / ".cursor" / "skills" / "wafer-guide",
504
+ ))
372
505
 
373
506
  if not targets_to_uninstall:
374
- typer.echo(f"Error: Unknown target '{target}'. Use: claude, codex, or all", err=True)
507
+ typer.echo(
508
+ f"Error: Unknown target '{target}'. Use: claude, codex, cursor, or all", err=True
509
+ )
375
510
  raise typer.Exit(1)
376
511
 
377
512
  for tool_name, dest_path in targets_to_uninstall:
@@ -406,6 +541,7 @@ def skill_status() -> None:
406
541
  installations = [
407
542
  ("Claude Code", Path.home() / ".claude" / "skills" / "wafer-guide"),
408
543
  ("Codex CLI", Path.home() / ".codex" / "skills" / "wafer-guide"),
544
+ ("Cursor", Path.home() / ".cursor" / "skills" / "wafer-guide"),
409
545
  ]
410
546
 
411
547
  for tool_name, path in installations:
@@ -1114,6 +1250,11 @@ def agent( # noqa: PLR0913
1114
1250
  "--list-sessions",
1115
1251
  help="List recent sessions and exit",
1116
1252
  ),
1253
+ get_session: str | None = typer.Option(
1254
+ None,
1255
+ "--get-session",
1256
+ help="Get session by ID and print messages (use with --json)",
1257
+ ),
1117
1258
  tools: str | None = typer.Option(
1118
1259
  None,
1119
1260
  "--tools",
@@ -1160,47 +1301,7 @@ def agent( # noqa: PLR0913
1160
1301
  None,
1161
1302
  "--corpus",
1162
1303
  "-c",
1163
- help="Documentation corpus to use (cuda, cutlass, hip). Must be downloaded first.",
1164
- ),
1165
- # Legacy kernel optimization options (hidden, for backwards compat)
1166
- problem: Path | None = typer.Option(
1167
- None,
1168
- "--problem",
1169
- hidden=True,
1170
- help="[Legacy] Path to problem YAML config file",
1171
- ),
1172
- reference: Path | None = typer.Option(
1173
- None,
1174
- "--reference",
1175
- "--ref",
1176
- hidden=True,
1177
- help="[Legacy] Path to reference kernel file",
1178
- ),
1179
- description: str | None = typer.Option(
1180
- None,
1181
- "--description",
1182
- "--desc",
1183
- hidden=True,
1184
- help="[Legacy] Problem description",
1185
- ),
1186
- test: list[str] | None = typer.Option(
1187
- None,
1188
- "--test",
1189
- hidden=True,
1190
- help="[Legacy] Test case",
1191
- ),
1192
- benchmark: list[str] | None = typer.Option(
1193
- None,
1194
- "--benchmark",
1195
- "-b",
1196
- hidden=True,
1197
- help="[Legacy] Benchmark case",
1198
- ),
1199
- speedup_target: float | None = typer.Option(
1200
- None,
1201
- "--speedup",
1202
- hidden=True,
1203
- help="[Legacy] Speedup target",
1304
+ help="Documentation corpus to use (cuda, cutlass, hip, amd). Must be downloaded first.",
1204
1305
  ),
1205
1306
  ) -> None:
1206
1307
  """AI assistant for GPU kernel development.
@@ -1287,20 +1388,15 @@ def agent( # noqa: PLR0913
1287
1388
  prompt=actual_prompt,
1288
1389
  interactive=use_tui,
1289
1390
  single_turn=single_turn,
1290
- problem=str(problem) if problem else None,
1291
- reference=str(reference) if reference else None,
1292
- description=description,
1293
- tests=list(test) if test else None,
1294
- benchmarks=list(benchmark) if benchmark else None,
1295
1391
  model=model,
1296
- max_turns=max_turns,
1297
- speedup_target=speedup_target,
1298
1392
  resume=resume,
1299
1393
  from_turn=from_turn,
1300
1394
  list_sessions=list_sessions,
1395
+ get_session=get_session,
1301
1396
  tools=tools.split(",") if tools else None,
1302
1397
  allow_spawn=allow_spawn,
1303
1398
  max_tool_fails=max_tool_fails,
1399
+ max_turns=max_turns,
1304
1400
  json_output=json_output,
1305
1401
  template=template,
1306
1402
  template_args=parsed_template_args,
@@ -1310,7 +1406,7 @@ def agent( # noqa: PLR0913
1310
1406
 
1311
1407
  # =============================================================================
1312
1408
  # Evaluate command
1313
- # Hidden aliases for backwards compatibility
1409
+ # Hidden aliases for agent command
1314
1410
  def _make_agent_alias(name: str, doc: str) -> None:
1315
1411
  """Create a hidden alias that delegates to agent()."""
1316
1412
 
@@ -1325,6 +1421,7 @@ def _make_agent_alias(name: str, doc: str) -> None:
1325
1421
  resume: str | None = typer.Option(None, "--resume", "-r"),
1326
1422
  from_turn: int | None = typer.Option(None, "--from-turn"),
1327
1423
  list_sessions: bool = typer.Option(False, "--list-sessions"),
1424
+ get_session: str | None = typer.Option(None, "--get-session"),
1328
1425
  tools: str | None = typer.Option(None, "--tools"),
1329
1426
  allow_spawn: bool = typer.Option(False, "--allow-spawn"),
1330
1427
  max_tool_fails: int | None = typer.Option(None, "--max-tool-fails"),
@@ -1334,12 +1431,6 @@ def _make_agent_alias(name: str, doc: str) -> None:
1334
1431
  template: str | None = typer.Option(None, "--template", "-t"),
1335
1432
  template_args: list[str] | None = typer.Option(None, "--args"),
1336
1433
  corpus: str | None = typer.Option(None, "--corpus"),
1337
- problem: Path | None = typer.Option(None, "--problem", hidden=True),
1338
- reference: Path | None = typer.Option(None, "--reference", hidden=True),
1339
- description: str | None = typer.Option(None, "--description", hidden=True),
1340
- test: list[Path] | None = typer.Option(None, "--test", hidden=True),
1341
- benchmark: list[Path] | None = typer.Option(None, "--benchmark", hidden=True),
1342
- speedup_target: float | None = typer.Option(None, "--speedup-target", hidden=True),
1343
1434
  ) -> None:
1344
1435
  agent(
1345
1436
  prompt=prompt,
@@ -1349,6 +1440,7 @@ def _make_agent_alias(name: str, doc: str) -> None:
1349
1440
  resume=resume,
1350
1441
  from_turn=from_turn,
1351
1442
  list_sessions=list_sessions,
1443
+ get_session=get_session,
1352
1444
  tools=tools,
1353
1445
  allow_spawn=allow_spawn,
1354
1446
  max_tool_fails=max_tool_fails,
@@ -1358,12 +1450,6 @@ def _make_agent_alias(name: str, doc: str) -> None:
1358
1450
  template=template,
1359
1451
  template_args=template_args,
1360
1452
  corpus=corpus,
1361
- problem=problem,
1362
- reference=reference,
1363
- description=description,
1364
- test=test,
1365
- benchmark=benchmark,
1366
- speedup_target=speedup_target,
1367
1453
  )
1368
1454
 
1369
1455
  alias_cmd.__doc__ = doc
@@ -1649,7 +1735,7 @@ def kernelbench_list_problems() -> None:
1649
1735
 
1650
1736
 
1651
1737
  @kernelbench_app.callback(invoke_without_command=True)
1652
- def kernelbench_evaluate( # noqa: PLR0913
1738
+ def kernelbench_evaluate( # noqa: PLR0913, PLR0915
1653
1739
  ctx: typer.Context,
1654
1740
  implementation: Path | None = typer.Option(
1655
1741
  None,
@@ -1685,10 +1771,22 @@ def kernelbench_evaluate( # noqa: PLR0913
1685
1771
  defensive: bool = typer.Option(
1686
1772
  False, "--defensive", help="Enable defensive timing to detect evaluation hacking"
1687
1773
  ),
1774
+ backend: str | None = typer.Option(
1775
+ None,
1776
+ "--backend",
1777
+ help="Kernel backend for static validation (hip, cuda, triton, cute, tilelang, thunderkittens). "
1778
+ "When specified, validates that the implementation uses the correct backend primitives.",
1779
+ ),
1688
1780
  sync_artifacts: bool = typer.Option(
1689
1781
  True, "--sync-artifacts/--no-sync-artifacts", help="Download artifacts"
1690
1782
  ),
1691
1783
  gpu_id: int | None = typer.Option(None, "--gpu-id", help="Override GPU ID"),
1784
+ json_output: bool = typer.Option(
1785
+ False, "--json", help="Output as single JSON object (machine-readable)"
1786
+ ),
1787
+ jsonl_output: bool = typer.Option(
1788
+ False, "--jsonl", help="Output as streaming JSON Lines (one object per event)"
1789
+ ),
1692
1790
  ) -> None:
1693
1791
  """Run kernel evaluation in KernelBench format (ModelNew class).
1694
1792
 
@@ -1744,6 +1842,10 @@ def kernelbench_evaluate( # noqa: PLR0913
1744
1842
  raise typer.Exit(1)
1745
1843
 
1746
1844
  from .evaluate import KernelBenchEvaluateArgs, run_evaluate_kernelbench
1845
+ from .output import OutputCollector, format_evaluate_result, get_output_format
1846
+
1847
+ output_format = get_output_format(json_output, jsonl_output)
1848
+ collector = OutputCollector(format=output_format)
1747
1849
 
1748
1850
  # If pool specified, acquire a target from the pool
1749
1851
  resolved_target = target or ""
@@ -1756,32 +1858,36 @@ def kernelbench_evaluate( # noqa: PLR0913
1756
1858
  try:
1757
1859
  pool_targets = get_pool(pool)
1758
1860
  except FileNotFoundError as e:
1759
- typer.echo(f"Error: {e}", err=True)
1861
+ collector.set_error("pool", "PoolNotFound", pool=pool, message=str(e))
1862
+ collector.finalize()
1760
1863
  raise typer.Exit(1) from None
1761
1864
 
1762
1865
  # Filter to only targets with valid auth
1763
1866
  usable_targets, skipped = filter_pool_by_auth(pool_targets)
1764
1867
  if skipped:
1765
- typer.echo(f"Skipping targets without auth: {', '.join(skipped)}", err=True)
1868
+ collector.emit("pool_auth_skip", targets=skipped)
1766
1869
 
1767
1870
  if not usable_targets:
1768
- typer.echo(f"Error: No usable targets in pool '{pool}'", err=True)
1769
- typer.echo(" All targets require authentication that is not configured.", err=True)
1770
- typer.echo(" Run 'wafer auth status' to see which providers need setup.", err=True)
1871
+ collector.set_error("pool", "NoUsableTargets", pool=pool)
1872
+ collector.finalize()
1771
1873
  raise typer.Exit(1) from None
1772
1874
 
1773
- typer.echo(f"Acquiring target from pool '{pool}' ({len(usable_targets)} targets)...")
1875
+ collector.emit("pool_acquire", pool=pool, count=len(usable_targets))
1774
1876
  pool_lock_context = acquire_from_pool(usable_targets)
1775
1877
  acquired_target = pool_lock_context.__enter__()
1776
1878
 
1777
1879
  if acquired_target is None:
1778
- typer.echo(f"Error: All targets in pool '{pool}' are busy", err=True)
1779
- typer.echo(f" Targets: {', '.join(usable_targets)}", err=True)
1880
+ # Exit context manager before raising to avoid resource leak
1881
+ pool_lock_context.__exit__(None, None, None)
1882
+ collector.set_error("pool", "AllTargetsBusy", pool=pool, targets=usable_targets)
1883
+ collector.finalize()
1780
1884
  raise typer.Exit(1)
1781
1885
 
1782
- typer.echo(f"Acquired target: {acquired_target}")
1886
+ collector.emit("pool_acquired", target=acquired_target)
1783
1887
  resolved_target = acquired_target
1784
1888
 
1889
+ collector.target = resolved_target
1890
+
1785
1891
  args = KernelBenchEvaluateArgs(
1786
1892
  implementation=implementation,
1787
1893
  reference=reference,
@@ -1791,41 +1897,45 @@ def kernelbench_evaluate( # noqa: PLR0913
1791
1897
  inputs=inputs,
1792
1898
  seed=seed,
1793
1899
  defensive=defensive,
1900
+ backend=backend,
1794
1901
  sync_artifacts=sync_artifacts,
1795
1902
  gpu_id=gpu_id,
1796
1903
  )
1797
1904
 
1905
+ collector.emit("started", target=resolved_target)
1906
+
1798
1907
  try:
1799
1908
  import trio_asyncio
1800
1909
 
1910
+ collector.emit("evaluation", status="running")
1801
1911
  result = trio_asyncio.run(run_evaluate_kernelbench, args)
1802
1912
  except KeyboardInterrupt:
1803
- typer.echo("\nInterrupted by user", err=True)
1913
+ collector.set_error("evaluation", "Interrupted", message="Interrupted by user")
1914
+ collector.finalize()
1804
1915
  raise typer.Exit(130) from None
1805
1916
  except Exception as e:
1806
- typer.echo(f"Error: {e}", err=True)
1917
+ collector.set_error("evaluation", "Exception", message=str(e))
1918
+ collector.finalize()
1807
1919
  raise typer.Exit(1) from None
1808
1920
  finally:
1809
1921
  # Release pool lock if we acquired one
1810
1922
  if pool_lock_context is not None:
1811
1923
  pool_lock_context.__exit__(None, None, None)
1812
1924
 
1813
- # Print results
1925
+ # Build structured output
1926
+ eval_output = format_evaluate_result(result, target=resolved_target)
1927
+ collector._result = eval_output
1928
+
1929
+ # Print results based on output format
1814
1930
  if result.success:
1815
- typer.echo("")
1816
- typer.echo("=" * 60)
1817
- status = "PASS" if result.all_correct else "FAIL"
1818
- typer.echo(f"Result: {status}")
1819
- score_pct = f"{result.correctness_score:.1%}"
1820
- typer.echo(f"Correctness: {result.passed_tests}/{result.total_tests} ({score_pct})")
1821
- if result.geomean_speedup > 0:
1822
- typer.echo(f"Speedup: {result.geomean_speedup:.2f}x")
1823
- typer.echo("=" * 60)
1931
+ collector.output_text_result(result)
1932
+ collector.finalize()
1824
1933
 
1825
1934
  if not result.all_correct:
1826
1935
  raise typer.Exit(1)
1827
1936
  else:
1828
- typer.echo(f"Error: {result.error_message}", err=True)
1937
+ collector.output_text_error(result.error_message or "Unknown error")
1938
+ collector.finalize()
1829
1939
  raise typer.Exit(1)
1830
1940
 
1831
1941
 
@@ -2182,6 +2292,8 @@ def gpumode_evaluate( # noqa: PLR0913, PLR0915
2182
2292
  acquired_target = pool_lock_context.__enter__()
2183
2293
 
2184
2294
  if acquired_target is None:
2295
+ # Exit context manager before raising to avoid resource leak
2296
+ pool_lock_context.__exit__(None, None, None)
2185
2297
  typer.echo(f"Error: All targets in pool '{pool}' are busy", err=True)
2186
2298
  typer.echo(f" Targets: {', '.join(usable_targets)}", err=True)
2187
2299
  raise typer.Exit(1)
@@ -2402,6 +2514,7 @@ def _run_api_mode( # noqa: PLR0913
2402
2514
  upload_dir: Path | None,
2403
2515
  workspace_id: str | None,
2404
2516
  gpu_id: int | None,
2517
+ gpu_count: int,
2405
2518
  docker_image: str | None,
2406
2519
  docker_entrypoint: str | None,
2407
2520
  pull_image: bool,
@@ -2416,6 +2529,8 @@ def _run_api_mode( # noqa: PLR0913
2416
2529
  typer.echo(f"Workspace: {workspace_id}")
2417
2530
  if gpu_id is not None:
2418
2531
  typer.echo(f"GPU: {gpu_id}")
2532
+ if gpu_count > 1:
2533
+ typer.echo(f"GPU count: {gpu_count}")
2419
2534
  if docker_image:
2420
2535
  typer.echo(f"Image: {docker_image}")
2421
2536
  if docker_entrypoint:
@@ -2433,6 +2548,7 @@ def _run_api_mode( # noqa: PLR0913
2433
2548
  upload_dir=upload_dir,
2434
2549
  workspace_id=workspace_id,
2435
2550
  gpu_id=gpu_id,
2551
+ gpu_count=gpu_count,
2436
2552
  docker_image=docker_image,
2437
2553
  docker_entrypoint=docker_entrypoint,
2438
2554
  pull_image=pull_image,
@@ -2456,6 +2572,7 @@ def remote_run( # noqa: PLR0913
2456
2572
  None, "--workspace-id", "-w", help="Workspace ID (from wafer push)"
2457
2573
  ),
2458
2574
  gpu_id: int | None = typer.Option(None, "--gpu", "-g", help="GPU ID"),
2575
+ gpu_count: int = typer.Option(1, "--gpu-count", "-n", help="Number of GPUs (1-8)"),
2459
2576
  docker_image: str | None = typer.Option(None, "--image", "-i", help="Docker image override"),
2460
2577
  docker_entrypoint: str | None = typer.Option(
2461
2578
  None, "--docker-entrypoint", help="Override Docker entrypoint (e.g., 'bash')"
@@ -2525,6 +2642,7 @@ def remote_run( # noqa: PLR0913
2525
2642
  upload_dir,
2526
2643
  workspace_id,
2527
2644
  gpu_id,
2645
+ gpu_count,
2528
2646
  docker_image,
2529
2647
  docker_entrypoint,
2530
2648
  pull_image,
@@ -4108,6 +4226,81 @@ def billing_portal(
4108
4226
  raise typer.Exit(1) from None
4109
4227
 
4110
4228
 
4229
+ # =============================================================================
4230
+ # SSH Keys commands (BYOK - Bring Your Own Key)
4231
+ # =============================================================================
4232
+
4233
+
4234
+ @ssh_keys_app.command("list")
4235
+ def ssh_keys_list(
4236
+ json_output: bool = typer.Option(False, "--json", "-j", help="Output as JSON"),
4237
+ ) -> None:
4238
+ """List all registered SSH public keys.
4239
+
4240
+ Example:
4241
+ wafer ssh-keys list
4242
+ wafer ssh-keys list --json
4243
+ """
4244
+ from .ssh_keys import list_ssh_keys
4245
+
4246
+ try:
4247
+ result = list_ssh_keys(json_output=json_output)
4248
+ typer.echo(result)
4249
+ except RuntimeError as e:
4250
+ typer.echo(f"Error: {e}", err=True)
4251
+ raise typer.Exit(1) from e
4252
+
4253
+
4254
+ @ssh_keys_app.command("add")
4255
+ def ssh_keys_add(
4256
+ pubkey_path: Path | None = typer.Argument(
4257
+ None, help="Path to public key file (auto-detects ~/.ssh/id_ed25519.pub if not specified)"
4258
+ ),
4259
+ name: str | None = typer.Option(None, "--name", "-n", help="Friendly name for the key"),
4260
+ json_output: bool = typer.Option(False, "--json", "-j", help="Output as JSON"),
4261
+ ) -> None:
4262
+ """Add an SSH public key.
4263
+
4264
+ If no path is specified, auto-detects keys from ~/.ssh/ in preference order:
4265
+ id_ed25519.pub, id_rsa.pub, id_ecdsa.pub.
4266
+
4267
+ Example:
4268
+ wafer ssh-keys add # Auto-detect
4269
+ wafer ssh-keys add ~/.ssh/id_rsa.pub # Specific file
4270
+ wafer ssh-keys add ~/.ssh/id_ed25519.pub --name laptop
4271
+ """
4272
+ from .ssh_keys import add_ssh_key
4273
+
4274
+ try:
4275
+ result = add_ssh_key(pubkey_path=pubkey_path, name=name, json_output=json_output)
4276
+ typer.echo(result)
4277
+ except RuntimeError as e:
4278
+ typer.echo(f"Error: {e}", err=True)
4279
+ raise typer.Exit(1) from e
4280
+
4281
+
4282
+ @ssh_keys_app.command("remove")
4283
+ def ssh_keys_remove(
4284
+ key_id: str = typer.Argument(..., help="UUID of the SSH key to remove"),
4285
+ json_output: bool = typer.Option(False, "--json", "-j", help="Output as JSON"),
4286
+ ) -> None:
4287
+ """Remove an SSH public key.
4288
+
4289
+ Get the key ID from 'wafer ssh-keys list'.
4290
+
4291
+ Example:
4292
+ wafer ssh-keys remove abc123-def456-...
4293
+ """
4294
+ from .ssh_keys import remove_ssh_key
4295
+
4296
+ try:
4297
+ result = remove_ssh_key(key_id=key_id, json_output=json_output)
4298
+ typer.echo(result)
4299
+ except RuntimeError as e:
4300
+ typer.echo(f"Error: {e}", err=True)
4301
+ raise typer.Exit(1) from e
4302
+
4303
+
4111
4304
  # =============================================================================
4112
4305
  # Workspaces commands
4113
4306
  # =============================================================================
@@ -4136,21 +4329,34 @@ def workspaces_list(
4136
4329
  @workspaces_app.command("create")
4137
4330
  def workspaces_create(
4138
4331
  name: str = typer.Argument(..., help="Workspace name"),
4139
- gpu_type: str = typer.Option("B200", "--gpu", "-g", help="GPU type (default: B200)"),
4332
+ gpu_type: str = typer.Option("B200", "--gpu", "-g", help="GPU type: MI300X (AMD) or B200 (NVIDIA, default)"),
4140
4333
  image: str | None = typer.Option(None, "--image", "-i", help="Docker image (optional)"),
4334
+ wait: bool = typer.Option(False, "--wait", "-w", help="Wait for provisioning and show SSH credentials"),
4141
4335
  json_output: bool = typer.Option(False, "--json", "-j", help="Output as JSON"),
4142
4336
  ) -> None:
4143
4337
  """Create a new workspace.
4144
4338
 
4339
+ Available GPUs:
4340
+ MI300X AMD Instinct MI300X (192GB HBM3, ROCm)
4341
+ B200 NVIDIA Blackwell B200 (180GB HBM3e, CUDA)
4342
+
4145
4343
  Example:
4146
- wafer workspaces create my-kernel
4147
- wafer workspaces create my-kernel --gpu H100
4344
+ wafer workspaces create my-kernel # B200 (default)
4345
+ wafer workspaces create my-kernel --gpu MI300X # AMD MI300X
4346
+ wafer workspaces create my-kernel --gpu B200 # NVIDIA B200
4148
4347
  wafer workspaces create my-kernel --image pytorch/pytorch:2.5.1-cuda12.4-cudnn9-devel
4348
+ wafer workspaces create my-kernel --wait
4149
4349
  """
4150
4350
  from .workspaces import create_workspace
4151
4351
 
4152
4352
  try:
4153
- result = create_workspace(name, gpu_type=gpu_type, image=image, json_output=json_output)
4353
+ result = create_workspace(
4354
+ name,
4355
+ gpu_type=gpu_type,
4356
+ image=image,
4357
+ wait=wait,
4358
+ json_output=json_output,
4359
+ )
4154
4360
  typer.echo(result)
4155
4361
  except RuntimeError as e:
4156
4362
  typer.echo(f"Error: {e}", err=True)
@@ -4160,16 +4366,23 @@ def workspaces_create(
4160
4366
  @workspaces_app.command("delete")
4161
4367
  def workspaces_delete(
4162
4368
  workspace_id: str = typer.Argument(..., help="Workspace ID to delete"),
4369
+ yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation prompt"),
4163
4370
  json_output: bool = typer.Option(False, "--json", "-j", help="Output as JSON"),
4164
4371
  ) -> None:
4165
4372
  """Delete a workspace.
4166
4373
 
4167
4374
  Example:
4168
4375
  wafer workspaces delete ws_abc123
4376
+ wafer workspaces delete ws_abc123 -y
4169
4377
  """
4170
4378
  from .workspaces import delete_workspace
4171
4379
 
4172
4380
  try:
4381
+ if not yes:
4382
+ confirm = typer.confirm(f"Delete workspace '{workspace_id}'?")
4383
+ if not confirm:
4384
+ typer.echo("Cancelled.")
4385
+ raise typer.Exit(0)
4173
4386
  result = delete_workspace(workspace_id, json_output=json_output)
4174
4387
  typer.echo(result)
4175
4388
  except RuntimeError as e:
@@ -4177,32 +4390,6 @@ def workspaces_delete(
4177
4390
  raise typer.Exit(1) from None
4178
4391
 
4179
4392
 
4180
- @workspaces_app.command("attach")
4181
- def workspaces_attach(
4182
- workspace_id: str = typer.Argument(..., help="Workspace ID to attach to"),
4183
- json_output: bool = typer.Option(False, "--json", "-j", help="Output as JSON"),
4184
- ) -> None:
4185
- """Attach to a workspace (get SSH credentials).
4186
-
4187
- This will:
4188
- 1. Start the workspace if needed
4189
- 2. Return SSH connection details
4190
- 3. Save the private key to ~/.wafer/keys/
4191
-
4192
- Example:
4193
- wafer workspaces attach ws_abc123
4194
- wafer workspaces attach ws_abc123 --json
4195
- """
4196
- from .workspaces import attach_workspace
4197
-
4198
- try:
4199
- result = attach_workspace(workspace_id, json_output=json_output)
4200
- typer.echo(result)
4201
- except RuntimeError as e:
4202
- typer.echo(f"Error: {e}", err=True)
4203
- raise typer.Exit(1) from None
4204
-
4205
-
4206
4393
  @workspaces_app.command("show")
4207
4394
  def workspaces_show(
4208
4395
  workspace_id: str = typer.Argument(..., help="Workspace ID to show"),
@@ -4224,12 +4411,19 @@ def workspaces_show(
4224
4411
  raise typer.Exit(1) from None
4225
4412
 
4226
4413
 
4227
- @workspaces_app.command("exec", context_settings={"allow_interspersed_args": False})
4414
+ @workspaces_app.command(
4415
+ "exec",
4416
+ context_settings={
4417
+ "allow_interspersed_args": False,
4418
+ "ignore_unknown_options": True,
4419
+ "allow_extra_args": True,
4420
+ },
4421
+ )
4228
4422
  def workspaces_exec(
4423
+ ctx: typer.Context,
4229
4424
  workspace: str | None = typer.Argument(
4230
4425
  None, help="Workspace name or ID (optional if default set)"
4231
4426
  ),
4232
- command: list[str] = typer.Argument(..., help="Command to execute"),
4233
4427
  timeout: int | None = typer.Option(
4234
4428
  None,
4235
4429
  "--timeout",
@@ -4247,6 +4441,7 @@ def workspaces_exec(
4247
4441
  baremetal: bool = typer.Option(
4248
4442
  False, "--baremetal", help="Force baremetal target (for hardware counters like ncu/nsys)"
4249
4443
  ),
4444
+ pull_image: bool = typer.Option(False, "--pull-image", help="Pull image on target if missing"),
4250
4445
  verbose: bool = typer.Option(False, "--verbose", "-v", help="Show [wafer] status messages"),
4251
4446
  quiet: bool = typer.Option(False, "--quiet", "-q", help="Suppress [wafer] status messages"),
4252
4447
  ) -> None:
@@ -4263,6 +4458,8 @@ def workspaces_exec(
4263
4458
  If workspace is not specified, uses the default workspace from config,
4264
4459
  or the only workspace if you have exactly one.
4265
4460
 
4461
+ IMPORTANT: Options must come before the workspace name.
4462
+
4266
4463
  Examples:
4267
4464
  wafer workspaces exec dev -- python train.py
4268
4465
  wafer workspaces exec dev -- python -c "import torch; print(torch.cuda.is_available())"
@@ -4273,6 +4470,34 @@ def workspaces_exec(
4273
4470
  from .global_config import get_defaults, get_preferences
4274
4471
  from .workspaces import exec_command, resolve_workspace, sync_files
4275
4472
 
4473
+ # Enforce option ordering to avoid treating CLI flags as remote commands
4474
+ known_options = {
4475
+ "--timeout",
4476
+ "-t",
4477
+ "--sync",
4478
+ "-s",
4479
+ "--gpu",
4480
+ "--cpu",
4481
+ "--baremetal",
4482
+ "--pull-image",
4483
+ "--verbose",
4484
+ "-v",
4485
+ "--quiet",
4486
+ "-q",
4487
+ "--help",
4488
+ "-h",
4489
+ }
4490
+ for arg in ctx.args:
4491
+ if arg == "--":
4492
+ break
4493
+ if arg in known_options:
4494
+ typer.echo(
4495
+ "Error: options must come before the workspace name. "
4496
+ "Example: wafer workspaces exec --pull-image dev -- python -V",
4497
+ err=True,
4498
+ )
4499
+ raise typer.Exit(1)
4500
+
4276
4501
  # Validate mutually exclusive routing flags
4277
4502
  routing_flags = sum([gpu, cpu, baremetal])
4278
4503
  if routing_flags > 1:
@@ -4339,27 +4564,30 @@ def workspaces_exec(
4339
4564
  typer.echo(f"Error: {e}", err=True)
4340
4565
  raise typer.Exit(1) from None
4341
4566
 
4567
+ # Get command from context args (passthrough after --)
4568
+ import shlex
4569
+
4570
+ command = list(ctx.args)
4571
+ if command and command[0] == "--":
4572
+ command = command[1:]
4573
+
4574
+ if not command:
4575
+ typer.echo("Error: No command specified", err=True)
4576
+ raise typer.Exit(1)
4577
+
4342
4578
  if show_status:
4343
4579
  typer.echo(f"[wafer] Executing (timeout: {effective_timeout}s)...", err=True)
4344
4580
 
4345
- # Join command list into shell command string, stripping leading "--" separator
4346
- if isinstance(command, list):
4347
- import shlex
4348
-
4349
- # Remove leading "--" if present (typer passes it through with allow_interspersed_args=False)
4350
- if command and command[0] == "--":
4351
- command = command[1:]
4352
- # Handle two cases:
4353
- # 1. Single element: user quoted the whole command (e.g., "echo hello world")
4354
- # -> use directly, don't re-quote
4355
- # 2. Multiple elements: user passed separate args (e.g., -- python -c "print(1)")
4356
- # -> use shlex.join to properly quote args with spaces
4357
- if len(command) == 1:
4358
- command_str = command[0]
4359
- else:
4360
- command_str = shlex.join(command)
4581
+ # Build command string
4582
+ # Handle two cases:
4583
+ # 1. Single element: user quoted the whole command (e.g., "echo hello world")
4584
+ # -> use directly, don't re-quote
4585
+ # 2. Multiple elements: user passed separate args (e.g., -- python -c "print(1)")
4586
+ # -> use shlex.join to properly quote args with spaces
4587
+ if len(command) == 1:
4588
+ command_str = command[0]
4361
4589
  else:
4362
- command_str = command
4590
+ command_str = shlex.join(command)
4363
4591
 
4364
4592
  try:
4365
4593
  exit_code = exec_command(
@@ -4367,6 +4595,7 @@ def workspaces_exec(
4367
4595
  command=command_str,
4368
4596
  timeout_seconds=effective_timeout,
4369
4597
  routing=routing,
4598
+ pull_image=pull_image,
4370
4599
  )
4371
4600
  except RuntimeError as e:
4372
4601
  typer.echo(f"Error: {e}", err=True)
@@ -4386,7 +4615,7 @@ def workspaces_ssh(
4386
4615
  ) -> None:
4387
4616
  """SSH into a workspace.
4388
4617
 
4389
- Gets SSH credentials via attach, then execs into SSH.
4618
+ Uses workspace SSH credentials once the workspace is running.
4390
4619
  If workspace is not specified, uses the default workspace.
4391
4620
 
4392
4621
  Examples:
@@ -4395,7 +4624,7 @@ def workspaces_ssh(
4395
4624
  """
4396
4625
  import os
4397
4626
 
4398
- from .workspaces import get_ssh_credentials, resolve_workspace
4627
+ from .workspaces import get_workspace_raw, resolve_workspace
4399
4628
 
4400
4629
  # Resolve workspace
4401
4630
  try:
@@ -4406,26 +4635,39 @@ def workspaces_ssh(
4406
4635
 
4407
4636
  typer.echo(f"Connecting to workspace: {resolved_workspace}...", err=True)
4408
4637
 
4409
- # Get SSH credentials (this calls attach)
4638
+ # Get SSH credentials from workspace
4410
4639
  try:
4411
- creds = get_ssh_credentials(resolved_workspace)
4640
+ ws = get_workspace_raw(resolved_workspace)
4412
4641
  except RuntimeError as e:
4413
4642
  typer.echo(f"Error: {e}", err=True)
4414
4643
  raise typer.Exit(1) from None
4415
4644
 
4416
- # Exec into SSH - replaces this process
4417
- ssh_args = [
4418
- "ssh",
4419
- "-i",
4420
- str(creds.key_path),
4645
+ from .workspaces import VALID_STATUSES
4646
+
4647
+ workspace_status = ws.get("status")
4648
+ assert workspace_status in VALID_STATUSES, (
4649
+ f"Workspace {resolved_workspace} has invalid status '{workspace_status}'. "
4650
+ f"Valid statuses: {VALID_STATUSES}"
4651
+ )
4652
+
4653
+ if workspace_status != "running":
4654
+ typer.echo(f"Error: Workspace is {workspace_status}. Wait for it to be running.", err=True)
4655
+ raise typer.Exit(1)
4656
+ if not ws.get("ssh_host") or not ws.get("ssh_port") or not ws.get("ssh_user"):
4657
+ typer.echo("Error: SSH credentials not available yet.", err=True)
4658
+ raise typer.Exit(1)
4659
+
4660
+ # Build SSH args - key_path is None for BYOK model (uses default SSH key)
4661
+ ssh_args = ["ssh"]
4662
+ ssh_args.extend([
4421
4663
  "-p",
4422
- str(creds.port),
4664
+ str(ws.get("ssh_port")),
4423
4665
  "-o",
4424
4666
  "StrictHostKeyChecking=no",
4425
4667
  "-o",
4426
4668
  "UserKnownHostsFile=/dev/null",
4427
- f"{creds.user}@{creds.host}",
4428
- ]
4669
+ f"{ws.get('ssh_user')}@{ws.get('ssh_host')}",
4670
+ ])
4429
4671
 
4430
4672
  # Replace current process with SSH
4431
4673
  os.execvp("ssh", ssh_args)
@@ -4492,51 +4734,568 @@ def workspaces_sync(
4492
4734
 
4493
4735
 
4494
4736
  # =============================================================================
4495
- # Perfetto trace analysis commands
4737
+ # Target operations commands (exec/ssh/sync)
4496
4738
  # =============================================================================
4497
4739
 
4498
4740
 
4499
- @perfetto_app.command("query")
4500
- def perfetto_query(
4501
- trace_path: Path = typer.Argument(..., help="Path to Perfetto trace file"),
4502
- sql: str = typer.Argument(..., help="SQL query to execute"),
4503
- json_output: bool = typer.Option(True, "--json", "-j", help="Output as JSON"),
4741
+ @targets_ops_app.command("exec", context_settings={"allow_interspersed_args": False})
4742
+ def targets_exec(
4743
+ target: str = typer.Argument(
4744
+ ...,
4745
+ help="Target name",
4746
+ autocompletion=complete_target_name,
4747
+ ),
4748
+ command: list[str] = typer.Argument(..., help="Command to execute"),
4749
+ timeout: int | None = typer.Option(
4750
+ None,
4751
+ "--timeout",
4752
+ "-t",
4753
+ help="Execution timeout in seconds (default: 300)",
4754
+ ),
4755
+ verbose: bool = typer.Option(False, "--verbose", "-v", help="Show [wafer] status messages"),
4756
+ quiet: bool = typer.Option(False, "--quiet", "-q", help="Suppress [wafer] status messages"),
4504
4757
  ) -> None:
4505
- """Execute SQL query against a Perfetto trace.
4758
+ """Execute a command on a configured target.
4506
4759
 
4507
- Starts trace_processor, loads the trace, executes the query, and returns results.
4760
+ Provisions the target if needed (RunPod, DigitalOcean), then runs the command via SSH.
4761
+ For cloud targets, the instance is kept alive after execution - use
4762
+ 'wafer config targets cleanup <name>' to terminate.
4763
+
4764
+ Supported targets: RunPod, DigitalOcean, SSH (baremetal/vm).
4765
+ Not supported: Modal (serverless), Local (no SSH), Workspace (use 'wafer workspaces exec').
4508
4766
 
4509
4767
  Examples:
4510
- wafer perfetto query trace.perfetto "SELECT * FROM slice LIMIT 10"
4511
- wafer perfetto query trace.perfetto "SELECT name, dur FROM slice ORDER BY dur DESC LIMIT 5"
4768
+ wafer targets exec runpod-mi300x -- python -c "import torch; print(torch.cuda.is_available())"
4769
+ wafer targets exec runpod-mi300x -- rocm-smi
4770
+ wafer targets exec my-ssh-server -- nvidia-smi
4771
+ wafer targets exec runpod-mi300x "echo hello && ls -la" --timeout 60
4512
4772
  """
4513
- from wafer_core.lib.perfetto.perfetto_tool import PerfettoConfig, PerfettoTool
4773
+ from .global_config import get_preferences
4774
+ from .targets import load_target
4775
+ from .targets_ops import TargetExecError, exec_on_target_sync, get_target_ssh_info
4514
4776
 
4515
- config = PerfettoConfig(
4516
- workspace_root=".",
4517
- storage_dir=str(Path.home() / ".wafer" / "perfetto"),
4518
- )
4519
- tool = PerfettoTool(config)
4777
+ # Determine verbosity
4778
+ prefs = get_preferences()
4779
+ if quiet:
4780
+ show_status = False
4781
+ elif verbose:
4782
+ show_status = True
4783
+ else:
4784
+ show_status = prefs.mode == "explicit"
4520
4785
 
4786
+ # Load target
4521
4787
  try:
4522
- results, err = tool.query(sql, str(trace_path))
4523
- if err:
4524
- typer.echo(f"Error: {err}", err=True)
4525
- raise typer.Exit(1)
4526
-
4527
- if json_output:
4528
- typer.echo(json.dumps({"results": results, "count": len(results or [])}, indent=2))
4529
- else:
4530
- if not results:
4531
- typer.echo("No results")
4532
- else:
4533
- # Simple table output
4534
- if results:
4535
- headers = list(results[0].keys())
4536
- typer.echo("\t".join(headers))
4537
- for row in results:
4538
- typer.echo("\t".join(str(row.get(h, "")) for h in headers))
4539
- except Exception as e:
4788
+ target_config = load_target(target)
4789
+ except FileNotFoundError as e:
4790
+ typer.echo(f"Error: {e}", err=True)
4791
+ typer.echo("List available targets with: wafer config targets list", err=True)
4792
+ raise typer.Exit(1) from None
4793
+ except ValueError as e:
4794
+ typer.echo(f"Error loading target config: {e}", err=True)
4795
+ raise typer.Exit(1) from None
4796
+
4797
+ if show_status:
4798
+ typer.echo(f"[wafer] Target: {target} ({type(target_config).__name__})", err=True)
4799
+
4800
+ # Get SSH info (may provision)
4801
+ if show_status:
4802
+ typer.echo("[wafer] Connecting to target...", err=True)
4803
+
4804
+ try:
4805
+ ssh_info = trio.run(get_target_ssh_info, target_config)
4806
+ except TargetExecError as e:
4807
+ typer.echo(f"Error: {e}", err=True)
4808
+ raise typer.Exit(1) from None
4809
+
4810
+ if show_status:
4811
+ typer.echo(f"[wafer] Connected: {ssh_info.user}@{ssh_info.host}:{ssh_info.port}", err=True)
4812
+
4813
+ # Build command string
4814
+ if isinstance(command, list):
4815
+ import shlex
4816
+
4817
+ # Remove leading "--" if present
4818
+ if command and command[0] == "--":
4819
+ command = command[1:]
4820
+
4821
+ if not command:
4822
+ typer.echo("Error: No command specified", err=True)
4823
+ raise typer.Exit(1)
4824
+
4825
+ if len(command) == 1:
4826
+ command_str = command[0]
4827
+ else:
4828
+ command_str = shlex.join(command)
4829
+ else:
4830
+ command_str = command
4831
+
4832
+ # Default timeout
4833
+ effective_timeout = timeout if timeout is not None else 300
4834
+
4835
+ if show_status:
4836
+ typer.echo(f"[wafer] Executing (timeout: {effective_timeout}s)...", err=True)
4837
+
4838
+ # Execute
4839
+ try:
4840
+ exit_code = exec_on_target_sync(ssh_info, command_str, effective_timeout)
4841
+ except TargetExecError as e:
4842
+ typer.echo(f"Error: {e}", err=True)
4843
+ raise typer.Exit(1) from None
4844
+
4845
+ if show_status:
4846
+ typer.echo(f"[wafer] Exit code: {exit_code}", err=True)
4847
+
4848
+ raise typer.Exit(exit_code)
4849
+
4850
+
4851
+ @targets_ops_app.command("ssh")
4852
+ def targets_ssh(
4853
+ target: str = typer.Argument(
4854
+ ...,
4855
+ help="Target name",
4856
+ autocompletion=complete_target_name,
4857
+ ),
4858
+ ) -> None:
4859
+ """SSH into a configured target.
4860
+
4861
+ Provisions the target if needed (RunPod, DigitalOcean), then starts an interactive SSH session.
4862
+ For cloud targets, the instance is kept alive - use 'wafer config targets cleanup <name>' to terminate.
4863
+
4864
+ Examples:
4865
+ wafer targets ssh runpod-mi300x
4866
+ wafer targets ssh my-baremetal-server
4867
+ """
4868
+ from .targets import load_target
4869
+ from .targets_ops import TargetExecError, get_target_ssh_info
4870
+
4871
+ # Load target
4872
+ try:
4873
+ target_config = load_target(target)
4874
+ except FileNotFoundError as e:
4875
+ typer.echo(f"Error: {e}", err=True)
4876
+ typer.echo("List available targets with: wafer config targets list", err=True)
4877
+ raise typer.Exit(1) from None
4878
+ except ValueError as e:
4879
+ typer.echo(f"Error loading target config: {e}", err=True)
4880
+ raise typer.Exit(1) from None
4881
+
4882
+ typer.echo(f"Connecting to target: {target}...", err=True)
4883
+
4884
+ # Get SSH info (may provision)
4885
+ try:
4886
+ ssh_info = trio.run(get_target_ssh_info, target_config)
4887
+ except TargetExecError as e:
4888
+ typer.echo(f"Error: {e}", err=True)
4889
+ raise typer.Exit(1) from None
4890
+
4891
+ # Build SSH command
4892
+ ssh_args = [
4893
+ "ssh",
4894
+ "-i",
4895
+ str(ssh_info.key_path),
4896
+ "-p",
4897
+ str(ssh_info.port),
4898
+ "-o",
4899
+ "StrictHostKeyChecking=no",
4900
+ "-o",
4901
+ "UserKnownHostsFile=/dev/null",
4902
+ f"{ssh_info.user}@{ssh_info.host}",
4903
+ ]
4904
+
4905
+ # Replace current process with SSH
4906
+ os.execvp("ssh", ssh_args)
4907
+
4908
+
4909
+ @targets_ops_app.command("sync")
4910
+ def targets_sync(
4911
+ target: str = typer.Argument(
4912
+ ...,
4913
+ help="Target name",
4914
+ autocompletion=complete_target_name,
4915
+ ),
4916
+ path: Path = typer.Argument(..., help="Local file or directory to sync"),
4917
+ dest: str | None = typer.Option(
4918
+ None,
4919
+ "--dest",
4920
+ "-d",
4921
+ help="Remote destination path (default: /tmp/<basename>)",
4922
+ ),
4923
+ verbose: bool = typer.Option(False, "--verbose", "-v", help="Show [wafer] status messages"),
4924
+ quiet: bool = typer.Option(False, "--quiet", "-q", help="Suppress [wafer] status messages"),
4925
+ ) -> None:
4926
+ """Sync local files to a configured target.
4927
+
4928
+ Uses rsync over SSH to copy files to the target. Provisions the target if needed.
4929
+
4930
+ Examples:
4931
+ wafer targets sync runpod-mi300x ./my-project
4932
+ wafer targets sync runpod-mi300x ./script.py --dest /workspace/script.py
4933
+ wafer targets sync my-server ./kernels --dest /tmp/kernels
4934
+ """
4935
+ from .global_config import get_preferences
4936
+ from .targets import load_target
4937
+ from .targets_ops import TargetExecError, get_target_ssh_info, sync_to_target
4938
+
4939
+ # Determine verbosity
4940
+ prefs = get_preferences()
4941
+ if quiet:
4942
+ show_status = False
4943
+ elif verbose:
4944
+ show_status = True
4945
+ else:
4946
+ show_status = prefs.mode == "explicit"
4947
+
4948
+ # Validate path
4949
+ if not path.exists():
4950
+ typer.echo(f"Error: Path not found: {path}", err=True)
4951
+ raise typer.Exit(1)
4952
+
4953
+ # Load target
4954
+ try:
4955
+ target_config = load_target(target)
4956
+ except FileNotFoundError as e:
4957
+ typer.echo(f"Error: {e}", err=True)
4958
+ typer.echo("List available targets with: wafer config targets list", err=True)
4959
+ raise typer.Exit(1) from None
4960
+ except ValueError as e:
4961
+ typer.echo(f"Error loading target config: {e}", err=True)
4962
+ raise typer.Exit(1) from None
4963
+
4964
+ if show_status:
4965
+ typer.echo(f"[wafer] Target: {target} ({type(target_config).__name__})", err=True)
4966
+
4967
+ # Get SSH info (may provision)
4968
+ if show_status:
4969
+ typer.echo("[wafer] Connecting to target...", err=True)
4970
+
4971
+ try:
4972
+ ssh_info = trio.run(get_target_ssh_info, target_config)
4973
+ except TargetExecError as e:
4974
+ typer.echo(f"Error: {e}", err=True)
4975
+ raise typer.Exit(1) from None
4976
+
4977
+ if show_status:
4978
+ typer.echo(f"[wafer] Connected: {ssh_info.user}@{ssh_info.host}:{ssh_info.port}", err=True)
4979
+
4980
+ # Sync
4981
+ def on_progress(msg: str) -> None:
4982
+ if show_status:
4983
+ typer.echo(f"[wafer] {msg}", err=True)
4984
+
4985
+ try:
4986
+ file_count = sync_to_target(ssh_info, path.resolve(), dest, on_progress)
4987
+ except TargetExecError as e:
4988
+ typer.echo(f"Error: {e}", err=True)
4989
+ raise typer.Exit(1) from None
4990
+
4991
+ if show_status:
4992
+ typer.echo(f"[wafer] Done. Synced {file_count} files.", err=True)
4993
+
4994
+
4995
+ @targets_ops_app.command("scp")
4996
+ def targets_scp(
4997
+ source: str = typer.Argument(..., help="Source path (prefix with target: for remote)"),
4998
+ dest: str = typer.Argument(..., help="Destination path (prefix with target: for remote)"),
4999
+ recursive: bool = typer.Option(False, "-r", "--recursive", help="Copy directories recursively"),
5000
+ verbose: bool = typer.Option(False, "--verbose", "-v", help="Show [wafer] status messages"),
5001
+ quiet: bool = typer.Option(False, "--quiet", "-q", help="Suppress [wafer] status messages"),
5002
+ ) -> None:
5003
+ """Copy files to/from a target using scp-style syntax.
5004
+
5005
+ Use target: prefix to indicate remote paths. Exactly one of source or dest
5006
+ must be remote.
5007
+
5008
+ Examples:
5009
+ wafer targets scp runpod-mi300x:/tmp/trace.json ./trace.json # download
5010
+ wafer targets scp ./script.py runpod-mi300x:/tmp/script.py # upload
5011
+ wafer targets scp -r ./kernels runpod-mi300x:/tmp/kernels # upload dir
5012
+ wafer targets scp -r runpod-mi300x:/tmp/results ./results # download dir
5013
+ """
5014
+ from .global_config import get_preferences
5015
+ from .targets import load_target
5016
+ from .targets_ops import TargetExecError, get_target_ssh_info, parse_scp_path, scp_transfer
5017
+
5018
+ # Determine verbosity
5019
+ prefs = get_preferences()
5020
+ if quiet:
5021
+ show_status = False
5022
+ elif verbose:
5023
+ show_status = True
5024
+ else:
5025
+ show_status = prefs.mode == "explicit"
5026
+
5027
+ # Parse source and dest
5028
+ source_target, source_path = parse_scp_path(source)
5029
+ dest_target, dest_path = parse_scp_path(dest)
5030
+
5031
+ # Validate: exactly one must be remote
5032
+ if source_target and dest_target:
5033
+ typer.echo("Error: Both paths are remote. Use ssh to transfer between remotes.", err=True)
5034
+ raise typer.Exit(1)
5035
+
5036
+ if not source_target and not dest_target:
5037
+ typer.echo("Error: Both paths are local. Use regular cp command.", err=True)
5038
+ raise typer.Exit(1)
5039
+
5040
+ # Determine direction and target
5041
+ is_download = source_target is not None
5042
+ target_name = source_target if is_download else dest_target
5043
+
5044
+ # Load target
5045
+ try:
5046
+ target_config = load_target(target_name)
5047
+ except FileNotFoundError:
5048
+ typer.echo(f"Error: Target '{target_name}' not found.", err=True)
5049
+ typer.echo("Run 'wafer config targets list' to see available targets.", err=True)
5050
+ raise typer.Exit(1) from None
5051
+ except ValueError as e:
5052
+ typer.echo(f"Error loading target config: {e}", err=True)
5053
+ raise typer.Exit(1) from None
5054
+
5055
+ # Validate local path exists (for upload)
5056
+ if not is_download:
5057
+ local_path = Path(source_path)
5058
+ if not local_path.exists():
5059
+ typer.echo(f"Error: Local path '{source_path}' does not exist.", err=True)
5060
+ raise typer.Exit(1)
5061
+ if local_path.is_dir() and not recursive:
5062
+ typer.echo(
5063
+ f"Error: '{source_path}' is a directory. Use -r flag for recursive copy.", err=True
5064
+ )
5065
+ raise typer.Exit(1)
5066
+
5067
+ if show_status:
5068
+ typer.echo(f"[wafer] Target: {target_name} ({type(target_config).__name__})", err=True)
5069
+ typer.echo("[wafer] Connecting to target...", err=True)
5070
+
5071
+ # Get SSH info (may provision)
5072
+ try:
5073
+ ssh_info = trio.run(get_target_ssh_info, target_config)
5074
+ except TargetExecError as e:
5075
+ typer.echo(f"Error: {e}", err=True)
5076
+ raise typer.Exit(1) from None
5077
+
5078
+ if show_status:
5079
+ typer.echo(f"[wafer] Connected: {ssh_info.user}@{ssh_info.host}:{ssh_info.port}", err=True)
5080
+ direction = "Downloading" if is_download else "Uploading"
5081
+ typer.echo(f"[wafer] {direction}...", err=True)
5082
+
5083
+ # Transfer
5084
+ try:
5085
+ if is_download:
5086
+ scp_transfer(ssh_info, source_path, dest_path, is_download=True, recursive=recursive)
5087
+ else:
5088
+ scp_transfer(ssh_info, source_path, dest_path, is_download=False, recursive=recursive)
5089
+ except TargetExecError as e:
5090
+ typer.echo(f"Error: {e}", err=True)
5091
+ raise typer.Exit(1) from None
5092
+
5093
+ if show_status:
5094
+ typer.echo("[wafer] Done.", err=True)
5095
+
5096
+
5097
+ @targets_ops_app.command("ensure")
5098
+ def targets_ensure( # noqa: PLR0915
5099
+ target: str = typer.Argument(
5100
+ None,
5101
+ help="Target name",
5102
+ autocompletion=complete_target_name,
5103
+ ),
5104
+ tool: str = typer.Argument(None, help="Tool to ensure is installed"),
5105
+ check_only: bool = typer.Option(False, "--check-only", "-c", help="Only check, don't install"),
5106
+ force: bool = typer.Option(False, "--force", "-f", help="Reinstall even if present"),
5107
+ list_tools: bool = typer.Option(False, "--list", "-l", help="List available tools"),
5108
+ timeout: int = typer.Option(300, "--timeout", "-t", help="Installation timeout in seconds"),
5109
+ verbose: bool = typer.Option(False, "--verbose", "-v", help="Show [wafer] status messages"),
5110
+ quiet: bool = typer.Option(False, "--quiet", "-q", help="Suppress [wafer] status messages"),
5111
+ ) -> None:
5112
+ """Ensure a tool is installed on a target.
5113
+
5114
+ Checks if a tool exists on the target and installs it if missing.
5115
+ Useful for profiling tools like rocprof-compute that aren't pre-installed.
5116
+
5117
+ Examples:
5118
+ wafer targets ensure runpod-mi300x rocprof-compute
5119
+ wafer targets ensure runpod-mi300x rocprof-compute --check-only
5120
+ wafer targets ensure runpod-mi300x rocprof-compute --force
5121
+ wafer targets ensure --list
5122
+ """
5123
+ from .global_config import get_preferences
5124
+ from .targets import load_target
5125
+ from .targets_ops import (
5126
+ TOOL_REGISTRY,
5127
+ TargetExecError,
5128
+ ensure_tool,
5129
+ get_target_platform,
5130
+ get_target_ssh_info,
5131
+ )
5132
+
5133
+ # Handle --list flag
5134
+ if list_tools:
5135
+ typer.echo("Available tools:\n")
5136
+ typer.echo("AMD tools:")
5137
+ for name, spec in sorted(TOOL_REGISTRY.items()):
5138
+ if spec.platform == "amd":
5139
+ auto = "auto-install" if spec.install_cmd else "manual"
5140
+ typer.echo(f" {name:20} ({auto}) - {spec.description}")
5141
+
5142
+ typer.echo("\nNVIDIA tools:")
5143
+ for name, spec in sorted(TOOL_REGISTRY.items()):
5144
+ if spec.platform == "nvidia":
5145
+ auto = "auto-install" if spec.install_cmd else "manual"
5146
+ typer.echo(f" {name:20} ({auto}) - {spec.description}")
5147
+
5148
+ typer.echo("\nCross-platform:")
5149
+ for name, spec in sorted(TOOL_REGISTRY.items()):
5150
+ if spec.platform == "any":
5151
+ auto = "auto-install" if spec.install_cmd else "manual"
5152
+ typer.echo(f" {name:20} ({auto}) - {spec.description}")
5153
+ return
5154
+
5155
+ # Require target and tool if not listing
5156
+ if not target:
5157
+ typer.echo("Error: Missing argument 'TARGET'", err=True)
5158
+ typer.echo("Usage: wafer targets ensure TARGET TOOL", err=True)
5159
+ typer.echo(" or: wafer targets ensure --list", err=True)
5160
+ raise typer.Exit(1)
5161
+
5162
+ if not tool:
5163
+ typer.echo("Error: Missing argument 'TOOL'", err=True)
5164
+ typer.echo("Usage: wafer targets ensure TARGET TOOL", err=True)
5165
+ typer.echo(" or: wafer targets ensure --list", err=True)
5166
+ raise typer.Exit(1)
5167
+
5168
+ # Check tool exists
5169
+ if tool not in TOOL_REGISTRY:
5170
+ typer.echo(f"Error: Unknown tool '{tool}'", err=True)
5171
+ typer.echo(f"Available tools: {', '.join(sorted(TOOL_REGISTRY.keys()))}", err=True)
5172
+ typer.echo("Run 'wafer targets ensure --list' for details.", err=True)
5173
+ raise typer.Exit(1)
5174
+
5175
+ spec = TOOL_REGISTRY[tool]
5176
+
5177
+ # Determine verbosity
5178
+ prefs = get_preferences()
5179
+ if quiet:
5180
+ show_status = False
5181
+ elif verbose:
5182
+ show_status = True
5183
+ else:
5184
+ show_status = prefs.mode == "explicit"
5185
+
5186
+ # Load target
5187
+ try:
5188
+ target_config = load_target(target)
5189
+ except FileNotFoundError as e:
5190
+ typer.echo(f"Error: {e}", err=True)
5191
+ typer.echo("List available targets with: wafer config targets list", err=True)
5192
+ raise typer.Exit(1) from None
5193
+ except ValueError as e:
5194
+ typer.echo(f"Error loading target config: {e}", err=True)
5195
+ raise typer.Exit(1) from None
5196
+
5197
+ # Platform validation
5198
+ platform = get_target_platform(target_config)
5199
+ if spec.platform != "any" and spec.platform != platform:
5200
+ typer.echo(
5201
+ f"Error: {tool} is an {spec.platform.upper()} tool but target '{target}' "
5202
+ f"is {platform.upper()}",
5203
+ err=True,
5204
+ )
5205
+ raise typer.Exit(1)
5206
+
5207
+ if show_status:
5208
+ typer.echo(f"[wafer] Target: {target} ({platform.upper()})", err=True)
5209
+ typer.echo(f"[wafer] Checking for {tool}...", err=True)
5210
+
5211
+ # Get SSH info (may provision)
5212
+ try:
5213
+ ssh_info = trio.run(get_target_ssh_info, target_config)
5214
+ except TargetExecError as e:
5215
+ typer.echo(f"Error: {e}", err=True)
5216
+ raise typer.Exit(1) from None
5217
+
5218
+ if show_status:
5219
+ typer.echo(f"[wafer] Connected: {ssh_info.user}@{ssh_info.host}:{ssh_info.port}", err=True)
5220
+
5221
+ # Check-only mode
5222
+ if check_only:
5223
+ from .targets_ops import TargetExecError, exec_on_target_sync
5224
+
5225
+ try:
5226
+ exit_code = exec_on_target_sync(ssh_info, spec.check_cmd, timeout_seconds=30)
5227
+ except TargetExecError as e:
5228
+ typer.echo(f"Error: {e}", err=True)
5229
+ raise typer.Exit(1) from None
5230
+ if exit_code == 0:
5231
+ typer.echo(f"{tool} is installed")
5232
+ else:
5233
+ typer.echo(f"{tool} is NOT installed", err=True)
5234
+ raise typer.Exit(1)
5235
+ return
5236
+
5237
+ # Ensure tool is installed
5238
+ result = ensure_tool(ssh_info, tool, force=force, timeout=timeout)
5239
+
5240
+ if result.error:
5241
+ typer.echo(f"Error: {result.error}", err=True)
5242
+ raise typer.Exit(1)
5243
+
5244
+ if result.already_installed:
5245
+ typer.echo(f"{tool} is already installed")
5246
+ elif result.installed:
5247
+ if result.verified:
5248
+ typer.echo(f"{tool} installed successfully")
5249
+ else:
5250
+ typer.echo(f"{tool} installed (verification skipped)")
5251
+
5252
+
5253
+ # =============================================================================
5254
+ # Perfetto trace analysis commands
5255
+ # =============================================================================
5256
+
5257
+
5258
+ @perfetto_app.command("query")
5259
+ def perfetto_query(
5260
+ trace_path: Path = typer.Argument(..., help="Path to Perfetto trace file"),
5261
+ sql: str = typer.Argument(..., help="SQL query to execute"),
5262
+ json_output: bool = typer.Option(True, "--json", "-j", help="Output as JSON"),
5263
+ ) -> None:
5264
+ """Execute SQL query against a Perfetto trace.
5265
+
5266
+ Starts trace_processor, loads the trace, executes the query, and returns results.
5267
+
5268
+ Examples:
5269
+ wafer perfetto query trace.perfetto "SELECT * FROM slice LIMIT 10"
5270
+ wafer perfetto query trace.perfetto "SELECT name, dur FROM slice ORDER BY dur DESC LIMIT 5"
5271
+ """
5272
+ from wafer_core.lib.perfetto.perfetto_tool import PerfettoConfig, PerfettoTool
5273
+
5274
+ config = PerfettoConfig(
5275
+ workspace_root=".",
5276
+ storage_dir=str(Path.home() / ".wafer" / "perfetto"),
5277
+ )
5278
+ tool = PerfettoTool(config)
5279
+
5280
+ try:
5281
+ results, err = tool.query(sql, str(trace_path))
5282
+ if err:
5283
+ typer.echo(f"Error: {err}", err=True)
5284
+ raise typer.Exit(1)
5285
+
5286
+ if json_output:
5287
+ typer.echo(json.dumps({"results": results, "count": len(results or [])}, indent=2))
5288
+ else:
5289
+ if not results:
5290
+ typer.echo("No results")
5291
+ else:
5292
+ # Simple table output
5293
+ if results:
5294
+ headers = list(results[0].keys())
5295
+ typer.echo("\t".join(headers))
5296
+ for row in results:
5297
+ typer.echo("\t".join(str(row.get(h, "")) for h in headers))
5298
+ except Exception as e:
4540
5299
  typer.echo(f"Error: {e}", err=True)
4541
5300
  raise typer.Exit(1) from None
4542
5301
 
@@ -4774,13 +5533,39 @@ def ncu_analyze(
4774
5533
 
4775
5534
 
4776
5535
  # =============================================================================
4777
- # NSYS Analyze command
5536
+ # NSYS commands
4778
5537
  # =============================================================================
4779
5538
 
4780
5539
 
5540
+ @nsys_app.command("check")
5541
+ def nsys_check() -> None:
5542
+ """Check if NSYS (Nsight Systems) is installed and show version.
5543
+
5544
+ NSYS is required for local analysis. If not installed, shows install instructions.
5545
+
5546
+ Examples:
5547
+ wafer nvidia nsys check
5548
+ """
5549
+ from .nsys_analyze import check_nsys_installation
5550
+
5551
+ result = check_nsys_installation()
5552
+
5553
+ if result.installed:
5554
+ typer.echo(f"✓ NSYS installed: {result.path}")
5555
+ if result.version:
5556
+ typer.echo(f" Version: {result.version}")
5557
+ else:
5558
+ typer.echo("✗ NSYS not installed")
5559
+ if result.install_command:
5560
+ typer.echo(f" Install with: {result.install_command}")
5561
+
5562
+
4781
5563
  @nsys_app.command("analyze")
4782
5564
  def nsys_analyze(
4783
5565
  filepath: Path = typer.Argument(..., help="Path to .nsys-rep profile file"),
5566
+ output_dir: Path | None = typer.Option(
5567
+ None, "--output-dir", "-o", help="Output directory for analysis files"
5568
+ ),
4784
5569
  json_output: bool = typer.Option(
4785
5570
  False, "--json", help="Output raw JSON instead of formatted text"
4786
5571
  ),
@@ -4789,6 +5574,12 @@ def nsys_analyze(
4789
5574
  "--remote/--local",
4790
5575
  help="Force remote (via API) or local analysis. Default: auto-detect (remote if nsys not installed locally)",
4791
5576
  ),
5577
+ target: str | None = typer.Option(
5578
+ None,
5579
+ "--target",
5580
+ "-t",
5581
+ help="Remote target: 'workspace:id' for workspace execution, or target name from ~/.wafer/targets/",
5582
+ ),
4792
5583
  ) -> None:
4793
5584
  """Analyze an NVIDIA Nsight Systems profile (.nsys-rep file).
4794
5585
 
@@ -4797,10 +5588,20 @@ def nsys_analyze(
4797
5588
  By default, uses local nsys if available, otherwise runs analysis
4798
5589
  remotely via wafer-api (requires authentication: wafer login).
4799
5590
 
5591
+ Supports multiple execution modes:
5592
+ - Local: Uses local nsys CLI (no GPU required for analysis)
5593
+ - Remote API: Uploads file and runs analysis on Modal
5594
+ - Workspace: Runs analysis on a Wafer workspace via SSH
5595
+ - Target: Runs analysis on a configured target machine via SSH
5596
+
4800
5597
  Examples:
4801
5598
  wafer nvidia nsys analyze profile.nsys-rep
4802
5599
  wafer nvidia nsys analyze profile.nsys-rep --json
5600
+ wafer nvidia nsys analyze profile.nsys-rep --local
4803
5601
  wafer nvidia nsys analyze profile.nsys-rep --remote
5602
+ wafer nvidia nsys analyze profile.nsys-rep --target workspace:abc123
5603
+ wafer nvidia nsys analyze profile.nsys-rep --target vultr-b200
5604
+ wafer nvidia nsys analyze profile.nsys-rep -o ./results/
4804
5605
  """
4805
5606
  from .nsys_analyze import analyze_nsys_profile
4806
5607
 
@@ -4812,11 +5613,20 @@ def nsys_analyze(
4812
5613
  typer.echo(f"Error: Expected .nsys-rep file, got: {filepath.suffix}", err=True)
4813
5614
  raise typer.Exit(1)
4814
5615
 
5616
+ # Warn if both remote flag and target are specified
5617
+ if target and remote is not None:
5618
+ typer.echo(
5619
+ "Warning: --target overrides --remote/--local flag",
5620
+ err=True,
5621
+ )
5622
+
4815
5623
  try:
4816
5624
  result = analyze_nsys_profile(
4817
5625
  filepath,
4818
5626
  json_output=json_output,
4819
5627
  remote=remote,
5628
+ target=target,
5629
+ output_dir=output_dir,
4820
5630
  )
4821
5631
  typer.echo(result)
4822
5632
  except FileNotFoundError as e:
@@ -4827,6 +5637,150 @@ def nsys_analyze(
4827
5637
  raise typer.Exit(1) from None
4828
5638
 
4829
5639
 
5640
+ @nsys_app.command("profile", context_settings={"allow_interspersed_args": False})
5641
+ def nsys_profile(
5642
+ command: list[str] = typer.Argument(..., help="Command to profile"),
5643
+ output: str = typer.Option(
5644
+ "profile",
5645
+ "--output",
5646
+ "-o",
5647
+ help="Output filename (without .nsys-rep extension)",
5648
+ ),
5649
+ trace: str | None = typer.Option(
5650
+ None,
5651
+ "--trace",
5652
+ "-t",
5653
+ help="Trace APIs to capture (comma-separated: cuda,nvtx,osrt,cudnn,cublas). Default: cuda",
5654
+ ),
5655
+ duration: int | None = typer.Option(
5656
+ None,
5657
+ "--duration",
5658
+ "-d",
5659
+ help="Maximum profiling duration in seconds",
5660
+ ),
5661
+ target: str | None = typer.Option(
5662
+ None,
5663
+ "--target",
5664
+ help="Remote target: 'workspace:id' for workspace execution, or target name from ~/.wafer/targets/",
5665
+ ),
5666
+ analyze: bool = typer.Option(
5667
+ False,
5668
+ "--analyze",
5669
+ "-a",
5670
+ help="Automatically analyze the profile after completion",
5671
+ ),
5672
+ json_output: bool = typer.Option(
5673
+ False,
5674
+ "--json",
5675
+ help="Output analysis as JSON (only with --analyze)",
5676
+ ),
5677
+ verbose: bool = typer.Option(
5678
+ False,
5679
+ "--verbose",
5680
+ "-v",
5681
+ help="Show verbose progress messages",
5682
+ ),
5683
+ extra_args: str | None = typer.Option(
5684
+ None,
5685
+ "--extra",
5686
+ help="Extra arguments to pass to nsys profile",
5687
+ ),
5688
+ ) -> None:
5689
+ """Profile a command with NVIDIA Nsight Systems.
5690
+
5691
+ Runs nsys profile on the specified command and generates a .nsys-rep file.
5692
+ Profiling requires an NVIDIA GPU. Use --target to run on a remote GPU server
5693
+ or workspace.
5694
+
5695
+ Examples:
5696
+ wafer nvidia nsys profile -- python train.py
5697
+ wafer nvidia nsys profile -o gemm_profile -- ./gemm_kernel
5698
+ wafer nvidia nsys profile --trace cuda,nvtx -- python model.py
5699
+ wafer nvidia nsys profile --duration 60 -- ./long_running_app
5700
+ wafer nvidia nsys profile --target workspace:abc123 -- python test.py
5701
+ wafer nvidia nsys profile --target vultr-b200 -- ./benchmark
5702
+ wafer nvidia nsys profile --analyze -- python train.py
5703
+ wafer nvidia nsys profile --analyze --json -- ./kernel > results.json
5704
+ """
5705
+ # Parse command
5706
+ import shlex
5707
+
5708
+ from .nsys_analyze import _parse_target
5709
+ from .nsys_profile import (
5710
+ NSYSProfileOptions,
5711
+ profile_and_analyze,
5712
+ profile_local,
5713
+ profile_remote_ssh,
5714
+ profile_workspace,
5715
+ )
5716
+
5717
+ if isinstance(command, list):
5718
+ # Remove leading "--" if present
5719
+ if command and command[0] == "--":
5720
+ command = command[1:]
5721
+ if len(command) == 1:
5722
+ command_str = command[0]
5723
+ else:
5724
+ command_str = shlex.join(command)
5725
+ else:
5726
+ command_str = command
5727
+
5728
+ if not command_str:
5729
+ typer.echo("Error: No command specified", err=True)
5730
+ raise typer.Exit(1)
5731
+
5732
+ # Parse trace options
5733
+ trace_list = trace.split(",") if trace else None
5734
+
5735
+ # Build options
5736
+ options = NSYSProfileOptions(
5737
+ command=command_str,
5738
+ output=output,
5739
+ trace=trace_list,
5740
+ duration=duration,
5741
+ extra_args=extra_args,
5742
+ )
5743
+
5744
+ if verbose:
5745
+ typer.echo(f"[nsys] Command: {command_str}", err=True)
5746
+ if target:
5747
+ typer.echo(f"[nsys] Target: {target}", err=True)
5748
+
5749
+ # Execute
5750
+ if analyze:
5751
+ profile_result, analysis_result = profile_and_analyze(
5752
+ options,
5753
+ target=target,
5754
+ json_output=json_output,
5755
+ verbose=verbose,
5756
+ )
5757
+ else:
5758
+ if target:
5759
+ target_type, target_id = _parse_target(target)
5760
+ if target_type == "workspace":
5761
+ profile_result = profile_workspace(target_id, options, verbose=verbose)
5762
+ else:
5763
+ profile_result = profile_remote_ssh(target_id, options, verbose=verbose)
5764
+ else:
5765
+ profile_result = profile_local(options, verbose=verbose)
5766
+ analysis_result = None
5767
+
5768
+ # Report results
5769
+ if not profile_result.success:
5770
+ typer.echo(f"Error: {profile_result.error}", err=True)
5771
+ if profile_result.stderr:
5772
+ typer.echo(f"stderr: {profile_result.stderr}", err=True)
5773
+ raise typer.Exit(1)
5774
+
5775
+ if verbose or not analyze:
5776
+ typer.echo(f"Profile created: {profile_result.output_path}")
5777
+
5778
+ if analysis_result:
5779
+ if not analysis_result.success:
5780
+ typer.echo(f"Analysis error: {analysis_result.error}", err=True)
5781
+ raise typer.Exit(1)
5782
+
5783
+
4830
5784
  # =============================================================================
4831
5785
  # ROCprof-Compute commands
4832
5786
  # =============================================================================
@@ -5959,13 +6913,14 @@ def capture_list_command(
5959
6913
 
5960
6914
  @corpus_app.command("download")
5961
6915
  def corpus_download(
5962
- name: str = typer.Argument(..., help="Corpus name (cuda, cutlass, hip)"),
6916
+ name: str = typer.Argument(..., help="Corpus name (cuda, cutlass, hip, amd)"),
5963
6917
  force: bool = typer.Option(False, "--force", "-f", help="Re-download even if exists"),
5964
6918
  ) -> None:
5965
6919
  """Download a documentation corpus for agent filesystem access.
5966
6920
 
5967
6921
  Examples:
5968
6922
  wafer corpus download cuda
6923
+ wafer corpus download amd
5969
6924
  wafer corpus download cutlass --force
5970
6925
  """
5971
6926
  from .corpus import CORPORA, download_corpus
@@ -6180,78 +7135,12 @@ def tracelens_collective(
6180
7135
 
6181
7136
 
6182
7137
  # =============================================================================
6183
- # ISA Analysis Commands
7138
+ # Unified ISA Analysis Commands (wafer amd isa ...)
6184
7139
  # =============================================================================
6185
7140
 
6186
7141
 
6187
7142
  @isa_app.command("analyze")
6188
7143
  def isa_analyze(
6189
- file: Path = typer.Argument(..., help="Path to .co file to analyze"),
6190
- json_output: bool = typer.Option(False, "--json", help="Output as JSON"),
6191
- ) -> None:
6192
- """Analyze AMD GPU code object (.co file).
6193
-
6194
- Extracts and analyzes ISA, showing register usage, instruction mix,
6195
- spills, and other performance-relevant metrics.
6196
-
6197
- The .co file is uploaded to the Wafer API server which has ROCm tools
6198
- installed for analysis.
6199
-
6200
- Examples:
6201
- wafer isa analyze kernel.co
6202
- wafer isa analyze kernel.co --json
6203
- """
6204
- from dataclasses import asdict
6205
-
6206
- from wafer_core.tools.isa_analysis_tools import analyze_isa, format_isa_summary
6207
-
6208
- from .auth import get_auth_headers
6209
- from .global_config import get_api_url
6210
-
6211
- # Validate file exists
6212
- if not file.exists():
6213
- typer.echo(f"Error: File not found: {file}", err=True)
6214
- raise typer.Exit(1)
6215
-
6216
- if not file.suffix == ".co":
6217
- typer.echo(f"Error: Expected .co file, got: {file.suffix}", err=True)
6218
- raise typer.Exit(1)
6219
-
6220
- # Get API URL and auth
6221
- api_url = get_api_url()
6222
- auth_headers = get_auth_headers()
6223
-
6224
- if not auth_headers:
6225
- typer.echo("Error: Not logged in. Run 'wafer login' first.", err=True)
6226
- raise typer.Exit(1)
6227
-
6228
- try:
6229
- result = analyze_isa(
6230
- co_file_path=file,
6231
- api_url=api_url,
6232
- auth_headers=auth_headers,
6233
- )
6234
-
6235
- if json_output:
6236
- typer.echo(json.dumps(asdict(result)))
6237
- else:
6238
- typer.echo(format_isa_summary(result))
6239
-
6240
- except FileNotFoundError as e:
6241
- typer.echo(f"Error: {e}", err=True)
6242
- raise typer.Exit(1) from None
6243
- except Exception as e:
6244
- typer.echo(f"Error: {e}", err=True)
6245
- raise typer.Exit(1) from None
6246
-
6247
-
6248
- # =============================================================================
6249
- # Kernel Scope Commands (wafer amd kernel-scope ...)
6250
- # =============================================================================
6251
-
6252
-
6253
- @kernel_scope_app.command("analyze")
6254
- def kernel_scope_analyze(
6255
7144
  path: Path = typer.Argument(..., help="Path to file or directory to analyze"),
6256
7145
  json_output: bool = typer.Option(False, "--json", "-j", help="Output as JSON"),
6257
7146
  csv_output: bool = typer.Option(False, "--csv", help="Output as CSV"),
@@ -6264,24 +7153,32 @@ def kernel_scope_analyze(
6264
7153
  output_file: Path | None = typer.Option(None, "--output", "-o", help="Write output to file"),
6265
7154
  kernel_index: int = typer.Option(0, "--kernel", "-k", help="Kernel index if multiple in file"),
6266
7155
  ) -> None:
6267
- """Analyze Triton compilation artifacts (ISA, LLVM-IR, TTGIR).
7156
+ """Analyze AMD GPU ISA files (.co, .s, .ll, .ttgir).
6268
7157
 
6269
7158
  Performs static analysis to extract performance metrics like register
6270
7159
  pressure, spills, MFMA density, and occupancy limits.
6271
7160
 
6272
7161
  Supports:
6273
- - AMDGCN ISA files (.s, .gcn, .asm)
6274
- - LLVM-IR files (.ll)
6275
- - TTGIR files (.ttgir, .ttir, .mlir)
7162
+ - AMD GPU code objects (.co) - Requires API authentication
7163
+ - AMDGCN ISA assembly (.s, .gcn, .asm) - Local parsing
7164
+ - LLVM-IR files (.ll) - Local parsing
7165
+ - TTGIR files (.ttgir, .ttir, .mlir) - Local parsing
6276
7166
 
6277
7167
  Examples:
6278
- wafer amd kernel-scope analyze kernel.s
6279
- wafer amd kernel-scope analyze kernel.s --json
6280
- wafer amd kernel-scope analyze ~/.triton/cache/ --filter 'spills > 0'
6281
- wafer amd kernel-scope analyze . -r --csv -o metrics.csv
7168
+ wafer amd isa analyze kernel.co # Code object (needs login)
7169
+ wafer amd isa analyze kernel.s # ISA assembly
7170
+ wafer amd isa analyze kernel.s --json # Output as JSON
7171
+ wafer amd isa analyze ~/.triton/cache/ --filter 'spills > 0'
7172
+ wafer amd isa analyze . -r --csv -o metrics.csv
6282
7173
  """
7174
+ from .auth import get_auth_headers
7175
+ from .global_config import get_api_url
6283
7176
  from .kernel_scope import analyze_command
6284
7177
 
7178
+ # Get API credentials for .co files
7179
+ api_url = get_api_url()
7180
+ auth_headers = get_auth_headers()
7181
+
6285
7182
  try:
6286
7183
  output = analyze_command(
6287
7184
  path=str(path),
@@ -6291,6 +7188,8 @@ def kernel_scope_analyze(
6291
7188
  filter_expr=filter_expr,
6292
7189
  output_file=str(output_file) if output_file else None,
6293
7190
  kernel_index=kernel_index,
7191
+ api_url=api_url,
7192
+ auth_headers=auth_headers,
6294
7193
  )
6295
7194
  typer.echo(output)
6296
7195
 
@@ -6305,15 +7204,15 @@ def kernel_scope_analyze(
6305
7204
  raise typer.Exit(1) from None
6306
7205
 
6307
7206
 
6308
- @kernel_scope_app.command("metrics")
6309
- def kernel_scope_metrics() -> None:
6310
- """List available metrics for kernel scope analysis.
7207
+ @isa_app.command("metrics")
7208
+ def isa_metrics() -> None:
7209
+ """List available metrics for ISA analysis.
6311
7210
 
6312
- Shows all metrics that can be extracted from Triton compilation
6313
- artifacts, along with their derivation.
7211
+ Shows all metrics that can be extracted from AMD GPU ISA files,
7212
+ along with their derivation.
6314
7213
 
6315
7214
  Examples:
6316
- wafer amd kernel-scope metrics
7215
+ wafer amd isa metrics
6317
7216
  """
6318
7217
  from .kernel_scope import metrics_command
6319
7218
 
@@ -6321,15 +7220,15 @@ def kernel_scope_metrics() -> None:
6321
7220
  typer.echo(output)
6322
7221
 
6323
7222
 
6324
- @kernel_scope_app.command("targets")
6325
- def kernel_scope_targets() -> None:
7223
+ @isa_app.command("targets")
7224
+ def isa_targets() -> None:
6326
7225
  """List supported GPU targets and their specifications.
6327
7226
 
6328
7227
  Shows hardware specs (VGPRs, SGPRs, LDS, etc.) for each supported
6329
7228
  AMD GPU architecture.
6330
7229
 
6331
7230
  Examples:
6332
- wafer amd kernel-scope targets
7231
+ wafer amd isa targets
6333
7232
  """
6334
7233
  from .kernel_scope import targets_command
6335
7234