wafer-cli 0.2.25__tar.gz → 0.2.27__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/PKG-INFO +1 -1
  2. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/pyproject.toml +1 -1
  3. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/cli.py +196 -37
  4. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/corpus.py +65 -5
  5. wafer_cli-0.2.27/wafer/specs_cli.py +157 -0
  6. wafer_cli-0.2.27/wafer/targets_cli.py +472 -0
  7. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/targets_ops.py +29 -2
  8. wafer_cli-0.2.27/wafer/trace_compare.py +274 -0
  9. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer_cli.egg-info/PKG-INFO +1 -1
  10. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer_cli.egg-info/SOURCES.txt +2 -0
  11. wafer_cli-0.2.25/wafer/trace_compare.py +0 -183
  12. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/README.md +0 -0
  13. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/setup.cfg +0 -0
  14. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/tests/test_analytics.py +0 -0
  15. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/tests/test_auth.py +0 -0
  16. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/tests/test_billing.py +0 -0
  17. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/tests/test_cli_coverage.py +0 -0
  18. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/tests/test_cli_parity_integration.py +0 -0
  19. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/tests/test_config_integration.py +0 -0
  20. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/tests/test_file_operations_integration.py +0 -0
  21. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/tests/test_kernel_scope_cli.py +0 -0
  22. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/tests/test_nsys_analyze.py +0 -0
  23. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/tests/test_nsys_profile.py +0 -0
  24. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/tests/test_output.py +0 -0
  25. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/tests/test_rocprof_compute_integration.py +0 -0
  26. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/tests/test_skill_commands.py +0 -0
  27. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/tests/test_ssh_integration.py +0 -0
  28. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/tests/test_targets_ops.py +0 -0
  29. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/tests/test_wevin_cli.py +0 -0
  30. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/tests/test_workflow_integration.py +0 -0
  31. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/GUIDE.md +0 -0
  32. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/__init__.py +0 -0
  33. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/agent_defaults.py +0 -0
  34. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/analytics.py +0 -0
  35. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/api_client.py +0 -0
  36. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/auth.py +0 -0
  37. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/autotuner.py +0 -0
  38. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/billing.py +0 -0
  39. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/cli_instructions.py +0 -0
  40. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/config.py +0 -0
  41. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/evaluate.py +0 -0
  42. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/global_config.py +0 -0
  43. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/gpu_run.py +0 -0
  44. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/inference.py +0 -0
  45. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/kernel_scope.py +0 -0
  46. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/ncu_analyze.py +0 -0
  47. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/nsys_analyze.py +0 -0
  48. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/nsys_profile.py +0 -0
  49. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/output.py +0 -0
  50. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/problems.py +0 -0
  51. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/rocprof_compute.py +0 -0
  52. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/rocprof_sdk.py +0 -0
  53. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/rocprof_systems.py +0 -0
  54. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/skills/wafer-guide/SKILL.md +0 -0
  55. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/ssh_keys.py +0 -0
  56. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/target_lock.py +0 -0
  57. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/targets.py +0 -0
  58. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/templates/__init__.py +0 -0
  59. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/templates/ask_docs.py +0 -0
  60. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/templates/optimize_kernel.py +0 -0
  61. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/templates/optimize_kernelbench.py +0 -0
  62. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/templates/trace_analyze.py +0 -0
  63. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/tests/test_eval_cli_parity.py +0 -0
  64. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/tracelens.py +0 -0
  65. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/wevin_cli.py +0 -0
  66. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer/workspaces.py +0 -0
  67. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer_cli.egg-info/dependency_links.txt +0 -0
  68. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer_cli.egg-info/entry_points.txt +0 -0
  69. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer_cli.egg-info/requires.txt +0 -0
  70. {wafer_cli-0.2.25 → wafer_cli-0.2.27}/wafer_cli.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wafer-cli
3
- Version: 0.2.25
3
+ Version: 0.2.27
4
4
  Summary: CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels
5
5
  Requires-Python: >=3.11
6
6
  Description-Content-Type: text/markdown
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "wafer-cli"
3
- version = "0.2.25"
3
+ version = "0.2.27"
4
4
  description = "CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -268,6 +268,32 @@ Configure targets with: wafer config targets init ..."""
268
268
  )
269
269
  app.add_typer(targets_ops_app, name="targets", rich_help_panel="Infrastructure")
270
270
 
271
+ # Specs management (new: local TOML configs)
272
+ from wafer.specs_cli import specs_app
273
+
274
+ app.add_typer(specs_app, name="specs", rich_help_panel="Configuration")
275
+
276
+ # Live resource management (new: API-backed commands on `wafer targets`)
277
+ # These become: wafer targets list, wafer targets terminate, etc.
278
+ from wafer.targets_cli import (
279
+ targets_list as _targets_list_cmd,
280
+ )
281
+ from wafer.targets_cli import (
282
+ targets_provision as _targets_provision_cmd,
283
+ )
284
+ from wafer.targets_cli import (
285
+ targets_reconcile as _targets_reconcile_cmd,
286
+ )
287
+ from wafer.targets_cli import (
288
+ targets_terminate as _targets_terminate_cmd,
289
+ )
290
+ from wafer.targets_cli import (
291
+ targets_pools as _targets_pools_cmd,
292
+ )
293
+ from wafer.targets_cli import (
294
+ targets_probe as _targets_probe_cmd,
295
+ )
296
+
271
297
  # Billing management - nested under config
272
298
  billing_app = typer.Typer(help="Manage billing, credits, and subscription")
273
299
  config_app.add_typer(billing_app, name="billing")
@@ -612,7 +638,9 @@ def skill_status() -> None:
612
638
  auth_app = typer.Typer(help="Authenticate with Wafer and cloud GPU providers")
613
639
  app.add_typer(auth_app, name="auth", rich_help_panel="Configuration")
614
640
 
615
- providers_app = typer.Typer(help="Manage API keys for cloud GPU providers (RunPod, DigitalOcean, etc.)")
641
+ providers_app = typer.Typer(
642
+ help="Manage API keys for cloud GPU providers (RunPod, DigitalOcean, etc.)"
643
+ )
616
644
  auth_app.add_typer(providers_app, name="providers")
617
645
 
618
646
 
@@ -1813,6 +1841,93 @@ def kernelbench_list_problems() -> None:
1813
1841
  raise typer.Exit(1) from None
1814
1842
 
1815
1843
 
1844
+ def _resolve_pool_query(pool: str, collector) -> tuple[str, object]:
1845
+ """Resolve a PoolQuery pool to a target spec name + lock context.
1846
+
1847
+ Queries live providers, matches by pool query, locks one target,
1848
+ returns (spec_name, lock_context) for the evaluator.
1849
+ """
1850
+ import trio
1851
+ from wafer_core.targets.pool import resolve_pool
1852
+
1853
+ from .target_lock import acquire_from_pool
1854
+
1855
+ matched_targets = trio.run(resolve_pool, pool)
1856
+
1857
+ if not matched_targets:
1858
+ collector.set_error("pool", "NoMatchingTargets", pool=pool)
1859
+ collector.finalize()
1860
+ raise typer.Exit(1)
1861
+
1862
+ # Filter to targets with a spec (evaluator needs spec fields)
1863
+ spec_targets = [t for t in matched_targets if t.spec_name]
1864
+ if not spec_targets:
1865
+ collector.set_error(
1866
+ "pool", "NoSpecTargets", pool=pool,
1867
+ message="Matched targets have no spec binding — evaluator needs spec fields",
1868
+ )
1869
+ collector.finalize()
1870
+ raise typer.Exit(1)
1871
+
1872
+ # Lock one by resource_id
1873
+ resource_ids = [t.resource_id for t in spec_targets]
1874
+ collector.emit("pool_acquire", pool=pool, count=len(resource_ids))
1875
+
1876
+ lock_ctx = acquire_from_pool(resource_ids)
1877
+ acquired_id = lock_ctx.__enter__()
1878
+
1879
+ if acquired_id is None:
1880
+ lock_ctx.__exit__(None, None, None)
1881
+ collector.set_error("pool", "AllTargetsBusy", pool=pool, targets=resource_ids)
1882
+ collector.finalize()
1883
+ raise typer.Exit(1)
1884
+
1885
+ # Map resource_id back to spec_name
1886
+ acquired_target = next(t for t in spec_targets if t.resource_id == acquired_id)
1887
+ spec_name = acquired_target.spec_name
1888
+
1889
+ collector.emit("pool_acquired", target=spec_name, resource_id=acquired_id)
1890
+ return spec_name, lock_ctx
1891
+
1892
+
1893
+ def _resolve_pool_legacy(pool: str, collector) -> tuple[str, object]:
1894
+ """Resolve an old-style pool (static target name list) to a target name + lock context.
1895
+
1896
+ Old format: [pools.name] targets = ["t1", "t2"]
1897
+ """
1898
+ from .target_lock import acquire_from_pool
1899
+ from .targets import filter_pool_by_auth, get_pool
1900
+
1901
+ try:
1902
+ pool_targets = get_pool(pool)
1903
+ except FileNotFoundError as e:
1904
+ collector.set_error("pool", "PoolNotFound", pool=pool, message=str(e))
1905
+ collector.finalize()
1906
+ raise typer.Exit(1) from None
1907
+
1908
+ usable_targets, skipped = filter_pool_by_auth(pool_targets)
1909
+ if skipped:
1910
+ collector.emit("pool_auth_skip", targets=skipped)
1911
+
1912
+ if not usable_targets:
1913
+ collector.set_error("pool", "NoUsableTargets", pool=pool)
1914
+ collector.finalize()
1915
+ raise typer.Exit(1) from None
1916
+
1917
+ collector.emit("pool_acquire", pool=pool, count=len(usable_targets))
1918
+ lock_ctx = acquire_from_pool(usable_targets)
1919
+ acquired_target = lock_ctx.__enter__()
1920
+
1921
+ if acquired_target is None:
1922
+ lock_ctx.__exit__(None, None, None)
1923
+ collector.set_error("pool", "AllTargetsBusy", pool=pool, targets=usable_targets)
1924
+ collector.finalize()
1925
+ raise typer.Exit(1)
1926
+
1927
+ collector.emit("pool_acquired", target=acquired_target)
1928
+ return acquired_target, lock_ctx
1929
+
1930
+
1816
1931
  @kernelbench_app.callback(invoke_without_command=True)
1817
1932
  def kernelbench_evaluate( # noqa: PLR0913, PLR0915
1818
1933
  ctx: typer.Context,
@@ -1943,39 +2058,12 @@ def kernelbench_evaluate( # noqa: PLR0913, PLR0915
1943
2058
  pool_lock_context = None
1944
2059
 
1945
2060
  if pool:
1946
- from .target_lock import acquire_from_pool
1947
- from .targets import filter_pool_by_auth, get_pool
2061
+ from wafer_core.targets.pool import is_query_pool
1948
2062
 
1949
- try:
1950
- pool_targets = get_pool(pool)
1951
- except FileNotFoundError as e:
1952
- collector.set_error("pool", "PoolNotFound", pool=pool, message=str(e))
1953
- collector.finalize()
1954
- raise typer.Exit(1) from None
1955
-
1956
- # Filter to only targets with valid auth
1957
- usable_targets, skipped = filter_pool_by_auth(pool_targets)
1958
- if skipped:
1959
- collector.emit("pool_auth_skip", targets=skipped)
1960
-
1961
- if not usable_targets:
1962
- collector.set_error("pool", "NoUsableTargets", pool=pool)
1963
- collector.finalize()
1964
- raise typer.Exit(1) from None
1965
-
1966
- collector.emit("pool_acquire", pool=pool, count=len(usable_targets))
1967
- pool_lock_context = acquire_from_pool(usable_targets)
1968
- acquired_target = pool_lock_context.__enter__()
1969
-
1970
- if acquired_target is None:
1971
- # Exit context manager before raising to avoid resource leak
1972
- pool_lock_context.__exit__(None, None, None)
1973
- collector.set_error("pool", "AllTargetsBusy", pool=pool, targets=usable_targets)
1974
- collector.finalize()
1975
- raise typer.Exit(1)
1976
-
1977
- collector.emit("pool_acquired", target=acquired_target)
1978
- resolved_target = acquired_target
2063
+ if is_query_pool(pool):
2064
+ resolved_target, pool_lock_context = _resolve_pool_query(pool, collector)
2065
+ else:
2066
+ resolved_target, pool_lock_context = _resolve_pool_legacy(pool, collector)
1979
2067
 
1980
2068
  collector.target = resolved_target
1981
2069
 
@@ -5254,6 +5342,18 @@ def workspaces_pull(
5254
5342
  raise typer.Exit(1) from None
5255
5343
 
5256
5344
 
5345
+ # =============================================================================
5346
+ # Live resource commands (list/terminate/reconcile/provision)
5347
+ # =============================================================================
5348
+
5349
+ targets_ops_app.command("list")(_targets_list_cmd)
5350
+ targets_ops_app.command("terminate")(_targets_terminate_cmd)
5351
+ targets_ops_app.command("reconcile")(_targets_reconcile_cmd)
5352
+ targets_ops_app.command("provision")(_targets_provision_cmd)
5353
+ targets_ops_app.command("pools")(_targets_pools_cmd)
5354
+ targets_ops_app.command("probe")(_targets_probe_cmd)
5355
+
5356
+
5257
5357
  # =============================================================================
5258
5358
  # Target operations commands (exec/ssh/sync)
5259
5359
  # =============================================================================
@@ -7787,6 +7887,9 @@ def compare_analyze(
7787
7887
  stack_traces: bool = typer.Option(
7788
7888
  False, "--stack-traces", help="Show Python stack traces for operations"
7789
7889
  ),
7890
+ recommendations: bool = typer.Option(
7891
+ False, "--recommendations", help="Generate prioritized recommendations for kernel team"
7892
+ ),
7790
7893
  json: bool = typer.Option(
7791
7894
  False, "--json", hidden=True, help="Ignored (for compatibility with cliExecutor)"
7792
7895
  ),
@@ -7839,6 +7942,7 @@ def compare_analyze(
7839
7942
  show_layers=layers,
7840
7943
  show_all=all,
7841
7944
  show_stack_traces=stack_traces,
7945
+ recommendations=recommendations,
7842
7946
  )
7843
7947
  _mark_command_success()
7844
7948
 
@@ -7883,14 +7987,69 @@ def compare_fusion_cmd(
7883
7987
  # CSV output to file
7884
7988
  wafer compare fusion amd_trace.json nvidia_trace.json --format csv -o fusion.csv
7885
7989
  """
7886
- from .trace_compare import compare_fusion
7990
+ from .trace_compare import compare_align
7991
+
7992
+ compare_align(
7993
+ trace1=trace1,
7994
+ trace2=trace2,
7995
+ output=output,
7996
+ output_format=format,
7997
+ phase="all",
7998
+ )
7999
+ _mark_command_success()
7887
8000
 
7888
- compare_fusion(
8001
+
8002
+ @compare_app.command("align")
8003
+ def compare_align_cmd(
8004
+ trace1: Path = typer.Argument(..., help="First trace file (AMD or NVIDIA)", exists=True),
8005
+ trace2: Path = typer.Argument(..., help="Second trace file (AMD or NVIDIA)", exists=True),
8006
+ format: str = typer.Option(
8007
+ "json",
8008
+ "--format",
8009
+ "-f",
8010
+ help="Output format: json",
8011
+ ),
8012
+ output: Path | None = typer.Option(
8013
+ None, "--output", "-o", help="Output file (default: stdout)"
8014
+ ),
8015
+ phase: str = typer.Option(
8016
+ "all",
8017
+ "--phase",
8018
+ help="Filter by phase: all, prefill, decode",
8019
+ ),
8020
+ layer: int | None = typer.Option(
8021
+ None,
8022
+ "--layer",
8023
+ help="Focus on specific layer number",
8024
+ ),
8025
+ ) -> None:
8026
+ """Align kernels at layer level for exact kernel-to-kernel comparison.
8027
+
8028
+ Provides kernel-to-kernel mapping across AMD and NVIDIA platforms,
8029
+ showing which kernels correspond to each other at each layer position.
8030
+
8031
+ Examples:
8032
+ # Basic alignment (stdout JSON)
8033
+ wafer compare align amd_trace.json nvidia_trace.json
8034
+
8035
+ # Save to file
8036
+ wafer compare align amd_trace.json nvidia_trace.json -o alignment.json
8037
+
8038
+ # Focus on decode phase only
8039
+ wafer compare align amd_trace.json nvidia_trace.json --phase decode
8040
+
8041
+ # Focus on specific layer
8042
+ wafer compare align amd_trace.json nvidia_trace.json --layer 5
8043
+ """
8044
+ from .trace_compare import compare_align
8045
+
8046
+ compare_align(
7889
8047
  trace1=trace1,
7890
8048
  trace2=trace2,
7891
8049
  output=output,
7892
- format_type=format,
7893
- min_group_size=min_group_size,
8050
+ output_format=format,
8051
+ phase=phase,
8052
+ layer=layer,
7894
8053
  )
7895
8054
  _mark_command_success()
7896
8055
 
@@ -109,14 +109,34 @@ CORPORA: dict[CorpusName, CorpusConfig] = {
109
109
  ),
110
110
  "hip": CorpusConfig(
111
111
  name="hip",
112
- description="HIP programming guide and API reference",
113
- source_type="github_repo",
114
- repo="ROCm/HIP",
115
- repo_paths=["docs"],
112
+ description="HIP programming guide, API reference, and examples",
113
+ source_type="github_multi_repo",
114
+ repos=[
115
+ # HIP - main documentation and API
116
+ RepoSource(
117
+ repo="ROCm/HIP",
118
+ paths=["docs"],
119
+ ),
120
+ # HIP examples - code samples
121
+ RepoSource(
122
+ repo="ROCm/HIP-Examples",
123
+ paths=["HIP-Examples-Applications", "mini-nbody"],
124
+ ),
125
+ # clr - HIP/OpenCL runtime (low-level)
126
+ RepoSource(
127
+ repo="ROCm/clr",
128
+ paths=["hipamd/include", "rocclr/device/gpu"],
129
+ ),
130
+ # ROCm docs - official documentation
131
+ RepoSource(
132
+ repo="ROCm/ROCm",
133
+ paths=["docs"],
134
+ ),
135
+ ],
116
136
  ),
117
137
  "amd": CorpusConfig(
118
138
  name="amd",
119
- description="AMD GPU kernel development (rocWMMA, CK, AITER, rocBLAS, HipKittens, vLLM)",
139
+ description="AMD GPU kernel development (rocWMMA, CK, AITER, rocBLAS, HipKittens, vLLM, FlashAttention)",
120
140
  source_type="github_multi_repo",
121
141
  repos=[
122
142
  # rocWMMA - wave matrix multiply-accumulate (WMMA) intrinsics
@@ -186,6 +206,46 @@ CORPORA: dict[CorpusName, CorpusConfig] = {
186
206
  repo="huggingface/hf-rocm-kernels",
187
207
  paths=["csrc", "hf_rocm_kernels", "docs"],
188
208
  ),
209
+ # ROCm/flash-attention - FlashAttention for AMD GPUs
210
+ RepoSource(
211
+ repo="ROCm/flash-attention",
212
+ paths=["csrc", "docs"],
213
+ ),
214
+ # ROCm/triton - Triton compiler for AMD GPUs
215
+ RepoSource(
216
+ repo="ROCm/triton",
217
+ paths=["python/tutorials", "third_party/amd"],
218
+ ),
219
+ # ROCm/rccl - ROCm Communication Collectives Library (multi-GPU)
220
+ RepoSource(
221
+ repo="ROCm/rccl",
222
+ paths=["docs"],
223
+ ),
224
+ # ROCm/rocprofiler-sdk - AMD GPU profiling SDK
225
+ RepoSource(
226
+ repo="ROCm/rocprofiler-sdk",
227
+ paths=["docs", "samples"],
228
+ ),
229
+ # ROCm/omniperf - AMD GPU profiling tool
230
+ RepoSource(
231
+ repo="ROCm/omniperf",
232
+ paths=["docs", "src/omniperf_analyze"],
233
+ ),
234
+ # ROCm/omnitrace - Application tracing for AMD
235
+ RepoSource(
236
+ repo="ROCm/omnitrace",
237
+ paths=["docs"],
238
+ ),
239
+ # AMD GPUOpen Performance Guides
240
+ RepoSource(
241
+ repo="GPUOpen-Tools/gpu_performance_api",
242
+ paths=["docs"],
243
+ ),
244
+ # AMD LLVM - AMD GPU compiler backend
245
+ RepoSource(
246
+ repo="ROCm/llvm-project",
247
+ paths=["amd/device-libs/README.md", "llvm/docs/AMDGPUUsage.rst"],
248
+ ),
189
249
  ],
190
250
  ),
191
251
  }
@@ -0,0 +1,157 @@
1
+ """CLI commands for wafer specs — TargetSpec TOML management.
2
+
3
+ These are the local config commands (no API calls).
4
+ Registered as: wafer specs list|show|add|remove|default|init
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from pathlib import Path
10
+
11
+ import typer
12
+
13
+ specs_app = typer.Typer(
14
+ help="""Manage GPU target specs (provisioning blueprints).
15
+
16
+ Specs define how to access or provision GPUs. They are TOML files in ~/.wafer/specs/.
17
+
18
+ wafer specs list # List all specs
19
+ wafer specs show runpod-mi300x # Show one spec
20
+ wafer specs add /path/to/spec.toml # Add from file
21
+ wafer specs remove old-target # Remove a spec
22
+ wafer specs default runpod-mi300x # Set default
23
+
24
+ To create a new spec interactively:
25
+ wafer config targets init ssh # (legacy, still works)
26
+ wafer config targets init runpod
27
+ """
28
+ )
29
+
30
+
31
+ @specs_app.command("list")
32
+ def specs_list() -> None:
33
+ """List all configured specs.
34
+
35
+ Example:
36
+ wafer specs list
37
+ """
38
+ from wafer_core.targets.spec_store import list_spec_names, load_spec
39
+
40
+ from .targets import get_default_target
41
+
42
+ names = list_spec_names()
43
+ default = get_default_target()
44
+
45
+ if not names:
46
+ typer.echo("No specs configured.")
47
+ typer.echo("Add one with: wafer specs add <path/to/spec.toml>")
48
+ typer.echo("Or interactively: wafer config targets init ssh")
49
+ return
50
+
51
+ typer.echo("Configured specs:")
52
+ for name in names:
53
+ marker = " (default)" if name == default else ""
54
+ try:
55
+ spec = load_spec(name)
56
+ type_name = type(spec).__name__.replace("Target", "")
57
+ typer.echo(f" {name}{marker} [{type_name}] gpu={spec.gpu_type}")
58
+ except Exception as e:
59
+ typer.echo(f" {name}{marker} [error: {e}]")
60
+
61
+
62
+ @specs_app.command("show")
63
+ def specs_show(
64
+ name: str = typer.Argument(..., help="Spec name"),
65
+ ) -> None:
66
+ """Show details for a spec.
67
+
68
+ Example:
69
+ wafer specs show runpod-mi300x
70
+ """
71
+ from wafer_core.targets.spec_store import load_spec
72
+
73
+ from .targets import get_target_info
74
+
75
+ try:
76
+ spec = load_spec(name)
77
+ except FileNotFoundError:
78
+ typer.echo(f"Spec not found: {name}", err=True)
79
+ raise typer.Exit(1) from None
80
+
81
+ typer.echo(f"Spec: {name}")
82
+ for key, value in get_target_info(spec).items():
83
+ typer.echo(f" {key}: {value}")
84
+
85
+
86
+ @specs_app.command("add")
87
+ def specs_add(
88
+ file_path: Path = typer.Argument(..., help="Path to TOML spec file"),
89
+ ) -> None:
90
+ """Add a spec from a TOML file.
91
+
92
+ Example:
93
+ wafer specs add ./my-target.toml
94
+ """
95
+ import tomllib
96
+
97
+ from wafer_core.targets.spec_store import parse_spec, save_spec
98
+
99
+ if not file_path.exists():
100
+ typer.echo(f"File not found: {file_path}", err=True)
101
+ raise typer.Exit(1) from None
102
+
103
+ try:
104
+ with open(file_path, "rb") as f:
105
+ data = tomllib.load(f)
106
+ spec = parse_spec(data)
107
+ save_spec(spec)
108
+ typer.echo(f"Added spec: {spec.name}")
109
+ except Exception as e:
110
+ typer.echo(f"Error: {e}", err=True)
111
+ raise typer.Exit(1) from None
112
+
113
+
114
+ @specs_app.command("remove")
115
+ def specs_remove(
116
+ name: str = typer.Argument(..., help="Spec name to remove"),
117
+ force: bool = typer.Option(False, "--force", "-f", help="Skip confirmation"),
118
+ ) -> None:
119
+ """Remove a spec.
120
+
121
+ Example:
122
+ wafer specs remove old-target
123
+ """
124
+ from wafer_core.targets.spec_store import remove_spec
125
+
126
+ if not force:
127
+ confirm = typer.confirm(f"Remove spec '{name}'?")
128
+ if not confirm:
129
+ return
130
+
131
+ try:
132
+ remove_spec(name)
133
+ typer.echo(f"Removed spec: {name}")
134
+ except FileNotFoundError:
135
+ typer.echo(f"Spec not found: {name}", err=True)
136
+ raise typer.Exit(1) from None
137
+
138
+
139
+ @specs_app.command("default")
140
+ def specs_default(
141
+ name: str = typer.Argument(..., help="Spec name to set as default"),
142
+ ) -> None:
143
+ """Set the default spec.
144
+
145
+ Example:
146
+ wafer specs default runpod-mi300x
147
+ """
148
+ from wafer_core.targets.spec_store import list_spec_names
149
+
150
+ from .targets import set_default_target
151
+
152
+ if name not in list_spec_names():
153
+ typer.echo(f"Spec not found: {name}", err=True)
154
+ raise typer.Exit(1) from None
155
+
156
+ set_default_target(name)
157
+ typer.echo(f"Default spec set to: {name}")