wafer-cli 0.2.29__py3-none-any.whl → 0.2.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wafer/cli.py CHANGED
@@ -8,6 +8,7 @@
8
8
  Core commands:
9
9
  agent AI assistant for GPU kernel development
10
10
  evaluate Test kernel correctness and performance
11
+ baseline Discover what kernel PyTorch uses for an op
11
12
  corpus Download GPU documentation for local access
12
13
  workspaces Manage cloud GPU environments
13
14
 
@@ -279,19 +280,19 @@ from wafer.targets_cli import (
279
280
  targets_list as _targets_list_cmd,
280
281
  )
281
282
  from wafer.targets_cli import (
282
- targets_provision as _targets_provision_cmd,
283
+ targets_pools as _targets_pools_cmd,
283
284
  )
284
285
  from wafer.targets_cli import (
285
- targets_reconcile as _targets_reconcile_cmd,
286
+ targets_probe as _targets_probe_cmd,
286
287
  )
287
288
  from wafer.targets_cli import (
288
- targets_terminate as _targets_terminate_cmd,
289
+ targets_provision as _targets_provision_cmd,
289
290
  )
290
291
  from wafer.targets_cli import (
291
- targets_pools as _targets_pools_cmd,
292
+ targets_reconcile as _targets_reconcile_cmd,
292
293
  )
293
294
  from wafer.targets_cli import (
294
- targets_probe as _targets_probe_cmd,
295
+ targets_terminate as _targets_terminate_cmd,
295
296
  )
296
297
 
297
298
  # Billing management - nested under config
@@ -323,6 +324,11 @@ gpumode_app = typer.Typer(
323
324
  )
324
325
  evaluate_app.add_typer(gpumode_app, name="gpumode")
325
326
 
327
+ # Baseline discovery (what kernel does PyTorch use?)
328
+ from wafer.baseline import baseline_app
329
+
330
+ app.add_typer(baseline_app, name="baseline", rich_help_panel="Kernel Development")
331
+
326
332
  # =============================================================================
327
333
  # Dev commands (internal, used by web app proxy)
328
334
  # =============================================================================
@@ -1592,7 +1598,9 @@ def evaluate( # noqa: PLR0913
1592
1598
  benchmark: bool = typer.Option(False, "--benchmark", help="Run performance benchmarks"),
1593
1599
  profile: bool = typer.Option(False, "--profile", help="Enable profiling"),
1594
1600
  defensive: bool = typer.Option(
1595
- False, "--defensive", help="Enable defensive timing to detect evaluation hacking"
1601
+ True,
1602
+ "--defense/--no-defense",
1603
+ help="Run reward hack defense checks after benchmarking. Enabled by default.",
1596
1604
  ),
1597
1605
  sync_artifacts: bool = typer.Option(
1598
1606
  True, "--sync-artifacts/--no-sync-artifacts", help="Download artifacts"
@@ -1606,19 +1614,19 @@ def evaluate( # noqa: PLR0913
1606
1614
  The evaluation checks:
1607
1615
  1. Correctness: Does the kernel produce the same output as the reference?
1608
1616
  2. Performance (--benchmark): How fast is it compared to the reference?
1609
- 3. Defense (--defensive): Detects evaluation hacking (stream injection, etc.)
1617
+ 3. Defense: Detects reward hacking (runs automatically with benchmark, disable with --no-defense)
1610
1618
 
1611
1619
  Examples:
1612
1620
  # Basic correctness check
1613
1621
  wafer evaluate gpumode --impl kernel.py --reference ref.py --test-cases tests.json
1614
1622
 
1615
- # With benchmarking on a specific target
1623
+ # With benchmarking (defense checks run automatically)
1616
1624
  wafer evaluate gpumode --impl kernel.py --reference ref.py --test-cases tests.json \\
1617
1625
  --target vultr-b200 --benchmark
1618
1626
 
1619
- # Full evaluation with defensive timing (detects cheating)
1627
+ # Benchmarking without defense checks
1620
1628
  wafer evaluate gpumode --impl kernel.py --reference ref.py --test-cases tests.json \\
1621
- --benchmark --defensive
1629
+ --benchmark --no-defense
1622
1630
 
1623
1631
  Subcommands:
1624
1632
  gpumode Use GPUMode format (functional) - RECOMMENDED
@@ -1863,7 +1871,9 @@ def _resolve_pool_query(pool: str, collector) -> tuple[str, object]:
1863
1871
  spec_targets = [t for t in matched_targets if t.spec_name]
1864
1872
  if not spec_targets:
1865
1873
  collector.set_error(
1866
- "pool", "NoSpecTargets", pool=pool,
1874
+ "pool",
1875
+ "NoSpecTargets",
1876
+ pool=pool,
1867
1877
  message="Matched targets have no spec binding — evaluator needs spec fields",
1868
1878
  )
1869
1879
  collector.finalize()
@@ -1963,7 +1973,9 @@ def kernelbench_evaluate( # noqa: PLR0913, PLR0915
1963
1973
  ),
1964
1974
  seed: int = typer.Option(42, "--seed", help="Random seed for weight initialization"),
1965
1975
  defensive: bool = typer.Option(
1966
- False, "--defensive", help="Enable defensive timing to detect evaluation hacking"
1976
+ True,
1977
+ "--defense/--no-defense",
1978
+ help="Run reward hack defense checks after benchmarking. Enabled by default.",
1967
1979
  ),
1968
1980
  backend: str | None = typer.Option(
1969
1981
  None,
@@ -2003,16 +2015,20 @@ def kernelbench_evaluate( # noqa: PLR0913, PLR0915
2003
2015
  The evaluation checks:
2004
2016
  1. Correctness: Does ModelNew.forward() produce same output as Model.forward()?
2005
2017
  2. Performance (--benchmark): How fast is it compared to the reference?
2006
- 3. Defense (--defensive): Detects evaluation hacking
2018
+ 3. Defense: Detects reward hacking (runs automatically with benchmark, disable with --no-defense)
2007
2019
 
2008
2020
  Examples:
2009
2021
  # Basic correctness check
2010
2022
  wafer evaluate kernelbench --impl my_kernel.py --reference problem.py
2011
2023
 
2012
- # With benchmarking
2024
+ # With benchmarking (defense checks run automatically)
2013
2025
  wafer evaluate kernelbench --impl my_kernel.py --reference problem.py \\
2014
2026
  --target vultr-b200 --benchmark
2015
2027
 
2028
+ # Benchmarking without defense checks
2029
+ wafer evaluate kernelbench --impl my_kernel.py --reference problem.py \\
2030
+ --target vultr-b200 --benchmark --no-defense
2031
+
2016
2032
  Subcommands:
2017
2033
  make-template Extract a KernelBench problem as template
2018
2034
  """
@@ -2072,12 +2088,15 @@ def kernelbench_evaluate( # noqa: PLR0913, PLR0915
2072
2088
  if stages == "all":
2073
2089
  resolved_stages = "compile,correctness,benchmark,defense"
2074
2090
 
2075
- # Handle backward compat: --benchmark and --defensive flags add to stages
2091
+ # Handle --benchmark and --defense/--no-defense flags
2076
2092
  stage_set = set(resolved_stages.split(","))
2077
2093
  if benchmark and "benchmark" not in stage_set:
2078
2094
  stage_set.add("benchmark")
2079
- if defensive and "defense" not in stage_set:
2095
+ # Defense runs automatically when benchmarking, unless --no-defense
2096
+ if defensive and "benchmark" in stage_set and "defense" not in stage_set:
2080
2097
  stage_set.add("defense")
2098
+ if not defensive:
2099
+ stage_set.discard("defense")
2081
2100
  resolved_stages = ",".join(
2082
2101
  sorted(
2083
2102
  stage_set,
@@ -2411,7 +2430,9 @@ def gpumode_evaluate( # noqa: PLR0913, PLR0915
2411
2430
  benchmark: bool = typer.Option(False, "--benchmark", help="Run performance benchmarks"),
2412
2431
  profile: bool = typer.Option(False, "--profile", help="Enable profiling"),
2413
2432
  defensive: bool = typer.Option(
2414
- False, "--defensive", help="Enable defensive timing to detect evaluation hacking"
2433
+ True,
2434
+ "--defense/--no-defense",
2435
+ help="Run reward hack defense checks after benchmarking. Enabled by default.",
2415
2436
  ),
2416
2437
  sync_artifacts: bool = typer.Option(
2417
2438
  True, "--sync-artifacts/--no-sync-artifacts", help="Download artifacts"
@@ -2567,307 +2588,6 @@ def gpumode_evaluate( # noqa: PLR0913, PLR0915
2567
2588
  else:
2568
2589
  typer.echo(f"Error: {result.error_message}", err=True)
2569
2590
  raise typer.Exit(1)
2570
-
2571
-
2572
- # =============================================================================
2573
- # Push and Remote-Run commands
2574
- # =============================================================================
2575
-
2576
-
2577
- @app.command("push", hidden=True)
2578
- def push(
2579
- local_path: Path = typer.Argument(..., help="Local directory to upload"),
2580
- workspace: str | None = typer.Option(None, "--workspace", "-w", help="Workspace name override"),
2581
- direct: bool = typer.Option(False, "--direct", "-d", help="Use direct SSH instead of API"),
2582
- target_name: str | None = typer.Option(
2583
- None,
2584
- "--target",
2585
- "-t",
2586
- help="Target for --direct mode. See 'wafer config targets list'.",
2587
- autocompletion=complete_target_name,
2588
- ),
2589
- ) -> None:
2590
- """Push directory to remote GPU.
2591
-
2592
- By default, uses wafer-api. Use --direct for direct SSH mode.
2593
-
2594
- Examples:
2595
- wafer push ./my_project
2596
- wafer push . --workspace my-kernel
2597
- wafer push ./my_project --direct --target vultr-b200
2598
- """
2599
- # Validate path
2600
- if not local_path.exists():
2601
- typer.echo(f"Error: Path not found: {local_path}", err=True)
2602
- raise typer.Exit(1)
2603
-
2604
- if not local_path.is_dir():
2605
- typer.echo(f"Error: Not a directory: {local_path}", err=True)
2606
- raise typer.Exit(1)
2607
-
2608
- # Resolve to absolute path
2609
- local_path = local_path.resolve()
2610
-
2611
- if direct:
2612
- # Direct SSH mode (requires target)
2613
- if not target_name:
2614
- typer.echo("Error: --target required for --direct mode", err=True)
2615
- raise typer.Exit(1)
2616
-
2617
- from wafer_core.utils.kernel_utils.targets.config import ModalTarget
2618
-
2619
- from .gpu_run import push_directory as push_direct
2620
- from .targets import load_target
2621
-
2622
- try:
2623
- target = load_target(target_name)
2624
- except FileNotFoundError:
2625
- typer.echo(f"Error: Target not found: {target_name}", err=True)
2626
- typer.echo("List targets with: wafer config targets list", err=True)
2627
- raise typer.Exit(1) from None
2628
-
2629
- if isinstance(target, ModalTarget):
2630
- typer.echo(
2631
- f"Error: Target '{target_name}' is a Modal target. Direct push requires SSH.",
2632
- err=True,
2633
- )
2634
- raise typer.Exit(1) from None
2635
-
2636
- typer.echo(f"Connecting to {target.ssh_target}...")
2637
- try:
2638
- result = push_direct(local_path, target)
2639
- except Exception as e:
2640
- typer.echo(f"Error: {e}", err=True)
2641
- raise typer.Exit(1) from None
2642
-
2643
- typer.echo(f"Uploading {len(result.files_uploaded)} files to {result.workspace_path}")
2644
- for f in result.files_uploaded:
2645
- typer.echo(f" ✓ {f}")
2646
- typer.echo(f"Pushed to: {result.workspace_path}")
2647
- else:
2648
- # API mode (default)
2649
- from .api_client import push_directory as push_api
2650
-
2651
- workspace_name = workspace or local_path.name
2652
- typer.echo(f"Pushing {local_path.name} to wafer-api...")
2653
-
2654
- try:
2655
- result = push_api(local_path, workspace_name)
2656
- except Exception as e:
2657
- typer.echo(f"Error: {e}", err=True)
2658
- raise typer.Exit(1) from None
2659
-
2660
- typer.echo(f"Uploaded {len(result.files_uploaded)} files")
2661
- for f in result.files_uploaded:
2662
- typer.echo(f" ✓ {f}")
2663
- typer.echo(f"Workspace ID: {result.workspace_id}")
2664
-
2665
-
2666
- def _run_direct_mode(
2667
- cmd_str: str,
2668
- target_name: str,
2669
- upload_dir: Path | None,
2670
- workspace_id: str | None,
2671
- gpu_id: int | None,
2672
- ) -> int:
2673
- """Run command via direct SSH mode. Returns exit code."""
2674
- from wafer_core.utils.kernel_utils.targets.config import ModalTarget
2675
-
2676
- from .gpu_run import push_directory as push_direct
2677
- from .gpu_run import run_command as run_direct
2678
- from .targets import load_target
2679
-
2680
- try:
2681
- target = load_target(target_name)
2682
- except FileNotFoundError:
2683
- typer.echo(f"Error: Target not found: {target_name}", err=True)
2684
- typer.echo("List targets with: wafer config targets list", err=True)
2685
- raise typer.Exit(1) from None
2686
-
2687
- if isinstance(target, ModalTarget):
2688
- typer.echo(
2689
- f"Error: Target '{target_name}' is a Modal target. Direct mode requires SSH.", err=True
2690
- )
2691
- raise typer.Exit(1) from None
2692
-
2693
- if not target.docker_image:
2694
- typer.echo(f"Error: Target '{target_name}' has no docker_image configured", err=True)
2695
- raise typer.Exit(1)
2696
-
2697
- # If upload_dir provided, push first
2698
- workspace_name = workspace_id
2699
- if upload_dir:
2700
- typer.echo(f"Uploading {upload_dir.name}...")
2701
- try:
2702
- push_result = push_direct(upload_dir, target)
2703
- workspace_name = push_result.workspace_name
2704
- typer.echo(f"Uploaded {len(push_result.files_uploaded)} files")
2705
- except Exception as e:
2706
- typer.echo(f"Error uploading: {e}", err=True)
2707
- raise typer.Exit(1) from None
2708
- elif not workspace_name:
2709
- workspace_name = "tmp"
2710
-
2711
- effective_gpu = gpu_id if gpu_id is not None else target.gpu_ids[0]
2712
- typer.echo(f"Target: {target_name} (docker: {target.docker_image})")
2713
- typer.echo(f"Workspace: {workspace_name}")
2714
- typer.echo(f"GPU: {effective_gpu}")
2715
- typer.echo(f"Command: {cmd_str}")
2716
- typer.echo("-" * 60)
2717
-
2718
- try:
2719
- return run_direct(cmd_str, workspace_name, target, gpu_id)
2720
- except KeyboardInterrupt:
2721
- typer.echo("\nInterrupted by user", err=True)
2722
- raise typer.Exit(130) from None
2723
- except Exception as e:
2724
- typer.echo(f"Error: {e}", err=True)
2725
- raise typer.Exit(1) from None
2726
-
2727
-
2728
- def _run_api_mode( # noqa: PLR0913
2729
- cmd_str: str,
2730
- upload_dir: Path | None,
2731
- workspace_id: str | None,
2732
- gpu_id: int | None,
2733
- gpu_count: int,
2734
- docker_image: str | None,
2735
- docker_entrypoint: str | None,
2736
- pull_image: bool,
2737
- require_hwc: bool,
2738
- ) -> int:
2739
- """Run command via wafer-api. Returns exit code."""
2740
- from .api_client import run_command_stream
2741
-
2742
- if upload_dir:
2743
- typer.echo(f"Uploading: {upload_dir}")
2744
- elif workspace_id:
2745
- typer.echo(f"Workspace: {workspace_id}")
2746
- if gpu_id is not None:
2747
- typer.echo(f"GPU: {gpu_id}")
2748
- if gpu_count > 1:
2749
- typer.echo(f"GPU count: {gpu_count}")
2750
- if docker_image:
2751
- typer.echo(f"Image: {docker_image}")
2752
- if docker_entrypoint:
2753
- typer.echo(f"Entrypoint: {docker_entrypoint}")
2754
- if pull_image:
2755
- typer.echo("Pull image: yes")
2756
- typer.echo(f"Command: {cmd_str}")
2757
- if require_hwc:
2758
- typer.echo("Hardware counters: required (baremetal)")
2759
- typer.echo("-" * 60)
2760
-
2761
- try:
2762
- return run_command_stream(
2763
- command=cmd_str,
2764
- upload_dir=upload_dir,
2765
- workspace_id=workspace_id,
2766
- gpu_id=gpu_id,
2767
- gpu_count=gpu_count,
2768
- docker_image=docker_image,
2769
- docker_entrypoint=docker_entrypoint,
2770
- pull_image=pull_image,
2771
- require_hardware_counters=require_hwc,
2772
- )
2773
- except KeyboardInterrupt:
2774
- typer.echo("\nInterrupted by user", err=True)
2775
- raise typer.Exit(130) from None
2776
- except Exception as e:
2777
- typer.echo(f"Error: {e}", err=True)
2778
- raise typer.Exit(1) from None
2779
-
2780
-
2781
- @app.command("remote-run", hidden=True)
2782
- def remote_run( # noqa: PLR0913
2783
- command: list[str] = typer.Argument(..., help="Command to run"),
2784
- upload_dir: Path | None = typer.Option(
2785
- None, "--upload-dir", "-u", help="Directory to upload (stateless mode)"
2786
- ),
2787
- workspace_id: str | None = typer.Option(
2788
- None, "--workspace-id", "-w", help="Workspace ID (from wafer push)"
2789
- ),
2790
- gpu_id: int | None = typer.Option(None, "--gpu", "-g", help="GPU ID"),
2791
- gpu_count: int = typer.Option(1, "--gpu-count", "-n", help="Number of GPUs (1-8)"),
2792
- docker_image: str | None = typer.Option(None, "--image", "-i", help="Docker image override"),
2793
- docker_entrypoint: str | None = typer.Option(
2794
- None, "--docker-entrypoint", help="Override Docker entrypoint (e.g., 'bash')"
2795
- ),
2796
- pull_image: bool = typer.Option(
2797
- False, "--pull-image", help="Pull image if not available on target"
2798
- ),
2799
- require_hwc: bool = typer.Option(
2800
- False, "--require-hwc", help="Require hardware counters (baremetal)"
2801
- ),
2802
- direct: bool = typer.Option(False, "--direct", "-d", help="Use direct SSH instead of API"),
2803
- target_name: str | None = typer.Option(
2804
- None,
2805
- "--target",
2806
- "-t",
2807
- help="Target for --direct mode. See 'wafer config targets list'.",
2808
- autocompletion=complete_target_name,
2809
- ),
2810
- ) -> None:
2811
- """Run command on remote GPU in Docker.
2812
-
2813
- Two modes:
2814
- - High-level (stateless): --upload-dir uploads files and runs command
2815
- - Low-level: --workspace-id uses existing workspace from 'wafer push'
2816
-
2817
- By default, uses wafer-api. Use --direct for direct SSH mode.
2818
-
2819
- Examples:
2820
- # Stateless: upload and run
2821
- wafer remote-run --upload-dir ./my_project -- python train.py
2822
-
2823
- # Run without files
2824
- wafer remote-run -- nvidia-smi
2825
-
2826
- # Low-level: use existing workspace
2827
- wafer remote-run --workspace-id ws_abc123 -- python train.py
2828
-
2829
- # Direct SSH mode
2830
- wafer remote-run --upload-dir ./my_project --direct --target vultr-b200 -- python train.py
2831
- """
2832
- cmd_str = " ".join(command)
2833
- if not cmd_str.strip():
2834
- typer.echo("Error: Empty command", err=True)
2835
- raise typer.Exit(1)
2836
-
2837
- if upload_dir and workspace_id:
2838
- typer.echo("Error: --upload-dir and --workspace-id are mutually exclusive", err=True)
2839
- raise typer.Exit(1)
2840
-
2841
- if upload_dir:
2842
- if not upload_dir.exists():
2843
- typer.echo(f"Error: Directory not found: {upload_dir}", err=True)
2844
- raise typer.Exit(1)
2845
- if not upload_dir.is_dir():
2846
- typer.echo(f"Error: Not a directory: {upload_dir}", err=True)
2847
- raise typer.Exit(1)
2848
- upload_dir = upload_dir.resolve()
2849
-
2850
- if direct:
2851
- if not target_name:
2852
- typer.echo("Error: --target required for --direct mode", err=True)
2853
- raise typer.Exit(1)
2854
- exit_code = _run_direct_mode(cmd_str, target_name, upload_dir, workspace_id, gpu_id)
2855
- else:
2856
- exit_code = _run_api_mode(
2857
- cmd_str,
2858
- upload_dir,
2859
- workspace_id,
2860
- gpu_id,
2861
- gpu_count,
2862
- docker_image,
2863
- docker_entrypoint,
2864
- pull_image,
2865
- require_hwc,
2866
- )
2867
-
2868
- raise typer.Exit(exit_code)
2869
-
2870
-
2871
2591
  # =============================================================================
2872
2592
  # Authentication commands
2873
2593
  # =============================================================================
@@ -6114,7 +5834,7 @@ def ncu_analyze(
6114
5834
  By default, uses local NCU if available, otherwise runs analysis
6115
5835
  remotely via wafer-api (requires authentication: wafer auth login).
6116
5836
 
6117
- Use --target for direct SSH mode (like wafer remote-run --direct).
5837
+ Use --target for direct SSH mode.
6118
5838
  Use --include-source to fetch SASS assembly with register/instruction data.
6119
5839
 
6120
5840
  Examples:
@@ -7988,7 +7708,7 @@ def compare_fusion_cmd(
7988
7708
  wafer compare fusion amd_trace.json nvidia_trace.json --format csv -o fusion.csv
7989
7709
  """
7990
7710
  from .trace_compare import compare_align
7991
-
7711
+
7992
7712
  compare_align(
7993
7713
  trace1=trace1,
7994
7714
  trace2=trace2,
@@ -8042,7 +7762,7 @@ def compare_align_cmd(
8042
7762
  wafer compare align amd_trace.json nvidia_trace.json --layer 5
8043
7763
  """
8044
7764
  from .trace_compare import compare_align
8045
-
7765
+
8046
7766
  compare_align(
8047
7767
  trace1=trace1,
8048
7768
  trace2=trace2,