PyPI - wafer-cli - Versions diffs - 0.2.29__py3-none-any.whl → 0.2.31__py3-none-any.whl - Mend

wafer-cli 0.2.29py3-none-any.whl → 0.2.31py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

wafer/baseline.py +661 -0
wafer/cli.py +41 -321
wafer/evaluate.py +81 -143
wafer/templates/optimize_kernel.py +4 -2
{wafer_cli-0.2.29.dist-info → wafer_cli-0.2.31.dist-info}/METADATA +1 -1
{wafer_cli-0.2.29.dist-info → wafer_cli-0.2.31.dist-info}/RECORD +9 -8
{wafer_cli-0.2.29.dist-info → wafer_cli-0.2.31.dist-info}/WHEEL +0 -0
{wafer_cli-0.2.29.dist-info → wafer_cli-0.2.31.dist-info}/entry_points.txt +0 -0
{wafer_cli-0.2.29.dist-info → wafer_cli-0.2.31.dist-info}/top_level.txt +0 -0

wafer/cli.py CHANGED Viewed

@@ -8,6 +8,7 @@
 Core commands:
   agent       AI assistant for GPU kernel development
   evaluate    Test kernel correctness and performance
+  baseline    Discover what kernel PyTorch uses for an op
   corpus      Download GPU documentation for local access
   workspaces  Manage cloud GPU environments
@@ -279,19 +280,19 @@ from wafer.targets_cli import (
     targets_list as _targets_list_cmd,
 )
 from wafer.targets_cli import (
-    targets_provision as _targets_provision_cmd,
+    targets_pools as _targets_pools_cmd,
 )
 from wafer.targets_cli import (
-    targets_reconcile as _targets_reconcile_cmd,
+    targets_probe as _targets_probe_cmd,
 )
 from wafer.targets_cli import (
-    targets_terminate as _targets_terminate_cmd,
+    targets_provision as _targets_provision_cmd,
 )
 from wafer.targets_cli import (
-    targets_pools as _targets_pools_cmd,
+    targets_reconcile as _targets_reconcile_cmd,
 )
 from wafer.targets_cli import (
-    targets_probe as _targets_probe_cmd,
+    targets_terminate as _targets_terminate_cmd,
 )
 # Billing management - nested under config
@@ -323,6 +324,11 @@ gpumode_app = typer.Typer(
 )
 evaluate_app.add_typer(gpumode_app, name="gpumode")
+# Baseline discovery (what kernel does PyTorch use?)
+from wafer.baseline import baseline_app
+app.add_typer(baseline_app, name="baseline", rich_help_panel="Kernel Development")
 # =============================================================================
 # Dev commands (internal, used by web app proxy)
 # =============================================================================
@@ -1592,7 +1598,9 @@ def evaluate(  # noqa: PLR0913
     benchmark: bool = typer.Option(False, "--benchmark", help="Run performance benchmarks"),
     profile: bool = typer.Option(False, "--profile", help="Enable profiling"),
     defensive: bool = typer.Option(
-        False, "--defensive", help="Enable defensive timing to detect evaluation hacking"
+        True,
+        "--defense/--no-defense",
+        help="Run reward hack defense checks after benchmarking. Enabled by default.",
     ),
     sync_artifacts: bool = typer.Option(
         True, "--sync-artifacts/--no-sync-artifacts", help="Download artifacts"
@@ -1606,19 +1614,19 @@ def evaluate(  # noqa: PLR0913
     The evaluation checks:
       1. Correctness: Does the kernel produce the same output as the reference?
       2. Performance (--benchmark): How fast is it compared to the reference?
-      3. Defense (--defensive): Detects evaluation hacking (stream injection, etc.)
+      3. Defense: Detects reward hacking (runs automatically with benchmark, disable with --no-defense)
     Examples:
         # Basic correctness check
         wafer evaluate gpumode --impl kernel.py --reference ref.py --test-cases tests.json
-        # With benchmarking on a specific target
+        # With benchmarking (defense checks run automatically)
         wafer evaluate gpumode --impl kernel.py --reference ref.py --test-cases tests.json \\
             --target vultr-b200 --benchmark
-        # Full evaluation with defensive timing (detects cheating)
+        # Benchmarking without defense checks
         wafer evaluate gpumode --impl kernel.py --reference ref.py --test-cases tests.json \\
-            --benchmark --defensive
+            --benchmark --no-defense
     Subcommands:
         gpumode        Use GPUMode format (functional) - RECOMMENDED
@@ -1863,7 +1871,9 @@ def _resolve_pool_query(pool: str, collector) -> tuple[str, object]:
     spec_targets = [t for t in matched_targets if t.spec_name]
     if not spec_targets:
         collector.set_error(
-            "pool", "NoSpecTargets", pool=pool,
+            "pool",
+            "NoSpecTargets",
+            pool=pool,
             message="Matched targets have no spec binding — evaluator needs spec fields",
         )
         collector.finalize()
@@ -1963,7 +1973,9 @@ def kernelbench_evaluate(  # noqa: PLR0913, PLR0915
     ),
     seed: int = typer.Option(42, "--seed", help="Random seed for weight initialization"),
     defensive: bool = typer.Option(
-        False, "--defensive", help="Enable defensive timing to detect evaluation hacking"
+        True,
+        "--defense/--no-defense",
+        help="Run reward hack defense checks after benchmarking. Enabled by default.",
     ),
     backend: str | None = typer.Option(
         None,
@@ -2003,16 +2015,20 @@ def kernelbench_evaluate(  # noqa: PLR0913, PLR0915
     The evaluation checks:
       1. Correctness: Does ModelNew.forward() produce same output as Model.forward()?
       2. Performance (--benchmark): How fast is it compared to the reference?
-      3. Defense (--defensive): Detects evaluation hacking
+      3. Defense: Detects reward hacking (runs automatically with benchmark, disable with --no-defense)
     Examples:
         # Basic correctness check
         wafer evaluate kernelbench --impl my_kernel.py --reference problem.py
-        # With benchmarking
+        # With benchmarking (defense checks run automatically)
         wafer evaluate kernelbench --impl my_kernel.py --reference problem.py \\
             --target vultr-b200 --benchmark
+        # Benchmarking without defense checks
+        wafer evaluate kernelbench --impl my_kernel.py --reference problem.py \\
+            --target vultr-b200 --benchmark --no-defense
     Subcommands:
         make-template  Extract a KernelBench problem as template
     """
@@ -2072,12 +2088,15 @@ def kernelbench_evaluate(  # noqa: PLR0913, PLR0915
     if stages == "all":
         resolved_stages = "compile,correctness,benchmark,defense"
-    # Handle backward compat: --benchmark and --defensive flags add to stages
+    # Handle --benchmark and --defense/--no-defense flags
     stage_set = set(resolved_stages.split(","))
     if benchmark and "benchmark" not in stage_set:
         stage_set.add("benchmark")
-    if defensive and "defense" not in stage_set:
+    # Defense runs automatically when benchmarking, unless --no-defense
+    if defensive and "benchmark" in stage_set and "defense" not in stage_set:
         stage_set.add("defense")
+    if not defensive:
+        stage_set.discard("defense")
     resolved_stages = ",".join(
         sorted(
             stage_set,
@@ -2411,7 +2430,9 @@ def gpumode_evaluate(  # noqa: PLR0913, PLR0915
     benchmark: bool = typer.Option(False, "--benchmark", help="Run performance benchmarks"),
     profile: bool = typer.Option(False, "--profile", help="Enable profiling"),
     defensive: bool = typer.Option(
-        False, "--defensive", help="Enable defensive timing to detect evaluation hacking"
+        True,
+        "--defense/--no-defense",
+        help="Run reward hack defense checks after benchmarking. Enabled by default.",
     ),
     sync_artifacts: bool = typer.Option(
         True, "--sync-artifacts/--no-sync-artifacts", help="Download artifacts"
@@ -2567,307 +2588,6 @@ def gpumode_evaluate(  # noqa: PLR0913, PLR0915
     else:
         typer.echo(f"Error: {result.error_message}", err=True)
         raise typer.Exit(1)
-# =============================================================================
-# Push and Remote-Run commands
-# =============================================================================
-@app.command("push", hidden=True)
-def push(
-    local_path: Path = typer.Argument(..., help="Local directory to upload"),
-    workspace: str | None = typer.Option(None, "--workspace", "-w", help="Workspace name override"),
-    direct: bool = typer.Option(False, "--direct", "-d", help="Use direct SSH instead of API"),
-    target_name: str | None = typer.Option(
-        None,
-        "--target",
-        "-t",
-        help="Target for --direct mode. See 'wafer config targets list'.",
-        autocompletion=complete_target_name,
-    ),
-) -> None:
-    """Push directory to remote GPU.
-    By default, uses wafer-api. Use --direct for direct SSH mode.
-    Examples:
-        wafer push ./my_project
-        wafer push . --workspace my-kernel
-        wafer push ./my_project --direct --target vultr-b200
-    """
-    # Validate path
-    if not local_path.exists():
-        typer.echo(f"Error: Path not found: {local_path}", err=True)
-        raise typer.Exit(1)
-    if not local_path.is_dir():
-        typer.echo(f"Error: Not a directory: {local_path}", err=True)
-        raise typer.Exit(1)
-    # Resolve to absolute path
-    local_path = local_path.resolve()
-    if direct:
-        # Direct SSH mode (requires target)
-        if not target_name:
-            typer.echo("Error: --target required for --direct mode", err=True)
-            raise typer.Exit(1)
-        from wafer_core.utils.kernel_utils.targets.config import ModalTarget
-        from .gpu_run import push_directory as push_direct
-        from .targets import load_target
-        try:
-            target = load_target(target_name)
-        except FileNotFoundError:
-            typer.echo(f"Error: Target not found: {target_name}", err=True)
-            typer.echo("List targets with: wafer config targets list", err=True)
-            raise typer.Exit(1) from None
-        if isinstance(target, ModalTarget):
-            typer.echo(
-                f"Error: Target '{target_name}' is a Modal target. Direct push requires SSH.",
-                err=True,
-            )
-            raise typer.Exit(1) from None
-        typer.echo(f"Connecting to {target.ssh_target}...")
-        try:
-            result = push_direct(local_path, target)
-        except Exception as e:
-            typer.echo(f"Error: {e}", err=True)
-            raise typer.Exit(1) from None
-        typer.echo(f"Uploading {len(result.files_uploaded)} files to {result.workspace_path}")
-        for f in result.files_uploaded:
-            typer.echo(f"  ✓ {f}")
-        typer.echo(f"Pushed to: {result.workspace_path}")
-    else:
-        # API mode (default)
-        from .api_client import push_directory as push_api
-        workspace_name = workspace or local_path.name
-        typer.echo(f"Pushing {local_path.name} to wafer-api...")
-        try:
-            result = push_api(local_path, workspace_name)
-        except Exception as e:
-            typer.echo(f"Error: {e}", err=True)
-            raise typer.Exit(1) from None
-        typer.echo(f"Uploaded {len(result.files_uploaded)} files")
-        for f in result.files_uploaded:
-            typer.echo(f"  ✓ {f}")
-        typer.echo(f"Workspace ID: {result.workspace_id}")
-def _run_direct_mode(
-    cmd_str: str,
-    target_name: str,
-    upload_dir: Path | None,
-    workspace_id: str | None,
-    gpu_id: int | None,
-) -> int:
-    """Run command via direct SSH mode. Returns exit code."""
-    from wafer_core.utils.kernel_utils.targets.config import ModalTarget
-    from .gpu_run import push_directory as push_direct
-    from .gpu_run import run_command as run_direct
-    from .targets import load_target
-    try:
-        target = load_target(target_name)
-    except FileNotFoundError:
-        typer.echo(f"Error: Target not found: {target_name}", err=True)
-        typer.echo("List targets with: wafer config targets list", err=True)
-        raise typer.Exit(1) from None
-    if isinstance(target, ModalTarget):
-        typer.echo(
-            f"Error: Target '{target_name}' is a Modal target. Direct mode requires SSH.", err=True
-        )
-        raise typer.Exit(1) from None
-    if not target.docker_image:
-        typer.echo(f"Error: Target '{target_name}' has no docker_image configured", err=True)
-        raise typer.Exit(1)
-    # If upload_dir provided, push first
-    workspace_name = workspace_id
-    if upload_dir:
-        typer.echo(f"Uploading {upload_dir.name}...")
-        try:
-            push_result = push_direct(upload_dir, target)
-            workspace_name = push_result.workspace_name
-            typer.echo(f"Uploaded {len(push_result.files_uploaded)} files")
-        except Exception as e:
-            typer.echo(f"Error uploading: {e}", err=True)
-            raise typer.Exit(1) from None
-    elif not workspace_name:
-        workspace_name = "tmp"
-    effective_gpu = gpu_id if gpu_id is not None else target.gpu_ids[0]
-    typer.echo(f"Target: {target_name} (docker: {target.docker_image})")
-    typer.echo(f"Workspace: {workspace_name}")
-    typer.echo(f"GPU: {effective_gpu}")
-    typer.echo(f"Command: {cmd_str}")
-    typer.echo("-" * 60)
-    try:
-        return run_direct(cmd_str, workspace_name, target, gpu_id)
-    except KeyboardInterrupt:
-        typer.echo("\nInterrupted by user", err=True)
-        raise typer.Exit(130) from None
-    except Exception as e:
-        typer.echo(f"Error: {e}", err=True)
-        raise typer.Exit(1) from None
-def _run_api_mode(  # noqa: PLR0913
-    cmd_str: str,
-    upload_dir: Path | None,
-    workspace_id: str | None,
-    gpu_id: int | None,
-    gpu_count: int,
-    docker_image: str | None,
-    docker_entrypoint: str | None,
-    pull_image: bool,
-    require_hwc: bool,
-) -> int:
-    """Run command via wafer-api. Returns exit code."""
-    from .api_client import run_command_stream
-    if upload_dir:
-        typer.echo(f"Uploading: {upload_dir}")
-    elif workspace_id:
-        typer.echo(f"Workspace: {workspace_id}")
-    if gpu_id is not None:
-        typer.echo(f"GPU: {gpu_id}")
-    if gpu_count > 1:
-        typer.echo(f"GPU count: {gpu_count}")
-    if docker_image:
-        typer.echo(f"Image: {docker_image}")
-    if docker_entrypoint:
-        typer.echo(f"Entrypoint: {docker_entrypoint}")
-    if pull_image:
-        typer.echo("Pull image: yes")
-    typer.echo(f"Command: {cmd_str}")
-    if require_hwc:
-        typer.echo("Hardware counters: required (baremetal)")
-    typer.echo("-" * 60)
-    try:
-        return run_command_stream(
-            command=cmd_str,
-            upload_dir=upload_dir,
-            workspace_id=workspace_id,
-            gpu_id=gpu_id,
-            gpu_count=gpu_count,
-            docker_image=docker_image,
-            docker_entrypoint=docker_entrypoint,
-            pull_image=pull_image,
-            require_hardware_counters=require_hwc,
-        )
-    except KeyboardInterrupt:
-        typer.echo("\nInterrupted by user", err=True)
-        raise typer.Exit(130) from None
-    except Exception as e:
-        typer.echo(f"Error: {e}", err=True)
-        raise typer.Exit(1) from None
-@app.command("remote-run", hidden=True)
-def remote_run(  # noqa: PLR0913
-    command: list[str] = typer.Argument(..., help="Command to run"),
-    upload_dir: Path | None = typer.Option(
-        None, "--upload-dir", "-u", help="Directory to upload (stateless mode)"
-    ),
-    workspace_id: str | None = typer.Option(
-        None, "--workspace-id", "-w", help="Workspace ID (from wafer push)"
-    ),
-    gpu_id: int | None = typer.Option(None, "--gpu", "-g", help="GPU ID"),
-    gpu_count: int = typer.Option(1, "--gpu-count", "-n", help="Number of GPUs (1-8)"),
-    docker_image: str | None = typer.Option(None, "--image", "-i", help="Docker image override"),
-    docker_entrypoint: str | None = typer.Option(
-        None, "--docker-entrypoint", help="Override Docker entrypoint (e.g., 'bash')"
-    ),
-    pull_image: bool = typer.Option(
-        False, "--pull-image", help="Pull image if not available on target"
-    ),
-    require_hwc: bool = typer.Option(
-        False, "--require-hwc", help="Require hardware counters (baremetal)"
-    ),
-    direct: bool = typer.Option(False, "--direct", "-d", help="Use direct SSH instead of API"),
-    target_name: str | None = typer.Option(
-        None,
-        "--target",
-        "-t",
-        help="Target for --direct mode. See 'wafer config targets list'.",
-        autocompletion=complete_target_name,
-    ),
-) -> None:
-    """Run command on remote GPU in Docker.
-    Two modes:
-    - High-level (stateless): --upload-dir uploads files and runs command
-    - Low-level: --workspace-id uses existing workspace from 'wafer push'
-    By default, uses wafer-api. Use --direct for direct SSH mode.
-    Examples:
-        # Stateless: upload and run
-        wafer remote-run --upload-dir ./my_project -- python train.py
-        # Run without files
-        wafer remote-run -- nvidia-smi
-        # Low-level: use existing workspace
-        wafer remote-run --workspace-id ws_abc123 -- python train.py
-        # Direct SSH mode
-        wafer remote-run --upload-dir ./my_project --direct --target vultr-b200 -- python train.py
-    """
-    cmd_str = " ".join(command)
-    if not cmd_str.strip():
-        typer.echo("Error: Empty command", err=True)
-        raise typer.Exit(1)
-    if upload_dir and workspace_id:
-        typer.echo("Error: --upload-dir and --workspace-id are mutually exclusive", err=True)
-        raise typer.Exit(1)
-    if upload_dir:
-        if not upload_dir.exists():
-            typer.echo(f"Error: Directory not found: {upload_dir}", err=True)
-            raise typer.Exit(1)
-        if not upload_dir.is_dir():
-            typer.echo(f"Error: Not a directory: {upload_dir}", err=True)
-            raise typer.Exit(1)
-        upload_dir = upload_dir.resolve()
-    if direct:
-        if not target_name:
-            typer.echo("Error: --target required for --direct mode", err=True)
-            raise typer.Exit(1)
-        exit_code = _run_direct_mode(cmd_str, target_name, upload_dir, workspace_id, gpu_id)
-    else:
-        exit_code = _run_api_mode(
-            cmd_str,
-            upload_dir,
-            workspace_id,
-            gpu_id,
-            gpu_count,
-            docker_image,
-            docker_entrypoint,
-            pull_image,
-            require_hwc,
-        )
-    raise typer.Exit(exit_code)
 # =============================================================================
 # Authentication commands
 # =============================================================================
@@ -6114,7 +5834,7 @@ def ncu_analyze(
     By default, uses local NCU if available, otherwise runs analysis
     remotely via wafer-api (requires authentication: wafer auth login).
-    Use --target for direct SSH mode (like wafer remote-run --direct).
+    Use --target for direct SSH mode.
     Use --include-source to fetch SASS assembly with register/instruction data.
     Examples:
@@ -7988,7 +7708,7 @@ def compare_fusion_cmd(
         wafer compare fusion amd_trace.json nvidia_trace.json --format csv -o fusion.csv
     """
     from .trace_compare import compare_align
     compare_align(
         trace1=trace1,
         trace2=trace2,
@@ -8042,7 +7762,7 @@ def compare_align_cmd(
         wafer compare align amd_trace.json nvidia_trace.json --layer 5
     """
     from .trace_compare import compare_align
     compare_align(
         trace1=trace1,
         trace2=trace2,

wafer-cli 0.2.29__py3-none-any.whl → 0.2.31__py3-none-any.whl

wafer-cli 0.2.29py3-none-any.whl → 0.2.31py3-none-any.whl