PyPI - wafer-cli - Versions diffs - 0.2.38__tar.gz → 0.2.40__tar.gz - Mend

wafer-cli 0.2.38tar.gz → 0.2.40tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

{wafer_cli-0.2.38 → wafer_cli-0.2.40}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: wafer-cli
-Version: 0.2.38
+Version: 0.2.40
 Summary: CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels
 Requires-Python: >=3.11
 Description-Content-Type: text/markdown

{wafer_cli-0.2.38 → wafer_cli-0.2.40}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "wafer-cli"
-version = "0.2.38"
+version = "0.2.40"
 description = "CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels"
 readme = "README.md"
 requires-python = ">=3.11"

{wafer_cli-0.2.38 → wafer_cli-0.2.40}/wafer/agent_defaults.py RENAMED Viewed

@@ -1,4 +1,4 @@
-"""Shared agent defaults for kernel optimization tasks.
+"""Shared agent defaults for kernel tasks.
 Single source of truth for bash allowlists and enabled tools used by both:
 - CLI templates (apps/wafer-cli/wafer/templates/*.py)
@@ -195,3 +195,50 @@ VLLM_BASH_ALLOWLIST: list[str] = [
     "cd",
     "git",
 ]
+# Tools available to audit agents (read-only + bash for compilation/profiling)
+AUDIT_ENABLED_TOOLS: list[str] = ["read", "glob", "grep", "bash"]
+# Bash commands allowed for kernel audit agents.
+AUDIT_BASH_ALLOWLIST: list[str] = [
+    # Read-only
+    "ls",
+    "cat",
+    "head",
+    "tail",
+    "wc",
+    "find",
+    "grep",
+    "rg",
+    "pwd",
+    "tree",
+    "which",
+    "diff",
+    "sort",
+    # Filesystem
+    "mkdir",
+    # Compilation
+    "make",
+    "cmake",
+    "nvcc",
+    "hipcc",
+    "g++",
+    "gcc",
+    "clang",
+    "python",
+    "python3",
+    # Execution — allows running compiled binaries via ./path.
+    # Security note: the agent can already compile arbitrary code via hipcc/gcc/etc,
+    # so blocking ./ execution doesn't add meaningful protection.
+    "./",
+    # Profiling
+    "wafer evaluate",
+    "wafer nvidia ncu",
+    "wafer nvidia nsys",
+    "wafer amd rocprof-compute",
+    "wafer amd rocprof-sdk",
+    "wafer amd rocprof-systems",
+    "wafer compiler-analyze",
+    # Misc
+    "timeout",
+]

{wafer_cli-0.2.38 → wafer_cli-0.2.40}/wafer/cli.py RENAMED Viewed

@@ -220,16 +220,22 @@ workspaces_app = typer.Typer(
 Workspaces are on-demand cloud GPU environments. Requires authentication (wafer login).
+Environment Types:
+  modal      Serverless GPU execution (fast startup, no SSH)
+  baremetal  Dedicated GPU server (SSH access, hardware counters)
 Available GPUs:
-  MI300X  AMD Instinct MI300X (192GB HBM3, ROCm)
+  MI300X  AMD Instinct MI300X (192GB HBM3, ROCm) - baremetal only
   B200    NVIDIA Blackwell B200 (180GB HBM3e, CUDA)
+  H100    NVIDIA Hopper H100 (80GB HBM3, CUDA)
 Commands:
-  wafer workspaces create dev --gpu B200   # Create workspace
-  wafer workspaces exec dev -- python x.py # Run commands
-  wafer workspaces ssh dev                 # Interactive SSH
-  wafer workspaces sync dev ./project      # Sync files
-  wafer workspaces delete dev              # Clean up"""
+  wafer workspaces create dev -g B200 -e modal      # Create Modal workspace
+  wafer workspaces create dev -g B200 -e baremetal  # Create baremetal workspace
+  wafer workspaces exec dev -- python x.py          # Run commands
+  wafer workspaces ssh dev                          # Interactive SSH (baremetal only)
+  wafer workspaces sync dev ./project               # Sync files
+  wafer workspaces delete dev                       # Clean up"""
 )
 app.add_typer(workspaces_app, name="workspaces")
@@ -1862,6 +1868,12 @@ def kernelbench_evaluate(  # noqa: PLR0913, PLR0915
         help="Sync files and generate eval script but don't run. "
         "Prints the command to run manually (useful for wrapping with rocprof, etc.)",
     ),
+    pool_timeout: int = typer.Option(
+        600,
+        "--pool-timeout",
+        help="Seconds to wait for a target from the pool before failing (default: 600). "
+        "Set to 0 for immediate failure if all targets are busy.",
+    ),
     json_output: bool = typer.Option(
         False, "--json", help="Output as single JSON object (machine-readable)"
     ),
@@ -1953,14 +1965,24 @@ def kernelbench_evaluate(  # noqa: PLR0913, PLR0915
             collector.finalize()
             raise typer.Exit(1) from None
-        collector.emit("pool_acquire", pool=pool, count=len(usable_targets))
-        pool_lock_context = acquire_from_pool(usable_targets)
+        effective_timeout = pool_timeout if pool_timeout > 0 else None
+        collector.emit("pool_acquire", pool=pool, count=len(usable_targets), timeout=pool_timeout)
+        pool_lock_context = acquire_from_pool(usable_targets, timeout=effective_timeout)
         acquired_target = pool_lock_context.__enter__()
         if acquired_target is None:
             # Exit context manager before raising to avoid resource leak
             pool_lock_context.__exit__(None, None, None)
-            collector.set_error("pool", "AllTargetsBusy", pool=pool, targets=usable_targets)
+            if pool_timeout > 0:
+                collector.set_error(
+                    "pool",
+                    "AllTargetsBusy",
+                    pool=pool,
+                    targets=usable_targets,
+                    message=f"All targets busy after waiting {pool_timeout}s",
+                )
+            else:
+                collector.set_error("pool", "AllTargetsBusy", pool=pool, targets=usable_targets)
             collector.finalize()
             raise typer.Exit(1)
@@ -2315,6 +2337,12 @@ def gpumode_evaluate(  # noqa: PLR0913, PLR0915
         True, "--sync-artifacts/--no-sync-artifacts", help="Download artifacts"
     ),
     gpu_id: int | None = typer.Option(None, "--gpu-id", help="Override GPU ID"),
+    pool_timeout: int = typer.Option(
+        600,
+        "--pool-timeout",
+        help="Seconds to wait for a target from the pool before failing (default: 600). "
+        "Set to 0 for immediate failure if all targets are busy.",
+    ),
 ) -> None:
     """Run kernel evaluation in GPUMode format (functional).
@@ -2394,14 +2422,21 @@ def gpumode_evaluate(  # noqa: PLR0913, PLR0915
             typer.echo("  Run 'wafer auth status' to see which providers need setup.", err=True)
             raise typer.Exit(1) from None
+        effective_timeout = pool_timeout if pool_timeout > 0 else None
         typer.echo(f"Acquiring target from pool '{pool}' ({len(usable_targets)} targets)...")
-        pool_lock_context = acquire_from_pool(usable_targets)
+        pool_lock_context = acquire_from_pool(usable_targets, timeout=effective_timeout)
         acquired_target = pool_lock_context.__enter__()
         if acquired_target is None:
             # Exit context manager before raising to avoid resource leak
             pool_lock_context.__exit__(None, None, None)
-            typer.echo(f"Error: All targets in pool '{pool}' are busy", err=True)
+            if pool_timeout > 0:
+                typer.echo(
+                    f"Error: All targets in pool '{pool}' are busy (waited {pool_timeout}s)",
+                    err=True,
+                )
+            else:
+                typer.echo(f"Error: All targets in pool '{pool}' are busy", err=True)
             typer.echo(f"  Targets: {', '.join(usable_targets)}", err=True)
             raise typer.Exit(1)
@@ -3243,7 +3278,7 @@ def demo_eval(
         # Step 1: Create workspace
         typer.echo(f"\n[1/4] Creating workspace '{workspace_name}'...")
         result = subprocess.run(
-            ["wafer", "workspaces", "create", workspace_name, "--gpu", "B200", "--json"],
+            ["wafer", "workspaces", "create", workspace_name, "--gpu", "B200", "--environment", "modal", "--json"],
             capture_output=True,
             text=True,
             check=True,
@@ -3330,7 +3365,7 @@ print(f"Performance: {(t1-t0)/100*1e6:.1f} us/iter")
             typer.echo("\n✓ Demo complete! To evaluate your own kernels:")
             typer.echo("")
             typer.echo("  # Using workspaces (no setup required):")
-            typer.echo("  wafer workspaces create dev --gpu B200")
+            typer.echo("  wafer workspaces create dev --gpu B200 --environment modal")
             typer.echo("  wafer workspaces exec --sync ./my-kernel dev -- python my_test.py")
             typer.echo("")
             typer.echo("  # Or using wafer evaluate with a configured target:")
@@ -4787,7 +4822,10 @@ def workspaces_list(
 def workspaces_create(
     name: str = typer.Argument(..., help="Workspace name"),
     gpu_type: str = typer.Option(
-        "B200", "--gpu", "-g", help="GPU type: MI300X (AMD) or B200 (NVIDIA, default)"
+        ..., "--gpu", "-g", help="GPU type: MI300X (AMD) or B200/H100 (NVIDIA)"
+    ),
+    environment: str = typer.Option(
+        ..., "--environment", "-e", help="Environment type: modal or baremetal"
     ),
     image: str | None = typer.Option(None, "--image", "-i", help="Docker image (optional)"),
     wait: bool = typer.Option(
@@ -4797,16 +4835,22 @@ def workspaces_create(
 ) -> None:
     """Create a new workspace.
+    Per-vendor architecture: each workspace has a single environment type.
+    Environment Types:
+        modal      Serverless GPU execution (fast startup, no SSH)
+        baremetal  Dedicated GPU server (SSH access, hardware counters)
     Available GPUs:
-        MI300X  AMD Instinct MI300X (192GB HBM3, ROCm)
-        B200    NVIDIA Blackwell B200 (180GB HBM3e, CUDA)
+        MI300X     AMD Instinct MI300X (192GB HBM3, ROCm) - baremetal only
+        B200       NVIDIA Blackwell B200 (180GB HBM3e, CUDA)
+        H100       NVIDIA Hopper H100 (80GB HBM3, CUDA)
     Example:
-        wafer workspaces create my-kernel                # B200 (default)
-        wafer workspaces create my-kernel --gpu MI300X   # AMD MI300X
-        wafer workspaces create my-kernel --gpu B200     # NVIDIA B200
-        wafer workspaces create my-kernel --image pytorch/pytorch:2.5.1-cuda12.4-cudnn9-devel
-        wafer workspaces create my-kernel --wait
+        wafer workspaces create my-kernel --gpu B200 --environment modal
+        wafer workspaces create my-kernel --gpu MI300X --environment baremetal
+        wafer workspaces create my-kernel -g B200 -e baremetal  # SSH + ncu profiling
+        wafer workspaces create my-kernel -g B200 -e modal --wait
     """
     from .workspaces import create_workspace
@@ -4814,6 +4858,7 @@ def workspaces_create(
         result = create_workspace(
             name,
             gpu_type=gpu_type,
+            environment_type=environment,
             image=image,
             wait=wait,
             json_output=json_output,
@@ -7751,16 +7796,24 @@ def compare_analyze(
         "-f",
         help="Output format: text, text-layers, csv, csv-layers, json",
     ),
-    output: Path | None = typer.Option(None, "--output", "-o", help="Output file (default: stdout)"),
+    output: Path | None = typer.Option(
+        None, "--output", "-o", help="Output file (default: stdout)"
+    ),
     phase: str = typer.Option(
         "all",
         "--phase",
         help="Filter by phase: all, prefill, decode",
     ),
     layers: bool = typer.Option(False, "--layers", help="Show layer-wise performance breakdown"),
-    all: bool = typer.Option(False, "--all", help="Show all items (no truncation for layers, operations, kernels)"),
-    stack_traces: bool = typer.Option(False, "--stack-traces", help="Show Python stack traces for operations"),
-    json: bool = typer.Option(False, "--json", hidden=True, help="Ignored (for compatibility with cliExecutor)"),
+    all: bool = typer.Option(
+        False, "--all", help="Show all items (no truncation for layers, operations, kernels)"
+    ),
+    stack_traces: bool = typer.Option(
+        False, "--stack-traces", help="Show Python stack traces for operations"
+    ),
+    json: bool = typer.Option(
+        False, "--json", hidden=True, help="Ignored (for compatibility with cliExecutor)"
+    ),
 ) -> None:
     """Compare GPU traces from two platforms platforms.
@@ -7824,13 +7877,17 @@ def compare_fusion_cmd(
         "-f",
         help="Output format: text, csv, json",
     ),
-    output: Path | None = typer.Option(None, "--output", "-o", help="Output file (default: stdout)"),
+    output: Path | None = typer.Option(
+        None, "--output", "-o", help="Output file (default: stdout)"
+    ),
     min_group_size: int = typer.Option(
         50,
         "--min-group-size",
         help="Minimum correlation group size to analyze",
     ),
-    json: bool = typer.Option(False, "--json", hidden=True, help="Ignored (for compatibility with cliExecutor)"),
+    json: bool = typer.Option(
+        False, "--json", hidden=True, help="Ignored (for compatibility with cliExecutor)"
+    ),
 ) -> None:
     """Analyze kernel fusion differences between AMD and NVIDIA traces.

{wafer_cli-0.2.38 → wafer_cli-0.2.40}/wafer/wevin_cli.py RENAMED Viewed

@@ -326,13 +326,28 @@ def _build_environment(
     tools_override: list[str] | None,
     corpus_path: str | None,
     no_sandbox: bool = False,
+    has_target: bool = False,
+    template_args: dict[str, str] | None = None,
 ) -> Environment:
-    """Build a CodingEnvironment from template config."""
+    """Build a CodingEnvironment from template config.
+    Working directory priority:
+    1. Template arg "dir" (--args dir=./my_project) — scopes agent to a directory
+    2. corpus_path (--corpus cuda) — for doc-browsing templates
+    3. Current working directory
+    """
     from wafer_core.environments.coding import CodingEnvironment
     from wafer_core.rollouts.templates import DANGEROUS_BASH_COMMANDS
     from wafer_core.sandbox import SandboxMode
-    working_dir = Path(corpus_path) if corpus_path else Path.cwd()
+    # Template arg "dir" takes priority over corpus_path
+    dir_arg = (template_args or {}).get("dir")
+    if dir_arg:
+        working_dir = Path(dir_arg).resolve()
+    elif corpus_path:
+        working_dir = Path(corpus_path)
+    else:
+        working_dir = Path.cwd()
     resolved_tools = list(tools_override or tpl.tools)
     # Add skill tool if skills are enabled
@@ -340,12 +355,18 @@ def _build_environment(
         resolved_tools.append("skill")
     sandbox_mode = SandboxMode.DISABLED if no_sandbox else SandboxMode.ENABLED
+    # Enable network when a target is configured — the agent needs to reach
+    # remote GPUs via SSH/HTTPS. Filesystem sandbox stays enforced.
+    allow_network = has_target
     env: Environment = CodingEnvironment(
         working_dir=working_dir,
         enabled_tools=resolved_tools,
         bash_allowlist=tpl.bash_allowlist,
         bash_denylist=DANGEROUS_BASH_COMMANDS,
         sandbox_mode=sandbox_mode,
+        allow_network=allow_network,
     )  # type: ignore[assignment]
     return env
@@ -589,9 +610,21 @@ def main(  # noqa: PLR0913, PLR0915
     # CLI args override template values
     resolved_single_turn = single_turn if single_turn is not None else tpl.single_turn
+    # Check if a default target is configured — if so, enable network access
+    # so the agent can reach remote GPUs via SSH/HTTPS.
+    has_target = False
+    try:
+        from wafer.targets import get_default_target
+        has_target = get_default_target() is not None
+    except Exception:
+        pass  # No target configured — network stays disabled
     # Build endpoint and environment
     endpoint = _build_endpoint(tpl, model, api_base, api_key, api_key_refresh)
-    environment = _build_environment(tpl, tools, corpus_path, no_sandbox)
+    environment = _build_environment(
+        tpl, tools, corpus_path, no_sandbox, has_target=has_target, template_args=template_args
+    )
     # Session store
     session_store = FileSessionStore()

{wafer_cli-0.2.38 → wafer_cli-0.2.40}/wafer/workspaces.py RENAMED Viewed

@@ -249,16 +249,20 @@ def list_workspaces(json_output: bool = False) -> str:
 def create_workspace(
     name: str,
-    gpu_type: str = "B200",
+    gpu_type: str,
+    environment_type: str,
     image: str | None = None,
     wait: bool = False,
     json_output: bool = False,
 ) -> str:
     """Create a new workspace.
+    Per-vendor architecture: each workspace has a single environment type.
     Args:
         name: Workspace name (must be unique)
-        gpu_type: GPU type (default: B200)
+        gpu_type: GPU type (required: B200, H100, MI300X)
+        environment_type: Environment type (required: modal, baremetal)
         image: Docker image (optional, uses default if not specified)
         wait: If True, stream provisioning progress and return SSH credentials
         json_output: If True, return raw JSON; otherwise return formatted text
@@ -272,6 +276,7 @@ def create_workspace(
     # Validate inputs
     assert name, "Workspace name must be non-empty"
     assert gpu_type, "GPU type must be non-empty"
+    assert environment_type, "Environment type must be non-empty"
     api_url, headers = _get_client()
@@ -296,6 +301,7 @@ def create_workspace(
     request_body: dict = {
         "name": name,
         "gpu_type": gpu_type,
+        "environment_type": environment_type,
     }
     if image:
         request_body["image"] = image

{wafer_cli-0.2.38 → wafer_cli-0.2.40}/wafer_cli.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: wafer-cli
-Version: 0.2.38
+Version: 0.2.40
 Summary: CLI for running GPU workloads, managing remote workspaces, and evaluating/optimizing kernels
 Requires-Python: >=3.11
 Description-Content-Type: text/markdown