PyPI - wafer-cli - Versions diffs - 0.2.4__tar.gz → 0.2.6__tar.gz - Mend

wafer-cli 0.2.4tar.gz → 0.2.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

{wafer_cli-0.2.4 → wafer_cli-0.2.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: wafer-cli
-Version: 0.2.4
+Version: 0.2.6
 Summary: CLI tool for running commands on remote GPUs and GPU kernel optimization agent
 Requires-Python: >=3.11
 Requires-Dist: typer>=0.12.0

{wafer_cli-0.2.4 → wafer_cli-0.2.6}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "wafer-cli"
-version = "0.2.4"
+version = "0.2.6"
 description = "CLI tool for running commands on remote GPUs and GPU kernel optimization agent"
 requires-python = ">=3.11"
 dependencies = [

{wafer_cli-0.2.4 → wafer_cli-0.2.6}/wafer/cli.py RENAMED Viewed

@@ -99,7 +99,11 @@ def main_callback(ctx: typer.Context) -> None:
     # Install exception hook to catch SystemExit and mark failures
     original_excepthook = sys.excepthook
-    def custom_excepthook(exc_type, exc_value, exc_traceback):
+    def custom_excepthook(
+        exc_type: type[BaseException],
+        exc_value: BaseException,
+        exc_traceback: object,
+    ) -> None:
         global _command_outcome
         # Mark as failure if SystemExit with non-zero code, or any other exception
         if exc_type is SystemExit:
@@ -467,7 +471,7 @@ def provider_auth_login(
     # Save the key
     save_api_key(provider, api_key)
     typer.echo(f"API key saved for {PROVIDERS[provider]['display_name']}")
-    typer.echo(f"Stored in: ~/.wafer/auth.json")
+    typer.echo("Stored in: ~/.wafer/auth.json")
 @provider_auth_app.command("logout")
@@ -517,9 +521,7 @@ def provider_auth_status() -> None:
     for status in statuses:
         if status.is_authenticated:
             source_str = f"({status.source})" if status.source else ""
-            typer.echo(
-                f"  {status.display_name}: ✓ {status.key_preview} {source_str}"
-            )
+            typer.echo(f"  {status.display_name}: ✓ {status.key_preview} {source_str}")
         else:
             typer.echo(f"  {status.display_name}: ✗ Not configured")
             typer.echo(f"      Run: wafer auth login {status.provider}")
@@ -1430,90 +1432,19 @@ def evaluate(  # noqa: PLR0913
     if ctx.invoked_subcommand is not None:
         return
-    # Deprecation warning for bare evaluate
-    typer.echo(
-        "⚠️  Deprecation warning: 'wafer evaluate' will be removed in a future version.",
-        err=True,
-    )
-    typer.echo(
-        "   Use 'wafer evaluate gpumode' instead for the functional format.",
-        err=True,
-    )
+    # Bare 'wafer evaluate' is no longer supported - must use subcommand
+    typer.echo("Error: 'wafer evaluate' requires a subcommand.", err=True)
     typer.echo("", err=True)
-    # Validate required args when running evaluation (not subcommands)
-    missing_args = []
-    if implementation is None:
-        missing_args.append("--impl/-i")
-    if reference is None:
-        missing_args.append("--reference")
-    if test_cases is None:
-        missing_args.append("--test-cases")
-    if missing_args:
-        typer.echo("Error: Missing required arguments", err=True)
-        typer.echo(f"  Required: {', '.join(missing_args)}", err=True)
-        typer.echo("", err=True)
-        typer.echo(
-            "Usage: wafer evaluate gpumode --impl KERNEL.py --reference REF.py --test-cases TESTS.json",
-            err=True,
-        )
-        typer.echo("", err=True)
-        typer.echo("Run 'wafer evaluate gpumode --help' for full options.", err=True)
-        typer.echo("Run 'wafer evaluate gpumode download' to download problem sets.", err=True)
-        raise typer.Exit(1)
-    from .evaluate import EvaluateArgs, run_evaluate
-    args = EvaluateArgs(
-        implementation=implementation,
-        reference=reference,
-        test_cases=test_cases,
-        target_name=target or "",
-        benchmark=benchmark,
-        profile=profile,
-        defensive=defensive,
-        sync_artifacts=sync_artifacts,
-        gpu_id=gpu_id,
-    )
-    try:
-        # Use trio_asyncio to run async code that uses both trio and asyncio
-        # (AsyncSSHClient uses asyncssh which is asyncio-based, bridged via trio_asyncio)
-        import trio_asyncio
-        result = trio_asyncio.run(run_evaluate, args)
-    except KeyboardInterrupt:
-        typer.echo("\nInterrupted by user", err=True)
-        raise typer.Exit(130) from None
-    except Exception as e:
-        # Unwrap ExceptionGroup (from Trio nurseries) to show actual error
-        if hasattr(e, "exceptions") and e.exceptions:
-            for exc in e.exceptions:
-                typer.echo(f"Error: {type(exc).__name__}: {exc}", err=True)
-        else:
-            typer.echo(f"Error: {e}", err=True)
-        raise typer.Exit(1) from None
-    # Print results
-    if result.success:
-        typer.echo("")
-        typer.echo("=" * 60)
-        status = "PASS" if result.all_correct else "FAIL"
-        typer.echo(f"Result: {status}")
-        score_pct = f"{result.correctness_score:.1%}"
-        typer.echo(f"Correctness: {result.passed_tests}/{result.total_tests} ({score_pct})")
-        if result.geomean_speedup > 0:
-            typer.echo(f"Speedup: {result.geomean_speedup:.2f}x")
-        if result.artifact_path:
-            typer.echo(f"Artifacts: {result.artifact_path}")
-        typer.echo("=" * 60)
-        if not result.all_correct:
-            raise typer.Exit(1)
-    else:
-        typer.echo(f"Error: {result.error_message}", err=True)
-        raise typer.Exit(1)
+    typer.echo("Available subcommands:", err=True)
+    typer.echo("  gpumode      Evaluate GPUMode format (custom_kernel/ref_kernel functions)", err=True)
+    typer.echo("  kernelbench  Evaluate KernelBench format (ModelNew class)", err=True)
+    typer.echo("", err=True)
+    typer.echo("Examples:", err=True)
+    typer.echo("  wafer evaluate gpumode --impl kernel.py --reference ref.py --test-cases tests.json", err=True)
+    typer.echo("  wafer evaluate kernelbench --impl impl.py --reference ref.py --benchmark", err=True)
+    typer.echo("", err=True)
+    typer.echo("Run 'wafer evaluate gpumode --help' or 'wafer evaluate kernelbench --help' for options.", err=True)
+    raise typer.Exit(1)
 TEMPLATE_KERNEL = '''\
@@ -1724,6 +1655,13 @@ def kernelbench_evaluate(  # noqa: PLR0913
         help="GPU target name. See 'wafer config targets list' for available targets.",
         autocompletion=complete_target_name,
     ),
+    pool: str | None = typer.Option(
+        None,
+        "--pool",
+        "-p",
+        help="Target pool name. Acquires first available target from the pool. "
+        "Define pools in ~/.wafer/config.toml under [pools.<name>].",
+    ),
     benchmark: bool = typer.Option(False, "--benchmark", help="Run performance benchmarks"),
     profile: bool = typer.Option(False, "--profile", help="Enable profiling"),
     inputs: Path | None = typer.Option(
@@ -1786,12 +1724,43 @@ def kernelbench_evaluate(  # noqa: PLR0913
         )
         raise typer.Exit(1)
+    # Validate --target and --pool are mutually exclusive
+    if target and pool:
+        typer.echo("Error: Cannot specify both --target and --pool", err=True)
+        raise typer.Exit(1)
     from .evaluate import KernelBenchEvaluateArgs, run_evaluate_kernelbench
+    # If pool specified, acquire a target from the pool
+    resolved_target = target or ""
+    pool_lock_context = None
+    if pool:
+        from .target_lock import acquire_from_pool
+        from .targets import get_pool
+        try:
+            pool_targets = get_pool(pool)
+        except FileNotFoundError as e:
+            typer.echo(f"Error: {e}", err=True)
+            raise typer.Exit(1) from None
+        typer.echo(f"Acquiring target from pool '{pool}' ({len(pool_targets)} targets)...")
+        pool_lock_context = acquire_from_pool(pool_targets)
+        acquired_target = pool_lock_context.__enter__()
+        if acquired_target is None:
+            typer.echo(f"Error: All targets in pool '{pool}' are busy", err=True)
+            typer.echo(f"  Targets: {', '.join(pool_targets)}", err=True)
+            raise typer.Exit(1)
+        typer.echo(f"Acquired target: {acquired_target}")
+        resolved_target = acquired_target
     args = KernelBenchEvaluateArgs(
         implementation=implementation,
         reference=reference,
-        target_name=target or "",
+        target_name=resolved_target,
         benchmark=benchmark,
         profile=profile,
         inputs=inputs,
@@ -1811,6 +1780,10 @@ def kernelbench_evaluate(  # noqa: PLR0913
     except Exception as e:
         typer.echo(f"Error: {e}", err=True)
         raise typer.Exit(1) from None
+    finally:
+        # Release pool lock if we acquired one
+        if pool_lock_context is not None:
+            pool_lock_context.__exit__(None, None, None)
     # Print results
     if result.success:
@@ -2066,7 +2039,7 @@ def gpumode_make_template(
 @gpumode_app.callback(invoke_without_command=True)
-def gpumode_evaluate(  # noqa: PLR0913
+def gpumode_evaluate(  # noqa: PLR0913, PLR0915
     ctx: typer.Context,
     implementation: Path | None = typer.Option(
         None, "--impl", "-i", help="Path to implementation kernel file"
@@ -2084,6 +2057,13 @@ def gpumode_evaluate(  # noqa: PLR0913
         help="GPU target name. See 'wafer config targets list' for available targets.",
         autocompletion=complete_target_name,
     ),
+    pool: str | None = typer.Option(
+        None,
+        "--pool",
+        "-p",
+        help="Target pool name. Acquires first available target from the pool. "
+        "Define pools in ~/.wafer/config.toml under [pools.<name>].",
+    ),
     benchmark: bool = typer.Option(False, "--benchmark", help="Run performance benchmarks"),
     profile: bool = typer.Option(False, "--profile", help="Enable profiling"),
     defensive: bool = typer.Option(
@@ -2140,14 +2120,44 @@ def gpumode_evaluate(  # noqa: PLR0913
         typer.echo("Run 'wafer evaluate gpumode download' to download problem sets.", err=True)
         raise typer.Exit(1)
-    # Reuse the existing evaluate logic (same format)
+    # Validate --target and --pool are mutually exclusive
+    if target and pool:
+        typer.echo("Error: Cannot specify both --target and --pool", err=True)
+        raise typer.Exit(1)
     from .evaluate import EvaluateArgs, run_evaluate
+    # If pool specified, acquire a target from the pool
+    resolved_target = target or ""
+    pool_lock_context = None
+    if pool:
+        from .target_lock import acquire_from_pool
+        from .targets import get_pool
+        try:
+            pool_targets = get_pool(pool)
+        except FileNotFoundError as e:
+            typer.echo(f"Error: {e}", err=True)
+            raise typer.Exit(1) from None
+        typer.echo(f"Acquiring target from pool '{pool}' ({len(pool_targets)} targets)...")
+        pool_lock_context = acquire_from_pool(pool_targets)
+        acquired_target = pool_lock_context.__enter__()
+        if acquired_target is None:
+            typer.echo(f"Error: All targets in pool '{pool}' are busy", err=True)
+            typer.echo(f"  Targets: {', '.join(pool_targets)}", err=True)
+            raise typer.Exit(1)
+        typer.echo(f"Acquired target: {acquired_target}")
+        resolved_target = acquired_target
     args = EvaluateArgs(
         implementation=implementation,
         reference=reference,
         test_cases=test_cases,
-        target_name=target or "",
+        target_name=resolved_target,
         benchmark=benchmark,
         profile=profile,
         defensive=defensive,
@@ -2169,6 +2179,10 @@ def gpumode_evaluate(  # noqa: PLR0913
         else:
             typer.echo(f"Error: {e}", err=True)
         raise typer.Exit(1) from None
+    finally:
+        # Release pool lock if we acquired one
+        if pool_lock_context is not None:
+            pool_lock_context.__exit__(None, None, None)
     # Print results
     if result.success:
@@ -3078,6 +3092,7 @@ init_app = typer.Typer(
 Choose based on your GPU access:
+  local        GPU on current machine (no SSH)
   ssh          Your own hardware via SSH
   runpod       RunPod cloud GPUs (needs WAFER_RUNPOD_API_KEY)
   digitalocean DigitalOcean AMD MI300X (needs WAFER_AMD_DIGITALOCEAN_API_KEY)"""
@@ -3085,6 +3100,92 @@ Choose based on your GPU access:
 targets_app.add_typer(init_app, name="init")
+@init_app.command("local")
+def init_local(
+    name: str = typer.Option("local", "--name", "-n", help="Target name"),
+    gpu_ids: str = typer.Option("0", "--gpu-ids", "-g", help="Comma-separated GPU IDs"),
+) -> None:
+    """Initialize a local target for GPU on current machine.
+    Detects your local GPU and configures a target for direct execution
+    (no SSH). Use this when running wafer on the same machine as the GPU.
+    Examples:
+        wafer config targets init local
+        wafer config targets init local --name my-5090 --gpu-ids 0,1
+    """
+    from .targets import save_target
+    # Parse GPU IDs
+    try:
+        parsed_gpu_ids = [int(g.strip()) for g in gpu_ids.split(",")]
+    except ValueError:
+        typer.echo(f"Error: Invalid GPU IDs '{gpu_ids}'. Use comma-separated integers.", err=True)
+        raise typer.Exit(1) from None
+    typer.echo("Detecting local GPU...")
+    try:
+        from wafer_core.gpu_detect import (
+            detect_local_gpu,
+            get_compute_capability,
+            get_torch_requirements,
+        )
+        detected_gpu = detect_local_gpu()
+        if detected_gpu:
+            typer.echo(f"  Found: {detected_gpu.gpu_name}")
+            if detected_gpu.vendor == "nvidia":
+                typer.echo(f"  CUDA: {detected_gpu.driver_version}")
+            else:
+                typer.echo(f"  ROCm: {detected_gpu.driver_version}")
+            typer.echo(f"  GPU count: {detected_gpu.gpu_count}")
+            # Get torch requirements and compute capability
+            torch_reqs = get_torch_requirements(detected_gpu)
+            compute_capability = get_compute_capability(detected_gpu)
+            gpu_type = _extract_gpu_type(detected_gpu.gpu_name)
+            typer.echo(f"  PyTorch: {torch_reqs.packages[0]}")
+        else:
+            typer.echo("  No GPU detected (nvidia-smi/rocm-smi not found)", err=True)
+            raise typer.Exit(1)
+    except ImportError as e:
+        typer.echo(f"Error: Missing dependency: {e}", err=True)
+        raise typer.Exit(1) from None
+    # Build target data
+    target_data = {
+        "name": name,
+        "type": "local",
+        "gpu_ids": parsed_gpu_ids,
+        "gpu_type": gpu_type,
+        "compute_capability": compute_capability,
+        "torch_package": torch_reqs.packages[0],
+        "torch_index_url": torch_reqs.index_url,
+        "vendor": detected_gpu.vendor,
+        "driver_version": detected_gpu.driver_version,
+    }
+    try:
+        target = save_target(target_data)
+        typer.echo(f"✓ Created target: {target.name}")
+        typer.echo("  Type: Local (no SSH)")
+        typer.echo(f"  GPU IDs: {parsed_gpu_ids}")
+        typer.echo(f"  GPU Type: {gpu_type}")
+        typer.echo(f"  Compute: {compute_capability}")
+        typer.echo(f"  Torch: {torch_reqs.packages[0]}")
+        typer.echo("")
+        typer.echo(
+            f"Usage: wafer evaluate --target {name} --impl kernel.py --reference ref.py --test-cases tests.json"
+        )
+    except (ValueError, AssertionError) as e:
+        typer.echo(f"Error: {e}", err=True)
+        raise typer.Exit(1) from None
 @init_app.command("runpod")
 def init_runpod(
     name: str = typer.Option("runpod-mi300x", "--name", "-n", help="Target name"),
@@ -3248,23 +3349,29 @@ def init_ssh(
     host: str = typer.Option(..., "--host", "-H", help="SSH host (user@hostname:port)"),
     ssh_key: str = typer.Option("~/.ssh/id_ed25519", "--ssh-key", "-k", help="Path to SSH key"),
     gpu_ids: str = typer.Option("0", "--gpu-ids", "-g", help="Comma-separated GPU IDs"),
-    gpu_type: str = typer.Option(
-        "H100", "--gpu-type", help="GPU type (H100, A100, B200, MI300X, etc.)"
+    gpu_type: str | None = typer.Option(
+        None, "--gpu-type", help="GPU type (auto-detected if not specified)"
     ),
     docker_image: str | None = typer.Option(
         None, "--docker-image", "-d", help="Docker image (optional)"
     ),
     ncu: bool = typer.Option(False, "--ncu/--no-ncu", help="NCU profiling available"),
+    no_detect: bool = typer.Option(False, "--no-detect", help="Skip GPU auto-detection"),
 ) -> None:
     """Initialize an SSH target for your own GPU hardware.
     Creates a target config for direct SSH access to a GPU machine.
-    Use for baremetal servers, VMs, or any machine you have SSH access to.
+    Automatically detects GPU type and selects compatible PyTorch version.
     Examples:
+        # Auto-detect GPU (recommended)
         wafer config targets init ssh --name my-gpu --host user@192.168.1.100:22
+        # Multiple GPUs with NCU profiling
         wafer config targets init ssh --name lab-h100 --host ubuntu@gpu.lab.com:22 --gpu-ids 0,1 --ncu
-        wafer config targets init ssh --name docker-gpu --host user@host:22 --docker-image nvcr.io/nvidia/pytorch:24.01-py3
+        # Skip detection, specify manually
+        wafer config targets init ssh --name my-gpu --host user@host:22 --gpu-type H100 --no-detect
     """
     from .targets import save_target
@@ -3281,17 +3388,87 @@ def init_ssh(
         typer.echo("Example: user@192.168.1.100:22", err=True)
         raise typer.Exit(1)
+    # Auto-detect GPU if not specified
+    detected_gpu = None
+    torch_package = None
+    torch_index_url = None
+    if not no_detect:
+        typer.echo(f"Connecting to {host}...")
+        try:
+            import trio
+            import trio_asyncio
+            from wafer_core.async_ssh import AsyncSSHClient
+            from wafer_core.gpu_detect import (
+                detect_remote_gpu,
+                get_compute_capability,
+                get_torch_requirements,
+            )
+            expanded_key = str(Path(ssh_key).expanduser())
+            async def _detect() -> None:
+                nonlocal detected_gpu, torch_package, torch_index_url
+                # Need trio_asyncio.open_loop() for asyncssh bridge
+                async with trio_asyncio.open_loop():
+                    async with AsyncSSHClient(host, expanded_key) as client:
+                        detected_gpu = await detect_remote_gpu(client)
+            trio.run(_detect)
+            if detected_gpu:
+                typer.echo(f"  Found: {detected_gpu.gpu_name}")
+                if detected_gpu.vendor == "nvidia":
+                    typer.echo(f"  CUDA: {detected_gpu.driver_version}")
+                else:
+                    typer.echo(f"  ROCm: {detected_gpu.driver_version}")
+                # Get torch requirements
+                torch_reqs = get_torch_requirements(detected_gpu)
+                torch_package = torch_reqs.packages[0]  # Just torch, not all packages
+                torch_index_url = torch_reqs.index_url
+                typer.echo(f"  PyTorch: {torch_package}")
+                # Use detected GPU type if not specified
+                if not gpu_type:
+                    # Extract GPU name (e.g., "H100" from "NVIDIA H100 80GB HBM3")
+                    gpu_type = _extract_gpu_type(detected_gpu.gpu_name)
+            else:
+                typer.echo("  No GPU detected (nvidia-smi/rocm-smi not found)")
+                if not gpu_type:
+                    gpu_type = "H100"  # Default fallback
+                    typer.echo(f"  Using default: {gpu_type}")
+        except Exception as e:
+            typer.echo(f"  Detection failed: {e}", err=True)
+            if not gpu_type:
+                gpu_type = "H100"
+                typer.echo(f"  Using default: {gpu_type}")
+    # Fallback if no detection
+    if not gpu_type:
+        gpu_type = "H100"
     # Compute capability mappings
-    compute_caps = {
-        "B200": "10.0",
-        "H100": "9.0",
-        "A100": "8.0",
-        "A10": "8.6",
-        "V100": "7.0",
-        "MI300X": "9.4",
-        "MI250X": "9.0",
-    }
-    compute_capability = compute_caps.get(gpu_type, "8.0")
+    if detected_gpu:
+        from wafer_core.gpu_detect import get_compute_capability
+        compute_capability = get_compute_capability(detected_gpu)
+    else:
+        compute_caps = {
+            "B200": "10.0",
+            "H100": "9.0",
+            "A100": "8.0",
+            "A10": "8.6",
+            "V100": "7.0",
+            "MI300X": "9.4",
+            "MI250X": "9.0",
+            "RTX 5090": "10.0",
+            "RTX 4090": "8.9",
+            "RTX 3090": "8.6",
+        }
+        compute_capability = compute_caps.get(gpu_type, "8.0")
     # Build target data
     target_data = {
@@ -3308,6 +3485,12 @@ def init_ssh(
     if docker_image:
         target_data["docker_image"] = docker_image
+    # Add torch requirements if detected
+    if torch_package:
+        target_data["torch_package"] = torch_package
+    if torch_index_url:
+        target_data["torch_index_url"] = torch_index_url
     try:
         target = save_target(target_data)
         typer.echo(f"✓ Created target: {target.name}")
@@ -3315,9 +3498,12 @@ def init_ssh(
         typer.echo(f"  Host: {host}")
         typer.echo(f"  GPU IDs: {parsed_gpu_ids}")
         typer.echo(f"  GPU Type: {gpu_type}")
+        typer.echo(f"  Compute: {compute_capability}")
         typer.echo(f"  NCU: {'Yes' if ncu else 'No'}")
         if docker_image:
             typer.echo(f"  Docker: {docker_image}")
+        if torch_package:
+            typer.echo(f"  Torch: {torch_package}")
         typer.echo("")
         typer.echo(
             f"Usage: wafer evaluate --target {name} --impl kernel.py --reference ref.py --test-cases tests.json"
@@ -3327,6 +3513,31 @@ def init_ssh(
         raise typer.Exit(1) from None
+def _extract_gpu_type(gpu_name: str) -> str:
+    """Extract GPU type from full GPU name.
+    Examples:
+        "NVIDIA H100 80GB HBM3" -> "H100"
+        "NVIDIA GeForce RTX 4090" -> "RTX 4090"
+        "AMD Instinct MI300X OAM" -> "MI300X"
+    """
+    gpu_name_upper = gpu_name.upper()
+    # Check for known GPU types
+    known_types = [
+        "B200", "B100", "H200", "H100", "A100", "A10", "V100",
+        "RTX 5090", "RTX 5080", "RTX 4090", "RTX 4080", "RTX 3090", "RTX 3080",
+        "MI300X", "MI250X", "MI100",
+    ]
+    for gpu_type in known_types:
+        if gpu_type in gpu_name_upper:
+            return gpu_type
+    # Fallback: return cleaned name
+    return gpu_name.replace("NVIDIA ", "").replace("AMD ", "").strip()
 @targets_app.command("add")
 def targets_add(
     file_path: Path = typer.Argument(..., help="Path to target TOML file"),
@@ -3539,6 +3750,92 @@ def targets_pods() -> None:
         typer.echo()
+# ── Pool commands ───────────────────────────────────────────────────────────
+@targets_app.command("pool-list")
+def targets_pool_list() -> None:
+    """List all configured target pools.
+    Example:
+        wafer config targets pool-list
+    """
+    from .targets import get_pool, list_pools
+    pools = list_pools()
+    if not pools:
+        typer.echo("No pools configured")
+        typer.echo("")
+        typer.echo("Define pools in ~/.wafer/config.toml:")
+        typer.echo("  [pools.my-pool]")
+        typer.echo('  targets = ["target-1", "target-2"]')
+        return
+    typer.echo("Configured pools:\n")
+    for pool_name in pools:
+        try:
+            targets = get_pool(pool_name)
+            typer.echo(f"  {pool_name}: {', '.join(targets)}")
+        except Exception as e:
+            typer.echo(f"  {pool_name}: (error: {e})")
+@targets_app.command("pool-create")
+def targets_pool_create(
+    name: str = typer.Argument(..., help="Pool name"),
+    targets: list[str] = typer.Argument(..., help="Target names to include in pool"),
+) -> None:
+    """Create or update a target pool.
+    Example:
+        wafer config targets pool-create mi300x-pool mi300x-1 mi300x-2 mi300x-3
+    """
+    from .targets import save_pool
+    try:
+        save_pool(name, targets)
+        typer.echo(f"Pool '{name}' created with {len(targets)} targets")
+    except FileNotFoundError as e:
+        typer.echo(f"Error: {e}", err=True)
+        raise typer.Exit(1) from None
+@targets_app.command("pool-status")
+def targets_pool_status(
+    name: str = typer.Argument(..., help="Pool name"),
+) -> None:
+    """Show status of targets in a pool (locked/available).
+    Example:
+        wafer config targets pool-status mi300x-pool
+    """
+    from .target_lock import get_lock_holder, is_target_locked
+    from .targets import get_pool
+    try:
+        targets = get_pool(name)
+    except FileNotFoundError as e:
+        typer.echo(f"Error: {e}", err=True)
+        raise typer.Exit(1) from None
+    typer.echo(f"Pool '{name}' ({len(targets)} targets):\n")
+    available = 0
+    for target_name in targets:
+        locked = is_target_locked(target_name)
+        if locked:
+            pid = get_lock_holder(target_name)
+            pid_str = f" (pid {pid})" if pid else ""
+            typer.echo(f"  [busy]  {target_name}{pid_str}")
+        else:
+            typer.echo(f"  [free]  {target_name}")
+            available += 1
+    typer.echo("")
+    typer.echo(f"Available: {available}/{len(targets)}")
 # =============================================================================
 # Billing commands
 # =============================================================================

wafer-cli 0.2.4__tar.gz → 0.2.6__tar.gz

wafer-cli 0.2.4tar.gz → 0.2.6tar.gz