PyPI - wafer-cli - Versions diffs - 0.2.14__py3-none-any.whl - Mend

wafer-cli 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

wafer/GUIDE.md +118 -0
wafer/__init__.py +3 -0
wafer/analytics.py +306 -0
wafer/api_client.py +195 -0
wafer/auth.py +432 -0
wafer/autotuner.py +1080 -0
wafer/billing.py +233 -0
wafer/cli.py +7289 -0
wafer/config.py +105 -0
wafer/corpus.py +366 -0
wafer/evaluate.py +4593 -0
wafer/global_config.py +350 -0
wafer/gpu_run.py +307 -0
wafer/inference.py +148 -0
wafer/kernel_scope.py +552 -0
wafer/ncu_analyze.py +651 -0
wafer/nsys_analyze.py +1042 -0
wafer/nsys_profile.py +510 -0
wafer/output.py +248 -0
wafer/problems.py +357 -0
wafer/rocprof_compute.py +490 -0
wafer/rocprof_sdk.py +274 -0
wafer/rocprof_systems.py +520 -0
wafer/skills/wafer-guide/SKILL.md +129 -0
wafer/ssh_keys.py +261 -0
wafer/target_lock.py +270 -0
wafer/targets.py +842 -0
wafer/targets_ops.py +717 -0
wafer/templates/__init__.py +0 -0
wafer/templates/ask_docs.py +61 -0
wafer/templates/optimize_kernel.py +71 -0
wafer/templates/optimize_kernelbench.py +137 -0
wafer/templates/trace_analyze.py +74 -0
wafer/tracelens.py +218 -0
wafer/wevin_cli.py +577 -0
wafer/workspaces.py +852 -0
wafer_cli-0.2.14.dist-info/METADATA +16 -0
wafer_cli-0.2.14.dist-info/RECORD +41 -0
wafer_cli-0.2.14.dist-info/WHEEL +5 -0
wafer_cli-0.2.14.dist-info/entry_points.txt +2 -0
wafer_cli-0.2.14.dist-info/top_level.txt +1 -0

wafer/ssh_keys.py ADDED Viewed

@@ -0,0 +1,261 @@
+"""SSH Keys CLI - Manage SSH public keys for workspace access.
+This module provides the implementation for the `wafer ssh-keys` subcommand.
+Users register their SSH public keys here, which are then installed in all
+workspaces they attach to (BYOK - Bring Your Own Key model).
+"""
+from __future__ import annotations
+import json
+from dataclasses import dataclass
+from pathlib import Path
+import httpx
+from .api_client import get_api_url
+from .auth import get_auth_headers
+@dataclass(frozen=True)
+class SshKey:
+    """Registered SSH key info."""
+    id: str
+    public_key: str
+    name: str | None
+    created_at: str
+def _get_client() -> tuple[str, dict[str, str]]:
+    """Get API URL and auth headers."""
+    api_url = get_api_url()
+    headers = get_auth_headers()
+    assert api_url, "API URL must be configured"
+    assert api_url.startswith("http"), "API URL must be a valid HTTP(S) URL"
+    return api_url, headers
+def _get_key_fingerprint(public_key: str) -> str:
+    """Extract a short fingerprint from a public key for display.
+    Returns the first 12 characters of the base64 data portion.
+    """
+    parts = public_key.strip().split()
+    if len(parts) >= 2:
+        return parts[1][:12] + "..."
+    return public_key[:12] + "..."
+def _get_key_type(public_key: str) -> str:
+    """Extract the key type from a public key."""
+    parts = public_key.strip().split()
+    if parts:
+        return parts[0]
+    return "unknown"
+def _detect_ssh_keys() -> list[Path]:
+    """Detect existing SSH public keys on disk.
+    Returns list of paths to found public key files, in preference order.
+    """
+    ssh_dir = Path.home() / ".ssh"
+    candidates = [
+        "id_ed25519.pub",  # Preferred (modern, secure, fast)
+        "id_rsa.pub",  # Legacy but common
+        "id_ecdsa.pub",  # Less common
+        "id_dsa.pub",  # Deprecated
+    ]
+    found = []
+    for filename in candidates:
+        key_path = ssh_dir / filename
+        if key_path.exists():
+            found.append(key_path)
+    return found
+def list_ssh_keys(json_output: bool = False) -> str:
+    """List all registered SSH keys.
+    Returns:
+        Formatted output string
+    """
+    api_url, headers = _get_client()
+    try:
+        with httpx.Client(timeout=30.0, headers=headers) as client:
+            response = client.get(f"{api_url}/v1/user/ssh-keys")
+            response.raise_for_status()
+            keys = response.json()
+    except httpx.HTTPStatusError as e:
+        if e.response.status_code == 401:
+            raise RuntimeError("Not authenticated. Run: wafer login") from e
+        raise RuntimeError(f"API error: {e.response.status_code} - {e.response.text}") from e
+    except httpx.RequestError as e:
+        raise RuntimeError(f"Could not reach API: {e}") from e
+    if json_output:
+        return json.dumps(keys, indent=2)
+    if not keys:
+        return (
+            "No SSH keys registered.\n"
+            "\n"
+            "Add your SSH key:\n"
+            "  wafer ssh-keys add\n"
+            "\n"
+            "This will auto-detect your key from ~/.ssh/"
+        )
+    lines = ["SSH Keys:"]
+    for key in keys:
+        key_type = _get_key_type(key["public_key"])
+        fingerprint = _get_key_fingerprint(key["public_key"])
+        name = key.get("name") or "(no name)"
+        lines.append(f"  • {name}: {key_type} {fingerprint}")
+        lines.append(f"    ID: {key['id']}")
+    return "\n".join(lines)
+def add_ssh_key(
+    pubkey_path: Path | None = None,
+    name: str | None = None,
+    json_output: bool = False,
+) -> str:
+    """Add an SSH public key.
+    Args:
+        pubkey_path: Path to public key file (auto-detects if None)
+        name: Optional friendly name for the key
+        json_output: Return JSON instead of formatted output
+    Returns:
+        Formatted output string
+    """
+    # Auto-detect if no path provided
+    if pubkey_path is None:
+        detected = _detect_ssh_keys()
+        if not detected:
+            raise RuntimeError(
+                "No SSH key found in ~/.ssh/\n"
+                "\n"
+                "Generate one with:\n"
+                "  ssh-keygen -t ed25519\n"
+                "\n"
+                "Or specify a path:\n"
+                "  wafer ssh-keys add /path/to/key.pub"
+            )
+        pubkey_path = detected[0]
+    # Validate path
+    if not pubkey_path.exists():
+        raise RuntimeError(f"File not found: {pubkey_path}")
+    if not pubkey_path.suffix == ".pub" and "pub" not in pubkey_path.name:
+        raise RuntimeError(
+            f"Expected a public key file (.pub), got: {pubkey_path}\n"
+            "\n"
+            "Make sure you're adding the PUBLIC key, not the private key."
+        )
+    # Read key content
+    try:
+        public_key = pubkey_path.read_text().strip()
+    except Exception as e:
+        raise RuntimeError(f"Could not read key file: {e}") from e
+    # Validate basic format
+    if not public_key.startswith(("ssh-", "ecdsa-", "sk-")):
+        raise RuntimeError(
+            f"Invalid SSH public key format in {pubkey_path}\n"
+            "\n"
+            "Expected OpenSSH format (e.g., 'ssh-ed25519 AAAAC3... user@host')"
+        )
+    # Auto-generate name from key type and filename if not provided
+    if name is None:
+        key_type = _get_key_type(public_key)
+        # Use key type without prefix (e.g., "ed25519" instead of "ssh-ed25519")
+        short_type = key_type.replace("ssh-", "").replace("ecdsa-sha2-", "")
+        name = short_type
+    # Call API
+    api_url, headers = _get_client()
+    request_body = {
+        "public_key": public_key,
+        "name": name,
+    }
+    try:
+        with httpx.Client(timeout=30.0, headers=headers) as client:
+            response = client.post(
+                f"{api_url}/v1/user/ssh-keys",
+                json=request_body,
+            )
+            response.raise_for_status()
+            key_data = response.json()
+    except httpx.HTTPStatusError as e:
+        if e.response.status_code == 401:
+            raise RuntimeError("Not authenticated. Run: wafer login") from e
+        if e.response.status_code == 400:
+            # Parse error detail
+            try:
+                detail = e.response.json().get("detail", e.response.text)
+            except Exception:
+                detail = e.response.text
+            raise RuntimeError(f"Invalid key: {detail}") from e
+        raise RuntimeError(f"API error: {e.response.status_code} - {e.response.text}") from e
+    except httpx.RequestError as e:
+        raise RuntimeError(f"Could not reach API: {e}") from e
+    if json_output:
+        return json.dumps(key_data, indent=2)
+    key_type = _get_key_type(public_key)
+    fingerprint = _get_key_fingerprint(public_key)
+    return (
+        f"✓ SSH key registered: {name}\n"
+        f"  Type: {key_type}\n"
+        f"  Fingerprint: {fingerprint}\n"
+        f"  Source: {pubkey_path}\n"
+        f"\n"
+        f"Your key will be installed in all workspaces you attach to."
+    )
+def remove_ssh_key(key_id: str, json_output: bool = False) -> str:
+    """Remove an SSH key.
+    Args:
+        key_id: UUID of the key to remove
+        json_output: Return JSON instead of formatted output
+    Returns:
+        Formatted output string
+    """
+    api_url, headers = _get_client()
+    try:
+        with httpx.Client(timeout=30.0, headers=headers) as client:
+            response = client.delete(f"{api_url}/v1/user/ssh-keys/{key_id}")
+            response.raise_for_status()
+    except httpx.HTTPStatusError as e:
+        if e.response.status_code == 401:
+            raise RuntimeError("Not authenticated. Run: wafer login") from e
+        if e.response.status_code == 404:
+            raise RuntimeError(f"SSH key not found: {key_id}") from e
+        raise RuntimeError(f"API error: {e.response.status_code} - {e.response.text}") from e
+    except httpx.RequestError as e:
+        raise RuntimeError(f"Could not reach API: {e}") from e
+    if json_output:
+        return json.dumps({"status": "deleted", "key_id": key_id}, indent=2)
+    return f"✓ SSH key removed: {key_id}"

wafer/target_lock.py ADDED Viewed

@@ -0,0 +1,270 @@
+"""Target locking for concurrent access control.
+Uses file locks (fcntl.flock) to ensure only one process uses a target at a time.
+Locks are automatically released when the process exits or crashes.
+Usage:
+    # Try to acquire a single target
+    with try_acquire_target("mi300x-1") as acquired:
+        if acquired:
+            # Got the lock, run eval
+            ...
+        else:
+            # Target busy
+            ...
+    # Acquire first available from a pool
+    with acquire_from_pool(["mi300x-1", "mi300x-2", "mi300x-3"]) as target:
+        if target:
+            # Got a target, run eval
+            ...
+        else:
+            # All targets busy
+            ...
+"""
+from __future__ import annotations
+import fcntl
+import json
+import os
+import sys
+import time
+from collections.abc import Iterator
+from contextlib import contextmanager
+from datetime import UTC
+from pathlib import Path
+def _emit_gpu_event(event_type: str, **data: dict) -> None:
+    """Emit structured GPU event to stderr as JSON.
+    Events are written to stderr (not stdout) to avoid interfering with
+    command output parsing. Format: JSON with newline.
+    These events can be:
+    1. Parsed from bash output in eval events.jsonl
+    2. Piped to observability systems
+    3. Aggregated for GPU utilization metrics
+    """
+    from datetime import datetime
+    event = {
+        "type": event_type,
+        "timestamp": datetime.now(UTC).isoformat(),
+        "pid": os.getpid(),
+        **data,
+    }
+    # Write to stderr so it doesn't interfere with stdout capture
+    print(f"[GPU_EVENT] {json.dumps(event)}", file=sys.stderr, flush=True)
+# Lock directory
+LOCKS_DIR = Path.home() / ".wafer" / "locks"
+def _ensure_locks_dir() -> None:
+    """Ensure locks directory exists."""
+    LOCKS_DIR.mkdir(parents=True, exist_ok=True)
+def _lock_path(target_name: str) -> Path:
+    """Get path to lock file for a target."""
+    return LOCKS_DIR / f"{target_name}.lock"
+@contextmanager
+def try_acquire_target(target_name: str) -> Iterator[bool]:
+    """Try to acquire exclusive lock on a target.
+    Args:
+        target_name: Name of the target to lock
+    Yields:
+        True if lock was acquired, False if target is busy
+    The lock is automatically released when the context exits,
+    or if the process crashes.
+    """
+    _ensure_locks_dir()
+    lock_file = _lock_path(target_name)
+    # Open or create lock file
+    fd = os.open(str(lock_file), os.O_CREAT | os.O_RDWR)
+    try:
+        # Try non-blocking exclusive lock
+        fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+        # Write PID to lock file for debugging
+        os.ftruncate(fd, 0)
+        os.write(fd, f"{os.getpid()}\n".encode())
+        acquire_time = time.time()
+        _emit_gpu_event("gpu_acquire", target=target_name)
+        try:
+            yield True
+        finally:
+            # Release lock
+            hold_duration_ms = (time.time() - acquire_time) * 1000
+            _emit_gpu_event(
+                "gpu_release",
+                target=target_name,
+                hold_duration_ms=round(hold_duration_ms, 1),
+            )
+            fcntl.flock(fd, fcntl.LOCK_UN)
+    except BlockingIOError:
+        # Lock is held by another process
+        yield False
+    finally:
+        os.close(fd)
+@contextmanager
+def acquire_from_pool(
+    target_names: list[str],
+    timeout: float | None = None,
+    poll_interval: float = 1.0,
+) -> Iterator[str | None]:
+    """Acquire first available target from a list.
+    Tries each target in order, returns the first one that's available.
+    If all targets are busy and timeout is set, waits and retries.
+    Args:
+        target_names: List of target names to try
+        timeout: Max seconds to wait for a target. None = no waiting (fail immediately).
+                 Use float('inf') to wait forever.
+        poll_interval: Seconds between retries when waiting
+    Yields:
+        Name of acquired target, or None if all are busy (and timeout expired)
+    Example:
+        # Wait up to 5 minutes for a target
+        with acquire_from_pool(["gpu-1", "gpu-2", "gpu-3"], timeout=300) as target:
+            if target:
+                print(f"Got {target}")
+                run_eval(target)
+            else:
+                print("All targets busy after timeout")
+    """
+    _ensure_locks_dir()
+    start_time = time.monotonic()
+    while True:
+        # Try each target in order
+        for target_name in target_names:
+            lock_file = _lock_path(target_name)
+            fd = os.open(str(lock_file), os.O_CREAT | os.O_RDWR)
+            try:
+                fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+                # Got the lock - write PID and yield
+                os.ftruncate(fd, 0)
+                os.write(fd, f"{os.getpid()}\n".encode())
+                acquire_time = time.time()
+                _emit_gpu_event("gpu_acquire", target=target_name, pool=target_names)
+                try:
+                    yield target_name
+                    return  # Success - exit after context
+                finally:
+                    hold_duration_ms = (time.time() - acquire_time) * 1000
+                    _emit_gpu_event(
+                        "gpu_release",
+                        target=target_name,
+                        pool=target_names,
+                        hold_duration_ms=round(hold_duration_ms, 1),
+                    )
+                    fcntl.flock(fd, fcntl.LOCK_UN)
+                    os.close(fd)
+            except BlockingIOError:
+                # This target is busy, try next
+                os.close(fd)
+                continue
+        # All targets busy - check if we should wait
+        if timeout is None:
+            # No waiting, fail immediately
+            break
+        elapsed = time.monotonic() - start_time
+        if elapsed >= timeout:
+            # Timeout expired
+            break
+        # Wait and retry
+        remaining = timeout - elapsed
+        print(f"  All targets busy, waiting... ({int(remaining)}s remaining)", file=sys.stderr)
+        time.sleep(poll_interval)
+    # All targets busy (timeout expired or no waiting)
+    yield None
+def is_target_locked(target_name: str) -> bool:
+    """Check if a target is currently locked.
+    Note: This is a point-in-time check - the lock status can change
+    immediately after this returns.
+    Args:
+        target_name: Name of the target to check
+    Returns:
+        True if target is locked, False if available
+    """
+    _ensure_locks_dir()
+    lock_file = _lock_path(target_name)
+    if not lock_file.exists():
+        return False
+    fd = os.open(str(lock_file), os.O_RDONLY)
+    try:
+        # Try non-blocking lock
+        fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+        # Got it - so it wasn't locked
+        fcntl.flock(fd, fcntl.LOCK_UN)
+        return False
+    except BlockingIOError:
+        return True
+    finally:
+        os.close(fd)
+def get_lock_holder(target_name: str) -> int | None:
+    """Get PID of process holding lock on a target.
+    Args:
+        target_name: Name of the target
+    Returns:
+        PID of lock holder, or None if not locked or unknown
+    """
+    lock_file = _lock_path(target_name)
+    if not lock_file.exists():
+        return None
+    try:
+        content = lock_file.read_text().strip()
+        return int(content)
+    except (ValueError, OSError):
+        return None
+def list_locked_targets() -> list[str]:
+    """List all currently locked targets.
+    Returns:
+        List of target names that are currently locked
+    """
+    _ensure_locks_dir()
+    locked = []
+    for lock_file in LOCKS_DIR.glob("*.lock"):
+        target_name = lock_file.stem
+        if is_target_locked(target_name):
+            locked.append(target_name)
+    return sorted(locked)