PyPI - wafer-cli - Versions diffs - 0.2.31__py3-none-any.whl → 0.2.33__py3-none-any.whl - Mend

wafer-cli 0.2.31py3-none-any.whl → 0.2.33py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

wafer/GUIDE.md +1 -1
wafer/agent_defaults.py +157 -2
wafer/billing.py +6 -6
wafer/cli.py +432 -346
wafer/corpus.py +6 -72
wafer/evaluate.py +143 -81
wafer/global_config.py +0 -13
wafer/kernel_scope.py +1 -1
wafer/ncu_analyze.py +1 -1
wafer/nsys_analyze.py +1 -1
wafer/skills/wafer-guide/SKILL.md +6 -22
wafer/ssh_keys.py +6 -6
wafer/targets_ops.py +2 -29
wafer/templates/aiter_optimize.py +59 -0
wafer/templates/optimize_kernel.py +2 -4
wafer/templates/optimize_kernelbench.py +62 -17
wafer/templates/optimize_vllm.py +156 -0
wafer/trace_compare.py +48 -139
wafer/wevin_cli.py +1 -12
wafer/workspaces.py +8 -8
wafer_cli-0.2.33.dist-info/METADATA +260 -0
{wafer_cli-0.2.31.dist-info → wafer_cli-0.2.33.dist-info}/RECORD +25 -23
wafer_cli-0.2.31.dist-info/METADATA +0 -107
{wafer_cli-0.2.31.dist-info → wafer_cli-0.2.33.dist-info}/WHEEL +0 -0
{wafer_cli-0.2.31.dist-info → wafer_cli-0.2.33.dist-info}/entry_points.txt +0 -0
{wafer_cli-0.2.31.dist-info → wafer_cli-0.2.33.dist-info}/top_level.txt +0 -0

wafer/GUIDE.md CHANGED Viewed

@@ -7,7 +7,7 @@ GPU development primitives for LLM agents.
 Run code on cloud GPUs instantly with workspaces:
 ```bash
-wafer auth login                         # One-time auth
+wafer login                              # One-time auth
 wafer workspaces create dev --gpu B200   # Create workspace (NVIDIA B200)
 wafer workspaces exec dev -- python -c "import torch; print(torch.cuda.get_device_name(0))"
 wafer workspaces sync dev ./my-project   # Sync files

wafer/agent_defaults.py CHANGED Viewed

@@ -1,8 +1,8 @@
 """Shared agent defaults for kernel optimization tasks.
 Single source of truth for bash allowlists and enabled tools used by both:
-- CLI templates (apps/wafer-cli/wafer/templates/optimize_kernelbench.py)
-- Eval configs (research/evals/optimize_kernelbench_eval/.../base_config.py)
+- CLI templates (apps/wafer-cli/wafer/templates/*.py)
+- Eval configs (research/evals/*_eval/*.py)
 Import from here instead of defining your own copy.
 """
@@ -12,6 +12,9 @@ from __future__ import annotations
 # Tools available to the agent (coding environment tools)
 ENABLED_TOOLS: list[str] = ["read", "write", "edit", "glob", "grep", "bash"]
+# vLLM-specific tools (same as ENABLED_TOOLS for now)
+VLLM_ENABLED_TOOLS: list[str] = ["read", "write", "edit", "glob", "grep", "bash"]
 # Bash commands allowed for kernel optimization agents.
 # Uses prefix matching — "wafer evaluate" also allows "wafer evaluate kernelbench".
 KERNELBENCH_BASH_ALLOWLIST: list[str] = [
@@ -40,3 +43,155 @@ KERNELBENCH_BASH_ALLOWLIST: list[str] = [
     "pwd",
     "which",
 ]
+# Tools available to aiter optimization agents (full coding environment)
+AITER_ENABLED_TOOLS: list[str] = ["read", "write", "edit", "glob", "grep", "bash"]
+# System prompt for aiter optimization (shared between eval and template)
+# Uses {op_name}, {test_file}, {target_flag} placeholders
+AITER_SYSTEM_PROMPT = """\
+You are a GPU kernel optimization expert specializing in AMD MI300X and the aiter library.
+## Context
+aiter (ROCm/aiter) is AMD's centralized repository for high-performance AI operators.
+Operators are implemented using Triton kernels, Composable Kernel (CK), or HIP/ROCm.
+Each operator has a test in `op_tests/test_{{op}}.py` that validates correctness and
+measures performance against a reference implementation.
+## Your Task
+1. **Understand the operator**: Read the test file and trace imports to find implementation
+2. **Establish baseline**: Run the evaluation to measure current performance
+   ```bash
+   # Quick check with one shape (fast iteration)
+   wafer evaluate aiter --aiter-dir . --cmd "python op_tests/test_{{op}}.py --mnk 128,32,8192" {target_flag}
+   # Full test suite (final validation)
+   wafer evaluate aiter --aiter-dir . --cmd "python op_tests/test_{{op}}.py" {target_flag}
+   ```
+3. **Identify optimizations**: Look for memory access patterns, occupancy, instruction selection
+4. **Implement changes**: Modify the operator to improve performance
+5. **Validate**: Re-run evaluation to verify correctness and measure speedup
+6. **Iterate**: Use quick checks during development, full suite for final validation
+## Finding Source Files
+The aiter codebase structure varies by operator. To find implementation files:
+1. **Start with the test file**: `op_tests/test_{{op}}.py`
+   - Read imports to see what modules are used
+   - Look for the main function being tested
+2. **Check common locations** (not all ops have all of these):
+   - `aiter/ops/{{op}}.py` — High-level Python API (some ops)
+   - `aiter/triton_kernels/` — Triton kernel implementations
+   - `csrc/kernels/` — CUDA/HIP kernel implementations
+   - `csrc/py_itfs_cu/` — Python interface CUDA files
+   - `csrc/cktile_*/` — Composable Kernel tile implementations
+3. **Search for the op name**:
+   ```bash
+   find . -name "*{{op}}*" -type f | grep -v __pycache__
+   grep -r "def {{function_name}}" aiter/ csrc/ --include="*.py" --include="*.cu"
+   ```
+## Key Directories
+- `aiter/` — Main package with operator implementations
+- `aiter/ops/` — High-level operator APIs (some ops)
+- `aiter/triton_kernels/` — Triton kernel implementations
+- `csrc/` — C++/CUDA/HIP implementations
+- `op_tests/` — Tests for each operator
+- `aiter/configs/` — Tuned configurations (CSV files)
+## Output
+Your goal is to produce:
+1. Modified operator code with optimizations
+2. Benchmark results showing correctness and speedup
+3. A summary of what you changed and why
+The optimization should be correct (pass the op_test) and faster than baseline."""
+# Bash commands allowed for aiter optimization agents.
+AITER_BASH_ALLOWLIST: list[str] = [
+    # Read-only
+    "ls",
+    "cat",
+    "head",
+    "tail",
+    "wc",
+    "find",
+    "grep",
+    "rg",
+    "pwd",
+    "tree",
+    "which",
+    "diff",
+    "sort",
+    # Filesystem
+    "mkdir",
+    "cp",
+    "mv",
+    # Git
+    "git diff",
+    "git status",
+    "git log",
+    # Compilation
+    "hipcc",
+    "g++",
+    "gcc",
+    "clang",
+    "python",
+    "python3",
+    "pip",
+    "pytest",
+    # Execution — allows running compiled binaries and python scripts
+    "./",
+    # Kernel evaluation
+    "wafer evaluate aiter",
+    # Profiling — AMD
+    "wafer amd rocprof-compute",
+    "wafer amd rocprof-sdk",
+    "wafer amd rocprof-systems",
+    "wafer amd isa",
+    # Sub-agents
+    "wafer agent -t ask-docs",
+    # Misc
+    "timeout",
+]
+# Bash commands allowed for vLLM kernel optimization agents.
+VLLM_BASH_ALLOWLIST: list[str] = [
+    # vLLM evaluation
+    "wafer evaluate vllm",
+    # vLLM's own test and benchmark commands (run inside vllm dir)
+    "pytest",
+    # Profiling — AMD
+    "wafer amd rocprof-compute",
+    "wafer amd rocprof-sdk",
+    "wafer amd rocprof-systems",
+    # Profiling — NVIDIA
+    "wafer nvidia ncu",
+    "wafer nvidia nsys",
+    # Analysis
+    "wafer compiler-analyze",
+    # Sub-agents
+    "wafer agent -t ask-docs",
+    # General utilities
+    "python",
+    "python3",
+    "pip",
+    "timeout",
+    "ls",
+    "cat",
+    "head",
+    "tail",
+    "wc",
+    "pwd",
+    "which",
+    "cd",
+    "git",
+]

wafer/billing.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """Billing CLI - Manage credits and subscription.
-This module provides the implementation for the `wafer config billing` subcommand.
+This module provides the implementation for the `wafer billing` subcommand.
 """
 import json
@@ -126,7 +126,7 @@ def format_usage_text(usage: dict) -> str:
         lines.extend([
             "",
             "Upgrade to Pro for hardware counters and credit topups:",
-            "  wafer config billing portal",
+            "  wafer billing portal",
         ])
     return "\n".join(lines)
@@ -153,7 +153,7 @@ def get_usage(json_output: bool = False) -> str:
             usage = response.json()
     except httpx.HTTPStatusError as e:
         if e.response.status_code == 401:
-            raise RuntimeError("Not authenticated. Run: wafer auth login") from e
+            raise RuntimeError("Not authenticated. Run: wafer login") from e
         raise RuntimeError(f"API error: {e.response.status_code} - {e.response.text}") from e
     except httpx.RequestError as e:
         raise RuntimeError(f"Could not reach API: {e}") from e
@@ -188,7 +188,7 @@ def create_topup(amount_cents: int) -> dict:
             return response.json()
     except httpx.HTTPStatusError as e:
         if e.response.status_code == 401:
-            raise RuntimeError("Not authenticated. Run: wafer auth login") from e
+            raise RuntimeError("Not authenticated. Run: wafer login") from e
         if e.response.status_code == 400:
             # Invalid amount
             try:
@@ -200,7 +200,7 @@ def create_topup(amount_cents: int) -> dict:
             # Start tier or other restriction
             raise RuntimeError(
                 "Topup not available for your subscription tier.\n"
-                "Upgrade your subscription first: wafer config billing portal"
+                "Upgrade your subscription first: wafer billing portal"
             ) from e
         if e.response.status_code == 503:
             raise RuntimeError("Billing service temporarily unavailable. Please try again later.") from e
@@ -227,7 +227,7 @@ def get_portal_url() -> dict:
             return response.json()
     except httpx.HTTPStatusError as e:
         if e.response.status_code == 401:
-            raise RuntimeError("Not authenticated. Run: wafer auth login") from e
+            raise RuntimeError("Not authenticated. Run: wafer login") from e
         raise RuntimeError(f"API error: {e.response.status_code} - {e.response.text}") from e
     except httpx.RequestError as e:
         raise RuntimeError(f"Could not reach API: {e}") from e

wafer-cli 0.2.31__py3-none-any.whl → 0.2.33__py3-none-any.whl

wafer-cli 0.2.31py3-none-any.whl → 0.2.33py3-none-any.whl