PyPI - wafer-cli - Versions diffs - 0.2.14__py3-none-any.whl → 0.2.30__py3-none-any.whl - Mend

wafer-cli 0.2.14py3-none-any.whl → 0.2.30py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

wafer/GUIDE.md +1 -1
wafer/agent_defaults.py +42 -0
wafer/auth.py +7 -0
wafer/billing.py +6 -6
wafer/cli.py +905 -131
wafer/cli_instructions.py +143 -0
wafer/corpus.py +313 -15
wafer/evaluate.py +480 -146
wafer/global_config.py +13 -0
wafer/kernel_scope.py +1 -1
wafer/ncu_analyze.py +1 -1
wafer/nsys_analyze.py +1 -1
wafer/skills/wafer-guide/SKILL.md +22 -6
wafer/specs_cli.py +157 -0
wafer/ssh_keys.py +6 -6
wafer/targets_cli.py +472 -0
wafer/targets_ops.py +29 -2
wafer/templates/ask_docs.py +1 -1
wafer/templates/optimize_kernel.py +3 -1
wafer/templates/optimize_kernelbench.py +17 -62
wafer/templates/trace_analyze.py +1 -1
wafer/tests/test_eval_cli_parity.py +199 -0
wafer/trace_compare.py +274 -0
wafer/wevin_cli.py +125 -26
wafer/workspaces.py +163 -16
wafer_cli-0.2.30.dist-info/METADATA +107 -0
wafer_cli-0.2.30.dist-info/RECORD +47 -0
wafer_cli-0.2.14.dist-info/METADATA +0 -16
wafer_cli-0.2.14.dist-info/RECORD +0 -41
{wafer_cli-0.2.14.dist-info → wafer_cli-0.2.30.dist-info}/WHEEL +0 -0
{wafer_cli-0.2.14.dist-info → wafer_cli-0.2.30.dist-info}/entry_points.txt +0 -0
{wafer_cli-0.2.14.dist-info → wafer_cli-0.2.30.dist-info}/top_level.txt +0 -0

wafer/cli_instructions.py ADDED Viewed

@@ -0,0 +1,143 @@
+"""Generate agent system prompt instructions from the wafer CLI's own --help text.
+Walks the typer/click command tree and extracts help text for commands
+matching the bash_allowlist. This ensures agent instructions stay in sync
+with the CLI — the --help text is the single source of truth for both
+human users and AI agents.
+Usage:
+    from wafer.cli_instructions import build_cli_instructions
+    instructions = build_cli_instructions([
+        "wafer evaluate",
+        "wafer nvidia ncu",
+        "wafer rocprof profile",
+        "python",  # non-wafer commands are skipped
+    ])
+"""
+from __future__ import annotations
+import click
+import typer.main
+def _resolve_command(root: click.BaseCommand, parts: list[str]) -> click.BaseCommand | None:
+    """Walk the click command tree to find a (sub)command by name parts.
+    Args:
+        root: The root click command (from typer.main.get_command)
+        parts: Command path segments, e.g. ["evaluate", "kernelbench"]
+    Returns:
+        The click command at that path, or None if not found.
+    """
+    cmd = root
+    for part in parts:
+        if not isinstance(cmd, click.MultiCommand):
+            return None
+        ctx = click.Context(cmd, info_name=part)
+        child = cmd.get_command(ctx, part)
+        if child is None:
+            return None
+        cmd = child
+    return cmd
+def _format_command_help(cmd_path: str, cmd: click.BaseCommand) -> str:
+    """Format a single command's help text for inclusion in a system prompt.
+    Extracts the description and option help text (skipping --help itself).
+    """
+    lines = [f"### `{cmd_path}`"]
+    if cmd.help:
+        lines.append(cmd.help.strip())
+    # Extract option help
+    option_lines = []
+    for param in getattr(cmd, "params", []):
+        if not isinstance(param, click.Option):
+            continue
+        # Skip --help
+        if param.name == "help":
+            continue
+        name = "/".join(param.opts)
+        type_name = param.type.name.upper() if hasattr(param.type, "name") else ""
+        help_text = param.help or ""
+        is_flag = type_name in ("BOOL", "BOOLEAN") or param.is_flag
+        if type_name and not is_flag:
+            option_lines.append(f"  {name} {type_name}  {help_text}")
+        else:
+            option_lines.append(f"  {name}  {help_text}")
+    if option_lines:
+        lines.append("")
+        lines.append("Options:")
+        lines.extend(option_lines)
+    # List subcommands if this is a group
+    if isinstance(cmd, click.MultiCommand):
+        ctx = click.Context(cmd, info_name=cmd_path.split()[-1])
+        subcmd_names = cmd.list_commands(ctx)
+        if subcmd_names:
+            subcmd_lines = []
+            for name in subcmd_names:
+                subcmd = cmd.get_command(ctx, name)
+                if subcmd:
+                    desc = (subcmd.help or subcmd.short_help or "").strip().split("\n")[0]
+                    subcmd_lines.append(f"  {cmd_path} {name}  {desc}")
+            if subcmd_lines:
+                lines.append("")
+                lines.append("Subcommands:")
+                lines.extend(subcmd_lines)
+    return "\n".join(lines)
+def build_cli_instructions(bash_allowlist: list[str]) -> str:
+    """Generate CLI instruction text from --help for allowed wafer commands.
+    Walks the typer/click command tree and extracts help text for each
+    wafer command in the bash_allowlist. Non-wafer commands (python, ls, etc.)
+    are skipped.
+    Args:
+        bash_allowlist: List of allowed bash command prefixes.
+            Example: ["wafer evaluate", "wafer nvidia ncu", "python"]
+    Returns:
+        Markdown-formatted CLI instructions, or empty string if no wafer
+        commands are in the allowlist.
+    """
+    if not bash_allowlist:
+        return ""
+    # Filter to wafer commands only
+    wafer_commands = [cmd for cmd in bash_allowlist if cmd.startswith("wafer ")]
+    if not wafer_commands:
+        return ""
+    # Lazy import to avoid circular deps at module level
+    from wafer.cli import app
+    root = typer.main.get_command(app)
+    sections = []
+    for cmd_str in wafer_commands:
+        # "wafer evaluate kernelbench" -> ["evaluate", "kernelbench"]
+        parts = cmd_str.split()[1:]  # drop "wafer" prefix
+        cmd = _resolve_command(root, parts)
+        if cmd is None:
+            # Command not found in tree — skip silently
+            continue
+        sections.append(_format_command_help(cmd_str, cmd))
+    if not sections:
+        return ""
+    header = (
+        "## Wafer CLI Commands\n\n"
+        "You do not have a local GPU. Use the wafer CLI to run on remote GPU hardware.\n"
+    )
+    return header + "\n\n".join(sections)

wafer/corpus.py CHANGED Viewed

@@ -3,10 +3,12 @@
 Download and manage documentation corpora for agent filesystem access.
 """
+import re
 import shutil
 import tarfile
 import tempfile
 from dataclasses import dataclass
+from html.parser import HTMLParser
 from pathlib import Path
 from typing import Literal
 from urllib.parse import urlparse
@@ -33,7 +35,7 @@ class CorpusConfig:
     name: CorpusName
     description: str
-    source_type: Literal["nvidia_md", "github_repo", "github_multi_repo"]
+    source_type: Literal["nvidia_md", "github_repo", "github_multi_repo", "mixed"]
     urls: list[str] | None = None
     repo: str | None = None
     repo_paths: list[str] | None = None
@@ -67,21 +69,74 @@ CORPORA: dict[CorpusName, CorpusConfig] = {
     ),
     "cutlass": CorpusConfig(
         name="cutlass",
-        description="CUTLASS and CuTe DSL documentation",
-        source_type="github_repo",
-        repo="NVIDIA/cutlass",
-        repo_paths=["media/docs", "python/cutlass/docs"],
+        description="CUTLASS C++ documentation, examples, and tutorials",
+        source_type="mixed",
+        # Official NVIDIA CUTLASS documentation (scraped as markdown)
+        urls=[
+            "https://docs.nvidia.com/cutlass/latest/overview.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/functionality.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/terminology.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/fundamental_types.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/programming_guidelines.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/heuristics.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/efficient_gemm.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/pipeline.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/profiler.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/dependent_kernel_launch.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/blackwell_functionality.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/blackwell_cluster_launch_control.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/cute/00_quickstart.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/cute/01_layout.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/cute/02_layout_algebra.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/cute/03_tensor.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/cute/04_algorithms.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/cute/0t_mma_atom.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/cute/0x_gemm_tutorial.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/cute/0y_predication.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/cute/0z_tma_tensors.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/cutlass_3x_design.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/cutlass_3x_backwards_compatibility.html",
+            "https://docs.nvidia.com/cutlass/latest/media/docs/cpp/gemm_api_3x.html",
+        ],
+        # NVIDIA/cutlass GitHub examples (excluding python/)
+        repos=[
+            RepoSource(
+                repo="NVIDIA/cutlass",
+                paths=["examples"],
+                branch="main",
+            ),
+        ],
     ),
     "hip": CorpusConfig(
         name="hip",
-        description="HIP programming guide and API reference",
-        source_type="github_repo",
-        repo="ROCm/HIP",
-        repo_paths=["docs"],
+        description="HIP programming guide, API reference, and examples",
+        source_type="github_multi_repo",
+        repos=[
+            # HIP - main documentation and API
+            RepoSource(
+                repo="ROCm/HIP",
+                paths=["docs"],
+            ),
+            # HIP examples - code samples
+            RepoSource(
+                repo="ROCm/HIP-Examples",
+                paths=["HIP-Examples-Applications", "mini-nbody"],
+            ),
+            # clr - HIP/OpenCL runtime (low-level)
+            RepoSource(
+                repo="ROCm/clr",
+                paths=["hipamd/include", "rocclr/device/gpu"],
+            ),
+            # ROCm docs - official documentation
+            RepoSource(
+                repo="ROCm/ROCm",
+                paths=["docs"],
+            ),
+        ],
     ),
     "amd": CorpusConfig(
         name="amd",
-        description="AMD GPU kernel development (rocWMMA, CK, AITER, rocBLAS, HipKittens, vLLM)",
+        description="AMD GPU kernel development (rocWMMA, CK, AITER, rocBLAS, HipKittens, vLLM, FlashAttention)",
         source_type="github_multi_repo",
         repos=[
             # rocWMMA - wave matrix multiply-accumulate (WMMA) intrinsics
@@ -125,11 +180,17 @@ CORPORA: dict[CorpusName, CorpusConfig] = {
                 paths=["docs"],
                 branch="develop_deprecated",
             ),
-            # HipKittens - high-performance AMD kernels
+            # HipKittens - high-performance AMD kernels (main branch: MI350X/CDNA4+)
             RepoSource(
                 repo="HazyResearch/HipKittens",
                 paths=["docs", "kernels", "include"],
             ),
+            # HipKittens cdna3 branch - MI300X/MI325X (gfx942)
+            RepoSource(
+                repo="HazyResearch/HipKittens",
+                paths=["kernels", "include", "tests"],
+                branch="cdna3",
+            ),
             # vLLM AMD kernels
             RepoSource(
                 repo="vllm-project/vllm",
@@ -145,6 +206,46 @@ CORPORA: dict[CorpusName, CorpusConfig] = {
                 repo="huggingface/hf-rocm-kernels",
                 paths=["csrc", "hf_rocm_kernels", "docs"],
             ),
+            # ROCm/flash-attention - FlashAttention for AMD GPUs
+            RepoSource(
+                repo="ROCm/flash-attention",
+                paths=["csrc", "docs"],
+            ),
+            # ROCm/triton - Triton compiler for AMD GPUs
+            RepoSource(
+                repo="ROCm/triton",
+                paths=["python/tutorials", "third_party/amd"],
+            ),
+            # ROCm/rccl - ROCm Communication Collectives Library (multi-GPU)
+            RepoSource(
+                repo="ROCm/rccl",
+                paths=["docs"],
+            ),
+            # ROCm/rocprofiler-sdk - AMD GPU profiling SDK
+            RepoSource(
+                repo="ROCm/rocprofiler-sdk",
+                paths=["docs", "samples"],
+            ),
+            # ROCm/omniperf - AMD GPU profiling tool
+            RepoSource(
+                repo="ROCm/omniperf",
+                paths=["docs", "src/omniperf_analyze"],
+            ),
+            # ROCm/omnitrace - Application tracing for AMD
+            RepoSource(
+                repo="ROCm/omnitrace",
+                paths=["docs"],
+            ),
+            # AMD GPUOpen Performance Guides
+            RepoSource(
+                repo="GPUOpen-Tools/gpu_performance_api",
+                paths=["docs"],
+            ),
+            # AMD LLVM - AMD GPU compiler backend
+            RepoSource(
+                repo="ROCm/llvm-project",
+                paths=["amd/device-libs/README.md", "llvm/docs/AMDGPUUsage.rst"],
+            ),
         ],
     ),
 }
@@ -169,19 +270,195 @@ def _url_to_filepath(url: str, base_dir: Path) -> Path:
     return base_dir / "/".join(path_parts)
+class _HTMLToMarkdown(HTMLParser):
+    """HTML to Markdown converter for NVIDIA documentation pages.
+    Uses stdlib HTMLParser - requires subclassing due to callback-based API.
+    The public interface is the functional `_html_to_markdown()` below.
+    """
+    def __init__(self) -> None:
+        super().__init__()
+        self.output: list[str] = []
+        self.current_tag: str = ""
+        self.in_code_block = False
+        self.in_pre = False
+        self.list_depth = 0
+        self.ordered_list_counters: list[int] = []
+        self.skip_content = False
+        self.link_href: str | None = None
+    def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
+        self.current_tag = tag
+        attrs_dict = dict(attrs)
+        # Skip script, style, nav, footer, header
+        if tag in ("script", "style", "nav", "footer", "header", "aside"):
+            self.skip_content = True
+            return
+        if tag == "h1":
+            self.output.append("\n# ")
+        elif tag == "h2":
+            self.output.append("\n## ")
+        elif tag == "h3":
+            self.output.append("\n### ")
+        elif tag == "h4":
+            self.output.append("\n#### ")
+        elif tag == "h5":
+            self.output.append("\n##### ")
+        elif tag == "h6":
+            self.output.append("\n###### ")
+        elif tag == "p":
+            self.output.append("\n\n")
+        elif tag == "br":
+            self.output.append("\n")
+        elif tag == "strong" or tag == "b":
+            self.output.append("**")
+        elif tag == "em" or tag == "i":
+            self.output.append("*")
+        elif tag == "code" and not self.in_pre:
+            self.output.append("`")
+            self.in_code_block = True
+        elif tag == "pre":
+            self.in_pre = True
+            # Check for language hint in class
+            lang = ""
+            if class_attr := attrs_dict.get("class"):
+                if "python" in class_attr.lower():
+                    lang = "python"
+                elif "cpp" in class_attr.lower() or "c++" in class_attr.lower():
+                    lang = "cpp"
+                elif "cuda" in class_attr.lower():
+                    lang = "cuda"
+            self.output.append(f"\n```{lang}\n")
+        elif tag == "ul":
+            self.list_depth += 1
+            self.output.append("\n")
+        elif tag == "ol":
+            self.list_depth += 1
+            self.ordered_list_counters.append(1)
+            self.output.append("\n")
+        elif tag == "li":
+            indent = "  " * (self.list_depth - 1)
+            if self.ordered_list_counters:
+                num = self.ordered_list_counters[-1]
+                self.output.append(f"{indent}{num}. ")
+                self.ordered_list_counters[-1] += 1
+            else:
+                self.output.append(f"{indent}- ")
+        elif tag == "a":
+            self.link_href = attrs_dict.get("href")
+            self.output.append("[")
+        elif tag == "img":
+            alt = attrs_dict.get("alt", "image")
+            src = attrs_dict.get("src", "")
+            self.output.append(f"![{alt}]({src})")
+        elif tag == "blockquote":
+            self.output.append("\n> ")
+        elif tag == "hr":
+            self.output.append("\n---\n")
+        elif tag == "table":
+            self.output.append("\n")
+        elif tag == "th":
+            self.output.append("| ")
+        elif tag == "td":
+            self.output.append("| ")
+        elif tag == "tr":
+            pass  # Handled in endtag
+    def handle_endtag(self, tag: str) -> None:
+        if tag in ("script", "style", "nav", "footer", "header", "aside"):
+            self.skip_content = False
+            return
+        if tag in ("h1", "h2", "h3", "h4", "h5", "h6"):
+            self.output.append("\n")
+        elif tag == "strong" or tag == "b":
+            self.output.append("**")
+        elif tag == "em" or tag == "i":
+            self.output.append("*")
+        elif tag == "code" and not self.in_pre:
+            self.output.append("`")
+            self.in_code_block = False
+        elif tag == "pre":
+            self.in_pre = False
+            self.output.append("\n```\n")
+        elif tag == "ul":
+            self.list_depth = max(0, self.list_depth - 1)
+        elif tag == "ol":
+            self.list_depth = max(0, self.list_depth - 1)
+            if self.ordered_list_counters:
+                self.ordered_list_counters.pop()
+        elif tag == "li":
+            self.output.append("\n")
+        elif tag == "a":
+            if self.link_href:
+                self.output.append(f"]({self.link_href})")
+            else:
+                self.output.append("]")
+            self.link_href = None
+        elif tag == "p":
+            self.output.append("\n")
+        elif tag == "blockquote":
+            self.output.append("\n")
+        elif tag == "tr":
+            self.output.append("|\n")
+        elif tag == "thead":
+            # Add markdown table separator after header row
+            self.output.append("|---" * 10 + "|\n")
+    def handle_data(self, data: str) -> None:
+        if self.skip_content:
+            return
+        # Preserve whitespace in code blocks
+        if self.in_pre:
+            self.output.append(data)
+        else:
+            # Collapse whitespace outside code
+            text = re.sub(r"\s+", " ", data)
+            if text.strip():
+                self.output.append(text)
+    def get_markdown(self) -> str:
+        """Get the converted markdown, cleaned up."""
+        md = "".join(self.output)
+        # Clean up excessive newlines
+        md = re.sub(r"\n{3,}", "\n\n", md)
+        # Clean up empty table separators
+        md = re.sub(r"\|---\|---.*\|\n(?!\|)", "", md)
+        return md.strip()
+def _html_to_markdown(html: str) -> str:
+    """Convert HTML to Markdown."""
+    parser = _HTMLToMarkdown()
+    parser.feed(html)
+    return parser.get_markdown()
 def _download_nvidia_md(config: CorpusConfig, dest: Path, verbose: bool = True) -> int:
-    """Download NVIDIA docs using .md endpoint."""
+    """Download NVIDIA docs and convert HTML to Markdown.
+    NVIDIA's .md endpoint no longer works, so we scrape HTML and convert to markdown.
+    """
     assert config.urls is not None
     downloaded = 0
     with httpx.Client(timeout=30.0, follow_redirects=True) as client:
         for url in config.urls:
-            md_url = f"{url}.md"
             filepath = _url_to_filepath(url, dest)
             filepath.parent.mkdir(parents=True, exist_ok=True)
             try:
-                resp = client.get(md_url)
+                # Fetch HTML page directly
+                resp = client.get(url)
                 resp.raise_for_status()
-                filepath.write_text(resp.text)
+                # Convert HTML to Markdown
+                markdown = _html_to_markdown(resp.text)
+                # Add source URL as header
+                content = f"<!-- Source: {url} -->\n\n{markdown}"
+                filepath.write_text(content)
                 downloaded += 1
                 if verbose:
                     print(f"  ✓ {filepath.relative_to(dest)}")
@@ -275,6 +552,25 @@ def _download_github_multi_repo(config: CorpusConfig, dest: Path, verbose: bool
     return downloaded
+def _download_mixed(config: CorpusConfig, dest: Path, verbose: bool = True) -> int:
+    """Download from mixed sources (NVIDIA docs + GitHub repos)."""
+    total = 0
+    # Download NVIDIA markdown docs (urls)
+    if config.urls:
+        if verbose:
+            print("  [NVIDIA docs]")
+        total += _download_nvidia_md(config, dest, verbose)
+    # Download GitHub repos
+    if config.repos:
+        if verbose:
+            print("  [GitHub repos]")
+        total += _download_github_multi_repo(config, dest, verbose)
+    return total
 def download_corpus(name: CorpusName, force: bool = False, verbose: bool = True) -> Path:
     """Download a corpus to local cache.
@@ -311,6 +607,8 @@ def download_corpus(name: CorpusName, force: bool = False, verbose: bool = True)
         count = _download_github_repo(config, dest, verbose)
     elif config.source_type == "github_multi_repo":
         count = _download_github_multi_repo(config, dest, verbose)
+    elif config.source_type == "mixed":
+        count = _download_mixed(config, dest, verbose)
     else:
         raise ValueError(f"Unknown source type: {config.source_type}")
     if verbose:

wafer-cli 0.2.14__py3-none-any.whl → 0.2.30__py3-none-any.whl

wafer-cli 0.2.14py3-none-any.whl → 0.2.30py3-none-any.whl