PyPI - wafer-core - Versions diffs - 0.1.21__py3-none-any.whl → 0.1.23__py3-none-any.whl - Mend

wafer-core 0.1.21py3-none-any.whl → 0.1.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

wafer_core/auth.py +38 -6
wafer_core/environments/coding.py +8 -0
wafer_core/rollouts/dtypes.py +4 -0
wafer_core/rollouts/environments/localfs.py +50 -2
wafer_core/rollouts/evaluation.py +17 -1
wafer_core/rollouts/prompt.py +14 -4
wafer_core/rollouts/skills.py +176 -0
wafer_core/rollouts/templates/base.py +3 -0
wafer_core/targets/runpod.py +154 -15
wafer_core/tools/__init__.py +14 -0
wafer_core/tools/file_tools/grep_tool.py +56 -29
wafer_core/tools/search_docs_tool.py +196 -0
wafer_core/tools/skill_tool.py +64 -0
wafer_core/utils/backend.py +3 -0
wafer_core/utils/kernel_utils/static_checker.py +175 -3
wafer_core/utils/kernel_utils/targets/config.py +58 -24
wafer_core/utils/modal_execution/modal_app.py +14 -27
{wafer_core-0.1.21.dist-info → wafer_core-0.1.23.dist-info}/METADATA +1 -1
{wafer_core-0.1.21.dist-info → wafer_core-0.1.23.dist-info}/RECORD +20 -17
{wafer_core-0.1.21.dist-info → wafer_core-0.1.23.dist-info}/WHEEL +0 -0

wafer_core/targets/runpod.py CHANGED Viewed

@@ -14,7 +14,6 @@ from __future__ import annotations
 import json
 import logging
-import os
 import time
 from contextlib import asynccontextmanager
 from dataclasses import dataclass
@@ -250,10 +249,17 @@ async def provision_pod(target: RunPodTarget) -> tuple[str, str, int, str]:
         "ports": "22/tcp",
         "startSsh": True,
         "startJupyter": False,
-        "imageName": target.image,
         "env": [],
     }
+    if target.template_id:
+        # Template defines image, dockerArgs (sshd setup), and ports.
+        # Required for non-RunPod images (e.g. rocm/pytorch) that don't
+        # have RunPod's built-in SSH handler.
+        pod_input["templateId"] = target.template_id
+    else:
+        pod_input["imageName"] = target.image
     variables = {"input": pod_input}
     logger.info(f"Provisioning RunPod pod: {pod_name}")
@@ -334,7 +340,8 @@ async def _wait_for_ssh(pod_id: str, timeout_seconds: int) -> tuple[str, int, st
         # Check for SSH port
         runtime = pod.get("runtime")
         if runtime and status == "running":
-            for port in runtime.get("ports", []):
+            # ports can be null in JSON response, so use 'or []' instead of default
+            for port in runtime.get("ports") or []:
                 if (
                     port.get("privatePort") == 22
                     and port.get("isIpPublic")
@@ -378,6 +385,55 @@ async def terminate_pod(pod_id: str) -> bool:
         return False
+# =============================================================================
+# Template Management (not yet implemented)
+# =============================================================================
+#
+# The saveTemplate mutation allows creating reusable pod templates with custom
+# configurations. Templates can specify docker images, environment setup,
+# container disk size, and other pod settings.
+#
+# Example mutation:
+#
+#     mutation saveTemplate($input: SaveTemplateInput) {
+#         saveTemplate(input: $input) {
+#             id
+#             name
+#             imageName
+#             containerDiskInGb
+#             ports
+#             dockerArgs
+#             startSsh
+#             startJupyter
+#         }
+#     }
+#
+# Example variables:
+#
+#     {
+#         "input": {
+#             "containerDiskInGb": 50,
+#             "dockerArgs": "bash -c \"apt-get update && apt-get install -y openssh-server && ...\"",
+#             "env": [],
+#             "isPublic": false,
+#             "isServerless": false,
+#             "name": "template-name",
+#             "ports": "22/tcp",
+#             "portsConfig": [{"name": "SSH", "port": "22"}],
+#             "readme": "",
+#             "volumeInGb": 0,
+#             "volumeMountPath": "",
+#             "config": {},
+#             "category": "AMD",
+#             "imageName": "rocm/pytorch:rocm7.0.2_ubuntu24.04_py3.12_pytorch_release_2.7.1"
+#         }
+#     }
+#
+# Note: Template creation is not currently implemented in this module.
+# If needed, implement a save_template() function following the pattern of
+# provision_pod() and terminate_pod() above.
 # =============================================================================
 # Context Manager
 # =============================================================================
@@ -482,20 +538,103 @@ async def cleanup_target(target_name: str) -> bool:
     return success
+async def sync_pods_from_api() -> list[PodState]:
+    """Query RunPod API for all running pods and update local state.
+    This discovers pods that exist on the account but aren't in our state file
+    (e.g., created manually or by another machine). Updates the state file with
+    any wafer-created pods found.
+    Returns list of all running pods with SSH info.
+    """
+    query = """
+    query {
+        myself {
+            pods {
+                id
+                name
+                desiredStatus
+                runtime {
+                    ports {
+                        ip
+                        isIpPublic
+                        privatePort
+                        publicPort
+                    }
+                }
+            }
+        }
+    }
+    """
+    try:
+        data = await _graphql_request_async(query)
+    except Exception as e:
+        logger.warning(f"Failed to query pods from API: {e}")
+        return []
+    pods = data.get("myself", {}).get("pods", [])
+    running_pods = []
+    for pod in pods:
+        status = pod.get("desiredStatus", "").lower()
+        if status != "running":
+            continue
+        pod_id = pod["id"]
+        pod_name = pod.get("name", "")
+        # Extract SSH info
+        runtime = pod.get("runtime")
+        if not runtime:
+            continue
+        public_ip = None
+        ssh_port = None
+        for port in runtime.get("ports") or []:
+            if port.get("privatePort") == 22 and port.get("isIpPublic"):
+                public_ip = port.get("ip")
+                ssh_port = port.get("publicPort")
+                break
+        if not public_ip or not ssh_port:
+            continue
+        # Extract target name from pod name (wafer-{target_name}-{timestamp})
+        target_name = None
+        if pod_name.startswith("wafer-"):
+            parts = pod_name.split("-")
+            if len(parts) >= 3:
+                # Handle target names with hyphens: wafer-runpod-mi300x-1234567
+                target_name = "-".join(parts[1:-1])
+        pod_state = PodState(
+            pod_id=pod_id,
+            target_name=target_name or pod_name,
+            public_ip=public_ip,
+            ssh_port=ssh_port,
+            ssh_username="root",
+            created_at=datetime.now(timezone.utc).isoformat(),
+        )
+        running_pods.append(pod_state)
+        # Update state file if this is a wafer-created pod
+        if target_name:
+            existing = get_pod_state(target_name)
+            if not existing or existing.pod_id != pod_id:
+                logger.info(f"Syncing pod {pod_id} to state for target {target_name}")
+                _add_pod_to_state(target_name, pod_id, public_ip, ssh_port, "root")
+    return running_pods
 async def list_running_pods() -> list[PodState]:
-    """List all pods in state file that are still running."""
-    state = _load_state()
-    running = []
+    """List all running pods by querying the RunPod API.
-    for name, pod_state in state.items():
-        if await check_pod_running(pod_state.pod_id):
-            running.append(pod_state)
-        else:
-            # Clean up stale entry
-            logger.info(f"Removing stale state for {name} (pod {pod_state.pod_id})")
-            _remove_pod_from_state(name)
-    return running
+    Syncs state file with API to discover pods not in local state.
+    Returns list of running pods with SSH info.
+    """
+    return await sync_pods_from_api()
 async def cleanup_all_pods() -> int:

wafer_core/tools/__init__.py CHANGED Viewed

@@ -49,6 +49,10 @@ from wafer_core.tools.rocprof_systems_tools import (
     exec_rocprof_systems_query,
     exec_rocprof_systems_sample,
 )
+from wafer_core.tools.skill_tool import (
+    SKILL_TOOL,
+    exec_skill,
+)
 from wafer_core.tools.tracelens_tools import (
     TRACELENS_COLLECTIVE_TOOL,
     TRACELENS_COMPARE_TOOL,
@@ -68,6 +72,10 @@ from wafer_core.tools.write_kernel_tool import (
     KernelSubmission,
     exec_write_kernel,
 )
+from wafer_core.tools.search_docs_tool import (
+    SEARCH_DOCS_TOOL,
+    exec_search_docs,
+)
 __all__ = [
     # File tools
@@ -88,6 +96,9 @@ __all__ = [
     "BashPermissionResult",
     "check_bash_permissions",
     "exec_bash",
+    # Skill tool
+    "SKILL_TOOL",
+    "exec_skill",
     # Wafer tool
     "WAFER_TOOL",
     "WAFER_SUBCOMMANDS",
@@ -126,4 +137,7 @@ __all__ = [
     "exec_tracelens_report",
     "exec_tracelens_compare",
     "exec_tracelens_collective",
+    # Search docs tool
+    "SEARCH_DOCS_TOOL",
+    "exec_search_docs",
 ]

wafer_core/tools/file_tools/grep_tool.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Grep tool using ripgrep for fast content search."""
+"""Grep tool using ripgrep (with fallback to standard grep)."""
 from pathlib import Path
@@ -15,7 +15,7 @@ GREP_TOOL = Tool(
     function=ToolFunction(
         name="grep",
         description=(
-            "Search for a pattern in files using ripgrep. "
+            "Search for a pattern in files. "
             "Returns matching lines with file paths and line numbers. "
             "Supports regex patterns by default."
         ),
@@ -54,7 +54,7 @@ GREP_TOOL = Tool(
 async def exec_grep(tool_call: ToolCall, working_dir: Path) -> ToolResult:
-    """Execute grep using ripgrep."""
+    """Execute grep using ripgrep (preferred) or standard grep (fallback)."""
     import shutil
     import subprocess
@@ -74,35 +74,55 @@ async def exec_grep(tool_call: ToolCall, working_dir: Path) -> ToolResult:
             error="'pattern' is required",
         )
-    # Find ripgrep
+    # Try ripgrep first, fall back to standard grep
     rg_path = shutil.which("rg")
-    if not rg_path:
+    grep_path = shutil.which("grep")
+    if rg_path:
+        # Use ripgrep (faster, better defaults)
+        cmd = [rg_path, "--line-number", "--no-heading", "--color=never"]
+        if case_insensitive:
+            cmd.append("--ignore-case")
+        if context_lines:
+            cmd.extend(["--context", str(context_lines)])
+        if glob_pattern:
+            cmd.extend(["--glob", glob_pattern])
+        # Limit results
+        cmd.extend(["--max-count", str(max_results)])
+        cmd.append(pattern)
+        cmd.append(search_path)
+        use_ripgrep = True
+    elif grep_path:
+        # Fallback to standard grep
+        cmd = [grep_path, "-r", "-n", "--color=never"]
+        if case_insensitive:
+            cmd.append("-i")
+        if context_lines:
+            cmd.extend(["-C", str(context_lines)])
+        if glob_pattern:
+            # Standard grep uses --include for glob patterns
+            cmd.extend(["--include", glob_pattern])
+        cmd.append(pattern)
+        cmd.append(search_path)
+        use_ripgrep = False
+    else:
         return ToolResult(
             tool_call_id=tool_call.id,
             is_error=True,
             content="",
-            error="ripgrep (rg) not found. Please install it: brew install ripgrep",
+            error="Neither ripgrep (rg) nor grep found. Please install one.",
         )
-    # Build command
-    cmd = [rg_path, "--line-number", "--no-heading", "--color=never"]
-    if case_insensitive:
-        cmd.append("--ignore-case")
-    if context_lines:
-        cmd.extend(["--context", str(context_lines)])
-    if glob_pattern:
-        cmd.extend(["--glob", glob_pattern])
-    # Limit results
-    cmd.extend(["--max-count", str(max_results)])
-    cmd.append(pattern)
-    cmd.append(search_path)
-    # Run ripgrep
+    # Run the search
     try:
         result = subprocess.run(
             cmd,
@@ -126,13 +146,14 @@ async def exec_grep(tool_call: ToolCall, working_dir: Path) -> ToolResult:
             error=f"Search failed: {e}",
         )
-    # ripgrep returns exit code 1 for no matches (not an error)
+    # Both ripgrep and grep return exit code 1 for no matches (not an error)
     if result.returncode not in (0, 1):
+        tool_name = "ripgrep" if use_ripgrep else "grep"
         return ToolResult(
             tool_call_id=tool_call.id,
             is_error=True,
             content="",
-            error=result.stderr or f"ripgrep exited with code {result.returncode}",
+            error=result.stderr or f"{tool_name} exited with code {result.returncode}",
         )
     output = result.stdout.strip()
@@ -143,8 +164,14 @@ async def exec_grep(tool_call: ToolCall, working_dir: Path) -> ToolResult:
             content=f"No matches found for pattern: {pattern}",
         )
-    # Count matches
-    match_count = len(output.split("\n"))
+    # Count matches and limit output for standard grep
+    lines = output.split("\n")
+    if not use_ripgrep and len(lines) > max_results:
+        lines = lines[:max_results]
+        output = "\n".join(lines)
+        output += f"\n... (truncated to {max_results} results)"
+    match_count = min(len(lines), max_results)
     header = f"Found {match_count} matches:\n\n"
     return ToolResult(

wafer_core/tools/search_docs_tool.py ADDED Viewed

@@ -0,0 +1,196 @@
+"""Search documentation tool for GPU programming corpora.
+Provides semantic and keyword search over documentation for CuTeDSL, CUDA, etc.
+Corpora are downloaded via `wafer corpus download <name>` and stored in ~/.cache/wafer/corpora/.
+"""
+import re
+from pathlib import Path
+from wafer_core.rollouts.dtypes import Tool, ToolCall, ToolFunction, ToolFunctionParameter, ToolResult
+# Cache directory where wafer corpus download stores files
+CACHE_DIR = Path.home() / ".cache" / "wafer" / "corpora"
+# Available corpora (names match wafer corpus download)
+AVAILABLE_CORPORA = ["cutlass", "cutedsl", "cuda", "hip", "amd"]
+SEARCH_DOCS_TOOL = Tool(
+    type="function",
+    function=ToolFunction(
+        name="search_docs",
+        description="""Search GPU programming documentation for relevant information.
+Use this tool to find documentation about:
+- CUTLASS C++ (cute:: namespace, gemm tutorials, tensor cores, TMA, Blackwell)
+- CuTeDSL Python API (@cute.kernel, @cute.jit, cute.arch functions)
+- CUDA programming concepts
+- GPU kernel optimization techniques
+- Code examples and patterns
+Available corpora:
+- 'cutlass' - NVIDIA CUTLASS C++ docs + GitHub examples (gemm, hopper, blackwell)
+- 'cutedsl' - CuTeDSL Python documentation
+- 'cuda' - General CUDA programming docs
+- 'hip' - AMD HIP programming docs
+- 'amd' - AMD GPU kernel development (rocWMMA, CK, etc.)
+Note: Corpora must be downloaded first with `wafer corpus download <name>`.
+Returns relevant documentation snippets with file paths.""",
+        parameters=ToolFunctionParameter(
+            type="object",
+            properties={
+                "query": {
+                    "type": "string",
+                    "description": "Search query - describe what you're looking for",
+                },
+                "corpus": {
+                    "type": "string",
+                    "description": "Which docs to search: 'cutlass', 'cutedsl', 'cuda', 'hip', 'amd' (default: cutlass)",
+                },
+                "max_results": {
+                    "type": "integer",
+                    "description": "Maximum number of results to return (default: 5)",
+                },
+            },
+        ),
+        required=["query"],
+    )
+)
+def _get_corpus_path(corpus_name: str) -> Path | None:
+    """Get the path to a corpus in the cache directory.
+    Corpora are stored at ~/.cache/wafer/corpora/<corpus_name>/
+    """
+    if corpus_name not in AVAILABLE_CORPORA:
+        return None
+    corpus_path = CACHE_DIR / corpus_name
+    if corpus_path.exists():
+        return corpus_path
+    return None
+def _search_files(corpus_path: Path, query: str, max_results: int = 5) -> list[dict]:
+    """Simple keyword search through documentation files."""
+    results = []
+    query_terms = query.lower().split()
+    # Search .md, .py, .cu, .hpp, and .h files (for CUTLASS examples)
+    for pattern in ["**/*.md", "**/*.py", "**/*.cu", "**/*.hpp", "**/*.h", "**/*.cuh"]:
+        for file_path in corpus_path.glob(pattern):
+            if file_path.is_file():
+                try:
+                    content = file_path.read_text(encoding="utf-8", errors="ignore")
+                    content_lower = content.lower()
+                    # Score based on term matches
+                    score = sum(content_lower.count(term) for term in query_terms)
+                    if score > 0:
+                        # Extract relevant snippets
+                        snippets = _extract_snippets(content, query_terms)
+                        results.append({
+                            "file": str(file_path),  # Return absolute path so read tool can access it
+                            "score": score,
+                            "snippets": snippets[:3],  # Top 3 snippets
+                        })
+                except Exception:
+                    continue
+    # Sort by score and return top results
+    results.sort(key=lambda x: x["score"], reverse=True)
+    return results[:max_results]
+def _extract_snippets(content: str, terms: list[str], context_lines: int = 5) -> list[str]:
+    """Extract snippets containing search terms."""
+    snippets = []
+    lines = content.split("\n")
+    for i, line in enumerate(lines):
+        line_lower = line.lower()
+        if any(term in line_lower for term in terms):
+            # Get context around the match
+            start = max(0, i - context_lines)
+            end = min(len(lines), i + context_lines + 1)
+            snippet = "\n".join(lines[start:end])
+            # Skip very short snippets
+            if len(snippet.strip()) > 50:
+                snippets.append(snippet)
+    return snippets
+async def exec_search_docs(
+    tool_call: ToolCall,
+    corpus_override: str | None = None,
+) -> ToolResult:
+    """Execute search_docs tool.
+    Args:
+        tool_call: The tool call with query and optional corpus
+        corpus_override: Override corpus path (for testing)
+    """
+    query = tool_call.args.get("query", "")
+    corpus_name = tool_call.args.get("corpus", "cutlass")
+    max_results = tool_call.args.get("max_results", 5)
+    if not query:
+        return ToolResult(
+            tool_call_id=tool_call.id,
+            content="",
+            error="query parameter is required",
+        )
+    # Find corpus path
+    if corpus_override:
+        corpus_path = Path(corpus_override)
+    else:
+        corpus_path = _get_corpus_path(corpus_name)
+        if corpus_path is None:
+            return ToolResult(
+                tool_call_id=tool_call.id,
+                content="",
+                error=f"Unknown corpus: {corpus_name}. Available: {AVAILABLE_CORPORA}",
+            )
+    if not corpus_path.exists():
+        return ToolResult(
+            tool_call_id=tool_call.id,
+            content="",
+            error=f"Corpus '{corpus_name}' not downloaded. Run: wafer corpus download {corpus_name}",
+        )
+    # Search
+    results = _search_files(corpus_path, query, max_results)
+    if not results:
+        return ToolResult(
+            tool_call_id=tool_call.id,
+            content=f"No results found for query: {query}",
+            error=None,
+        )
+    # Format output
+    output_parts = [f"Found {len(results)} results for: {query}\n"]
+    for i, result in enumerate(results, 1):
+        output_parts.append(f"\n{'='*60}")
+        output_parts.append(f"[{i}] {result['file']} (score: {result['score']})")
+        output_parts.append("=" * 60)
+        for snippet in result["snippets"]:
+            output_parts.append(snippet)
+            output_parts.append("-" * 40)
+    return ToolResult(
+        tool_call_id=tool_call.id,
+        content="\n".join(output_parts),
+        error=None,
+    )

wafer_core/tools/skill_tool.py ADDED Viewed

@@ -0,0 +1,64 @@
+"""Skill tool.
+Loads skill content on demand from ~/.wafer/skills/ or bundled locations.
+"""
+from wafer_core.rollouts.dtypes import (
+    Tool,
+    ToolCall,
+    ToolFunction,
+    ToolFunctionParameter,
+    ToolResult,
+)
+# ── Tool Definition ──────────────────────────────────────────────────────────
+SKILL_TOOL = Tool(
+    type="function",
+    function=ToolFunction(
+        name="skill",
+        description="Load a skill's full instructions. Skills provide domain-specific knowledge and workflows. Use this when you need detailed guidance for a task mentioned in your available skills.",
+        parameters=ToolFunctionParameter(
+            type="object",
+            properties={
+                "name": {
+                    "type": "string",
+                    "description": "Name of the skill to load (e.g., 'wafer-guide')",
+                },
+            },
+        ),
+        required=["name"],
+    ),
+)
+# ── Pure Function Executor ───────────────────────────────────────────────────
+async def exec_skill(tool_call: ToolCall) -> ToolResult:
+    """Load a skill's full instructions.
+    Args:
+        tool_call: The tool call with skill name.
+    """
+    from wafer_core.rollouts.skills import discover_skills, load_skill
+    skill_name = tool_call.args["name"]
+    skill = load_skill(skill_name)
+    if skill is None:
+        available = discover_skills()
+        available_names = [s.name for s in available]
+        return ToolResult(
+            tool_call_id=tool_call.id,
+            is_error=True,
+            content="",
+            error=f"Skill not found: {skill_name}. Available skills: {', '.join(available_names) or 'none'}",
+        )
+    header = f"# Skill: {skill.name}\n\n"
+    return ToolResult(
+        tool_call_id=tool_call.id,
+        is_error=False,
+        content=header + skill.content,
+    )

wafer_core/utils/backend.py CHANGED Viewed

@@ -33,6 +33,9 @@ def get_auth_token() -> str | None:
     Note:
         In local dev mode (localhost), no token is required.
         The API will use LOCAL_DEV_MODE to bypass auth.
+        Callers (like wevin-extension) should pass WAFER_AUTH_TOKEN
+        as an environment variable when spawning Python processes.
     """
     return os.environ.get("WAFER_AUTH_TOKEN")

wafer-core 0.1.21__py3-none-any.whl → 0.1.23__py3-none-any.whl

wafer-core 0.1.21py3-none-any.whl → 0.1.23py3-none-any.whl