PyPI - wafer-cli - Versions diffs - 0.2.19__tar.gz → 0.2.21__tar.gz - Mend

wafer-cli 0.2.19tar.gz → 0.2.21tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

{wafer_cli-0.2.19 → wafer_cli-0.2.21}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: wafer-cli
-Version: 0.2.19
+Version: 0.2.21
 Summary: CLI tool for running commands on remote GPUs and GPU kernel optimization agent
 Requires-Python: >=3.11
 Requires-Dist: typer>=0.12.0

{wafer_cli-0.2.19 → wafer_cli-0.2.21}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "wafer-cli"
-version = "0.2.19"
+version = "0.2.21"
 description = "CLI tool for running commands on remote GPUs and GPU kernel optimization agent"
 requires-python = ">=3.11"
 dependencies = [

{wafer_cli-0.2.19 → wafer_cli-0.2.21}/tests/test_cli_coverage.py RENAMED Viewed

@@ -719,3 +719,17 @@ class TestWorkspacesExecFlagPassthrough:
             "workspaces", "exec", "test-ws", "--", "cmd", "--output=/tmp/out"
         ])
         assert "no such option" not in result.output.lower()
+class TestAgentNoSandboxOption:
+    """Test --no-sandbox option in wafer agent command."""
+    def test_agent_no_sandbox_option_exists(self) -> None:
+        """Test that --no-sandbox option is accepted by wafer agent command."""
+        result = runner.invoke(app, ["agent", "--help"])
+        assert result.exit_code == 0
+        # Strip ANSI escape codes before checking (help output may contain color codes)
+        ansi_escape = re.compile(r'\x1b\[[0-9;]*m')
+        clean_output = ansi_escape.sub('', result.stdout)
+        assert "--no-sandbox" in clean_output
+        assert "liability" in clean_output.lower()  # Warning text should be in help

{wafer_cli-0.2.19 → wafer_cli-0.2.21}/tests/test_wevin_cli.py RENAMED Viewed

@@ -634,35 +634,114 @@ def test_streaming_frontend_session_start_state_without_session_id():
 def test_streaming_frontend_session_start_resumed_then_new():
     """Test session_start emission when resuming but states have different session_id.
     Edge case: --resume used but states return different session_id (should use states one).
     """
     import trio
     from wafer.wevin_cli import StreamingChunkFrontend
     async def run_test() -> None:
         # Start with resumed session_id
         frontend = StreamingChunkFrontend(
             session_id="resumed-session-123",
             model="claude-sonnet-4.5"
         )
         emitted_events = []
         def mock_emit(obj) -> None:
             emitted_events.append(obj)
         frontend._emit = mock_emit
         # start() emits session_start for resumed session
         await frontend.start()
         assert len(emitted_events) == 1
         assert emitted_events[0]["session_id"] == "resumed-session-123"
         # If states have different session_id (shouldn't happen, but handle gracefully)
         # The logic in main() checks `if first_session_id and not session_id`
         # So if session_id was set, it won't emit again
         # This is correct behavior - use the one from --resume
     trio.run(run_test)
+# =============================================================================
+# --no-sandbox flag tests
+# =============================================================================
+def test_no_sandbox_parameter_accepted():
+    """Test that no_sandbox parameter exists in wevin_main signature."""
+    import inspect
+    from wafer.wevin_cli import main as wevin_main
+    sig = inspect.signature(wevin_main)
+    params = sig.parameters
+    # Verify parameter exists
+    assert 'no_sandbox' in params
+    # Verify type and default
+    assert str(params['no_sandbox'].annotation) in ('bool', "<class 'bool'>")
+    assert params['no_sandbox'].default is False
+def test_build_environment_accepts_no_sandbox():
+    """Test that _build_environment accepts no_sandbox parameter."""
+    import inspect
+    from wafer.wevin_cli import _build_environment
+    sig = inspect.signature(_build_environment)
+    params = sig.parameters
+    assert 'no_sandbox' in params
+    assert params['no_sandbox'].default is False
+def test_build_environment_with_no_sandbox_false():
+    """Test _build_environment creates env with sandbox ENABLED when no_sandbox=False."""
+    from wafer_core.rollouts.templates import TemplateConfig
+    from wafer_core.sandbox import SandboxMode
+    from wafer.wevin_cli import _build_environment
+    tpl = TemplateConfig(
+        name="test",
+        description="Test template",
+        system_prompt="Test",
+        tools=["read"],
+    )
+    # This will raise RuntimeError if sandbox is unavailable on this system
+    # That's expected - we're testing that sandbox is ENABLED by default
+    try:
+        env = _build_environment(tpl, None, None, no_sandbox=False)
+        # If we get here, sandbox is available - verify it's enabled
+        assert env.sandbox_mode == SandboxMode.ENABLED
+    except RuntimeError as e:
+        # Sandbox unavailable - that's OK, the error proves ENABLED is set
+        assert "sandboxing is not available" in str(e)
+def test_build_environment_with_no_sandbox_true():
+    """Test _build_environment creates env with sandbox DISABLED when no_sandbox=True."""
+    from wafer_core.rollouts.templates import TemplateConfig
+    from wafer_core.sandbox import SandboxMode
+    from wafer.wevin_cli import _build_environment
+    tpl = TemplateConfig(
+        name="test",
+        description="Test template",
+        system_prompt="Test",
+        tools=["read"],
+    )
+    # This should NOT raise - sandbox is disabled
+    env = _build_environment(tpl, None, None, no_sandbox=True)
+    assert env.sandbox_mode == SandboxMode.DISABLED

{wafer_cli-0.2.19 → wafer_cli-0.2.21}/wafer/cli.py RENAMED Viewed

@@ -1,6 +1,8 @@
-# ruff: noqa: PLR0913
+# ruff: noqa: PLR0913, E402
 # PLR0913 (too many arguments) is suppressed because Typer CLI commands
 # naturally have many parameters - each --flag becomes a function argument.
+# E402 (module level import not at top) is suppressed because we intentionally
+# load .env files before importing other modules that may read env vars.
 """Wafer CLI - GPU development toolkit for LLM coding agents.
 Core commands:
@@ -27,6 +29,12 @@ from pathlib import Path
 import trio
 import typer
+from dotenv import load_dotenv
+# Auto-load .env from current directory and ~/.wafer/.env
+# This runs at import time so env vars are available before any config is accessed
+load_dotenv()  # cwd/.env
+load_dotenv(Path.home() / ".wafer" / ".env")  # ~/.wafer/.env
 from .config import WaferConfig, WaferEnvironment
 from .inference import infer_upload_files, resolve_environment
@@ -42,6 +50,7 @@ from .problems import (
 app = typer.Typer(
     help="GPU development toolkit for LLM coding agents",
     no_args_is_help=True,
+    pretty_exceptions_show_locals=False,  # Don't dump local vars (makes tracebacks huge)
 )
 # =============================================================================
@@ -58,11 +67,11 @@ def _show_version() -> None:
     """Show CLI version and environment, then exit."""
     from .analytics import _get_cli_version
     from .global_config import load_global_config
     version = _get_cli_version()
     config = load_global_config()
     environment = config.environment
     typer.echo(f"wafer-cli {version} ({environment})")
     raise typer.Exit()
@@ -110,7 +119,7 @@ def main_callback(
     if version:
         _show_version()
         return
     global _command_start_time, _command_outcome
     _command_start_time = time.time()
     _command_outcome = "success"  # Default to success, mark failure on exceptions
@@ -121,6 +130,7 @@ def main_callback(
     analytics.init_analytics()
     # Install exception hook to catch SystemExit and mark failures
+    # Also prints error message FIRST so it's visible even when traceback is truncated
     original_excepthook = sys.excepthook
     def custom_excepthook(
@@ -136,7 +146,9 @@ def main_callback(
                 _command_outcome = "failure"
         else:
             _command_outcome = "failure"
-        # Call original excepthook
+            # Print error summary FIRST (before traceback) so it's visible even if truncated
+            print(f"\n\033[1;31m>>> ERROR: {exc_type.__name__}: {exc_value}\033[0m\n", file=sys.stderr)
+        # Call original excepthook (prints the full traceback)
         original_excepthook(exc_type, exc_value, exc_traceback)
     sys.excepthook = custom_excepthook
@@ -591,7 +603,7 @@ app.add_typer(provider_auth_app, name="auth")
 def provider_auth_login(
     provider: str = typer.Argument(
         ...,
-        help="Provider name: runpod, digitalocean, or modal",
+        help="Provider name: runpod, digitalocean, modal, anthropic, or openai",
     ),
     api_key: str | None = typer.Option(
         None,
@@ -600,15 +612,16 @@ def provider_auth_login(
         help="API key (if not provided, reads from stdin)",
     ),
 ) -> None:
-    """Save API key for a cloud GPU provider.
+    """Save API key for a provider.
     Stores the key in ~/.wafer/auth.json. Environment variables
-    (e.g., WAFER_RUNPOD_API_KEY) take precedence over stored keys.
+    (e.g., ANTHROPIC_API_KEY) take precedence over stored keys.
     Examples:
+        wafer auth login anthropic --api-key sk-ant-xxx
         wafer auth login runpod --api-key rp_xxx
-        wafer auth login digitalocean --api-key dop_v1_xxx
-        echo $API_KEY | wafer auth login runpod
+        wafer auth login openai --api-key sk-xxx
+        echo $API_KEY | wafer auth login anthropic
     """
     import sys
@@ -642,7 +655,7 @@ def provider_auth_login(
 def provider_auth_logout(
     provider: str = typer.Argument(
         ...,
-        help="Provider name: runpod, digitalocean, or modal",
+        help="Provider name: runpod, digitalocean, modal, anthropic, or openai",
     ),
 ) -> None:
     """Remove stored API key for a cloud GPU provider.
@@ -1327,6 +1340,11 @@ def agent(  # noqa: PLR0913
         "-c",
         help="Documentation corpus to use (cuda, cutlass, hip, amd). Must be downloaded first.",
     ),
+    no_sandbox: bool = typer.Option(
+        False,
+        "--no-sandbox",
+        help="Disable OS-level sandboxing (YOU accept liability for any damage caused by the agent)",
+    ),
 ) -> None:
     """AI assistant for GPU kernel development.
@@ -1408,6 +1426,13 @@ def agent(  # noqa: PLR0913
                 raise typer.Exit(1) from None
         corpus_path = str(path)
+    # Warn user about sandbox disabled
+    if no_sandbox:
+        print(
+            "Warning: Sandbox disabled. You accept liability for any damage caused by the agent.",
+            file=sys.stderr,
+        )
     wevin_main(
         prompt=actual_prompt,
         interactive=use_tui,
@@ -1425,6 +1450,7 @@ def agent(  # noqa: PLR0913
         template=template,
         template_args=parsed_template_args,
         corpus_path=corpus_path,
+        no_sandbox=no_sandbox,
     )
@@ -1455,6 +1481,7 @@ def _make_agent_alias(name: str, doc: str) -> None:
         template: str | None = typer.Option(None, "--template", "-t"),
         template_args: list[str] | None = typer.Option(None, "--args"),
         corpus: str | None = typer.Option(None, "--corpus"),
+        no_sandbox: bool = typer.Option(False, "--no-sandbox"),
     ) -> None:
         agent(
             prompt=prompt,
@@ -1474,6 +1501,7 @@ def _make_agent_alias(name: str, doc: str) -> None:
             template=template,
             template_args=template_args,
             corpus=corpus,
+            no_sandbox=no_sandbox,
         )
     alias_cmd.__doc__ = doc
@@ -4391,9 +4419,13 @@ def workspaces_list(
 @workspaces_app.command("create")
 def workspaces_create(
     name: str = typer.Argument(..., help="Workspace name"),
-    gpu_type: str = typer.Option("B200", "--gpu", "-g", help="GPU type: MI300X (AMD) or B200 (NVIDIA, default)"),
+    gpu_type: str = typer.Option(
+        "B200", "--gpu", "-g", help="GPU type: MI300X (AMD) or B200 (NVIDIA, default)"
+    ),
     image: str | None = typer.Option(None, "--image", "-i", help="Docker image (optional)"),
-    wait: bool = typer.Option(False, "--wait", "-w", help="Wait for provisioning and show SSH credentials"),
+    wait: bool = typer.Option(
+        False, "--wait", "-w", help="Wait for provisioning and show SSH credentials"
+    ),
     json_output: bool = typer.Option(False, "--json", "-j", help="Output as JSON"),
 ) -> None:
     """Create a new workspace.
@@ -4702,19 +4734,25 @@ def workspaces_ssh(
     ssh_host = ws.get("ssh_host")
     ssh_port = ws.get("ssh_port")
     ssh_user = ws.get("ssh_user")
     if not ssh_host or not ssh_port or not ssh_user:
         typer.echo("Error: Workspace not ready. Wait a few seconds and retry.", err=True)
         raise typer.Exit(1)
     # Connect via SSH
-    os.execvp("ssh", [
+    os.execvp(
         "ssh",
-        "-p", str(ssh_port),
-        "-o", "StrictHostKeyChecking=no",
-        "-o", "UserKnownHostsFile=/dev/null",
-        f"{ssh_user}@{ssh_host}",
-    ])
+        [
+            "ssh",
+            "-p",
+            str(ssh_port),
+            "-o",
+            "StrictHostKeyChecking=no",
+            "-o",
+            "UserKnownHostsFile=/dev/null",
+            f"{ssh_user}@{ssh_host}",
+        ],
+    )
 @workspaces_app.command("sync")

{wafer_cli-0.2.19 → wafer_cli-0.2.21}/wafer/templates/optimize_kernel.py RENAMED Viewed

@@ -68,4 +68,6 @@ IMPORTANT: Always verify correctness with wafer evaluate before claiming success
         "kernel": "./kernel.cu",
         "target": "H100",
     },
+    # Enable skill discovery (agent can load wafer-guide, etc.)
+    include_skills=True,
 )

{wafer_cli-0.2.19 → wafer_cli-0.2.21}/wafer/wevin_cli.py RENAMED Viewed

@@ -266,18 +266,27 @@ def _build_environment(
     tpl: TemplateConfig,
     tools_override: list[str] | None,
     corpus_path: str | None,
+    no_sandbox: bool = False,
 ) -> Environment:
     """Build a CodingEnvironment from template config."""
     from wafer_core.environments.coding import CodingEnvironment
     from wafer_core.rollouts.templates import DANGEROUS_BASH_COMMANDS
+    from wafer_core.sandbox import SandboxMode
     working_dir = Path(corpus_path) if corpus_path else Path.cwd()
-    resolved_tools = tools_override or tpl.tools
+    resolved_tools = list(tools_override or tpl.tools)
+    # Add skill tool if skills are enabled
+    if tpl.include_skills and "skill" not in resolved_tools:
+        resolved_tools.append("skill")
+    sandbox_mode = SandboxMode.DISABLED if no_sandbox else SandboxMode.ENABLED
     env: Environment = CodingEnvironment(
         working_dir=working_dir,
         enabled_tools=resolved_tools,
         bash_allowlist=tpl.bash_allowlist,
         bash_denylist=DANGEROUS_BASH_COMMANDS,
+        sandbox_mode=sandbox_mode,
     )  # type: ignore[assignment]
     return env
@@ -362,6 +371,7 @@ def main(  # noqa: PLR0913, PLR0915
     list_sessions: bool = False,
     get_session: str | None = None,
     json_output: bool = False,
+    no_sandbox: bool = False,
 ) -> None:
     """Run wevin agent in-process via rollouts."""
     from dataclasses import asdict
@@ -373,6 +383,7 @@ def main(  # noqa: PLR0913, PLR0915
     # Handle --get-session: load session by ID and print
     if get_session:
         async def _get_session() -> None:
             try:
                 session, err = await session_store.get(get_session)
@@ -393,16 +404,18 @@ def main(  # noqa: PLR0913, PLR0915
                         error_msg = f"Failed to serialize messages: {e}"
                         print(json.dumps({"error": error_msg}))
                         sys.exit(1)
-                    print(json.dumps({
-                        "session_id": session.session_id,
-                        "status": session.status.value,
-                        "model": session.endpoint.model if session.endpoint else None,
-                        "created_at": session.created_at,
-                        "updated_at": session.updated_at,
-                        "messages": messages_data,
-                        "tags": session.tags,
-                    }))
+                    print(
+                        json.dumps({
+                            "session_id": session.session_id,
+                            "status": session.status.value,
+                            "model": session.endpoint.model if session.endpoint else None,
+                            "created_at": session.created_at,
+                            "updated_at": session.updated_at,
+                            "messages": messages_data,
+                            "tags": session.tags,
+                        })
+                    )
                 else:
                     print(f"Session: {session.session_id}")
                     print(f"Status: {session.status.value}")
@@ -490,7 +503,7 @@ def main(  # noqa: PLR0913, PLR0915
             print(f"Error loading template: {err}", file=sys.stderr)
             sys.exit(1)
         tpl = loaded_template
-        system_prompt = tpl.interpolate_prompt(template_args or {})
+        base_system_prompt = tpl.interpolate_prompt(template_args or {})
         # Show template info when starting without a prompt
         if not prompt and tpl.description:
             print(f"Template: {tpl.name}", file=sys.stderr)
@@ -498,14 +511,27 @@ def main(  # noqa: PLR0913, PLR0915
             print(file=sys.stderr)
     else:
         tpl = _get_default_template()
-        system_prompt = tpl.system_prompt
+        base_system_prompt = tpl.system_prompt
+    # Append skill metadata if skills are enabled
+    if tpl.include_skills:
+        from wafer_core.rollouts.skills import discover_skills, format_skill_metadata_for_prompt
+        skill_metadata = discover_skills()
+        if skill_metadata:
+            skill_section = format_skill_metadata_for_prompt(skill_metadata)
+            system_prompt = base_system_prompt + "\n\n" + skill_section
+        else:
+            system_prompt = base_system_prompt
+    else:
+        system_prompt = base_system_prompt
     # CLI args override template values
     resolved_single_turn = single_turn if single_turn is not None else tpl.single_turn
     # Build endpoint and environment
     endpoint = _build_endpoint(tpl, model, api_base, api_key)
-    environment = _build_environment(tpl, tools, corpus_path)
+    environment = _build_environment(tpl, tools, corpus_path, no_sandbox)
     # Session store
     session_store = FileSessionStore()
@@ -545,7 +571,7 @@ def main(  # noqa: PLR0913, PLR0915
             else:
                 if json_output:
                     # Emit session_start if we have a session_id (from --resume)
-                    model_name = endpoint.model if hasattr(endpoint, 'model') else None
+                    model_name = endpoint.model if hasattr(endpoint, "model") else None
                     frontend = StreamingChunkFrontend(session_id=session_id, model=model_name)
                 else:
                     frontend = NoneFrontend(show_tool_calls=True, show_thinking=False)
@@ -560,9 +586,11 @@ def main(  # noqa: PLR0913, PLR0915
                 # Emit session_start for new sessions (if session_id was None and we got one)
                 # Check first state to emit as early as possible
                 if json_output and isinstance(frontend, StreamingChunkFrontend):
-                    first_session_id = states[0].session_id if states and states[0].session_id else None
+                    first_session_id = (
+                        states[0].session_id if states and states[0].session_id else None
+                    )
                     if first_session_id and not session_id:  # New session created
-                        model_name = endpoint.model if hasattr(endpoint, 'model') else None
+                        model_name = endpoint.model if hasattr(endpoint, "model") else None
                         frontend.emit_session_start(first_session_id, model_name)
                 # Print resume command with full wafer agent prefix
                 if states and states[-1].session_id:

{wafer_cli-0.2.19 → wafer_cli-0.2.21}/wafer_cli.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: wafer-cli
-Version: 0.2.19
+Version: 0.2.21
 Summary: CLI tool for running commands on remote GPUs and GPU kernel optimization agent
 Requires-Python: >=3.11
 Requires-Dist: typer>=0.12.0