PyPI - toolrails - Versions diffs - 0.1.0__py3-none-any.whl - Mend

toolrails 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

toolrails/__init__.py +9 -0
toolrails/__main__.py +4 -0
toolrails/app.py +155 -0
toolrails/cli.py +56 -0
toolrails/pipeline.py +206 -0
toolrails/schemas.py +218 -0
toolrails/upstream.py +91 -0
toolrails-0.1.0.dist-info/METADATA +208 -0
toolrails-0.1.0.dist-info/RECORD +12 -0
toolrails-0.1.0.dist-info/WHEEL +4 -0
toolrails-0.1.0.dist-info/entry_points.txt +2 -0
toolrails-0.1.0.dist-info/licenses/LICENSE +21 -0

toolrails/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""toolrails — valid tool calls from any local model.
+A drop-in OpenAI-compatible proxy in front of Ollama that guarantees the tool
+calls your agent receives are well-formed: real tool name, arguments that match
+the tool's JSON schema. It also restores `tool_choice`, which Ollama's
+OpenAI-compatible endpoint silently ignores.
+"""
+__version__ = "0.1.0"

toolrails/__main__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .cli import main
+if __name__ == "__main__":
+    main()

toolrails/app.py ADDED Viewed

@@ -0,0 +1,155 @@
+"""The ASGI proxy: an OpenAI-compatible front door that repairs tool calls.
+Point any agent that speaks the OpenAI API at this instead of at Ollama and
+nothing else changes. Endpoints:
+    POST /v1/chat/completions   the pipeline (or a plain pass-through)
+    GET  /v1/models             forwarded to Ollama verbatim
+    GET  /health                liveness
+Fail-open is the whole safety story: if the repair pipeline raises for any
+reason, we forward the original request unchanged rather than return an error.
+The worst toolrails can do is nothing.
+"""
+from __future__ import annotations
+import contextlib
+import json
+import time
+from typing import Any
+from starlette.applications import Starlette
+from starlette.requests import Request
+from starlette.responses import JSONResponse, Response, StreamingResponse
+from starlette.routing import Route
+from .pipeline import handle
+from .upstream import Upstream
+def create_app(ollama_url: str) -> Starlette:
+    async def chat_completions(request: Request) -> Response:
+        up: Upstream = request.app.state.upstream
+        try:
+            body: dict[str, Any] = await request.json()
+        except (json.JSONDecodeError, ValueError):
+            return JSONResponse({"error": "invalid JSON body"}, status_code=400)
+        wants_stream = bool(body.get("stream"))
+        has_tools = bool(body.get("tools"))
+        # With tools we run the pipeline on a non-streamed response, then
+        # re-emit as a stream if the client asked for one. Guarantee first,
+        # streaming second. Without tools there is nothing to repair, so we
+        # forward untouched (including native streaming).
+        if not has_tools:
+            return await _forward(up, body, wants_stream)
+        try:
+            result = await handle({**body, "stream": False}, up)
+        except Exception:
+            # Fail open: give the client Ollama's own answer.
+            return await _forward(up, body, wants_stream)
+        if wants_stream:
+            return StreamingResponse(
+                _as_sse(result), media_type="text/event-stream"
+            )
+        return JSONResponse(result)
+    async def models(request: Request) -> Response:
+        up: Upstream = request.app.state.upstream
+        r = await up.passthrough("/v1/models")
+        return Response(
+            r.content, status_code=r.status_code,
+            media_type=r.headers.get("content-type", "application/json"),
+        )
+    async def health(request: Request) -> Response:
+        return JSONResponse({"status": "ok", "ollama": ollama_url})
+    @contextlib.asynccontextmanager
+    async def lifespan(app: Starlette):
+        app.state.upstream = Upstream(ollama_url)
+        try:
+            yield
+        finally:
+            await app.state.upstream.aclose()
+    app = Starlette(
+        routes=[
+            Route("/v1/chat/completions", chat_completions, methods=["POST"]),
+            Route("/v1/models", models, methods=["GET"]),
+            Route("/health", health, methods=["GET"]),
+        ],
+        lifespan=lifespan,
+    )
+    return app
+async def _forward(up: Upstream, body: dict[str, Any], wants_stream: bool) -> Response:
+    """Pass a request straight to Ollama. Used when there's nothing to repair
+    and as the fail-open path."""
+    if wants_stream:
+        async def gen():
+            async with up._client.stream(  # noqa: SLF001 - deliberate reuse
+                "POST", f"{up.base_url}/v1/chat/completions", json=body
+            ) as r:
+                async for chunk in r.aiter_raw():
+                    yield chunk
+        return StreamingResponse(gen(), media_type="text/event-stream")
+    r = await up.chat_raw(body)
+    return Response(
+        r.content,
+        status_code=r.status_code,
+        media_type=r.headers.get("content-type", "application/json"),
+    )
+def _as_sse(result: dict[str, Any]):
+    """Re-emit a completed response as an OpenAI streaming sequence.
+    The repaired response is already whole, so we replay it in the shape clients
+    actually accumulate: a role delta, then content, then for each tool call an
+    opener delta (index, id, type, name) followed by its arguments, then a final
+    delta carrying finish_reason, then [DONE]. Each tool call carries its
+    `index`, which is what strict clients key on — the piece the old single-chunk
+    form omitted. Real token-by-token streaming under the grammar is still a
+    later refinement; this makes the buffered form protocol-correct.
+    """
+    try:
+        choice = result["choices"][0]
+        message = choice.get("message", {})
+    except (KeyError, IndexError):
+        yield b"data: [DONE]\n\n"
+        return
+    base = {
+        "id": result.get("id", f"chatcmpl-{int(time.time())}"),
+        "object": "chat.completion.chunk",
+        "created": result.get("created", int(time.time())),
+        "model": result.get("model", ""),
+    }
+    def emit(delta: dict[str, Any], finish: str | None = None) -> bytes:
+        chunk = {**base, "choices": [{"index": 0, "delta": delta, "finish_reason": finish}]}
+        return f"data: {json.dumps(chunk)}\n\n".encode()
+    yield emit({"role": "assistant"})
+    if message.get("content"):
+        yield emit({"content": message["content"]})
+    for i, call in enumerate(message.get("tool_calls") or []):
+        fn = call.get("function", {})
+        yield emit({"tool_calls": [{
+            "index": i, "id": call.get("id", f"call_{i}"), "type": "function",
+            "function": {"name": fn.get("name", ""), "arguments": ""},
+        }]})
+        yield emit({"tool_calls": [{
+            "index": i, "function": {"arguments": fn.get("arguments", "")},
+        }]})
+    yield emit({}, finish=choice.get("finish_reason", "stop"))
+    yield b"data: [DONE]\n\n"

toolrails/cli.py ADDED Viewed

@@ -0,0 +1,56 @@
+"""`toolrails` command line: start the proxy."""
+from __future__ import annotations
+import argparse
+import logging
+import os
+from . import __version__
+def main(argv: list[str] | None = None) -> None:
+    parser = argparse.ArgumentParser(
+        prog="toolrails",
+        description="An OpenAI-compatible proxy that guarantees valid tool "
+        "calls from local models served by Ollama.",
+    )
+    parser.add_argument(
+        "--ollama",
+        default=os.environ.get("OLLAMA_HOST", "http://localhost:11434"),
+        help="Base URL of the Ollama server (default: %(default)s).",
+    )
+    parser.add_argument(
+        "--host", default="127.0.0.1", help="Address to bind (default: %(default)s)."
+    )
+    parser.add_argument(
+        "--port", type=int, default=11500, help="Port to listen on (default: %(default)s)."
+    )
+    parser.add_argument(
+        "--quiet", action="store_true", help="Don't log a line per repaired call."
+    )
+    parser.add_argument("--version", action="version", version=f"toolrails {__version__}")
+    args = parser.parse_args(argv)
+    logging.basicConfig(
+        level=logging.WARNING if args.quiet else logging.INFO,
+        format="toolrails: %(message)s",
+    )
+    # Only our own per-call lines; the HTTP client's request chatter stays quiet.
+    logging.getLogger("httpx").setLevel(logging.WARNING)
+    logging.getLogger("httpcore").setLevel(logging.WARNING)
+    import uvicorn
+    from .app import create_app
+    app = create_app(args.ollama)
+    print(
+        f"toolrails {__version__} → proxying {args.ollama}\n"
+        f"point your agent's base URL at http://{args.host}:{args.port}/v1"
+    )
+    uvicorn.run(app, host=args.host, port=args.port, log_level="warning")
+if __name__ == "__main__":
+    main()

toolrails/pipeline.py ADDED Viewed

@@ -0,0 +1,206 @@
+"""The two-stage tool-call pipeline.
+Given an OpenAI-shaped chat request that offers tools, produce a response whose
+tool calls are guaranteed well-formed. The shape:
+    1. Ask the model naturally (unconstrained). Whether and which tool to call
+       is the model's decision — we never constrain that step, because doing so
+       is what makes models stop calling tools (the "constraint tax").
+    2. For each call it attempted: if the name is real and the arguments already
+       validate, keep it as-is (the fast path — zero extra cost). Otherwise snap
+       the name to the nearest real tool and regenerate *only the arguments*
+       under a grammar built from that tool's schema, which cannot produce
+       invalid JSON.
+    3. Honour `tool_choice`, which Ollama's OpenAI endpoint drops on the floor:
+       `none` strips tools, `required`/a named function forces a call even when
+       the model tried to answer in prose.
+Every public entry point is wrapped by the caller in a fail-open guard: if
+anything in here raises, the proxy falls back to a plain pass-through so it can
+never wedge the agent using it.
+"""
+from __future__ import annotations
+import json
+import logging
+from typing import Any
+from . import schemas
+from .upstream import Upstream
+logger = logging.getLogger("toolrails")
+def _nudge(name: str, description: str) -> dict[str, str]:
+    """The instruction that steers the constrained second pass toward one tool."""
+    desc = f" ({description})" if description else ""
+    return {
+        "role": "user",
+        "content": (
+            f"Call the tool `{name}`{desc} given the conversation above. "
+            f"Respond with only a JSON object of its arguments."
+        ),
+    }
+def _as_openai_call(name: str, args: dict[str, Any], call_id: str) -> dict[str, Any]:
+    """A tool call in the exact shape OpenAI clients expect (arguments = string)."""
+    return {
+        "id": call_id,
+        "type": "function",
+        "function": {"name": name, "arguments": json.dumps(args)},
+    }
+def _forced_name(tool_choice: Any, names: list[str]) -> str | None:
+    """The specific tool named by `tool_choice={"function": {"name": ...}}`."""
+    if isinstance(tool_choice, dict):
+        fn = tool_choice.get("function") or {}
+        name = fn.get("name")
+        if name in names:
+            return name
+    return None
+async def _repair_call(
+    call: dict[str, Any],
+    up: Upstream,
+    model: str,
+    messages: list[dict[str, Any]],
+    tools: list[dict[str, Any]],
+    names: list[str],
+) -> dict[str, Any] | None:
+    """Return a guaranteed-valid version of one attempted tool call.
+    None means the attempt could not be tied to any real tool and the model's
+    original output should be left untouched.
+    """
+    fn = call.get("function") or {}
+    raw_name = fn.get("name") or ""
+    name = schemas.nearest_name(raw_name, names)
+    if name is None:
+        logger.warning("unknown tool %r left untouched", raw_name)
+        return None
+    if name != raw_name:
+        logger.info("name %r → %r", raw_name, name)
+    schema = schemas.schema_for(tools, name)
+    call_id = call.get("id") or f"call_{name}"
+    args = schemas.parse_arguments(fn.get("arguments"))
+    # Fast path: the model already got it right. No second call.
+    if args is not None and schemas.args_valid(args, schema):
+        logger.info("call %s ok", name)
+        return _as_openai_call(name, args, call_id)
+    # Surgical path: fix the *types* of the model's own values (the common
+    # small-model failure) without a second model call and without changing what
+    # the model meant.
+    if args is not None:
+        coerced = schemas.coerce(args, schema)
+        if schemas.args_valid(coerced, schema):
+            logger.info("call %s coerced (fixed argument types)", name)
+            return _as_openai_call(name, coerced, call_id)
+    # Last resort: regenerate arguments under the grammar. Guaranteed to match
+    # the schema, at the cost of one more generation.
+    logger.info("call %s regenerated under grammar", name)
+    regen = await up.constrained_object(
+        model,
+        messages + [_nudge(name, schemas.describe(tools, name))],
+        schema,
+    )
+    if regen is None:
+        regen = args if args is not None else {}
+    return _as_openai_call(name, regen, call_id)
+async def _pick_tool(
+    up: Upstream,
+    model: str,
+    messages: list[dict[str, Any]],
+    names: list[str],
+) -> str:
+    """Choose one tool by name when `tool_choice` forces a call but the model
+    answered in prose. The decision is already made (a call *must* happen), so
+    constraining a name-only choice here carries no tax."""
+    if len(names) == 1:
+        return names[0]
+    selection = await up.constrained_object(
+        model,
+        messages
+        + [{"role": "user", "content": "Which tool should be called? Reply with its name."}],
+        {"type": "object", "properties": {"tool": {"enum": names}}, "required": ["tool"]},
+    )
+    chosen = (selection or {}).get("tool")
+    return chosen if chosen in names else names[0]
+async def _force_call(
+    up: Upstream,
+    model: str,
+    messages: list[dict[str, Any]],
+    tools: list[dict[str, Any]],
+    name: str,
+    message: dict[str, Any],
+    choice: dict[str, Any],
+) -> None:
+    """Rewrite `message` in place to be exactly one guaranteed-valid call to
+    `name` — the arguments regenerated under that tool's grammar."""
+    schema = schemas.schema_for(tools, name)
+    args = await up.constrained_object(
+        model, messages + [_nudge(name, schemas.describe(tools, name))], schema
+    )
+    message["content"] = None
+    message["tool_calls"] = [_as_openai_call(name, args or {}, f"call_{name}")]
+    choice["finish_reason"] = "tool_calls"
+async def handle(body: dict[str, Any], up: Upstream) -> dict[str, Any]:
+    """Run the pipeline for one (non-streaming) chat-completions request."""
+    tools = body.get("tools") or []
+    names = schemas.tool_names(tools)
+    tool_choice = body.get("tool_choice", "auto")
+    model = body.get("model", "")
+    messages = body.get("messages", [])
+    # No tools, or the caller explicitly forbade them: plain completion.
+    if not names or tool_choice == "none":
+        clean = {k: v for k, v in body.items() if k not in ("tools", "tool_choice")}
+        return await up.chat_openai(clean)
+    # Stage one: the model's own, unconstrained answer.
+    resp = await up.chat_openai(body)
+    try:
+        choice = resp["choices"][0]
+        message = choice["message"]
+    except (KeyError, IndexError):
+        return resp  # unfamiliar shape — hand it back untouched
+    # A specific tool_choice wins over whatever stage-1 decided to do — even if
+    # the model chose a different tool or answered in prose, honour the request.
+    forced = _forced_name(tool_choice, names)
+    if forced:
+        logger.info("forced call %s (tool_choice names it)", forced)
+        await _force_call(up, model, messages, tools, forced, message, choice)
+        return resp
+    calls = message.get("tool_calls")
+    if calls:
+        repaired = []
+        for call in calls:
+            fixed = await _repair_call(call, up, model, messages, tools, names)
+            repaired.append(fixed if fixed is not None else call)
+        message["tool_calls"] = repaired
+        return resp
+    # The model answered in prose. Force a call only if tool_choice demanded one.
+    if tool_choice == "required":
+        name = await _pick_tool(up, model, messages, names)
+        logger.info("forced call %s (tool_choice=required)", name)
+        await _force_call(up, model, messages, tools, name, message, choice)
+        return resp
+    # `auto` with a prose answer is a legitimate outcome — leave it alone.
+    return resp

toolrails/schemas.py ADDED Viewed

@@ -0,0 +1,218 @@
+"""Pure, dependency-light helpers for tool-call repair.
+Nothing here does I/O or talks to a model — it is all deterministic string and
+schema work, so it can be unit-tested on its own and reasoned about in
+isolation. The network side lives in `upstream.py`; the orchestration that ties
+them together lives in `pipeline.py`.
+"""
+from __future__ import annotations
+import difflib
+import json
+import re
+from typing import Any
+try:
+    import jsonschema
+except ImportError:  # pragma: no cover - jsonschema is a hard dependency
+    jsonschema = None  # type: ignore
+# --- tool introspection ----------------------------------------------------
+def tool_names(tools: list[dict[str, Any]]) -> list[str]:
+    """The set of function names the caller offered, in order."""
+    names = []
+    for t in tools or []:
+        fn = t.get("function") or {}
+        name = fn.get("name")
+        if name:
+            names.append(name)
+    return names
+def schema_for(tools: list[dict[str, Any]], name: str) -> dict[str, Any]:
+    """The JSON schema for a named tool's *arguments*.
+    OpenAI/Ollama both carry it at function.parameters. A tool with no declared
+    parameters gets a permissive empty-object schema so constrained decoding
+    still produces valid (empty) JSON rather than failing.
+    """
+    for t in tools or []:
+        fn = t.get("function") or {}
+        if fn.get("name") == name:
+            params = fn.get("parameters")
+            if isinstance(params, dict) and params:
+                return params
+            return {"type": "object", "properties": {}}
+    return {"type": "object", "properties": {}}
+def describe(tools: list[dict[str, Any]], name: str) -> str:
+    """A tool's human description, for nudging the constrained second pass."""
+    for t in tools or []:
+        fn = t.get("function") or {}
+        if fn.get("name") == name:
+            return (fn.get("description") or "").strip()
+    return ""
+# --- name repair -----------------------------------------------------------
+def nearest_name(name: str, valid: list[str], cutoff: float = 0.6) -> str | None:
+    """Snap a hallucinated tool name to the closest real one.
+    Small local models routinely emit a name that is *almost* right —
+    `get_weather` for `getWeather`, `read` for `read_file`. If exactly one
+    valid name is close we snap to it; otherwise we return None and let the
+    caller decide (toolrails never invents a call the model didn't attempt).
+    """
+    if not name or not valid:
+        return None
+    if name in valid:
+        return name
+    matches = difflib.get_close_matches(name, valid, n=1, cutoff=cutoff)
+    return matches[0] if matches else None
+# --- argument repair -------------------------------------------------------
+_FENCE = re.compile(r"^\s*```(?:json)?\s*|\s*```\s*$", re.IGNORECASE)
+_TRAILING_COMMA = re.compile(r",(\s*[}\]])")
+def parse_arguments(raw: Any) -> dict[str, Any] | None:
+    """Coerce whatever the model produced for `arguments` into a dict.
+    OpenAI sends arguments as a JSON *string*; Ollama's native API sends a dict.
+    Weak models send neither cleanly — fenced code, trailing commas, a stray
+    sentence in front. We try the strict parse first, then a best-effort
+    repair. Returns None if there is nothing recoverable.
+    """
+    if isinstance(raw, dict):
+        return raw
+    if not isinstance(raw, str):
+        return None
+    text = raw.strip()
+    if not text:
+        return {}
+    # 1. strict
+    try:
+        val = json.loads(text)
+        return val if isinstance(val, dict) else None
+    except json.JSONDecodeError:
+        pass
+    # 2. strip markdown fences and retry
+    stripped = _FENCE.sub("", text).strip()
+    # 3. carve out the outermost {...} object if there's prose around it
+    start, end = stripped.find("{"), stripped.rfind("}")
+    if start != -1 and end != -1 and end > start:
+        stripped = stripped[start : end + 1]
+    # 4. drop trailing commas before } or ]
+    stripped = _TRAILING_COMMA.sub(r"\1", stripped)
+    try:
+        val = json.loads(stripped)
+        return val if isinstance(val, dict) else None
+    except json.JSONDecodeError:
+        return None
+def coerce(value: Any, schema: dict[str, Any]) -> Any:
+    """Nudge the model's own value toward the schema's type without inventing
+    anything. This fixes the failure we actually see from small models — the
+    right value with the wrong type: an integer sent as `"30"`, an array or
+    object serialized into a `"[...]"` string, a boolean as `"true"`. It walks
+    the schema recursively and only ever *reshapes* values it already has; it
+    never fills in a missing one. Anything it can't confidently convert is left
+    untouched for validation (and, failing that, grammar regeneration) to catch.
+    """
+    if not isinstance(schema, dict):
+        return value
+    t = schema.get("type")
+    if isinstance(t, list):  # e.g. ["string", "null"] — too ambiguous to coerce
+        return value
+    # A structured value the model flattened into a JSON string ("[...]", "{...}").
+    if t in ("object", "array") and isinstance(value, str):
+        try:
+            value = json.loads(value)
+        except (json.JSONDecodeError, ValueError):
+            return value
+    if t == "integer":
+        if isinstance(value, bool):
+            return value
+        if isinstance(value, int):
+            return value
+        if isinstance(value, float) and value.is_integer():
+            return int(value)
+        if isinstance(value, str):
+            try:
+                return int(value.strip())
+            except ValueError:
+                try:
+                    f = float(value.strip())
+                    return int(f) if f.is_integer() else value
+                except ValueError:
+                    return value
+        return value
+    if t == "number":
+        if isinstance(value, bool):
+            return value
+        if isinstance(value, (int, float)):
+            return value
+        if isinstance(value, str):
+            try:
+                return float(value.strip())
+            except ValueError:
+                return value
+        return value
+    if t == "boolean":
+        if isinstance(value, str):
+            low = value.strip().lower()
+            if low in ("true", "yes", "1"):
+                return True
+            if low in ("false", "no", "0"):
+                return False
+        return value
+    if t == "array" and isinstance(value, list):
+        items = schema.get("items")
+        return [coerce(v, items) for v in value] if isinstance(items, dict) else value
+    if isinstance(value, dict):  # object, or a schema with properties but no type
+        props = schema.get("properties")
+        if isinstance(props, dict):
+            return {k: (coerce(v, props[k]) if k in props else v) for k, v in value.items()}
+    return value
+def args_valid(args: dict[str, Any], schema: dict[str, Any]) -> bool:
+    """True if `args` satisfies the tool's parameter schema.
+    If jsonschema is somehow unavailable we degrade to "is it a dict" rather
+    than crashing — toolrails must never be the reason a call fails to go out.
+    """
+    if not isinstance(args, dict):
+        return False
+    if jsonschema is None:  # pragma: no cover
+        return True
+    try:
+        jsonschema.validate(args, schema)
+        return True
+    except jsonschema.ValidationError:
+        return False
+    except jsonschema.SchemaError:
+        # A malformed tool schema is the caller's problem, not the model's —
+        # don't block the call over it.
+        return True

toolrails/upstream.py ADDED Viewed

@@ -0,0 +1,91 @@
+"""The only part of toolrails that talks to Ollama.
+Two calls matter:
+* `chat_openai` — forward the request to Ollama's OpenAI-compatible endpoint
+  and get its natural, *unconstrained* answer. This is stage one: we let the
+  model decide whether and which tool to call, because constraining that
+  decision is exactly what suppresses tool calls (see the constraint-tax note
+  in the README).
+* `constrained_object` — Ollama's native `/api/chat` with a JSON schema in
+  `format`. Ollama compiles the schema to a grammar (XGrammar) and constrains
+  decoding token by token, so the output is *structurally guaranteed* to match.
+  This is stage two: once a tool is chosen, we regenerate only its arguments
+  under the grammar.
+"""
+from __future__ import annotations
+import json
+from typing import Any
+import httpx
+class Upstream:
+    def __init__(self, base_url: str, timeout: float = 600.0) -> None:
+        self.base_url = base_url.rstrip("/")
+        self._client = httpx.AsyncClient(timeout=timeout)
+    async def aclose(self) -> None:
+        await self._client.aclose()
+    async def chat_openai(self, body: dict[str, Any]) -> dict[str, Any]:
+        """Stage one: unconstrained OpenAI-compatible chat completion.
+        Raises on a non-2xx status so the pipeline's fail-open guard trips and
+        the caller gets Ollama's own response (see `chat_raw`) instead of a
+        half-repaired one.
+        """
+        r = await self._client.post(
+            f"{self.base_url}/v1/chat/completions", json=body
+        )
+        r.raise_for_status()
+        return r.json()
+    async def chat_raw(self, body: dict[str, Any]) -> httpx.Response:
+        """Forward a chat request and hand back Ollama's response verbatim,
+        status and all. Used for pass-through and as the fail-open path, so an
+        upstream 400 (e.g. a model that doesn't support tools) reaches the
+        client as a 400 rather than being masked as a proxy 500."""
+        return await self._client.post(
+            f"{self.base_url}/v1/chat/completions", json=body
+        )
+    async def constrained_object(
+        self,
+        model: str,
+        messages: list[dict[str, Any]],
+        schema: dict[str, Any],
+        options: dict[str, Any] | None = None,
+    ) -> dict[str, Any] | None:
+        """Stage two: grammar-constrained JSON matching `schema`.
+        Returns the decoded object, or None if the model produced nothing
+        usable even under the grammar (rare, but we stay fail-open).
+        """
+        payload: dict[str, Any] = {
+            "model": model,
+            "messages": messages,
+            "format": schema,
+            "stream": False,
+            # Arguments should be deterministic given the decision to call the
+            # tool — there is no creativity wanted in a function signature.
+            "options": {"temperature": 0, **(options or {})},
+        }
+        r = await self._client.post(f"{self.base_url}/api/chat", json=payload)
+        r.raise_for_status()
+        content = ((r.json() or {}).get("message") or {}).get("content", "")
+        if not content:
+            return None
+        try:
+            val = json.loads(content)
+            return val if isinstance(val, dict) else None
+        except json.JSONDecodeError:
+            return None
+    async def passthrough(self, path: str, method: str = "GET") -> httpx.Response:
+        """Forward an unrelated endpoint (e.g. /v1/models) verbatim."""
+        r = await self._client.request(method, f"{self.base_url}{path}")
+        return r

toolrails-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,208 @@
+Metadata-Version: 2.4
+Name: toolrails
+Version: 0.1.0
+Summary: Valid tool calls from any local model. A drop-in OpenAI-compatible proxy for Ollama that guarantees well-formed tool calls and restores tool_choice.
+Project-URL: Homepage, https://github.com/theadamdanielsson/toolrails
+Project-URL: Issues, https://github.com/theadamdanielsson/toolrails/issues
+Author-email: Adam Danielsson <the.adam.danielsson@gmail.com>
+License-Expression: MIT
+License-File: LICENSE
+Keywords: agent,function-calling,llm,local-llm,ollama,openai,proxy,structured-output,tool-calling
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Topic :: Software Development :: Libraries
+Requires-Python: >=3.10
+Requires-Dist: httpx>=0.27
+Requires-Dist: jsonschema>=4.0
+Requires-Dist: starlette>=0.37
+Requires-Dist: uvicorn>=0.30
+Provides-Extra: dev
+Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
+Requires-Dist: pytest>=7; extra == 'dev'
+Description-Content-Type: text/markdown
+# toolrails
+**Valid tool calls from any local model.**
+Local models are good enough to code with now — until they try to call a tool.
+A small model on Ollama will decide to call `read_file` and then hand your agent
+the arguments as a *string* instead of an object, or an array field serialized
+as `"[...]"`, or an integer wrapped in quotes, or invent a tool named
+`readFile`. The agent can't use it, retries, gets the same broken call, and
+burns your evening in a loop. (See
+[ollama/ollama#15390](https://github.com/ollama/ollama/issues/15390): Claude Code
++ a local model, stuck on *Invalid tool parameters*, unresolved.)
+toolrails is a small proxy that sits between your agent and Ollama and makes that
+stop. Your agent speaks the ordinary OpenAI API to it; toolrails guarantees the
+tool calls that come back are well-formed — a real tool name, and arguments that
+match the tool's JSON schema.
+```bash
+# start it (nothing to install with uv)
+uvx toolrails --ollama http://localhost:11434
+# then point your agent's base URL at toolrails instead of Ollama:
+#   http://localhost:11500/v1
+```
+That's the whole change. One base URL.
+## Point your agent at it
+toolrails speaks the OpenAI API, so anything that lets you set a base URL works —
+Cline, opencode, the OpenAI SDKs, your own scripts. Point the base URL at
+`http://localhost:11500/v1` and keep using your Ollama model name. The API key is
+ignored, so pass any placeholder.
+```python
+from openai import OpenAI
+client = OpenAI(base_url="http://localhost:11500/v1", api_key="ollama")
+resp = client.chat.completions.create(
+    model="llama3.2:3b",
+    messages=[{"role": "user", "content": "weather in Oslo?"}],
+    tools=[...],
+)
+```
+## The difference, measured
+A benchmark ships in the repo (`demo/reliability.py`): the same tool-calling
+request, twelve times, against raw Ollama and through toolrails, using a
+realistically complex tool — typed fields and a nested array of objects, the way
+a real coding agent's tools actually look.
+| endpoint | model | valid tool calls |
+| --- | --- | --- |
+| raw Ollama | llama3.2:3b | **0 / 12** |
+| via toolrails | llama3.2:3b | **12 / 12** |
+The model isn't stupid — it gets the *values* right and the *types* wrong. Raw,
+it hands your agent this (note the integer-as-string and the two stringified
+arrays):
+```json
+{"duration_minutes": "30",
+ "attendees": "[\"alice@example.com\", \"bob@example.com\"]",
+ "reminders": "[{\"method\": \"email\", \"minutes_before\": 10}]"}
+```
+`attendees` is a string, not a list — your agent can't iterate it, so the call
+fails and the retry loop begins. Through toolrails, the same request and the same
+model:
+```json
+{"duration_minutes": 30,
+ "attendees": ["alice@example.com", "bob@example.com"],
+ "reminders": [{"method": "email", "minutes_before": 10}]}
+```
+Correct types, real nested arrays, every time. Simpler flat tools fail far less
+often raw — the gap is widest exactly where real agent tools live: structured,
+typed, nested.
+## What it guarantees
+- **The tool name is real.** A hallucinated `getWeather` is snapped to the
+  `get_weather` you actually offered; a name that matches nothing is left alone
+  rather than guessed at.
+- **The arguments parse and fit the schema.** When the model's arguments don't
+  validate, toolrails first fixes the *types* of its own values — the array it
+  sent as a string, the integer it quoted — and only if that still can't satisfy
+  the schema does it regenerate them under a grammar built from the tool's
+  schema. Either way, the call you receive validates.
+- **`tool_choice` works again.** Ollama's OpenAI-compatible endpoint silently
+  ignores `tool_choice`. toolrails restores it: `"none"` strips the tools,
+  `"required"` (or a named function) forces a call even when the model tried to
+  answer in prose.
+## It never breaks your agent
+toolrails fails open. If it can't reach Ollama's constrained endpoint, hits a
+tool schema it can't make sense of, or throws anywhere in the repair path, it
+forwards the model's original answer unchanged. The worst it can ever do is
+nothing — it will not turn a working call into an error. And on the common case,
+where the model already produced a valid call, it adds **zero** extra model
+calls: the fast path recognises a good call and passes it straight through.
+## How it works
+The naive fix — force every response through the tool's grammar — backfires.
+Constraining the *decision* to call a tool is what makes models stop calling
+tools at all; there's a measured "constraint tax" for exactly this
+([arXiv:2606.25605](https://arxiv.org/abs/2606.25605)). So toolrails never
+touches the decision. It asks Ollama normally, lets the model choose whether and
+which tool to call, and then repairs the result in the cheapest way that works:
+1. **If the call already validates, it passes straight through** — no extra work.
+2. **If only the types are wrong** — the array the model sent as a string, the
+   integer it quoted — toolrails coerces the model's *own* values to the schema.
+   This is the common case; it costs no second model call and never changes what
+   the model meant.
+3. **If coercion still can't satisfy the schema**, toolrails regenerates the
+   arguments with the tool's JSON schema in Ollama's `format` parameter. Ollama
+   compiles that schema to a grammar (XGrammar) and constrains decoding token by
+   token, so the arguments come back well-formed by construction.
+Names are repaired by deterministic string matching, arguments checked with
+`jsonschema`. There is no second model judging the first — just coercion, a
+grammar, and a validator. And if every step somehow fails, the model's original
+answer passes through untouched.
+## Install
+You need [Ollama](https://ollama.com) running and Python 3.10 or newer.
+```bash
+uvx toolrails                 # run without installing
+pip install toolrails         # or install the CLI
+toolrails --ollama http://localhost:11434 --port 11500
+```
+Options: `--ollama` (Ollama base URL, or `$OLLAMA_HOST`), `--host`, `--port`,
+`--quiet` (stop logging a line per repaired call). It prints one line whenever it
+steps in, so you can see it working:
+```
+toolrails: call create_event repaired (arguments did not match schema)
+toolrails: forced call get_weather (tool_choice names it)
+```
+## Scope
+toolrails fixes the *shape* of tool calls: valid name, valid arguments, working
+`tool_choice`. It does not make a weak model *choose* the right tool, invent a
+call the model didn't attempt, or route between models. If the model decides not
+to call a tool, that decision stands (unless you set `tool_choice: required`).
+It is a proxy over Ollama specifically, because the leverage is Ollama's
+grammar-constrained `format` — the same primitive the guarantee is built on. It
+repairs models that *attempt* tool calls; a model Ollama rejects outright with
+*"does not support tools"* (some chat templates have none) is out of scope for
+v1 — forcing tool calls onto those is a bigger, separate job.
+Streaming requests are supported: with tools, the response is repaired and then
+re-emitted as standard incremental deltas (verified against the OpenAI SDK's
+streaming client). The repair still buffers internally rather than streaming the
+model token by token — that's a later refinement; v1 gets the call right first.
+## Contributing
+The most useful thing you can send is a tool call that came out wrong: the model,
+the tool schema you gave it, and what it produced. That is the test set. See
+[CONTRIBUTING.md](CONTRIBUTING.md) for how to run the tests and the reliability
+benchmark against your own models.
+## From the same author
+toolrails is by the author of [overloop](https://github.com/theadamdanielsson/overloop)
+(*stop your agent looping*) and [overllm](https://github.com/theadamdanielsson/overllm)
+(*catch the LLM calls you didn't need*). Same theme, one layer down: those stop
+wasted agent work; this stops the wasted work of a tool call that never parses.
+## License
+MIT © Adam Danielsson

toolrails-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+toolrails/__init__.py,sha256=8x230z_B1KhJCAJRV98b19V2L27O_HxueM3Pu-68JBs,358
+toolrails/__main__.py,sha256=MSmt_5Xg84uHqzTN38JwgseJK8rsJn_11A8WD99VtEo,61
+toolrails/app.py,sha256=wnb9VetefKveTTdLP0B52_Lq-nOrLheBitlP0EW95Oo,5773
+toolrails/cli.py,sha256=G8NYfkRe6NXfIBZBY2XoIsftpDDAd4yl7Nix3n8k8NQ,1773
+toolrails/pipeline.py,sha256=XCk13g6YZ8jq4rIh_dey6DD2l_MyRN26iGmYESlnD-U,7786
+toolrails/schemas.py,sha256=EwWC8sE2_xN3QGiAUB9-iv9NRkCr4pQoWolNi_QluXo,7608
+toolrails/upstream.py,sha256=vJCPVREVFdCSL-821b9NI_A48auzV6DWZdtYPHS3q3Y,3546
+toolrails-0.1.0.dist-info/METADATA,sha256=_d8aeMkN-XEo-YV5TV__taosZRa7_fQB9zX413jCzp0,9146
+toolrails-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+toolrails-0.1.0.dist-info/entry_points.txt,sha256=joBwCHvMR5wl7Y1bqzV7UR8Zey-SiQlEo22KT4uEFQA,49
+toolrails-0.1.0.dist-info/licenses/LICENSE,sha256=dJo02JmanV48uwAVydix7ep6cVMAmT1gY_yoWItnDXc,1072
+toolrails-0.1.0.dist-info/RECORD,,

toolrails-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.30.1
+Root-Is-Purelib: true
+Tag: py3-none-any

toolrails-0.1.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ toolrails = toolrails.cli:main

toolrails-0.1.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Adam Danielsson
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.