toolrails 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
toolrails/__init__.py ADDED
@@ -0,0 +1,9 @@
1
+ """toolrails — valid tool calls from any local model.
2
+
3
+ A drop-in OpenAI-compatible proxy in front of Ollama that guarantees the tool
4
+ calls your agent receives are well-formed: real tool name, arguments that match
5
+ the tool's JSON schema. It also restores `tool_choice`, which Ollama's
6
+ OpenAI-compatible endpoint silently ignores.
7
+ """
8
+
9
+ __version__ = "0.1.0"
toolrails/__main__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .cli import main
2
+
3
+ if __name__ == "__main__":
4
+ main()
toolrails/app.py ADDED
@@ -0,0 +1,155 @@
1
+ """The ASGI proxy: an OpenAI-compatible front door that repairs tool calls.
2
+
3
+ Point any agent that speaks the OpenAI API at this instead of at Ollama and
4
+ nothing else changes. Endpoints:
5
+
6
+ POST /v1/chat/completions the pipeline (or a plain pass-through)
7
+ GET /v1/models forwarded to Ollama verbatim
8
+ GET /health liveness
9
+
10
+ Fail-open is the whole safety story: if the repair pipeline raises for any
11
+ reason, we forward the original request unchanged rather than return an error.
12
+ The worst toolrails can do is nothing.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import contextlib
18
+ import json
19
+ import time
20
+ from typing import Any
21
+
22
+ from starlette.applications import Starlette
23
+ from starlette.requests import Request
24
+ from starlette.responses import JSONResponse, Response, StreamingResponse
25
+ from starlette.routing import Route
26
+
27
+ from .pipeline import handle
28
+ from .upstream import Upstream
29
+
30
+
31
+ def create_app(ollama_url: str) -> Starlette:
32
+ async def chat_completions(request: Request) -> Response:
33
+ up: Upstream = request.app.state.upstream
34
+ try:
35
+ body: dict[str, Any] = await request.json()
36
+ except (json.JSONDecodeError, ValueError):
37
+ return JSONResponse({"error": "invalid JSON body"}, status_code=400)
38
+
39
+ wants_stream = bool(body.get("stream"))
40
+ has_tools = bool(body.get("tools"))
41
+
42
+ # With tools we run the pipeline on a non-streamed response, then
43
+ # re-emit as a stream if the client asked for one. Guarantee first,
44
+ # streaming second. Without tools there is nothing to repair, so we
45
+ # forward untouched (including native streaming).
46
+ if not has_tools:
47
+ return await _forward(up, body, wants_stream)
48
+
49
+ try:
50
+ result = await handle({**body, "stream": False}, up)
51
+ except Exception:
52
+ # Fail open: give the client Ollama's own answer.
53
+ return await _forward(up, body, wants_stream)
54
+
55
+ if wants_stream:
56
+ return StreamingResponse(
57
+ _as_sse(result), media_type="text/event-stream"
58
+ )
59
+ return JSONResponse(result)
60
+
61
+ async def models(request: Request) -> Response:
62
+ up: Upstream = request.app.state.upstream
63
+ r = await up.passthrough("/v1/models")
64
+ return Response(
65
+ r.content, status_code=r.status_code,
66
+ media_type=r.headers.get("content-type", "application/json"),
67
+ )
68
+
69
+ async def health(request: Request) -> Response:
70
+ return JSONResponse({"status": "ok", "ollama": ollama_url})
71
+
72
+ @contextlib.asynccontextmanager
73
+ async def lifespan(app: Starlette):
74
+ app.state.upstream = Upstream(ollama_url)
75
+ try:
76
+ yield
77
+ finally:
78
+ await app.state.upstream.aclose()
79
+
80
+ app = Starlette(
81
+ routes=[
82
+ Route("/v1/chat/completions", chat_completions, methods=["POST"]),
83
+ Route("/v1/models", models, methods=["GET"]),
84
+ Route("/health", health, methods=["GET"]),
85
+ ],
86
+ lifespan=lifespan,
87
+ )
88
+ return app
89
+
90
+
91
+ async def _forward(up: Upstream, body: dict[str, Any], wants_stream: bool) -> Response:
92
+ """Pass a request straight to Ollama. Used when there's nothing to repair
93
+ and as the fail-open path."""
94
+ if wants_stream:
95
+ async def gen():
96
+ async with up._client.stream( # noqa: SLF001 - deliberate reuse
97
+ "POST", f"{up.base_url}/v1/chat/completions", json=body
98
+ ) as r:
99
+ async for chunk in r.aiter_raw():
100
+ yield chunk
101
+ return StreamingResponse(gen(), media_type="text/event-stream")
102
+ r = await up.chat_raw(body)
103
+ return Response(
104
+ r.content,
105
+ status_code=r.status_code,
106
+ media_type=r.headers.get("content-type", "application/json"),
107
+ )
108
+
109
+
110
+ def _as_sse(result: dict[str, Any]):
111
+ """Re-emit a completed response as an OpenAI streaming sequence.
112
+
113
+ The repaired response is already whole, so we replay it in the shape clients
114
+ actually accumulate: a role delta, then content, then for each tool call an
115
+ opener delta (index, id, type, name) followed by its arguments, then a final
116
+ delta carrying finish_reason, then [DONE]. Each tool call carries its
117
+ `index`, which is what strict clients key on — the piece the old single-chunk
118
+ form omitted. Real token-by-token streaming under the grammar is still a
119
+ later refinement; this makes the buffered form protocol-correct.
120
+ """
121
+ try:
122
+ choice = result["choices"][0]
123
+ message = choice.get("message", {})
124
+ except (KeyError, IndexError):
125
+ yield b"data: [DONE]\n\n"
126
+ return
127
+
128
+ base = {
129
+ "id": result.get("id", f"chatcmpl-{int(time.time())}"),
130
+ "object": "chat.completion.chunk",
131
+ "created": result.get("created", int(time.time())),
132
+ "model": result.get("model", ""),
133
+ }
134
+
135
+ def emit(delta: dict[str, Any], finish: str | None = None) -> bytes:
136
+ chunk = {**base, "choices": [{"index": 0, "delta": delta, "finish_reason": finish}]}
137
+ return f"data: {json.dumps(chunk)}\n\n".encode()
138
+
139
+ yield emit({"role": "assistant"})
140
+
141
+ if message.get("content"):
142
+ yield emit({"content": message["content"]})
143
+
144
+ for i, call in enumerate(message.get("tool_calls") or []):
145
+ fn = call.get("function", {})
146
+ yield emit({"tool_calls": [{
147
+ "index": i, "id": call.get("id", f"call_{i}"), "type": "function",
148
+ "function": {"name": fn.get("name", ""), "arguments": ""},
149
+ }]})
150
+ yield emit({"tool_calls": [{
151
+ "index": i, "function": {"arguments": fn.get("arguments", "")},
152
+ }]})
153
+
154
+ yield emit({}, finish=choice.get("finish_reason", "stop"))
155
+ yield b"data: [DONE]\n\n"
toolrails/cli.py ADDED
@@ -0,0 +1,56 @@
1
+ """`toolrails` command line: start the proxy."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import logging
7
+ import os
8
+
9
+ from . import __version__
10
+
11
+
12
+ def main(argv: list[str] | None = None) -> None:
13
+ parser = argparse.ArgumentParser(
14
+ prog="toolrails",
15
+ description="An OpenAI-compatible proxy that guarantees valid tool "
16
+ "calls from local models served by Ollama.",
17
+ )
18
+ parser.add_argument(
19
+ "--ollama",
20
+ default=os.environ.get("OLLAMA_HOST", "http://localhost:11434"),
21
+ help="Base URL of the Ollama server (default: %(default)s).",
22
+ )
23
+ parser.add_argument(
24
+ "--host", default="127.0.0.1", help="Address to bind (default: %(default)s)."
25
+ )
26
+ parser.add_argument(
27
+ "--port", type=int, default=11500, help="Port to listen on (default: %(default)s)."
28
+ )
29
+ parser.add_argument(
30
+ "--quiet", action="store_true", help="Don't log a line per repaired call."
31
+ )
32
+ parser.add_argument("--version", action="version", version=f"toolrails {__version__}")
33
+ args = parser.parse_args(argv)
34
+
35
+ logging.basicConfig(
36
+ level=logging.WARNING if args.quiet else logging.INFO,
37
+ format="toolrails: %(message)s",
38
+ )
39
+ # Only our own per-call lines; the HTTP client's request chatter stays quiet.
40
+ logging.getLogger("httpx").setLevel(logging.WARNING)
41
+ logging.getLogger("httpcore").setLevel(logging.WARNING)
42
+
43
+ import uvicorn
44
+
45
+ from .app import create_app
46
+
47
+ app = create_app(args.ollama)
48
+ print(
49
+ f"toolrails {__version__} → proxying {args.ollama}\n"
50
+ f"point your agent's base URL at http://{args.host}:{args.port}/v1"
51
+ )
52
+ uvicorn.run(app, host=args.host, port=args.port, log_level="warning")
53
+
54
+
55
+ if __name__ == "__main__":
56
+ main()
toolrails/pipeline.py ADDED
@@ -0,0 +1,206 @@
1
+ """The two-stage tool-call pipeline.
2
+
3
+ Given an OpenAI-shaped chat request that offers tools, produce a response whose
4
+ tool calls are guaranteed well-formed. The shape:
5
+
6
+ 1. Ask the model naturally (unconstrained). Whether and which tool to call
7
+ is the model's decision — we never constrain that step, because doing so
8
+ is what makes models stop calling tools (the "constraint tax").
9
+ 2. For each call it attempted: if the name is real and the arguments already
10
+ validate, keep it as-is (the fast path — zero extra cost). Otherwise snap
11
+ the name to the nearest real tool and regenerate *only the arguments*
12
+ under a grammar built from that tool's schema, which cannot produce
13
+ invalid JSON.
14
+ 3. Honour `tool_choice`, which Ollama's OpenAI endpoint drops on the floor:
15
+ `none` strips tools, `required`/a named function forces a call even when
16
+ the model tried to answer in prose.
17
+
18
+ Every public entry point is wrapped by the caller in a fail-open guard: if
19
+ anything in here raises, the proxy falls back to a plain pass-through so it can
20
+ never wedge the agent using it.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import json
26
+ import logging
27
+ from typing import Any
28
+
29
+ from . import schemas
30
+ from .upstream import Upstream
31
+
32
+ logger = logging.getLogger("toolrails")
33
+
34
+
35
+ def _nudge(name: str, description: str) -> dict[str, str]:
36
+ """The instruction that steers the constrained second pass toward one tool."""
37
+ desc = f" ({description})" if description else ""
38
+ return {
39
+ "role": "user",
40
+ "content": (
41
+ f"Call the tool `{name}`{desc} given the conversation above. "
42
+ f"Respond with only a JSON object of its arguments."
43
+ ),
44
+ }
45
+
46
+
47
+ def _as_openai_call(name: str, args: dict[str, Any], call_id: str) -> dict[str, Any]:
48
+ """A tool call in the exact shape OpenAI clients expect (arguments = string)."""
49
+ return {
50
+ "id": call_id,
51
+ "type": "function",
52
+ "function": {"name": name, "arguments": json.dumps(args)},
53
+ }
54
+
55
+
56
+ def _forced_name(tool_choice: Any, names: list[str]) -> str | None:
57
+ """The specific tool named by `tool_choice={"function": {"name": ...}}`."""
58
+ if isinstance(tool_choice, dict):
59
+ fn = tool_choice.get("function") or {}
60
+ name = fn.get("name")
61
+ if name in names:
62
+ return name
63
+ return None
64
+
65
+
66
+ async def _repair_call(
67
+ call: dict[str, Any],
68
+ up: Upstream,
69
+ model: str,
70
+ messages: list[dict[str, Any]],
71
+ tools: list[dict[str, Any]],
72
+ names: list[str],
73
+ ) -> dict[str, Any] | None:
74
+ """Return a guaranteed-valid version of one attempted tool call.
75
+
76
+ None means the attempt could not be tied to any real tool and the model's
77
+ original output should be left untouched.
78
+ """
79
+ fn = call.get("function") or {}
80
+ raw_name = fn.get("name") or ""
81
+ name = schemas.nearest_name(raw_name, names)
82
+ if name is None:
83
+ logger.warning("unknown tool %r left untouched", raw_name)
84
+ return None
85
+ if name != raw_name:
86
+ logger.info("name %r → %r", raw_name, name)
87
+
88
+ schema = schemas.schema_for(tools, name)
89
+ call_id = call.get("id") or f"call_{name}"
90
+ args = schemas.parse_arguments(fn.get("arguments"))
91
+
92
+ # Fast path: the model already got it right. No second call.
93
+ if args is not None and schemas.args_valid(args, schema):
94
+ logger.info("call %s ok", name)
95
+ return _as_openai_call(name, args, call_id)
96
+
97
+ # Surgical path: fix the *types* of the model's own values (the common
98
+ # small-model failure) without a second model call and without changing what
99
+ # the model meant.
100
+ if args is not None:
101
+ coerced = schemas.coerce(args, schema)
102
+ if schemas.args_valid(coerced, schema):
103
+ logger.info("call %s coerced (fixed argument types)", name)
104
+ return _as_openai_call(name, coerced, call_id)
105
+
106
+ # Last resort: regenerate arguments under the grammar. Guaranteed to match
107
+ # the schema, at the cost of one more generation.
108
+ logger.info("call %s regenerated under grammar", name)
109
+ regen = await up.constrained_object(
110
+ model,
111
+ messages + [_nudge(name, schemas.describe(tools, name))],
112
+ schema,
113
+ )
114
+ if regen is None:
115
+ regen = args if args is not None else {}
116
+ return _as_openai_call(name, regen, call_id)
117
+
118
+
119
+ async def _pick_tool(
120
+ up: Upstream,
121
+ model: str,
122
+ messages: list[dict[str, Any]],
123
+ names: list[str],
124
+ ) -> str:
125
+ """Choose one tool by name when `tool_choice` forces a call but the model
126
+ answered in prose. The decision is already made (a call *must* happen), so
127
+ constraining a name-only choice here carries no tax."""
128
+ if len(names) == 1:
129
+ return names[0]
130
+ selection = await up.constrained_object(
131
+ model,
132
+ messages
133
+ + [{"role": "user", "content": "Which tool should be called? Reply with its name."}],
134
+ {"type": "object", "properties": {"tool": {"enum": names}}, "required": ["tool"]},
135
+ )
136
+ chosen = (selection or {}).get("tool")
137
+ return chosen if chosen in names else names[0]
138
+
139
+
140
+ async def _force_call(
141
+ up: Upstream,
142
+ model: str,
143
+ messages: list[dict[str, Any]],
144
+ tools: list[dict[str, Any]],
145
+ name: str,
146
+ message: dict[str, Any],
147
+ choice: dict[str, Any],
148
+ ) -> None:
149
+ """Rewrite `message` in place to be exactly one guaranteed-valid call to
150
+ `name` — the arguments regenerated under that tool's grammar."""
151
+ schema = schemas.schema_for(tools, name)
152
+ args = await up.constrained_object(
153
+ model, messages + [_nudge(name, schemas.describe(tools, name))], schema
154
+ )
155
+ message["content"] = None
156
+ message["tool_calls"] = [_as_openai_call(name, args or {}, f"call_{name}")]
157
+ choice["finish_reason"] = "tool_calls"
158
+
159
+
160
+ async def handle(body: dict[str, Any], up: Upstream) -> dict[str, Any]:
161
+ """Run the pipeline for one (non-streaming) chat-completions request."""
162
+ tools = body.get("tools") or []
163
+ names = schemas.tool_names(tools)
164
+ tool_choice = body.get("tool_choice", "auto")
165
+ model = body.get("model", "")
166
+ messages = body.get("messages", [])
167
+
168
+ # No tools, or the caller explicitly forbade them: plain completion.
169
+ if not names or tool_choice == "none":
170
+ clean = {k: v for k, v in body.items() if k not in ("tools", "tool_choice")}
171
+ return await up.chat_openai(clean)
172
+
173
+ # Stage one: the model's own, unconstrained answer.
174
+ resp = await up.chat_openai(body)
175
+ try:
176
+ choice = resp["choices"][0]
177
+ message = choice["message"]
178
+ except (KeyError, IndexError):
179
+ return resp # unfamiliar shape — hand it back untouched
180
+
181
+ # A specific tool_choice wins over whatever stage-1 decided to do — even if
182
+ # the model chose a different tool or answered in prose, honour the request.
183
+ forced = _forced_name(tool_choice, names)
184
+ if forced:
185
+ logger.info("forced call %s (tool_choice names it)", forced)
186
+ await _force_call(up, model, messages, tools, forced, message, choice)
187
+ return resp
188
+
189
+ calls = message.get("tool_calls")
190
+ if calls:
191
+ repaired = []
192
+ for call in calls:
193
+ fixed = await _repair_call(call, up, model, messages, tools, names)
194
+ repaired.append(fixed if fixed is not None else call)
195
+ message["tool_calls"] = repaired
196
+ return resp
197
+
198
+ # The model answered in prose. Force a call only if tool_choice demanded one.
199
+ if tool_choice == "required":
200
+ name = await _pick_tool(up, model, messages, names)
201
+ logger.info("forced call %s (tool_choice=required)", name)
202
+ await _force_call(up, model, messages, tools, name, message, choice)
203
+ return resp
204
+
205
+ # `auto` with a prose answer is a legitimate outcome — leave it alone.
206
+ return resp
toolrails/schemas.py ADDED
@@ -0,0 +1,218 @@
1
+ """Pure, dependency-light helpers for tool-call repair.
2
+
3
+ Nothing here does I/O or talks to a model — it is all deterministic string and
4
+ schema work, so it can be unit-tested on its own and reasoned about in
5
+ isolation. The network side lives in `upstream.py`; the orchestration that ties
6
+ them together lives in `pipeline.py`.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import difflib
12
+ import json
13
+ import re
14
+ from typing import Any
15
+
16
+ try:
17
+ import jsonschema
18
+ except ImportError: # pragma: no cover - jsonschema is a hard dependency
19
+ jsonschema = None # type: ignore
20
+
21
+
22
+ # --- tool introspection ----------------------------------------------------
23
+
24
+ def tool_names(tools: list[dict[str, Any]]) -> list[str]:
25
+ """The set of function names the caller offered, in order."""
26
+ names = []
27
+ for t in tools or []:
28
+ fn = t.get("function") or {}
29
+ name = fn.get("name")
30
+ if name:
31
+ names.append(name)
32
+ return names
33
+
34
+
35
+ def schema_for(tools: list[dict[str, Any]], name: str) -> dict[str, Any]:
36
+ """The JSON schema for a named tool's *arguments*.
37
+
38
+ OpenAI/Ollama both carry it at function.parameters. A tool with no declared
39
+ parameters gets a permissive empty-object schema so constrained decoding
40
+ still produces valid (empty) JSON rather than failing.
41
+ """
42
+ for t in tools or []:
43
+ fn = t.get("function") or {}
44
+ if fn.get("name") == name:
45
+ params = fn.get("parameters")
46
+ if isinstance(params, dict) and params:
47
+ return params
48
+ return {"type": "object", "properties": {}}
49
+ return {"type": "object", "properties": {}}
50
+
51
+
52
+ def describe(tools: list[dict[str, Any]], name: str) -> str:
53
+ """A tool's human description, for nudging the constrained second pass."""
54
+ for t in tools or []:
55
+ fn = t.get("function") or {}
56
+ if fn.get("name") == name:
57
+ return (fn.get("description") or "").strip()
58
+ return ""
59
+
60
+
61
+ # --- name repair -----------------------------------------------------------
62
+
63
+ def nearest_name(name: str, valid: list[str], cutoff: float = 0.6) -> str | None:
64
+ """Snap a hallucinated tool name to the closest real one.
65
+
66
+ Small local models routinely emit a name that is *almost* right —
67
+ `get_weather` for `getWeather`, `read` for `read_file`. If exactly one
68
+ valid name is close we snap to it; otherwise we return None and let the
69
+ caller decide (toolrails never invents a call the model didn't attempt).
70
+ """
71
+ if not name or not valid:
72
+ return None
73
+ if name in valid:
74
+ return name
75
+ matches = difflib.get_close_matches(name, valid, n=1, cutoff=cutoff)
76
+ return matches[0] if matches else None
77
+
78
+
79
+ # --- argument repair -------------------------------------------------------
80
+
81
+ _FENCE = re.compile(r"^\s*```(?:json)?\s*|\s*```\s*$", re.IGNORECASE)
82
+ _TRAILING_COMMA = re.compile(r",(\s*[}\]])")
83
+
84
+
85
+ def parse_arguments(raw: Any) -> dict[str, Any] | None:
86
+ """Coerce whatever the model produced for `arguments` into a dict.
87
+
88
+ OpenAI sends arguments as a JSON *string*; Ollama's native API sends a dict.
89
+ Weak models send neither cleanly — fenced code, trailing commas, a stray
90
+ sentence in front. We try the strict parse first, then a best-effort
91
+ repair. Returns None if there is nothing recoverable.
92
+ """
93
+ if isinstance(raw, dict):
94
+ return raw
95
+ if not isinstance(raw, str):
96
+ return None
97
+
98
+ text = raw.strip()
99
+ if not text:
100
+ return {}
101
+
102
+ # 1. strict
103
+ try:
104
+ val = json.loads(text)
105
+ return val if isinstance(val, dict) else None
106
+ except json.JSONDecodeError:
107
+ pass
108
+
109
+ # 2. strip markdown fences and retry
110
+ stripped = _FENCE.sub("", text).strip()
111
+
112
+ # 3. carve out the outermost {...} object if there's prose around it
113
+ start, end = stripped.find("{"), stripped.rfind("}")
114
+ if start != -1 and end != -1 and end > start:
115
+ stripped = stripped[start : end + 1]
116
+
117
+ # 4. drop trailing commas before } or ]
118
+ stripped = _TRAILING_COMMA.sub(r"\1", stripped)
119
+
120
+ try:
121
+ val = json.loads(stripped)
122
+ return val if isinstance(val, dict) else None
123
+ except json.JSONDecodeError:
124
+ return None
125
+
126
+
127
+ def coerce(value: Any, schema: dict[str, Any]) -> Any:
128
+ """Nudge the model's own value toward the schema's type without inventing
129
+ anything. This fixes the failure we actually see from small models — the
130
+ right value with the wrong type: an integer sent as `"30"`, an array or
131
+ object serialized into a `"[...]"` string, a boolean as `"true"`. It walks
132
+ the schema recursively and only ever *reshapes* values it already has; it
133
+ never fills in a missing one. Anything it can't confidently convert is left
134
+ untouched for validation (and, failing that, grammar regeneration) to catch.
135
+ """
136
+ if not isinstance(schema, dict):
137
+ return value
138
+ t = schema.get("type")
139
+ if isinstance(t, list): # e.g. ["string", "null"] — too ambiguous to coerce
140
+ return value
141
+
142
+ # A structured value the model flattened into a JSON string ("[...]", "{...}").
143
+ if t in ("object", "array") and isinstance(value, str):
144
+ try:
145
+ value = json.loads(value)
146
+ except (json.JSONDecodeError, ValueError):
147
+ return value
148
+
149
+ if t == "integer":
150
+ if isinstance(value, bool):
151
+ return value
152
+ if isinstance(value, int):
153
+ return value
154
+ if isinstance(value, float) and value.is_integer():
155
+ return int(value)
156
+ if isinstance(value, str):
157
+ try:
158
+ return int(value.strip())
159
+ except ValueError:
160
+ try:
161
+ f = float(value.strip())
162
+ return int(f) if f.is_integer() else value
163
+ except ValueError:
164
+ return value
165
+ return value
166
+
167
+ if t == "number":
168
+ if isinstance(value, bool):
169
+ return value
170
+ if isinstance(value, (int, float)):
171
+ return value
172
+ if isinstance(value, str):
173
+ try:
174
+ return float(value.strip())
175
+ except ValueError:
176
+ return value
177
+ return value
178
+
179
+ if t == "boolean":
180
+ if isinstance(value, str):
181
+ low = value.strip().lower()
182
+ if low in ("true", "yes", "1"):
183
+ return True
184
+ if low in ("false", "no", "0"):
185
+ return False
186
+ return value
187
+
188
+ if t == "array" and isinstance(value, list):
189
+ items = schema.get("items")
190
+ return [coerce(v, items) for v in value] if isinstance(items, dict) else value
191
+
192
+ if isinstance(value, dict): # object, or a schema with properties but no type
193
+ props = schema.get("properties")
194
+ if isinstance(props, dict):
195
+ return {k: (coerce(v, props[k]) if k in props else v) for k, v in value.items()}
196
+
197
+ return value
198
+
199
+
200
+ def args_valid(args: dict[str, Any], schema: dict[str, Any]) -> bool:
201
+ """True if `args` satisfies the tool's parameter schema.
202
+
203
+ If jsonschema is somehow unavailable we degrade to "is it a dict" rather
204
+ than crashing — toolrails must never be the reason a call fails to go out.
205
+ """
206
+ if not isinstance(args, dict):
207
+ return False
208
+ if jsonschema is None: # pragma: no cover
209
+ return True
210
+ try:
211
+ jsonschema.validate(args, schema)
212
+ return True
213
+ except jsonschema.ValidationError:
214
+ return False
215
+ except jsonschema.SchemaError:
216
+ # A malformed tool schema is the caller's problem, not the model's —
217
+ # don't block the call over it.
218
+ return True
toolrails/upstream.py ADDED
@@ -0,0 +1,91 @@
1
+ """The only part of toolrails that talks to Ollama.
2
+
3
+ Two calls matter:
4
+
5
+ * `chat_openai` — forward the request to Ollama's OpenAI-compatible endpoint
6
+ and get its natural, *unconstrained* answer. This is stage one: we let the
7
+ model decide whether and which tool to call, because constraining that
8
+ decision is exactly what suppresses tool calls (see the constraint-tax note
9
+ in the README).
10
+
11
+ * `constrained_object` — Ollama's native `/api/chat` with a JSON schema in
12
+ `format`. Ollama compiles the schema to a grammar (XGrammar) and constrains
13
+ decoding token by token, so the output is *structurally guaranteed* to match.
14
+ This is stage two: once a tool is chosen, we regenerate only its arguments
15
+ under the grammar.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ from typing import Any
22
+
23
+ import httpx
24
+
25
+
26
+ class Upstream:
27
+ def __init__(self, base_url: str, timeout: float = 600.0) -> None:
28
+ self.base_url = base_url.rstrip("/")
29
+ self._client = httpx.AsyncClient(timeout=timeout)
30
+
31
+ async def aclose(self) -> None:
32
+ await self._client.aclose()
33
+
34
+ async def chat_openai(self, body: dict[str, Any]) -> dict[str, Any]:
35
+ """Stage one: unconstrained OpenAI-compatible chat completion.
36
+
37
+ Raises on a non-2xx status so the pipeline's fail-open guard trips and
38
+ the caller gets Ollama's own response (see `chat_raw`) instead of a
39
+ half-repaired one.
40
+ """
41
+ r = await self._client.post(
42
+ f"{self.base_url}/v1/chat/completions", json=body
43
+ )
44
+ r.raise_for_status()
45
+ return r.json()
46
+
47
+ async def chat_raw(self, body: dict[str, Any]) -> httpx.Response:
48
+ """Forward a chat request and hand back Ollama's response verbatim,
49
+ status and all. Used for pass-through and as the fail-open path, so an
50
+ upstream 400 (e.g. a model that doesn't support tools) reaches the
51
+ client as a 400 rather than being masked as a proxy 500."""
52
+ return await self._client.post(
53
+ f"{self.base_url}/v1/chat/completions", json=body
54
+ )
55
+
56
+ async def constrained_object(
57
+ self,
58
+ model: str,
59
+ messages: list[dict[str, Any]],
60
+ schema: dict[str, Any],
61
+ options: dict[str, Any] | None = None,
62
+ ) -> dict[str, Any] | None:
63
+ """Stage two: grammar-constrained JSON matching `schema`.
64
+
65
+ Returns the decoded object, or None if the model produced nothing
66
+ usable even under the grammar (rare, but we stay fail-open).
67
+ """
68
+ payload: dict[str, Any] = {
69
+ "model": model,
70
+ "messages": messages,
71
+ "format": schema,
72
+ "stream": False,
73
+ # Arguments should be deterministic given the decision to call the
74
+ # tool — there is no creativity wanted in a function signature.
75
+ "options": {"temperature": 0, **(options or {})},
76
+ }
77
+ r = await self._client.post(f"{self.base_url}/api/chat", json=payload)
78
+ r.raise_for_status()
79
+ content = ((r.json() or {}).get("message") or {}).get("content", "")
80
+ if not content:
81
+ return None
82
+ try:
83
+ val = json.loads(content)
84
+ return val if isinstance(val, dict) else None
85
+ except json.JSONDecodeError:
86
+ return None
87
+
88
+ async def passthrough(self, path: str, method: str = "GET") -> httpx.Response:
89
+ """Forward an unrelated endpoint (e.g. /v1/models) verbatim."""
90
+ r = await self._client.request(method, f"{self.base_url}{path}")
91
+ return r
@@ -0,0 +1,208 @@
1
+ Metadata-Version: 2.4
2
+ Name: toolrails
3
+ Version: 0.1.0
4
+ Summary: Valid tool calls from any local model. A drop-in OpenAI-compatible proxy for Ollama that guarantees well-formed tool calls and restores tool_choice.
5
+ Project-URL: Homepage, https://github.com/theadamdanielsson/toolrails
6
+ Project-URL: Issues, https://github.com/theadamdanielsson/toolrails/issues
7
+ Author-email: Adam Danielsson <the.adam.danielsson@gmail.com>
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Keywords: agent,function-calling,llm,local-llm,ollama,openai,proxy,structured-output,tool-calling
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3 :: Only
15
+ Classifier: Topic :: Software Development :: Libraries
16
+ Requires-Python: >=3.10
17
+ Requires-Dist: httpx>=0.27
18
+ Requires-Dist: jsonschema>=4.0
19
+ Requires-Dist: starlette>=0.37
20
+ Requires-Dist: uvicorn>=0.30
21
+ Provides-Extra: dev
22
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
23
+ Requires-Dist: pytest>=7; extra == 'dev'
24
+ Description-Content-Type: text/markdown
25
+
26
+ # toolrails
27
+
28
+ **Valid tool calls from any local model.**
29
+
30
+ Local models are good enough to code with now — until they try to call a tool.
31
+ A small model on Ollama will decide to call `read_file` and then hand your agent
32
+ the arguments as a *string* instead of an object, or an array field serialized
33
+ as `"[...]"`, or an integer wrapped in quotes, or invent a tool named
34
+ `readFile`. The agent can't use it, retries, gets the same broken call, and
35
+ burns your evening in a loop. (See
36
+ [ollama/ollama#15390](https://github.com/ollama/ollama/issues/15390): Claude Code
37
+ + a local model, stuck on *Invalid tool parameters*, unresolved.)
38
+
39
+ toolrails is a small proxy that sits between your agent and Ollama and makes that
40
+ stop. Your agent speaks the ordinary OpenAI API to it; toolrails guarantees the
41
+ tool calls that come back are well-formed — a real tool name, and arguments that
42
+ match the tool's JSON schema.
43
+
44
+ ```bash
45
+ # start it (nothing to install with uv)
46
+ uvx toolrails --ollama http://localhost:11434
47
+
48
+ # then point your agent's base URL at toolrails instead of Ollama:
49
+ # http://localhost:11500/v1
50
+ ```
51
+
52
+ That's the whole change. One base URL.
53
+
54
+ ## Point your agent at it
55
+
56
+ toolrails speaks the OpenAI API, so anything that lets you set a base URL works —
57
+ Cline, opencode, the OpenAI SDKs, your own scripts. Point the base URL at
58
+ `http://localhost:11500/v1` and keep using your Ollama model name. The API key is
59
+ ignored, so pass any placeholder.
60
+
61
+ ```python
62
+ from openai import OpenAI
63
+
64
+ client = OpenAI(base_url="http://localhost:11500/v1", api_key="ollama")
65
+ resp = client.chat.completions.create(
66
+ model="llama3.2:3b",
67
+ messages=[{"role": "user", "content": "weather in Oslo?"}],
68
+ tools=[...],
69
+ )
70
+ ```
71
+
72
+ ## The difference, measured
73
+
74
+ A benchmark ships in the repo (`demo/reliability.py`): the same tool-calling
75
+ request, twelve times, against raw Ollama and through toolrails, using a
76
+ realistically complex tool — typed fields and a nested array of objects, the way
77
+ a real coding agent's tools actually look.
78
+
79
+ | endpoint | model | valid tool calls |
80
+ | --- | --- | --- |
81
+ | raw Ollama | llama3.2:3b | **0 / 12** |
82
+ | via toolrails | llama3.2:3b | **12 / 12** |
83
+
84
+ The model isn't stupid — it gets the *values* right and the *types* wrong. Raw,
85
+ it hands your agent this (note the integer-as-string and the two stringified
86
+ arrays):
87
+
88
+ ```json
89
+ {"duration_minutes": "30",
90
+ "attendees": "[\"alice@example.com\", \"bob@example.com\"]",
91
+ "reminders": "[{\"method\": \"email\", \"minutes_before\": 10}]"}
92
+ ```
93
+
94
+ `attendees` is a string, not a list — your agent can't iterate it, so the call
95
+ fails and the retry loop begins. Through toolrails, the same request and the same
96
+ model:
97
+
98
+ ```json
99
+ {"duration_minutes": 30,
100
+ "attendees": ["alice@example.com", "bob@example.com"],
101
+ "reminders": [{"method": "email", "minutes_before": 10}]}
102
+ ```
103
+
104
+ Correct types, real nested arrays, every time. Simpler flat tools fail far less
105
+ often raw — the gap is widest exactly where real agent tools live: structured,
106
+ typed, nested.
107
+
108
+ ## What it guarantees
109
+
110
+ - **The tool name is real.** A hallucinated `getWeather` is snapped to the
111
+ `get_weather` you actually offered; a name that matches nothing is left alone
112
+ rather than guessed at.
113
+ - **The arguments parse and fit the schema.** When the model's arguments don't
114
+ validate, toolrails first fixes the *types* of its own values — the array it
115
+ sent as a string, the integer it quoted — and only if that still can't satisfy
116
+ the schema does it regenerate them under a grammar built from the tool's
117
+ schema. Either way, the call you receive validates.
118
+ - **`tool_choice` works again.** Ollama's OpenAI-compatible endpoint silently
119
+ ignores `tool_choice`. toolrails restores it: `"none"` strips the tools,
120
+ `"required"` (or a named function) forces a call even when the model tried to
121
+ answer in prose.
122
+
123
+ ## It never breaks your agent
124
+
125
+ toolrails fails open. If it can't reach Ollama's constrained endpoint, hits a
126
+ tool schema it can't make sense of, or throws anywhere in the repair path, it
127
+ forwards the model's original answer unchanged. The worst it can ever do is
128
+ nothing — it will not turn a working call into an error. And on the common case,
129
+ where the model already produced a valid call, it adds **zero** extra model
130
+ calls: the fast path recognises a good call and passes it straight through.
131
+
132
+ ## How it works
133
+
134
+ The naive fix — force every response through the tool's grammar — backfires.
135
+ Constraining the *decision* to call a tool is what makes models stop calling
136
+ tools at all; there's a measured "constraint tax" for exactly this
137
+ ([arXiv:2606.25605](https://arxiv.org/abs/2606.25605)). So toolrails never
138
+ touches the decision. It asks Ollama normally, lets the model choose whether and
139
+ which tool to call, and then repairs the result in the cheapest way that works:
140
+
141
+ 1. **If the call already validates, it passes straight through** — no extra work.
142
+ 2. **If only the types are wrong** — the array the model sent as a string, the
143
+ integer it quoted — toolrails coerces the model's *own* values to the schema.
144
+ This is the common case; it costs no second model call and never changes what
145
+ the model meant.
146
+ 3. **If coercion still can't satisfy the schema**, toolrails regenerates the
147
+ arguments with the tool's JSON schema in Ollama's `format` parameter. Ollama
148
+ compiles that schema to a grammar (XGrammar) and constrains decoding token by
149
+ token, so the arguments come back well-formed by construction.
150
+
151
+ Names are repaired by deterministic string matching, arguments checked with
152
+ `jsonschema`. There is no second model judging the first — just coercion, a
153
+ grammar, and a validator. And if every step somehow fails, the model's original
154
+ answer passes through untouched.
155
+
156
+ ## Install
157
+
158
+ You need [Ollama](https://ollama.com) running and Python 3.10 or newer.
159
+
160
+ ```bash
161
+ uvx toolrails # run without installing
162
+ pip install toolrails # or install the CLI
163
+ toolrails --ollama http://localhost:11434 --port 11500
164
+ ```
165
+
166
+ Options: `--ollama` (Ollama base URL, or `$OLLAMA_HOST`), `--host`, `--port`,
167
+ `--quiet` (stop logging a line per repaired call). It prints one line whenever it
168
+ steps in, so you can see it working:
169
+
170
+ ```
171
+ toolrails: call create_event repaired (arguments did not match schema)
172
+ toolrails: forced call get_weather (tool_choice names it)
173
+ ```
174
+
175
+ ## Scope
176
+
177
+ toolrails fixes the *shape* of tool calls: valid name, valid arguments, working
178
+ `tool_choice`. It does not make a weak model *choose* the right tool, invent a
179
+ call the model didn't attempt, or route between models. If the model decides not
180
+ to call a tool, that decision stands (unless you set `tool_choice: required`).
181
+ It is a proxy over Ollama specifically, because the leverage is Ollama's
182
+ grammar-constrained `format` — the same primitive the guarantee is built on. It
183
+ repairs models that *attempt* tool calls; a model Ollama rejects outright with
184
+ *"does not support tools"* (some chat templates have none) is out of scope for
185
+ v1 — forcing tool calls onto those is a bigger, separate job.
186
+
187
+ Streaming requests are supported: with tools, the response is repaired and then
188
+ re-emitted as standard incremental deltas (verified against the OpenAI SDK's
189
+ streaming client). The repair still buffers internally rather than streaming the
190
+ model token by token — that's a later refinement; v1 gets the call right first.
191
+
192
+ ## Contributing
193
+
194
+ The most useful thing you can send is a tool call that came out wrong: the model,
195
+ the tool schema you gave it, and what it produced. That is the test set. See
196
+ [CONTRIBUTING.md](CONTRIBUTING.md) for how to run the tests and the reliability
197
+ benchmark against your own models.
198
+
199
+ ## From the same author
200
+
201
+ toolrails is by the author of [overloop](https://github.com/theadamdanielsson/overloop)
202
+ (*stop your agent looping*) and [overllm](https://github.com/theadamdanielsson/overllm)
203
+ (*catch the LLM calls you didn't need*). Same theme, one layer down: those stop
204
+ wasted agent work; this stops the wasted work of a tool call that never parses.
205
+
206
+ ## License
207
+
208
+ MIT © Adam Danielsson
@@ -0,0 +1,12 @@
1
+ toolrails/__init__.py,sha256=8x230z_B1KhJCAJRV98b19V2L27O_HxueM3Pu-68JBs,358
2
+ toolrails/__main__.py,sha256=MSmt_5Xg84uHqzTN38JwgseJK8rsJn_11A8WD99VtEo,61
3
+ toolrails/app.py,sha256=wnb9VetefKveTTdLP0B52_Lq-nOrLheBitlP0EW95Oo,5773
4
+ toolrails/cli.py,sha256=G8NYfkRe6NXfIBZBY2XoIsftpDDAd4yl7Nix3n8k8NQ,1773
5
+ toolrails/pipeline.py,sha256=XCk13g6YZ8jq4rIh_dey6DD2l_MyRN26iGmYESlnD-U,7786
6
+ toolrails/schemas.py,sha256=EwWC8sE2_xN3QGiAUB9-iv9NRkCr4pQoWolNi_QluXo,7608
7
+ toolrails/upstream.py,sha256=vJCPVREVFdCSL-821b9NI_A48auzV6DWZdtYPHS3q3Y,3546
8
+ toolrails-0.1.0.dist-info/METADATA,sha256=_d8aeMkN-XEo-YV5TV__taosZRa7_fQB9zX413jCzp0,9146
9
+ toolrails-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
10
+ toolrails-0.1.0.dist-info/entry_points.txt,sha256=joBwCHvMR5wl7Y1bqzV7UR8Zey-SiQlEo22KT4uEFQA,49
11
+ toolrails-0.1.0.dist-info/licenses/LICENSE,sha256=dJo02JmanV48uwAVydix7ep6cVMAmT1gY_yoWItnDXc,1072
12
+ toolrails-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ toolrails = toolrails.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Adam Danielsson
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.