spawnllm 0.4.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {spawnllm-0.4.0 → spawnllm-0.5.0}/PKG-INFO +13 -31
  2. {spawnllm-0.4.0 → spawnllm-0.5.0}/README.md +12 -30
  3. {spawnllm-0.4.0 → spawnllm-0.5.0}/pyproject.toml +2 -1
  4. {spawnllm-0.4.0 → spawnllm-0.5.0}/spawnllm/__init__.py +12 -21
  5. {spawnllm-0.4.0 → spawnllm-0.5.0}/spawnllm/backends/__init__.py +2 -2
  6. {spawnllm-0.4.0 → spawnllm-0.5.0}/spawnllm/backends/base.py +72 -25
  7. {spawnllm-0.4.0 → spawnllm-0.5.0}/spawnllm/backends/claude.py +34 -14
  8. {spawnllm-0.4.0 → spawnllm-0.5.0}/spawnllm/backends/codex.py +5 -17
  9. {spawnllm-0.4.0 → spawnllm-0.5.0}/spawnllm/backends/gemini.py +21 -23
  10. {spawnllm-0.4.0 → spawnllm-0.5.0}/spawnllm/backends/mlx.py +8 -9
  11. {spawnllm-0.4.0 → spawnllm-0.5.0}/spawnllm/call.py +32 -29
  12. spawnllm-0.5.0/spawnllm/extract.py +122 -0
  13. spawnllm-0.5.0/spawnllm/response.py +29 -0
  14. {spawnllm-0.4.0 → spawnllm-0.5.0}/spawnllm/run.py +20 -19
  15. {spawnllm-0.4.0 → spawnllm-0.5.0}/spawnllm/spec.py +3 -1
  16. spawnllm-0.5.0/spawnllm/structured.py +116 -0
  17. spawnllm-0.4.0/spawnllm/structured.py +0 -169
  18. {spawnllm-0.4.0 → spawnllm-0.5.0}/LICENSE +0 -0
  19. {spawnllm-0.4.0 → spawnllm-0.5.0}/spawnllm/__main__.py +0 -0
  20. {spawnllm-0.4.0 → spawnllm-0.5.0}/spawnllm/backends/registry.py +0 -0
  21. {spawnllm-0.4.0 → spawnllm-0.5.0}/spawnllm/cli.py +0 -0
  22. {spawnllm-0.4.0 → spawnllm-0.5.0}/spawnllm/mlx/__init__.py +0 -0
  23. {spawnllm-0.4.0 → spawnllm-0.5.0}/spawnllm/mlx/codec.py +0 -0
  24. {spawnllm-0.4.0 → spawnllm-0.5.0}/spawnllm/mlx/engine.py +0 -0
  25. {spawnllm-0.4.0 → spawnllm-0.5.0}/spawnllm/mlx/fuse.py +0 -0
  26. {spawnllm-0.4.0 → spawnllm-0.5.0}/spawnllm/mlx/patches.py +0 -0
  27. {spawnllm-0.4.0 → spawnllm-0.5.0}/spawnllm/proc.py +0 -0
  28. {spawnllm-0.4.0 → spawnllm-0.5.0}/spawnllm/py.typed +0 -0
  29. {spawnllm-0.4.0 → spawnllm-0.5.0}/spawnllm/types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spawnllm
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools.
5
5
  Keywords:
6
6
  Author: Yasyf Mohamedali
@@ -64,24 +64,13 @@ domain logic instead of its own copy of the backends.
64
64
 
65
65
  ## Install
66
66
 
67
- No install needed run everything through [uvx](https://docs.astral.sh/uv/):
67
+ Run the CLI with [uvx](https://docs.astral.sh/uv/):
68
68
 
69
69
  ```bash
70
70
  uvx spawnllm --help
71
71
  ```
72
72
 
73
- `uvx` fetches spawnllm into a throwaway environment and runs it. To add it
74
- to a project instead:
75
-
76
- ```bash
77
- uv add spawnllm
78
- ```
79
-
80
- For the local MLX engine (Apple Silicon only), pull the extra:
81
-
82
- ```bash
83
- uv add "spawnllm[mlx]"
84
- ```
73
+ For the local MLX engine (Apple Silicon only), pull the extra: `uv add "spawnllm[mlx]"`.
85
74
 
86
75
  ## Quickstart
87
76
 
@@ -107,9 +96,9 @@ uvx spawnllm call --backend claude "What is 2+2? Reply with just the number."
107
96
  4
108
97
  ```
109
98
 
110
- `--model small|medium|large` swaps the tier, which each backend maps to a concrete model.
111
- The `claude` backend resolves `small` to Haiku, `medium` to Sonnet, and `large` to Opus. Add
112
- `--agent` to let the call use tools.
99
+ `--model small|medium|large` swaps the tier, which each backend maps to a concrete model — the
100
+ `claude` backend resolves `small` to Haiku, `medium` to Sonnet, and `large` to Opus. Add
101
+ `--agent` to let the call use tools. Run `uvx spawnllm --help` for the full flag list.
113
102
 
114
103
  ### From Python
115
104
 
@@ -171,21 +160,14 @@ result = run_sync(
171
160
  print(result.stdout) # 4
172
161
  ```
173
162
 
174
- ## What problems does this solve?
175
-
176
- Every tool that shells out to `claude` or `codex` rebuilds the same plumbing: argv
177
- construction, stdin/stdout piping, stderr teeing, and turning non-zero exits into useful
178
- errors. spawnllm holds it once.
179
-
180
- Structured output is boilerplate too. A Pydantic model becomes a JSON-schema constraint
181
- and a parsed, validated result, identically for both CLI backends.
182
-
183
- Local MLX is fiddly. Adapter fusion, prompt-cache reuse, worker-thread lifecycle, and
184
- batched single-token generation live behind one engine instead of in every consumer.
163
+ ## How it works
185
164
 
186
- Behavior drift goes away with the duplication: two tools that call the same models stay
187
- byte-for-byte consistent because they share the backend layer, not a pair of diverging
188
- copies.
165
+ Each backend holds plumbing that consumers would otherwise rebuild: the CLI backends own argv
166
+ construction, stdin/stdout piping, stderr teeing, and turning non-zero exits into useful errors,
167
+ and they turn a Pydantic model into a JSON-schema constraint plus a parsed, validated result. The
168
+ MLX engine wraps adapter fusion, prompt-cache reuse, worker-thread lifecycle, and batched
169
+ single-token generation. Tools that share the layer stay byte-for-byte consistent instead of
170
+ drifting across diverging copies.
189
171
 
190
172
  ## Docs
191
173
 
@@ -17,24 +17,13 @@ domain logic instead of its own copy of the backends.
17
17
 
18
18
  ## Install
19
19
 
20
- No install needed run everything through [uvx](https://docs.astral.sh/uv/):
20
+ Run the CLI with [uvx](https://docs.astral.sh/uv/):
21
21
 
22
22
  ```bash
23
23
  uvx spawnllm --help
24
24
  ```
25
25
 
26
- `uvx` fetches spawnllm into a throwaway environment and runs it. To add it
27
- to a project instead:
28
-
29
- ```bash
30
- uv add spawnllm
31
- ```
32
-
33
- For the local MLX engine (Apple Silicon only), pull the extra:
34
-
35
- ```bash
36
- uv add "spawnllm[mlx]"
37
- ```
26
+ For the local MLX engine (Apple Silicon only), pull the extra: `uv add "spawnllm[mlx]"`.
38
27
 
39
28
  ## Quickstart
40
29
 
@@ -60,9 +49,9 @@ uvx spawnllm call --backend claude "What is 2+2? Reply with just the number."
60
49
  4
61
50
  ```
62
51
 
63
- `--model small|medium|large` swaps the tier, which each backend maps to a concrete model.
64
- The `claude` backend resolves `small` to Haiku, `medium` to Sonnet, and `large` to Opus. Add
65
- `--agent` to let the call use tools.
52
+ `--model small|medium|large` swaps the tier, which each backend maps to a concrete model — the
53
+ `claude` backend resolves `small` to Haiku, `medium` to Sonnet, and `large` to Opus. Add
54
+ `--agent` to let the call use tools. Run `uvx spawnllm --help` for the full flag list.
66
55
 
67
56
  ### From Python
68
57
 
@@ -124,21 +113,14 @@ result = run_sync(
124
113
  print(result.stdout) # 4
125
114
  ```
126
115
 
127
- ## What problems does this solve?
128
-
129
- Every tool that shells out to `claude` or `codex` rebuilds the same plumbing: argv
130
- construction, stdin/stdout piping, stderr teeing, and turning non-zero exits into useful
131
- errors. spawnllm holds it once.
132
-
133
- Structured output is boilerplate too. A Pydantic model becomes a JSON-schema constraint
134
- and a parsed, validated result, identically for both CLI backends.
135
-
136
- Local MLX is fiddly. Adapter fusion, prompt-cache reuse, worker-thread lifecycle, and
137
- batched single-token generation live behind one engine instead of in every consumer.
116
+ ## How it works
138
117
 
139
- Behavior drift goes away with the duplication: two tools that call the same models stay
140
- byte-for-byte consistent because they share the backend layer, not a pair of diverging
141
- copies.
118
+ Each backend holds plumbing that consumers would otherwise rebuild: the CLI backends own argv
119
+ construction, stdin/stdout piping, stderr teeing, and turning non-zero exits into useful errors,
120
+ and they turn a Pydantic model into a JSON-schema constraint plus a parsed, validated result. The
121
+ MLX engine wraps adapter fusion, prompt-cache reuse, worker-thread lifecycle, and batched
122
+ single-token generation. Tools that share the layer stay byte-for-byte consistent instead of
123
+ drifting across diverging copies.
142
124
 
143
125
  ## Docs
144
126
 
@@ -1,6 +1,7 @@
1
1
  [project]
2
2
  name = "spawnllm"
3
- version = "0.4.0"
3
+ # Inert sentinel: the real version is set from the release tag (uv version --frozen).
4
+ version = "0.5.0"
4
5
  description = "Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools."
5
6
  readme = "README.md"
6
7
  license = "MIT"
@@ -1,14 +1,17 @@
1
1
  """Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools.
2
2
 
3
- The top-level namespace exposes the CLI backends, subprocess transport, and
4
- structured-output helpers. The MLX engine lives under `spawnllm.mlx`, whose
5
- imports are lazy so that `import spawnllm` never pulls `mlx_lm`/`zstandard`.
3
+ The top-level namespace exposes the three primitives `run`/`call`/`extract`
4
+ and their `_sync` companions over a `Backend` family that fully encapsulates
5
+ execution and returns one shared `Response`. The MLX engine lives under
6
+ `spawnllm.mlx`, whose imports are lazy so that `import spawnllm` never pulls
7
+ `mlx_lm`/`zstandard`.
6
8
  """
7
9
 
8
10
  from __future__ import annotations
9
11
 
10
12
  from spawnllm.backends import (
11
13
  AntigravityCliBackend,
14
+ BackendCallError,
12
15
  BackendNotAuthenticated,
13
16
  BackendNotInstalled,
14
17
  BackendReady,
@@ -18,26 +21,21 @@ from spawnllm.backends import (
18
21
  CliBackend,
19
22
  CodexCliBackend,
20
23
  GeminiCliBackend,
21
- Invocation,
22
24
  LlmBackend,
23
25
  LlmBackends,
24
26
  MlxBackend,
25
27
  select_backend,
26
28
  )
27
29
  from spawnllm.call import call, call_sync
28
- from spawnllm.proc import RunResult, arun_cli, collect_process, map_concurrent, run_cli
30
+ from spawnllm.extract import extract, extract_sync
31
+ from spawnllm.response import Response
29
32
  from spawnllm.run import run, run_sync
30
33
  from spawnllm.spec import ClaudeConfig, CodexConfig, GeminiConfig, RunSpec
31
- from spawnllm.structured import (
32
- extract_structured,
33
- parse_result_envelope,
34
- parse_structured_output,
35
- resolve_schema_path,
36
- )
37
34
  from spawnllm.types import ProviderName, TModel, TSpecialty
38
35
 
39
36
  __all__ = [
40
37
  "AntigravityCliBackend",
38
+ "BackendCallError",
41
39
  "BackendNotAuthenticated",
42
40
  "BackendNotInstalled",
43
41
  "BackendReady",
@@ -50,26 +48,19 @@ __all__ = [
50
48
  "CodexConfig",
51
49
  "GeminiCliBackend",
52
50
  "GeminiConfig",
53
- "Invocation",
54
51
  "LlmBackend",
55
52
  "LlmBackends",
56
53
  "MlxBackend",
57
54
  "ProviderName",
58
- "RunResult",
55
+ "Response",
59
56
  "RunSpec",
60
57
  "TModel",
61
58
  "TSpecialty",
62
- "arun_cli",
63
59
  "call",
64
60
  "call_sync",
65
- "collect_process",
66
- "extract_structured",
67
- "map_concurrent",
68
- "parse_result_envelope",
69
- "parse_structured_output",
70
- "resolve_schema_path",
61
+ "extract",
62
+ "extract_sync",
71
63
  "run",
72
- "run_cli",
73
64
  "run_sync",
74
65
  "select_backend",
75
66
  ]
@@ -3,13 +3,13 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from spawnllm.backends.base import (
6
+ BackendCallError,
6
7
  BackendNotAuthenticated,
7
8
  BackendNotInstalled,
8
9
  BackendReady,
9
10
  BackendStatus,
10
11
  BackendUnavailable,
11
12
  CliBackend,
12
- Invocation,
13
13
  LlmBackend,
14
14
  )
15
15
  from spawnllm.backends.claude import ClaudeCliBackend
@@ -20,6 +20,7 @@ from spawnllm.backends.registry import LlmBackends, select_backend
20
20
 
21
21
  __all__ = [
22
22
  "AntigravityCliBackend",
23
+ "BackendCallError",
23
24
  "BackendNotAuthenticated",
24
25
  "BackendNotInstalled",
25
26
  "BackendReady",
@@ -29,7 +30,6 @@ __all__ = [
29
30
  "CliBackend",
30
31
  "CodexCliBackend",
31
32
  "GeminiCliBackend",
32
- "Invocation",
33
33
  "LlmBackend",
34
34
  "LlmBackends",
35
35
  "MlxBackend",
@@ -10,7 +10,8 @@ from dataclasses import dataclass
10
10
  from pathlib import Path
11
11
  from typing import TYPE_CHECKING, ClassVar
12
12
 
13
- from spawnllm.proc import RunResult, acapture_cli, capture_cli
13
+ from spawnllm.proc import acapture_cli, capture_cli
14
+ from spawnllm.response import Response
14
15
 
15
16
  if TYPE_CHECKING:
16
17
  from pydantic import BaseModel
@@ -62,6 +63,18 @@ class BackendUnavailable(RuntimeError):
62
63
  """Raised when no backend is ready (installed and authenticated)."""
63
64
 
64
65
 
66
+ class BackendCallError(RuntimeError):
67
+ """Raised by `call`/`extract` when a backend returns a provider error.
68
+
69
+ Carries the backend's error string (a nonzero exit with stderr, or an error
70
+ envelope), attached both as the message and as a note for tracebacks.
71
+ """
72
+
73
+ def __init__(self, error: str) -> None:
74
+ super().__init__(error)
75
+ self.add_note(error)
76
+
77
+
65
78
  @dataclass(frozen=True)
66
79
  class Invocation:
67
80
  """A built CLI invocation: argv, optional stdin, and where to read the result.
@@ -95,37 +108,28 @@ class LlmBackend(ABC):
95
108
  provider: ClassVar[ProviderName]
96
109
 
97
110
  @abstractmethod
98
- async def aexecute(self, spec: RunSpec) -> RunResult:
99
- """Execute a single run asynchronously and capture its raw outcome.
100
-
101
- Args:
102
- spec: The configured run to execute.
103
-
104
- Returns:
105
- The captured stdout, stderr, and exit code.
106
- """
111
+ async def aexecute(self, spec: RunSpec) -> Response:
112
+ """Execute a single run asynchronously and resolve it to a `Response`.
107
113
 
108
- @abstractmethod
109
- def execute(self, spec: RunSpec) -> RunResult:
110
- """Execute a single run synchronously and capture its raw outcome.
114
+ The backend runs the process, reads its output wherever the provider
115
+ writes it, detects failure, and validates against `spec.response_model`.
111
116
 
112
117
  Args:
113
118
  spec: The configured run to execute.
114
119
 
115
120
  Returns:
116
- The captured stdout, stderr, and exit code.
121
+ The resolved `Response`.
117
122
  """
118
123
 
119
124
  @abstractmethod
120
- def parse_response(self, raw: str, response_model: type[BaseModel] | None) -> str | BaseModel:
121
- """Parse raw stdout into text or a validated model.
125
+ def execute(self, spec: RunSpec) -> Response:
126
+ """Execute a single run synchronously and resolve it to a `Response`.
122
127
 
123
128
  Args:
124
- raw: Raw stdout from the backend.
125
- response_model: Model to validate against, or `None` for raw text.
129
+ spec: The configured run to execute.
126
130
 
127
131
  Returns:
128
- `raw` when `response_model` is `None`, else a validated instance.
132
+ The resolved `Response`.
129
133
  """
130
134
 
131
135
  @abstractmethod
@@ -172,6 +176,49 @@ class LlmBackend(ABC):
172
176
  """
173
177
  return json.dumps(model.model_json_schema())
174
178
 
179
+ def schema_arg(self, spec: RunSpec) -> str | None:
180
+ """Return the JSON-schema string for `spec`'s `response_model`, or `None` when absent."""
181
+ return self.schema_for(spec.response_model) if spec.response_model is not None else None
182
+
183
+ def to_response(self, raw: str, *, returncode: int, stderr: str, spec: RunSpec) -> Response:
184
+ """Resolve a raw capture into a `Response`: detect failure, extract text, validate.
185
+
186
+ A nonzero exit or an error envelope becomes `Response.error`; otherwise
187
+ the text comes from `result_text` and, when `spec.response_model` is set,
188
+ the validated model from `result_value`. A `pydantic.ValidationError`
189
+ from a non-conforming model propagates.
190
+
191
+ Args:
192
+ raw: The raw output read wherever the provider wrote it.
193
+ returncode: The process exit code.
194
+ stderr: The captured stderr.
195
+ spec: The configured run, carrying the optional `response_model`.
196
+
197
+ Returns:
198
+ The resolved `Response`.
199
+ """
200
+ if returncode != 0:
201
+ return Response(error=f"{self.provider} exited {returncode}: {stderr.strip()[-2000:]}", result=None)
202
+ if (err := self.envelope_error(raw)) is not None:
203
+ return Response(error=err, result=None)
204
+ if spec.response_model is None:
205
+ return Response(error=None, result=self.result_text(raw))
206
+ return Response(
207
+ error=None, result=self.result_text(raw), parsed=spec.response_model.model_validate(self.result_value(raw))
208
+ )
209
+
210
+ def result_text(self, raw: str) -> str:
211
+ """Return the final text output from a raw capture; the default is `raw` unchanged."""
212
+ return raw
213
+
214
+ def result_value(self, raw: str) -> object:
215
+ """Return the JSON value to validate from a raw capture; the default parses `raw` as JSON."""
216
+ return json.loads(raw)
217
+
218
+ def envelope_error(self, raw: str) -> str | None:
219
+ """Return the provider's error message from an error envelope, or `None` on success."""
220
+ return None
221
+
175
222
 
176
223
  class CliBackend(LlmBackend):
177
224
  """Execution contract for the subprocess-backed LLM family.
@@ -214,7 +261,7 @@ class CliBackend(LlmBackend):
214
261
  """
215
262
  return Invocation(self.build_command(spec), spec.prompt)
216
263
 
217
- async def aexecute(self, spec: RunSpec) -> RunResult:
264
+ async def aexecute(self, spec: RunSpec) -> Response:
218
265
  inv = self.invocation(spec)
219
266
  try:
220
267
  rr = await acapture_cli(
@@ -224,13 +271,13 @@ class CliBackend(LlmBackend):
224
271
  cwd=spec.cwd,
225
272
  timeout=spec.timeout,
226
273
  )
227
- stdout = Path(inv.result_path).read_text() if inv.result_path else rr.stdout
274
+ raw = Path(inv.result_path).read_text() if inv.result_path else rr.stdout
228
275
  finally:
229
276
  for path in inv.cleanup_paths:
230
277
  Path(path).unlink(missing_ok=True)
231
- return RunResult(stdout, rr.stderr, rr.returncode)
278
+ return self.to_response(raw, returncode=rr.returncode, stderr=rr.stderr, spec=spec)
232
279
 
233
- def execute(self, spec: RunSpec) -> RunResult:
280
+ def execute(self, spec: RunSpec) -> Response:
234
281
  inv = self.invocation(spec)
235
282
  try:
236
283
  rr = capture_cli(
@@ -240,11 +287,11 @@ class CliBackend(LlmBackend):
240
287
  cwd=spec.cwd,
241
288
  timeout=spec.timeout,
242
289
  )
243
- stdout = Path(inv.result_path).read_text() if inv.result_path else rr.stdout
290
+ raw = Path(inv.result_path).read_text() if inv.result_path else rr.stdout
244
291
  finally:
245
292
  for path in inv.cleanup_paths:
246
293
  Path(path).unlink(missing_ok=True)
247
- return RunResult(stdout, rr.stderr, rr.returncode)
294
+ return self.to_response(raw, returncode=rr.returncode, stderr=rr.stderr, spec=spec)
248
295
 
249
296
  def check_status(self, *, timeout: int = 10) -> BackendStatus:
250
297
  """Check whether this backend's CLI is installed and authenticated.
@@ -8,7 +8,7 @@ from typing import TYPE_CHECKING, ClassVar
8
8
 
9
9
  from spawnllm.backends.base import CliBackend
10
10
  from spawnllm.spec import ClaudeConfig
11
- from spawnllm.structured import parse_structured_output
11
+ from spawnllm.structured import structured_value
12
12
 
13
13
  if TYPE_CHECKING:
14
14
  from pydantic import BaseModel
@@ -19,6 +19,21 @@ if TYPE_CHECKING:
19
19
  CLAUDE_MODELS: dict[TModel, str] = {"small": "haiku", "medium": "sonnet", "large": "opus"}
20
20
 
21
21
 
22
+ def result_event(raw: str) -> dict[str, object] | None:
23
+ """Return the `claude` result envelope: the dict itself, or the `type=="result"` stream-json event, else `None`."""
24
+ try:
25
+ data = json.loads(raw)
26
+ except json.JSONDecodeError:
27
+ return None
28
+ match data:
29
+ case {"is_error": _} | {"result": _}:
30
+ return data
31
+ case list():
32
+ return next((e for e in data if isinstance(e, dict) and e.get("type") == "result"), None)
33
+ case _:
34
+ return None
35
+
36
+
22
37
  class ClaudeCliBackend(CliBackend):
23
38
  """`CliBackend` for the Anthropic `claude` CLI.
24
39
 
@@ -53,6 +68,7 @@ class ClaudeCliBackend(CliBackend):
53
68
  The argv list to execute; the prompt is delivered over stdin.
54
69
  """
55
70
  cfg = spec.config_for(ClaudeConfig) or ClaudeConfig()
71
+ schema = self.schema_arg(spec)
56
72
  explicit = (
57
73
  cfg.permission_mode is not None
58
74
  or cfg.mcp_config is not None
@@ -92,8 +108,8 @@ class ClaudeCliBackend(CliBackend):
92
108
  *(["--tools", cfg.tools] if cfg.tools is not None else []),
93
109
  *(["--disable-slash-commands"] if cfg.disable_slash_commands else []),
94
110
  *(
95
- ["--json-schema", spec.schema, "--output-format", "json"]
96
- if spec.schema
111
+ ["--json-schema", schema, "--output-format", "json"]
112
+ if schema
97
113
  else ["--output-format", cfg.output_format]
98
114
  if cfg.output_format
99
115
  else []
@@ -118,17 +134,21 @@ class ClaudeCliBackend(CliBackend):
118
134
 
119
135
  return json.dumps(transform_schema(model))
120
136
 
121
- def parse_response(self, raw: str, response_model: type[BaseModel] | None) -> str | BaseModel:
122
- """Parse `claude` stdout into text or a validated model.
123
-
124
- Args:
125
- raw: Raw stdout from the `claude` CLI.
126
- response_model: Model to validate against, or `None` for raw text.
127
-
128
- Returns:
129
- `raw` for text calls; otherwise the validated `structured_output` from the result event, else `raw` as JSON.
130
- """
131
- return parse_structured_output(raw, response_model)
137
+ def result_text(self, raw: str) -> str:
138
+ """Return the `result` text from the `claude` envelope, falling back to `raw` for plain text."""
139
+ if (event := result_event(raw)) is not None and isinstance(text := event.get("result"), str):
140
+ return text
141
+ return raw
142
+
143
+ def result_value(self, raw: str) -> object:
144
+ """Return the `structured_output` from the `claude` stream-json result event, else `raw` parsed as JSON."""
145
+ return structured_value(raw)
146
+
147
+ def envelope_error(self, raw: str) -> str | None:
148
+ """Return the error message when the `claude` result event marks the run as an error, else `None`."""
149
+ if (event := result_event(raw)) is not None and event.get("is_error"):
150
+ return event["result"] if isinstance(event.get("result"), str) else "claude reported an error"
151
+ return None
132
152
 
133
153
  def env(self) -> dict[str, str]:
134
154
  """Return no extra environment variables; the `claude` CLI runs with the inherited environment."""
@@ -47,9 +47,9 @@ class CodexCliBackend(CliBackend):
47
47
  def build_command(self, spec: RunSpec) -> list[str]:
48
48
  """Build the `codex exec` argv for one stdin-prompted invocation.
49
49
 
50
- Resolves `spec.schema` to a temp file via `resolve_schema_path` and adds
51
- `--output-schema` when present; `invocation` reuses that path and cleans
52
- it up after the run.
50
+ Derives the schema from `spec.response_model`, writes it to a temp file
51
+ via `resolve_schema_path`, and adds `--output-schema` when present;
52
+ `invocation` reuses that path and cleans it up after the run.
53
53
 
54
54
  Args:
55
55
  spec: The configured run to translate into argv.
@@ -59,7 +59,7 @@ class CodexCliBackend(CliBackend):
59
59
  """
60
60
  from spawnllm.structured import resolve_schema_path
61
61
 
62
- return self.command_for(spec, resolve_schema_path(self, spec.schema))
62
+ return self.command_for(spec, resolve_schema_path(self, self.schema_arg(spec)))
63
63
 
64
64
  def command_for(self, spec: RunSpec, schema_path: str | None) -> list[str]:
65
65
  cfg = spec.config_for(CodexConfig) or CodexConfig()
@@ -97,7 +97,7 @@ class CodexCliBackend(CliBackend):
97
97
  """
98
98
  from spawnllm.structured import resolve_schema_path
99
99
 
100
- schema_path = resolve_schema_path(self, spec.schema)
100
+ schema_path = resolve_schema_path(self, self.schema_arg(spec))
101
101
  fd, result_path = tempfile.mkstemp(suffix=".json")
102
102
  os.close(fd)
103
103
  return Invocation(
@@ -125,18 +125,6 @@ class CodexCliBackend(CliBackend):
125
125
 
126
126
  return json.dumps(to_strict_json_schema(model))
127
127
 
128
- def parse_response(self, raw: str, response_model: type[BaseModel] | None) -> str | BaseModel:
129
- """Parse the final message `codex` wrote to its `-o` file into text or a validated model.
130
-
131
- Args:
132
- raw: The final message read from the `-o` file.
133
- response_model: Model to validate against, or `None` for raw text.
134
-
135
- Returns:
136
- `raw` when `response_model` is `None`; otherwise `raw` validated as JSON against `response_model`.
137
- """
138
- return raw if not response_model else response_model.model_validate_json(raw)
139
-
140
128
  def env(self) -> dict[str, str]:
141
129
  """Return no extra environment variables; the `codex` CLI runs with the inherited environment."""
142
130
  return {}
@@ -16,8 +16,6 @@ from spawnllm.spec import GeminiConfig
16
16
  from spawnllm.structured import extract_json_block
17
17
 
18
18
  if TYPE_CHECKING:
19
- from pydantic import BaseModel
20
-
21
19
  from spawnllm.spec import RunSpec
22
20
  from spawnllm.types import ProviderName, TModel
23
21
 
@@ -33,7 +31,7 @@ class GeminiFamilyBackend(CliBackend, ABC):
33
31
  api_key_envs: ClassVar[tuple[str, ...]]
34
32
 
35
33
  def env(self) -> dict[str, str]:
36
- """Return no extra environment variables; Gemini-family CLIs authenticate via OAuth, never an injected API key."""
34
+ """Return no extra environment variables; Gemini-family CLIs authenticate via OAuth, not an injected key."""
37
35
  return {}
38
36
 
39
37
  def is_authenticated(self, *, timeout: int) -> bool:
@@ -60,23 +58,17 @@ class GeminiFamilyBackend(CliBackend, ABC):
60
58
  Returns:
61
59
  An `Invocation` with an empty stdin that forces non-interactive output.
62
60
  """
63
- text = spec.prompt if spec.schema is None else f"{spec.prompt}\n\n{SCHEMA_PROMPT}\n{spec.schema}"
61
+ schema = self.schema_arg(spec)
62
+ text = spec.prompt if schema is None else f"{spec.prompt}\n\n{SCHEMA_PROMPT}\n{schema}"
64
63
  return Invocation(self.build_command(spec) + ["-p", text], "")
65
64
 
66
- def parse_response(self, raw: str, response_model: type[BaseModel] | None) -> str | BaseModel:
67
- """Parse Gemini-family stdout into text or a validated model.
68
-
69
- Args:
70
- raw: Raw stdout from the CLI.
71
- response_model: Model to validate against, or `None` for raw text.
65
+ def result_text(self, raw: str) -> str:
66
+ """Return the model's text output, extracted from this CLI's stdout envelope."""
67
+ return self.extract_text(raw)
72
68
 
73
- Returns:
74
- The extracted text when `response_model` is `None`; otherwise the JSON block validated against it.
75
- """
76
- text = self.extract_text(raw)
77
- if response_model is None:
78
- return text
79
- return response_model.model_validate_json(extract_json_block(text))
69
+ def result_value(self, raw: str) -> object:
70
+ """Return the JSON block parsed from the model's text output."""
71
+ return json.loads(extract_json_block(self.result_text(raw)))
80
72
 
81
73
  @abstractmethod
82
74
  def extract_text(self, raw: str) -> str: ...
@@ -149,13 +141,19 @@ class GeminiCliBackend(GeminiFamilyBackend):
149
141
  return [arg for e in extensions for arg in ("-e", e)]
150
142
 
151
143
  def extract_text(self, raw: str) -> str:
144
+ return json.loads(raw)["response"]
145
+
146
+ def envelope_error(self, raw: str) -> str | None:
147
+ """Return the raw failure payload when `gemini` reports `totalErrors` or an empty response, else `None`.
148
+
149
+ The whole payload tail is folded into the message so any transient marker the CLI emits
150
+ (529/overloaded/rate-limit) lands in `Response.error` and `is_transient` can fire a retry.
151
+ """
152
152
  data = json.loads(raw)
153
- if (
154
- sum(m["api"]["totalErrors"] for m in data.get("stats", {}).get("models", {}).values()) > 0
155
- or not data.get("response")
156
- ):
157
- raise RuntimeError(f"gemini call failed: {data.get('stats', {}).get('models')}")
158
- return data["response"]
153
+ models = data.get("stats", {}).get("models", {})
154
+ if sum(m["api"]["totalErrors"] for m in models.values()) > 0 or not data.get("response"):
155
+ return f"gemini call failed: {raw.strip()[-2000:]}"
156
+ return None
159
157
 
160
158
  def has_cached_credentials(self) -> bool:
161
159
  return (Path.home() / ".gemini" / "oauth_creds.json").exists()