spawnllm 0.4.0__tar.gz → 0.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {spawnllm-0.4.0 → spawnllm-0.5.1}/PKG-INFO +13 -31
  2. {spawnllm-0.4.0 → spawnllm-0.5.1}/README.md +12 -30
  3. {spawnllm-0.4.0 → spawnllm-0.5.1}/pyproject.toml +2 -1
  4. {spawnllm-0.4.0 → spawnllm-0.5.1}/spawnllm/__init__.py +15 -21
  5. {spawnllm-0.4.0 → spawnllm-0.5.1}/spawnllm/backends/__init__.py +2 -2
  6. {spawnllm-0.4.0 → spawnllm-0.5.1}/spawnllm/backends/base.py +116 -39
  7. {spawnllm-0.4.0 → spawnllm-0.5.1}/spawnllm/backends/claude.py +42 -17
  8. {spawnllm-0.4.0 → spawnllm-0.5.1}/spawnllm/backends/codex.py +18 -22
  9. {spawnllm-0.4.0 → spawnllm-0.5.1}/spawnllm/backends/gemini.py +26 -23
  10. {spawnllm-0.4.0 → spawnllm-0.5.1}/spawnllm/backends/mlx.py +9 -10
  11. {spawnllm-0.4.0 → spawnllm-0.5.1}/spawnllm/backends/registry.py +5 -2
  12. {spawnllm-0.4.0 → spawnllm-0.5.1}/spawnllm/call.py +31 -29
  13. spawnllm-0.5.1/spawnllm/extract.py +121 -0
  14. spawnllm-0.5.1/spawnllm/response.py +78 -0
  15. {spawnllm-0.4.0 → spawnllm-0.5.1}/spawnllm/run.py +21 -19
  16. {spawnllm-0.4.0 → spawnllm-0.5.1}/spawnllm/spec.py +15 -2
  17. spawnllm-0.5.1/spawnllm/structured.py +116 -0
  18. spawnllm-0.4.0/spawnllm/structured.py +0 -169
  19. {spawnllm-0.4.0 → spawnllm-0.5.1}/LICENSE +0 -0
  20. {spawnllm-0.4.0 → spawnllm-0.5.1}/spawnllm/__main__.py +0 -0
  21. {spawnllm-0.4.0 → spawnllm-0.5.1}/spawnllm/cli.py +0 -0
  22. {spawnllm-0.4.0 → spawnllm-0.5.1}/spawnllm/mlx/__init__.py +0 -0
  23. {spawnllm-0.4.0 → spawnllm-0.5.1}/spawnllm/mlx/codec.py +0 -0
  24. {spawnllm-0.4.0 → spawnllm-0.5.1}/spawnllm/mlx/engine.py +0 -0
  25. {spawnllm-0.4.0 → spawnllm-0.5.1}/spawnllm/mlx/fuse.py +0 -0
  26. {spawnllm-0.4.0 → spawnllm-0.5.1}/spawnllm/mlx/patches.py +0 -0
  27. {spawnllm-0.4.0 → spawnllm-0.5.1}/spawnllm/proc.py +0 -0
  28. {spawnllm-0.4.0 → spawnllm-0.5.1}/spawnllm/py.typed +0 -0
  29. {spawnllm-0.4.0 → spawnllm-0.5.1}/spawnllm/types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spawnllm
3
- Version: 0.4.0
3
+ Version: 0.5.1
4
4
  Summary: Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools.
5
5
  Keywords:
6
6
  Author: Yasyf Mohamedali
@@ -64,24 +64,13 @@ domain logic instead of its own copy of the backends.
64
64
 
65
65
  ## Install
66
66
 
67
- No install needed run everything through [uvx](https://docs.astral.sh/uv/):
67
+ Run the CLI with [uvx](https://docs.astral.sh/uv/):
68
68
 
69
69
  ```bash
70
70
  uvx spawnllm --help
71
71
  ```
72
72
 
73
- `uvx` fetches spawnllm into a throwaway environment and runs it. To add it
74
- to a project instead:
75
-
76
- ```bash
77
- uv add spawnllm
78
- ```
79
-
80
- For the local MLX engine (Apple Silicon only), pull the extra:
81
-
82
- ```bash
83
- uv add "spawnllm[mlx]"
84
- ```
73
+ For the local MLX engine (Apple Silicon only), pull the extra: `uv add "spawnllm[mlx]"`.
85
74
 
86
75
  ## Quickstart
87
76
 
@@ -107,9 +96,9 @@ uvx spawnllm call --backend claude "What is 2+2? Reply with just the number."
107
96
  4
108
97
  ```
109
98
 
110
- `--model small|medium|large` swaps the tier, which each backend maps to a concrete model.
111
- The `claude` backend resolves `small` to Haiku, `medium` to Sonnet, and `large` to Opus. Add
112
- `--agent` to let the call use tools.
99
+ `--model small|medium|large` swaps the tier, which each backend maps to a concrete model — the
100
+ `claude` backend resolves `small` to Haiku, `medium` to Sonnet, and `large` to Opus. Add
101
+ `--agent` to let the call use tools. Run `uvx spawnllm --help` for the full flag list.
113
102
 
114
103
  ### From Python
115
104
 
@@ -171,21 +160,14 @@ result = run_sync(
171
160
  print(result.stdout) # 4
172
161
  ```
173
162
 
174
- ## What problems does this solve?
175
-
176
- Every tool that shells out to `claude` or `codex` rebuilds the same plumbing: argv
177
- construction, stdin/stdout piping, stderr teeing, and turning non-zero exits into useful
178
- errors. spawnllm holds it once.
179
-
180
- Structured output is boilerplate too. A Pydantic model becomes a JSON-schema constraint
181
- and a parsed, validated result, identically for both CLI backends.
182
-
183
- Local MLX is fiddly. Adapter fusion, prompt-cache reuse, worker-thread lifecycle, and
184
- batched single-token generation live behind one engine instead of in every consumer.
163
+ ## How it works
185
164
 
186
- Behavior drift goes away with the duplication: two tools that call the same models stay
187
- byte-for-byte consistent because they share the backend layer, not a pair of diverging
188
- copies.
165
+ Each backend holds plumbing that consumers would otherwise rebuild: the CLI backends own argv
166
+ construction, stdin/stdout piping, stderr teeing, and turning non-zero exits into useful errors,
167
+ and they turn a Pydantic model into a JSON-schema constraint plus a parsed, validated result. The
168
+ MLX engine wraps adapter fusion, prompt-cache reuse, worker-thread lifecycle, and batched
169
+ single-token generation. Tools that share the layer stay byte-for-byte consistent instead of
170
+ drifting across diverging copies.
189
171
 
190
172
  ## Docs
191
173
 
@@ -17,24 +17,13 @@ domain logic instead of its own copy of the backends.
17
17
 
18
18
  ## Install
19
19
 
20
- No install needed run everything through [uvx](https://docs.astral.sh/uv/):
20
+ Run the CLI with [uvx](https://docs.astral.sh/uv/):
21
21
 
22
22
  ```bash
23
23
  uvx spawnllm --help
24
24
  ```
25
25
 
26
- `uvx` fetches spawnllm into a throwaway environment and runs it. To add it
27
- to a project instead:
28
-
29
- ```bash
30
- uv add spawnllm
31
- ```
32
-
33
- For the local MLX engine (Apple Silicon only), pull the extra:
34
-
35
- ```bash
36
- uv add "spawnllm[mlx]"
37
- ```
26
+ For the local MLX engine (Apple Silicon only), pull the extra: `uv add "spawnllm[mlx]"`.
38
27
 
39
28
  ## Quickstart
40
29
 
@@ -60,9 +49,9 @@ uvx spawnllm call --backend claude "What is 2+2? Reply with just the number."
60
49
  4
61
50
  ```
62
51
 
63
- `--model small|medium|large` swaps the tier, which each backend maps to a concrete model.
64
- The `claude` backend resolves `small` to Haiku, `medium` to Sonnet, and `large` to Opus. Add
65
- `--agent` to let the call use tools.
52
+ `--model small|medium|large` swaps the tier, which each backend maps to a concrete model — the
53
+ `claude` backend resolves `small` to Haiku, `medium` to Sonnet, and `large` to Opus. Add
54
+ `--agent` to let the call use tools. Run `uvx spawnllm --help` for the full flag list.
66
55
 
67
56
  ### From Python
68
57
 
@@ -124,21 +113,14 @@ result = run_sync(
124
113
  print(result.stdout) # 4
125
114
  ```
126
115
 
127
- ## What problems does this solve?
128
-
129
- Every tool that shells out to `claude` or `codex` rebuilds the same plumbing: argv
130
- construction, stdin/stdout piping, stderr teeing, and turning non-zero exits into useful
131
- errors. spawnllm holds it once.
132
-
133
- Structured output is boilerplate too. A Pydantic model becomes a JSON-schema constraint
134
- and a parsed, validated result, identically for both CLI backends.
135
-
136
- Local MLX is fiddly. Adapter fusion, prompt-cache reuse, worker-thread lifecycle, and
137
- batched single-token generation live behind one engine instead of in every consumer.
116
+ ## How it works
138
117
 
139
- Behavior drift goes away with the duplication: two tools that call the same models stay
140
- byte-for-byte consistent because they share the backend layer, not a pair of diverging
141
- copies.
118
+ Each backend holds plumbing that consumers would otherwise rebuild: the CLI backends own argv
119
+ construction, stdin/stdout piping, stderr teeing, and turning non-zero exits into useful errors,
120
+ and they turn a Pydantic model into a JSON-schema constraint plus a parsed, validated result. The
121
+ MLX engine wraps adapter fusion, prompt-cache reuse, worker-thread lifecycle, and batched
122
+ single-token generation. Tools that share the layer stay byte-for-byte consistent instead of
123
+ drifting across diverging copies.
142
124
 
143
125
  ## Docs
144
126
 
@@ -1,6 +1,7 @@
1
1
  [project]
2
2
  name = "spawnllm"
3
- version = "0.4.0"
3
+ # Inert sentinel: the real version is set from the release tag (uv version --frozen).
4
+ version = "0.5.1"
4
5
  description = "Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools."
5
6
  readme = "README.md"
6
7
  license = "MIT"
@@ -1,14 +1,17 @@
1
1
  """Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools.
2
2
 
3
- The top-level namespace exposes the CLI backends, subprocess transport, and
4
- structured-output helpers. The MLX engine lives under `spawnllm.mlx`, whose
5
- imports are lazy so that `import spawnllm` never pulls `mlx_lm`/`zstandard`.
3
+ The top-level namespace exposes the three primitives `run`/`call`/`extract`
4
+ and their `_sync` companions over a `Backend` family that fully encapsulates
5
+ execution and returns one shared `Response`. The MLX engine lives under
6
+ `spawnllm.mlx`, whose imports are lazy so that `import spawnllm` never pulls
7
+ `mlx_lm`/`zstandard`.
6
8
  """
7
9
 
8
10
  from __future__ import annotations
9
11
 
10
12
  from spawnllm.backends import (
11
13
  AntigravityCliBackend,
14
+ BackendCallError,
12
15
  BackendNotAuthenticated,
13
16
  BackendNotInstalled,
14
17
  BackendReady,
@@ -18,26 +21,21 @@ from spawnllm.backends import (
18
21
  CliBackend,
19
22
  CodexCliBackend,
20
23
  GeminiCliBackend,
21
- Invocation,
22
24
  LlmBackend,
23
25
  LlmBackends,
24
26
  MlxBackend,
25
27
  select_backend,
26
28
  )
27
29
  from spawnllm.call import call, call_sync
28
- from spawnllm.proc import RunResult, arun_cli, collect_process, map_concurrent, run_cli
30
+ from spawnllm.extract import extract, extract_sync
31
+ from spawnllm.response import Error, Output, Response, Result
29
32
  from spawnllm.run import run, run_sync
30
33
  from spawnllm.spec import ClaudeConfig, CodexConfig, GeminiConfig, RunSpec
31
- from spawnllm.structured import (
32
- extract_structured,
33
- parse_result_envelope,
34
- parse_structured_output,
35
- resolve_schema_path,
36
- )
37
34
  from spawnllm.types import ProviderName, TModel, TSpecialty
38
35
 
39
36
  __all__ = [
40
37
  "AntigravityCliBackend",
38
+ "BackendCallError",
41
39
  "BackendNotAuthenticated",
42
40
  "BackendNotInstalled",
43
41
  "BackendReady",
@@ -48,28 +46,24 @@ __all__ = [
48
46
  "CliBackend",
49
47
  "CodexCliBackend",
50
48
  "CodexConfig",
49
+ "Error",
51
50
  "GeminiCliBackend",
52
51
  "GeminiConfig",
53
- "Invocation",
54
52
  "LlmBackend",
55
53
  "LlmBackends",
56
54
  "MlxBackend",
55
+ "Output",
57
56
  "ProviderName",
58
- "RunResult",
57
+ "Response",
58
+ "Result",
59
59
  "RunSpec",
60
60
  "TModel",
61
61
  "TSpecialty",
62
- "arun_cli",
63
62
  "call",
64
63
  "call_sync",
65
- "collect_process",
66
- "extract_structured",
67
- "map_concurrent",
68
- "parse_result_envelope",
69
- "parse_structured_output",
70
- "resolve_schema_path",
64
+ "extract",
65
+ "extract_sync",
71
66
  "run",
72
- "run_cli",
73
67
  "run_sync",
74
68
  "select_backend",
75
69
  ]
@@ -3,13 +3,13 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from spawnllm.backends.base import (
6
+ BackendCallError,
6
7
  BackendNotAuthenticated,
7
8
  BackendNotInstalled,
8
9
  BackendReady,
9
10
  BackendStatus,
10
11
  BackendUnavailable,
11
12
  CliBackend,
12
- Invocation,
13
13
  LlmBackend,
14
14
  )
15
15
  from spawnllm.backends.claude import ClaudeCliBackend
@@ -20,6 +20,7 @@ from spawnllm.backends.registry import LlmBackends, select_backend
20
20
 
21
21
  __all__ = [
22
22
  "AntigravityCliBackend",
23
+ "BackendCallError",
23
24
  "BackendNotAuthenticated",
24
25
  "BackendNotInstalled",
25
26
  "BackendReady",
@@ -29,7 +30,6 @@ __all__ = [
29
30
  "CliBackend",
30
31
  "CodexCliBackend",
31
32
  "GeminiCliBackend",
32
- "Invocation",
33
33
  "LlmBackend",
34
34
  "LlmBackends",
35
35
  "MlxBackend",
@@ -5,12 +5,14 @@ from __future__ import annotations
5
5
  import json
6
6
  import os
7
7
  import shutil
8
+ import subprocess
8
9
  from abc import ABC, abstractmethod
9
10
  from dataclasses import dataclass
10
11
  from pathlib import Path
11
12
  from typing import TYPE_CHECKING, ClassVar
12
13
 
13
- from spawnllm.proc import RunResult, acapture_cli, capture_cli
14
+ from spawnllm.proc import acapture_cli, capture_cli
15
+ from spawnllm.response import Error, Output, Response, Result
14
16
 
15
17
  if TYPE_CHECKING:
16
18
  from pydantic import BaseModel
@@ -62,6 +64,13 @@ class BackendUnavailable(RuntimeError):
62
64
  """Raised when no backend is ready (installed and authenticated)."""
63
65
 
64
66
 
67
+ class BackendCallError(RuntimeError):
68
+ """Raised by `call`/`extract` when a backend returns a provider error.
69
+
70
+ Carries the backend's error string: a nonzero exit with stderr, or an error envelope.
71
+ """
72
+
73
+
65
74
  @dataclass(frozen=True)
66
75
  class Invocation:
67
76
  """A built CLI invocation: argv, optional stdin, and where to read the result.
@@ -95,37 +104,28 @@ class LlmBackend(ABC):
95
104
  provider: ClassVar[ProviderName]
96
105
 
97
106
  @abstractmethod
98
- async def aexecute(self, spec: RunSpec) -> RunResult:
99
- """Execute a single run asynchronously and capture its raw outcome.
107
+ async def aexecute(self, spec: RunSpec) -> Response:
108
+ """Execute a single run asynchronously and resolve it to a `Response`.
100
109
 
101
- Args:
102
- spec: The configured run to execute.
103
-
104
- Returns:
105
- The captured stdout, stderr, and exit code.
106
- """
107
-
108
- @abstractmethod
109
- def execute(self, spec: RunSpec) -> RunResult:
110
- """Execute a single run synchronously and capture its raw outcome.
110
+ The backend runs the process, reads its output wherever the provider
111
+ writes it, detects failure, and validates against `spec.response_model`.
111
112
 
112
113
  Args:
113
114
  spec: The configured run to execute.
114
115
 
115
116
  Returns:
116
- The captured stdout, stderr, and exit code.
117
+ The resolved `Response`.
117
118
  """
118
119
 
119
120
  @abstractmethod
120
- def parse_response(self, raw: str, response_model: type[BaseModel] | None) -> str | BaseModel:
121
- """Parse raw stdout into text or a validated model.
121
+ def execute(self, spec: RunSpec) -> Response:
122
+ """Execute a single run synchronously and resolve it to a `Response`.
122
123
 
123
124
  Args:
124
- raw: Raw stdout from the backend.
125
- response_model: Model to validate against, or `None` for raw text.
125
+ spec: The configured run to execute.
126
126
 
127
127
  Returns:
128
- `raw` when `response_model` is `None`, else a validated instance.
128
+ The resolved `Response`.
129
129
  """
130
130
 
131
131
  @abstractmethod
@@ -172,6 +172,73 @@ class LlmBackend(ABC):
172
172
  """
173
173
  return json.dumps(model.model_json_schema())
174
174
 
175
+ def schema_arg(self, spec: RunSpec) -> str | None:
176
+ """Return the JSON-schema string for `spec`, from a `response_model` or a raw `schema`.
177
+
178
+ A `response_model` is run through `schema_for` (the provider's
179
+ strict-schema transform); a raw `schema` passes verbatim — a dict is
180
+ `json.dumps`'d, a string is returned unchanged. Returns `None` when
181
+ neither is set.
182
+
183
+ Args:
184
+ spec: The configured run, carrying the optional `response_model` or `schema`.
185
+
186
+ Returns:
187
+ The JSON-schema string for this backend's structured-output argument, or `None`.
188
+ """
189
+ if spec.response_model is not None:
190
+ return self.schema_for(spec.response_model)
191
+ if spec.schema is not None:
192
+ return json.dumps(spec.schema) if isinstance(spec.schema, dict) else spec.schema
193
+ return None
194
+
195
+ def to_response(self, raw: str, *, returncode: int, stderr: str, spec: RunSpec) -> Response:
196
+ """Resolve a raw capture into a structured `Response`: detect failure, extract text, validate.
197
+
198
+ `output` always carries the full raw stream. A nonzero exit, an error
199
+ envelope, or a `pydantic.ValidationError` from a non-conforming model all
200
+ route through `error` (with the underlying exception preserved in
201
+ `error.ex`) and leave `result` as `None`; a success yields `result` (text
202
+ from `result_text`, plus the validated model from `result_value` when
203
+ `spec.response_model` is set) and `error` as `None`.
204
+
205
+ Args:
206
+ raw: The raw output read wherever the provider wrote it.
207
+ returncode: The process exit code.
208
+ stderr: The captured stderr.
209
+ spec: The configured run, carrying the optional `response_model` or `schema`.
210
+
211
+ Returns:
212
+ The resolved `Response`.
213
+ """
214
+ import pydantic
215
+
216
+ output = Output(raw)
217
+ if returncode != 0:
218
+ msg = f"{self.provider} exited {returncode}: {stderr.strip()[-2000:]}"
219
+ return Response(spec=spec, output=output, error=Error(msg, BackendCallError(msg)))
220
+ if (err := self.envelope_error(raw)) is not None:
221
+ return Response(spec=spec, output=output, error=Error(err, BackendCallError(err)))
222
+ if spec.response_model is None:
223
+ return Response(spec=spec, output=output, result=Result(raw=self.result_text(raw)))
224
+ try:
225
+ parsed = spec.response_model.model_validate(self.result_value(raw))
226
+ except pydantic.ValidationError as e:
227
+ return Response(spec=spec, output=output, error=Error(str(e), e))
228
+ return Response(spec=spec, output=output, result=Result(raw=self.result_text(raw), parsed=parsed))
229
+
230
+ def result_text(self, raw: str) -> str:
231
+ """Return the final text output from a raw capture; the default is `raw` unchanged."""
232
+ return raw
233
+
234
+ def result_value(self, raw: str) -> object:
235
+ """Return the JSON value to validate from a raw capture; the default parses `raw` as JSON."""
236
+ return json.loads(raw)
237
+
238
+ def envelope_error(self, raw: str) -> str | None:
239
+ """Return the provider's error message from an error envelope, or `None` on success."""
240
+ return None
241
+
175
242
 
176
243
  class CliBackend(LlmBackend):
177
244
  """Execution contract for the subprocess-backed LLM family.
@@ -214,37 +281,47 @@ class CliBackend(LlmBackend):
214
281
  """
215
282
  return Invocation(self.build_command(spec), spec.prompt)
216
283
 
217
- async def aexecute(self, spec: RunSpec) -> RunResult:
284
+ def timed_out(self, spec: RunSpec) -> Response:
285
+ msg = f"{self.provider} timed out after {spec.timeout}s"
286
+ return Response(spec=spec, output=Output(""), error=Error(msg, TimeoutError(msg)))
287
+
288
+ async def aexecute(self, spec: RunSpec) -> Response:
218
289
  inv = self.invocation(spec)
219
290
  try:
220
- rr = await acapture_cli(
221
- inv.argv,
222
- input=inv.stdin,
223
- env=os.environ | self.env() | (spec.env or {}),
224
- cwd=spec.cwd,
225
- timeout=spec.timeout,
226
- )
227
- stdout = Path(inv.result_path).read_text() if inv.result_path else rr.stdout
291
+ try:
292
+ rr = await acapture_cli(
293
+ inv.argv,
294
+ input=inv.stdin,
295
+ env=os.environ | self.env() | (spec.env or {}),
296
+ cwd=spec.cwd,
297
+ timeout=spec.timeout,
298
+ )
299
+ except TimeoutError:
300
+ return self.timed_out(spec)
301
+ raw = Path(inv.result_path).read_text() if inv.result_path else rr.stdout
228
302
  finally:
229
303
  for path in inv.cleanup_paths:
230
304
  Path(path).unlink(missing_ok=True)
231
- return RunResult(stdout, rr.stderr, rr.returncode)
305
+ return self.to_response(raw, returncode=rr.returncode, stderr=rr.stderr, spec=spec)
232
306
 
233
- def execute(self, spec: RunSpec) -> RunResult:
307
+ def execute(self, spec: RunSpec) -> Response:
234
308
  inv = self.invocation(spec)
235
309
  try:
236
- rr = capture_cli(
237
- inv.argv,
238
- input=inv.stdin,
239
- env=os.environ | self.env() | (spec.env or {}),
240
- cwd=spec.cwd,
241
- timeout=spec.timeout,
242
- )
243
- stdout = Path(inv.result_path).read_text() if inv.result_path else rr.stdout
310
+ try:
311
+ rr = capture_cli(
312
+ inv.argv,
313
+ input=inv.stdin,
314
+ env=os.environ | self.env() | (spec.env or {}),
315
+ cwd=spec.cwd,
316
+ timeout=spec.timeout,
317
+ )
318
+ except subprocess.TimeoutExpired:
319
+ return self.timed_out(spec)
320
+ raw = Path(inv.result_path).read_text() if inv.result_path else rr.stdout
244
321
  finally:
245
322
  for path in inv.cleanup_paths:
246
323
  Path(path).unlink(missing_ok=True)
247
- return RunResult(stdout, rr.stderr, rr.returncode)
324
+ return self.to_response(raw, returncode=rr.returncode, stderr=rr.stderr, spec=spec)
248
325
 
249
326
  def check_status(self, *, timeout: int = 10) -> BackendStatus:
250
327
  """Check whether this backend's CLI is installed and authenticated.
@@ -8,7 +8,7 @@ from typing import TYPE_CHECKING, ClassVar
8
8
 
9
9
  from spawnllm.backends.base import CliBackend
10
10
  from spawnllm.spec import ClaudeConfig
11
- from spawnllm.structured import parse_structured_output
11
+ from spawnllm.structured import structured_value
12
12
 
13
13
  if TYPE_CHECKING:
14
14
  from pydantic import BaseModel
@@ -19,6 +19,21 @@ if TYPE_CHECKING:
19
19
  CLAUDE_MODELS: dict[TModel, str] = {"small": "haiku", "medium": "sonnet", "large": "opus"}
20
20
 
21
21
 
22
+ def result_event(raw: str) -> dict[str, object] | None:
23
+ """Return the `claude` result envelope: the dict itself, or the `type=="result"` stream-json event, else `None`."""
24
+ try:
25
+ data = json.loads(raw)
26
+ except json.JSONDecodeError:
27
+ return None
28
+ match data:
29
+ case {"is_error": _} | {"result": _}:
30
+ return data
31
+ case list():
32
+ return next((e for e in data if isinstance(e, dict) and e.get("type") == "result"), None)
33
+ case _:
34
+ return None
35
+
36
+
22
37
  class ClaudeCliBackend(CliBackend):
23
38
  """`CliBackend` for the Anthropic `claude` CLI.
24
39
 
@@ -53,6 +68,7 @@ class ClaudeCliBackend(CliBackend):
53
68
  The argv list to execute; the prompt is delivered over stdin.
54
69
  """
55
70
  cfg = spec.config_for(ClaudeConfig) or ClaudeConfig()
71
+ schema = self.schema_arg(spec)
56
72
  explicit = (
57
73
  cfg.permission_mode is not None
58
74
  or cfg.mcp_config is not None
@@ -68,11 +84,12 @@ class ClaudeCliBackend(CliBackend):
68
84
  "--no-session-persistence",
69
85
  "--model",
70
86
  spec.model,
87
+ *(["--setting-sources", ""] if spec.isolated else []),
88
+ *(["--strict-mcp-config"] if spec.isolated or cfg.strict_mcp else []),
71
89
  *(
72
90
  [
73
91
  *(["--permission-mode", cfg.permission_mode] if cfg.permission_mode is not None else []),
74
92
  *(["--mcp-config", cfg.mcp_config] if cfg.mcp_config is not None else []),
75
- *(["--strict-mcp-config"] if cfg.strict_mcp else []),
76
93
  *(["--disallowedTools", *cfg.disallowed_tools] if cfg.disallowed_tools else []),
77
94
  *(
78
95
  ["--append-system-prompt", cfg.append_system_prompt]
@@ -85,15 +102,15 @@ class ClaudeCliBackend(CliBackend):
85
102
  if explicit
86
103
  else ["--permission-mode", "auto", "--max-budget-usd", "1"]
87
104
  if spec.agent
88
- else ["--system-prompt", "", "--setting-sources", "", "--strict-mcp-config"]
105
+ else ["--system-prompt", ""]
89
106
  ),
90
107
  *(["--system-prompt", cfg.system_prompt] if cfg.system_prompt is not None else []),
91
108
  *(["--max-turns", str(cfg.max_turns)] if cfg.max_turns is not None else []),
92
109
  *(["--tools", cfg.tools] if cfg.tools is not None else []),
93
110
  *(["--disable-slash-commands"] if cfg.disable_slash_commands else []),
94
111
  *(
95
- ["--json-schema", spec.schema, "--output-format", "json"]
96
- if spec.schema
112
+ ["--json-schema", schema, "--output-format", "json"]
113
+ if schema
97
114
  else ["--output-format", cfg.output_format]
98
115
  if cfg.output_format
99
116
  else []
@@ -118,22 +135,30 @@ class ClaudeCliBackend(CliBackend):
118
135
 
119
136
  return json.dumps(transform_schema(model))
120
137
 
121
- def parse_response(self, raw: str, response_model: type[BaseModel] | None) -> str | BaseModel:
122
- """Parse `claude` stdout into text or a validated model.
138
+ def result_text(self, raw: str) -> str:
139
+ """Return the `result` text from the `claude` envelope, falling back to `raw` for plain text."""
140
+ if (event := result_event(raw)) is not None and isinstance(text := event.get("result"), str):
141
+ return text
142
+ return raw
123
143
 
124
- Args:
125
- raw: Raw stdout from the `claude` CLI.
126
- response_model: Model to validate against, or `None` for raw text.
144
+ def result_value(self, raw: str) -> object:
145
+ """Return the `structured_output` from the `claude` stream-json result event, else `raw` parsed as JSON."""
146
+ return structured_value(raw)
127
147
 
128
- Returns:
129
- `raw` for text calls; otherwise the validated `structured_output` from the result event, else `raw` as JSON.
130
- """
131
- return parse_structured_output(raw, response_model)
148
+ def envelope_error(self, raw: str) -> str | None:
149
+ """Return the error message when the `claude` result event marks the run as an error, else `None`."""
150
+ if (event := result_event(raw)) is not None and event.get("is_error"):
151
+ return event["result"] if isinstance(event.get("result"), str) else "claude reported an error"
152
+ return None
132
153
 
133
154
  def env(self) -> dict[str, str]:
134
- """Return no extra environment variables; the `claude` CLI runs with the inherited environment."""
135
- # CLAUDE_CODE_SIMPLE=1 breaks claude.ai keychain auth ("Not logged in")
136
- # on current CLIs; --setting-sources ""/--strict-mcp-config already trim startup.
155
+ """Return no extra environment variables; the `claude` CLI runs with the inherited environment.
156
+
157
+ Isolation is flag-only (`--setting-sources ""`/`--strict-mcp-config`). A fresh
158
+ `CLAUDE_CONFIG_DIR` would log the CLI out: the keychain token is keyed to the
159
+ `oauthAccount` recorded in `~/.claude.json`, absent from a relocated dir.
160
+ (`CLAUDE_CODE_SIMPLE=1` likewise breaks claude.ai keychain auth.)
161
+ """
137
162
  return {}
138
163
 
139
164
  def is_authenticated(self, *, timeout: int) -> bool: