spawnllm 0.1.2__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {spawnllm-0.1.2 → spawnllm-0.2.0}/PKG-INFO +16 -9
  2. {spawnllm-0.1.2 → spawnllm-0.2.0}/README.md +15 -8
  3. {spawnllm-0.1.2 → spawnllm-0.2.0}/pyproject.toml +1 -1
  4. {spawnllm-0.1.2 → spawnllm-0.2.0}/spawnllm/__init__.py +18 -12
  5. spawnllm-0.2.0/spawnllm/backends/__init__.py +31 -0
  6. spawnllm-0.2.0/spawnllm/backends/base.py +152 -0
  7. spawnllm-0.2.0/spawnllm/backends/claude.py +176 -0
  8. spawnllm-0.2.0/spawnllm/backends/codex.py +91 -0
  9. spawnllm-0.2.0/spawnllm/backends/gemini.py +228 -0
  10. spawnllm-0.2.0/spawnllm/backends/registry.py +80 -0
  11. spawnllm-0.2.0/spawnllm/call.py +48 -0
  12. spawnllm-0.2.0/spawnllm/cli.py +66 -0
  13. {spawnllm-0.1.2 → spawnllm-0.2.0}/spawnllm/mlx/__init__.py +2 -2
  14. {spawnllm-0.1.2 → spawnllm-0.2.0}/spawnllm/mlx/codec.py +26 -6
  15. {spawnllm-0.1.2 → spawnllm-0.2.0}/spawnllm/mlx/engine.py +49 -5
  16. {spawnllm-0.1.2 → spawnllm-0.2.0}/spawnllm/mlx/fuse.py +23 -0
  17. {spawnllm-0.1.2 → spawnllm-0.2.0}/spawnllm/mlx/patches.py +11 -1
  18. {spawnllm-0.1.2 → spawnllm-0.2.0}/spawnllm/proc.py +53 -1
  19. spawnllm-0.2.0/spawnllm/structured.py +112 -0
  20. {spawnllm-0.1.2 → spawnllm-0.2.0}/spawnllm/types.py +3 -0
  21. spawnllm-0.1.2/spawnllm/backends/__init__.py +0 -27
  22. spawnllm-0.1.2/spawnllm/backends/base.py +0 -53
  23. spawnllm-0.1.2/spawnllm/backends/claude.py +0 -126
  24. spawnllm-0.1.2/spawnllm/backends/codex.py +0 -41
  25. spawnllm-0.1.2/spawnllm/backends/registry.py +0 -27
  26. spawnllm-0.1.2/spawnllm/call.py +0 -42
  27. spawnllm-0.1.2/spawnllm/cli.py +0 -40
  28. spawnllm-0.1.2/spawnllm/structured.py +0 -69
  29. {spawnllm-0.1.2 → spawnllm-0.2.0}/LICENSE +0 -0
  30. {spawnllm-0.1.2 → spawnllm-0.2.0}/spawnllm/__main__.py +0 -0
  31. {spawnllm-0.1.2 → spawnllm-0.2.0}/spawnllm/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spawnllm
3
- Version: 0.1.2
3
+ Version: 0.2.0
4
4
  Summary: Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools.
5
5
  Keywords:
6
6
  Author: Yasyf Mohamedali
@@ -45,6 +45,8 @@ Description-Content-Type: text/markdown
45
45
 
46
46
  # spawnllm
47
47
 
48
+ ![spawnllm banner](https://github.com/yasyf/spawnllm/raw/main/docs/assets/readme-banner.webp)
49
+
48
50
  [![PyPI](https://img.shields.io/pypi/v/spawnllm.svg)](https://pypi.org/project/spawnllm/)
49
51
  [![Python](https://img.shields.io/pypi/pyversions/spawnllm.svg)](https://pypi.org/project/spawnllm/)
50
52
  [![Docs](https://img.shields.io/github/actions/workflow/status/yasyf/spawnllm/docs.yml?branch=main&label=docs)](https://yasyf.github.io/spawnllm/)
@@ -95,14 +97,19 @@ mlx
95
97
 
96
98
  ## What problems does this solve?
97
99
 
98
- - **Duplicate subshell plumbing.** Building `claude`/`codex` argv, piping stdin/stdout, teeing
99
- stderr, and turning non-zero exits into useful errors — written once, not re-derived per tool.
100
- - **Structured-output boilerplate.** A Pydantic model becomes a JSON-schema constraint and a
101
- parsed, validated result the same way for every backend.
102
- - **Local MLX is fiddly.** Adapter fusion, prompt-cache reuse, worker-thread lifecycle, and
103
- batched single-token generation live behind one engine instead of in every consumer.
104
- - **Behavior drift.** Two tools that call the same models stay byte-for-byte consistent because
105
- they share the backend layer rather than each maintaining a copy.
100
+ Every tool that shells out to `claude` or `codex` rebuilds the same plumbing: argv
101
+ construction, stdin/stdout piping, stderr teeing, and turning non-zero exits into useful
102
+ errors. spawnllm holds it once.
103
+
104
+ Structured output is boilerplate too. A Pydantic model becomes a JSON-schema constraint
105
+ and a parsed, validated result, identically for both CLI backends.
106
+
107
+ Local MLX is fiddly. Adapter fusion, prompt-cache reuse, worker-thread lifecycle, and
108
+ batched single-token generation live behind one engine instead of in every consumer.
109
+
110
+ Behavior drift goes away with the duplication: two tools that call the same models stay
111
+ byte-for-byte consistent because they share the backend layer, not a pair of diverging
112
+ copies.
106
113
 
107
114
  ## Docs
108
115
 
@@ -1,5 +1,7 @@
1
1
  # spawnllm
2
2
 
3
+ ![spawnllm banner](https://github.com/yasyf/spawnllm/raw/main/docs/assets/readme-banner.webp)
4
+
3
5
  [![PyPI](https://img.shields.io/pypi/v/spawnllm.svg)](https://pypi.org/project/spawnllm/)
4
6
  [![Python](https://img.shields.io/pypi/pyversions/spawnllm.svg)](https://pypi.org/project/spawnllm/)
5
7
  [![Docs](https://img.shields.io/github/actions/workflow/status/yasyf/spawnllm/docs.yml?branch=main&label=docs)](https://yasyf.github.io/spawnllm/)
@@ -50,14 +52,19 @@ mlx
50
52
 
51
53
  ## What problems does this solve?
52
54
 
53
- - **Duplicate subshell plumbing.** Building `claude`/`codex` argv, piping stdin/stdout, teeing
54
- stderr, and turning non-zero exits into useful errors — written once, not re-derived per tool.
55
- - **Structured-output boilerplate.** A Pydantic model becomes a JSON-schema constraint and a
56
- parsed, validated result the same way for every backend.
57
- - **Local MLX is fiddly.** Adapter fusion, prompt-cache reuse, worker-thread lifecycle, and
58
- batched single-token generation live behind one engine instead of in every consumer.
59
- - **Behavior drift.** Two tools that call the same models stay byte-for-byte consistent because
60
- they share the backend layer rather than each maintaining a copy.
55
+ Every tool that shells out to `claude` or `codex` rebuilds the same plumbing: argv
56
+ construction, stdin/stdout piping, stderr teeing, and turning non-zero exits into useful
57
+ errors. spawnllm holds it once.
58
+
59
+ Structured output is boilerplate too. A Pydantic model becomes a JSON-schema constraint
60
+ and a parsed, validated result, identically for both CLI backends.
61
+
62
+ Local MLX is fiddly. Adapter fusion, prompt-cache reuse, worker-thread lifecycle, and
63
+ batched single-token generation live behind one engine instead of in every consumer.
64
+
65
+ Behavior drift goes away with the duplication: two tools that call the same models stay
66
+ byte-for-byte consistent because they share the backend layer, not a pair of diverging
67
+ copies.
61
68
 
62
69
  ## Docs
63
70
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "spawnllm"
3
- version = "0.1.2"
3
+ version = "0.2.0"
4
4
  description = "Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools."
5
5
  readme = "README.md"
6
6
  license = "MIT"
@@ -1,22 +1,25 @@
1
1
  """Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools.
2
2
 
3
3
  The top-level namespace exposes the CLI backends, subprocess transport, and
4
- structured-output helpers. The MLX engine lives under :mod:`spawnllm.mlx` and is
5
- imported lazily so that ``import spawnllm`` never pulls ``mlx_lm``/``zstandard``.
4
+ structured-output helpers. The MLX engine lives under `spawnllm.mlx`, whose
5
+ imports are lazy so that `import spawnllm` never pulls `mlx_lm`/`zstandard`.
6
6
  """
7
7
 
8
8
  from __future__ import annotations
9
9
 
10
10
  from spawnllm.backends import (
11
+ AntigravityCliBackend,
12
+ BackendNotAuthenticated,
13
+ BackendNotInstalled,
14
+ BackendReady,
15
+ BackendStatus,
16
+ BackendUnavailable,
11
17
  ClaudeCliBackend,
12
- ClaudeNotAuthenticated,
13
- ClaudeNotInstalled,
14
- ClaudeReady,
15
- ClaudeStatus,
16
18
  CodexCliBackend,
19
+ GeminiCliBackend,
17
20
  LlmBackend,
18
21
  LlmBackends,
19
- check_status,
22
+ select_backend,
20
23
  )
21
24
  from spawnllm.call import call
22
25
  from spawnllm.proc import arun_cli, collect_process, map_concurrent, run_cli
@@ -30,19 +33,21 @@ from spawnllm.structured import (
30
33
  from spawnllm.types import TModel, TSpecialty
31
34
 
32
35
  __all__ = [
36
+ "AntigravityCliBackend",
37
+ "BackendNotAuthenticated",
38
+ "BackendNotInstalled",
39
+ "BackendReady",
40
+ "BackendStatus",
41
+ "BackendUnavailable",
33
42
  "ClaudeCliBackend",
34
- "ClaudeNotAuthenticated",
35
- "ClaudeNotInstalled",
36
- "ClaudeReady",
37
- "ClaudeStatus",
38
43
  "CodexCliBackend",
44
+ "GeminiCliBackend",
39
45
  "LlmBackend",
40
46
  "LlmBackends",
41
47
  "TModel",
42
48
  "TSpecialty",
43
49
  "arun_cli",
44
50
  "call",
45
- "check_status",
46
51
  "collect_process",
47
52
  "extract_structured",
48
53
  "map_concurrent",
@@ -51,4 +56,5 @@ __all__ = [
51
56
  "resolve_schema_path",
52
57
  "run_cli",
53
58
  "schema_for",
59
+ "select_backend",
54
60
  ]
@@ -0,0 +1,31 @@
1
+ """LLM CLI backends (Claude/Codex/Gemini family) and the specialty registry."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from spawnllm.backends.base import (
6
+ BackendNotAuthenticated,
7
+ BackendNotInstalled,
8
+ BackendReady,
9
+ BackendStatus,
10
+ BackendUnavailable,
11
+ LlmBackend,
12
+ )
13
+ from spawnllm.backends.claude import ClaudeCliBackend
14
+ from spawnllm.backends.codex import CodexCliBackend
15
+ from spawnllm.backends.gemini import AntigravityCliBackend, GeminiCliBackend
16
+ from spawnllm.backends.registry import LlmBackends, select_backend
17
+
18
+ __all__ = [
19
+ "AntigravityCliBackend",
20
+ "BackendNotAuthenticated",
21
+ "BackendNotInstalled",
22
+ "BackendReady",
23
+ "BackendStatus",
24
+ "BackendUnavailable",
25
+ "ClaudeCliBackend",
26
+ "CodexCliBackend",
27
+ "GeminiCliBackend",
28
+ "LlmBackend",
29
+ "LlmBackends",
30
+ "select_backend",
31
+ ]
@@ -0,0 +1,152 @@
1
+ """Abstract interface for an LLM CLI backend."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import shutil
6
+ from abc import ABC, abstractmethod
7
+ from dataclasses import dataclass
8
+ from typing import TYPE_CHECKING, ClassVar
9
+
10
+ if TYPE_CHECKING:
11
+ from pydantic import BaseModel
12
+
13
+ from spawnllm.types import TModel
14
+
15
+
16
+ @dataclass(frozen=True)
17
+ class BackendReady:
18
+ """A backend whose CLI is installed and authenticated.
19
+
20
+ Attributes:
21
+ binary: Name of the backend's CLI executable on PATH.
22
+ """
23
+
24
+ binary: str
25
+
26
+
27
+ @dataclass(frozen=True)
28
+ class BackendNotInstalled:
29
+ """A backend whose CLI is not on PATH.
30
+
31
+ Attributes:
32
+ binary: Name of the backend's CLI executable.
33
+ install_hint: Suggested shell command to install the CLI.
34
+ """
35
+
36
+ binary: str
37
+ install_hint: str
38
+
39
+
40
+ @dataclass(frozen=True)
41
+ class BackendNotAuthenticated:
42
+ """A backend whose CLI is installed but not authenticated.
43
+
44
+ Attributes:
45
+ binary: Name of the backend's CLI executable on PATH.
46
+ """
47
+
48
+ binary: str
49
+
50
+
51
+ BackendStatus = BackendReady | BackendNotInstalled | BackendNotAuthenticated
52
+ """Result of `LlmBackend.check_status`: `BackendReady`, `BackendNotInstalled`, or `BackendNotAuthenticated`."""
53
+
54
+
55
+ class BackendUnavailable(RuntimeError):
56
+ """Raised when no backend is ready (installed and authenticated)."""
57
+
58
+
59
+ class LlmBackend(ABC):
60
+ """Abstract interface for an LLM CLI backend.
61
+
62
+ Concrete backends map abstract model sizes to provider-specific model names
63
+ and encapsulate how to invoke the provider's CLI and parse the raw response.
64
+
65
+ Attributes:
66
+ models: Mapping from abstract model size to the provider's model name.
67
+ """
68
+
69
+ models: ClassVar[dict[TModel, str]]
70
+ binary: ClassVar[str]
71
+ install_hint: ClassVar[str]
72
+
73
+ @abstractmethod
74
+ def build_command(self, model: str, schema_path: str | None, agent: bool) -> list[str]:
75
+ """Build the CLI argv for a single invocation (prompt delivered via stdin).
76
+
77
+ Args:
78
+ model: Provider-specific model name.
79
+ schema_path: Schema argument for structured output, or `None`.
80
+ agent: Whether the invocation may use tools / agent capabilities.
81
+
82
+ Returns:
83
+ The argv list to execute.
84
+ """
85
+
86
+ @abstractmethod
87
+ def parse_response(self, raw: str, response_model: type[BaseModel] | None) -> str | BaseModel:
88
+ """Parse raw CLI stdout into text or a validated model.
89
+
90
+ Args:
91
+ raw: Raw stdout from the backend CLI.
92
+ response_model: Model to validate against, or `None` for raw text.
93
+
94
+ Returns:
95
+ `raw` when `response_model` is `None`, else a validated instance.
96
+ """
97
+
98
+ @abstractmethod
99
+ def env(self) -> dict[str, str]:
100
+ """Return extra environment variables for the CLI invocation, merged over the inherited environment."""
101
+
102
+ def check_status(self, *, timeout: int = 10) -> BackendStatus:
103
+ """Check whether this backend's CLI is installed and authenticated.
104
+
105
+ Args:
106
+ timeout: Seconds to wait for the authentication probe.
107
+
108
+ Returns:
109
+ `BackendReady` when authenticated, `BackendNotInstalled` when the CLI
110
+ is not on PATH, else `BackendNotAuthenticated`.
111
+
112
+ Raises:
113
+ subprocess.TimeoutExpired: If `is_authenticated` exceeds `timeout`.
114
+ """
115
+ if not shutil.which(self.binary):
116
+ return BackendNotInstalled(binary=self.binary, install_hint=self.install_hint)
117
+ if self.is_authenticated(timeout=timeout):
118
+ return BackendReady(binary=self.binary)
119
+ return BackendNotAuthenticated(binary=self.binary)
120
+
121
+ @abstractmethod
122
+ def is_authenticated(self, *, timeout: int) -> bool:
123
+ """Probe whether the CLI holds valid credentials for its provider.
124
+
125
+ "Authenticated" means the CLI reports an active login session for the
126
+ provider, not merely that the executable is present on PATH.
127
+
128
+ Args:
129
+ timeout: Seconds to wait for the credential probe.
130
+
131
+ Returns:
132
+ `True` when the CLI reports an authenticated session.
133
+ """
134
+
135
+ def invocation(
136
+ self, prompt: str, *, model: str, schema_path: str | None, agent: bool
137
+ ) -> tuple[list[str], str | None]:
138
+ """Build the argv and stdin text for a single invocation.
139
+
140
+ The default delivers the prompt over stdin; subclasses override to
141
+ deliver it inline within the argv.
142
+
143
+ Args:
144
+ prompt: The prompt text to deliver to the CLI.
145
+ model: Provider-specific model name.
146
+ schema_path: Schema argument for structured output, or `None`.
147
+ agent: Whether the invocation may use tools / agent capabilities.
148
+
149
+ Returns:
150
+ A `(argv, stdin_text)` pair; `stdin_text` is `None` when the prompt is delivered inline.
151
+ """
152
+ return self.build_command(model, schema_path, agent), prompt
@@ -0,0 +1,176 @@
1
+ """LlmBackend for the Anthropic `claude` CLI, plus install/auth status checks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import subprocess
6
+ from dataclasses import dataclass
7
+ from typing import TYPE_CHECKING, ClassVar
8
+
9
+ from spawnllm.backends.base import LlmBackend
10
+ from spawnllm.structured import parse_result_envelope, parse_structured_output
11
+
12
+ if TYPE_CHECKING:
13
+ from pydantic import BaseModel
14
+
15
+ from spawnllm.types import TModel
16
+
17
+ CLAUDE_MODELS: dict[TModel, str] = {"small": "haiku", "medium": "sonnet", "large": "opus"}
18
+
19
+
20
+ @dataclass(frozen=True)
21
+ class ClaudeCliBackend(LlmBackend):
22
+ """`LlmBackend` for the Anthropic `claude` CLI.
23
+
24
+ The default (no-arg) construction delivers the prompt over stdin with abstract
25
+ model tiers and structured-output parsing. The `cc_sentiment` preset
26
+ configures inline `-p` prompting with `{is_error, result}` envelope parsing.
27
+
28
+ Attributes:
29
+ models: Mapping from abstract model size to a Claude model alias
30
+ (`haiku`/`sonnet`/`opus`).
31
+ inline_system_prompt: System prompt that `build_argv` passes via
32
+ `--system-prompt`.
33
+ verbose: Whether `build_argv` appends `--verbose`.
34
+
35
+ Example:
36
+ >>> ClaudeCliBackend().build_command("haiku", None, agent=False)[:5]
37
+ ['claude', '-p', '--no-session-persistence', '--model', 'haiku']
38
+ """
39
+
40
+ models: ClassVar[dict[TModel, str]] = CLAUDE_MODELS
41
+ binary: ClassVar[str] = "claude"
42
+ install_hint: ClassVar[str] = "curl -fsSL https://claude.ai/install.sh | bash"
43
+
44
+ inline_system_prompt: str = ""
45
+ verbose: bool = False
46
+
47
+ @classmethod
48
+ def cc_sentiment(cls, *, system_prompt: str, verbose: bool = False) -> ClaudeCliBackend:
49
+ """Build a backend preset for the sentiment/pushback scoring path.
50
+
51
+ Args:
52
+ system_prompt: System prompt that `build_argv` passes via
53
+ `--system-prompt`.
54
+ verbose: Whether `build_argv` appends `--verbose`.
55
+
56
+ Returns:
57
+ A `ClaudeCliBackend` for inline `-p` prompting; parse its stdout with `parse_result_envelope`.
58
+ """
59
+ return cls(inline_system_prompt=system_prompt, verbose=verbose)
60
+
61
+ def build_command(self, model: str, schema_path: str | None, agent: bool) -> list[str]:
62
+ """Build the `claude -p` argv for one stdin-prompted invocation.
63
+
64
+ Every invocation runs without session persistence. Agent invocations
65
+ add `--permission-mode auto` and a $1 `--max-budget-usd` cap;
66
+ non-agent invocations empty the system prompt, disable setting
67
+ sources, and load no MCP servers. A schema adds `--json-schema` with
68
+ `--output-format json`.
69
+
70
+ Args:
71
+ model: Claude model name or alias, e.g. `haiku`.
72
+ schema_path: Inline JSON schema passed to `--json-schema`, or `None`.
73
+ agent: Whether the invocation may use tools / agent capabilities.
74
+
75
+ Returns:
76
+ The argv list to execute.
77
+ """
78
+ return [
79
+ "claude",
80
+ "-p",
81
+ "--no-session-persistence",
82
+ "--model",
83
+ model,
84
+ *(
85
+ ["--permission-mode", "auto", "--max-budget-usd", "1"]
86
+ if agent
87
+ else ["--system-prompt", "", "--setting-sources", "", "--strict-mcp-config"]
88
+ ),
89
+ *(["--json-schema", schema_path, "--output-format", "json"] if schema_path else []),
90
+ ]
91
+
92
+ def parse_response(self, raw: str, response_model: type[BaseModel] | None) -> str | BaseModel:
93
+ """Parse `claude` stdout into text or a validated model.
94
+
95
+ Args:
96
+ raw: Raw stdout from the `claude` CLI.
97
+ response_model: Model to validate against, or `None` for raw text.
98
+
99
+ Returns:
100
+ `raw` for text calls; otherwise the validated `structured_output` from the result event, else `raw` as JSON.
101
+ """
102
+ return parse_structured_output(raw, response_model)
103
+
104
+ def env(self) -> dict[str, str]:
105
+ """Return no extra environment variables; the `claude` CLI runs with the inherited environment."""
106
+ # CLAUDE_CODE_SIMPLE=1 breaks claude.ai keychain auth ("Not logged in")
107
+ # on current CLIs; --setting-sources ""/--strict-mcp-config already trim startup.
108
+ return {}
109
+
110
+ def is_authenticated(self, *, timeout: int) -> bool:
111
+ """Report whether `claude auth status` exits cleanly, i.e. a claude.ai login is stored.
112
+
113
+ Args:
114
+ timeout: Seconds to wait for `claude auth status`.
115
+
116
+ Returns:
117
+ `True` when the OAuth-aware probe reports a stored claude.ai login.
118
+ """
119
+ return (
120
+ subprocess.run(
121
+ ["claude", "auth", "status"], capture_output=True, text=True, timeout=timeout, check=False
122
+ ).returncode
123
+ == 0
124
+ )
125
+
126
+ def build_argv(self, content: str, *, model: str) -> list[str]:
127
+ """Build the inline `-p` argv for the sentiment/pushback scoring path.
128
+
129
+ The prompt travels inline as the `-p` argument instead of over stdin.
130
+ The invocation uses `inline_system_prompt` as the system prompt, JSON
131
+ output, a single turn, no tools, and no slash commands; `verbose`
132
+ appends `--verbose`.
133
+
134
+ Args:
135
+ content: Prompt text passed inline via `-p`.
136
+ model: Claude model name or alias, e.g. `haiku`.
137
+
138
+ Returns:
139
+ The argv list to execute; parse its stdout with `parse_result_envelope`.
140
+ """
141
+ argv = [
142
+ "claude",
143
+ "-p",
144
+ content,
145
+ "--model",
146
+ model,
147
+ "--system-prompt",
148
+ self.inline_system_prompt,
149
+ "--output-format",
150
+ "json",
151
+ "--max-turns",
152
+ "1",
153
+ "--tools",
154
+ "",
155
+ "--disable-slash-commands",
156
+ ]
157
+ if self.verbose:
158
+ argv.append("--verbose")
159
+ return argv
160
+
161
+ @staticmethod
162
+ def parse_result_envelope(stdout: bytes, *, argv: list[str], stderr: bytes) -> str:
163
+ """Parse the `{is_error, result}` JSON envelope from `claude -p --output-format json`.
164
+
165
+ Args:
166
+ stdout: Raw stdout bytes holding the JSON envelope.
167
+ argv: The argv that produced the output, recorded on the raised error.
168
+ stderr: Raw stderr bytes, recorded on the raised error.
169
+
170
+ Returns:
171
+ The envelope's `result` string.
172
+
173
+ Raises:
174
+ subprocess.CalledProcessError: If the envelope's `is_error` flag is set.
175
+ """
176
+ return parse_result_envelope(stdout, argv=argv, stderr=stderr)
@@ -0,0 +1,91 @@
1
+ """LlmBackend for the OpenAI `codex` CLI."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import subprocess
6
+ from typing import TYPE_CHECKING, ClassVar
7
+
8
+ from spawnllm.backends.base import LlmBackend
9
+
10
+ if TYPE_CHECKING:
11
+ from pydantic import BaseModel
12
+
13
+ from spawnllm.types import TModel
14
+
15
+
16
+ class CodexCliBackend(LlmBackend):
17
+ """`LlmBackend` for the OpenAI `codex` CLI.
18
+
19
+ Invokes `codex exec` with an ephemeral session and a read-only sandbox.
20
+
21
+ Attributes:
22
+ models: Mapping from abstract model size to an OpenAI model name.
23
+ """
24
+
25
+ models: ClassVar[dict[TModel, str]] = {
26
+ "small": "gpt-5.3-codex-spark",
27
+ "medium": "gpt-5.4-mini",
28
+ "large": "gpt-5.5",
29
+ }
30
+ binary: ClassVar[str] = "codex"
31
+ install_hint: ClassVar[str] = "npm install -g @openai/codex"
32
+
33
+ def build_command(self, model: str, schema_path: str | None, agent: bool) -> list[str]:
34
+ """Build the `codex exec` argv for one stdin-prompted invocation.
35
+
36
+ Every invocation runs an ephemeral session in a read-only sandbox.
37
+ Non-agent invocations disable Codex hooks and MCP servers. A schema
38
+ path adds `--output-schema`.
39
+
40
+ Args:
41
+ model: OpenAI model name, e.g. `gpt-5.5`.
42
+ schema_path: Path to a JSON schema file passed to
43
+ `--output-schema`, or `None`.
44
+ agent: Whether the invocation may use tools / agent capabilities.
45
+
46
+ Returns:
47
+ The argv list to execute.
48
+ """
49
+ return [
50
+ "codex",
51
+ "exec",
52
+ "--ephemeral",
53
+ "--sandbox",
54
+ "read-only",
55
+ "--model",
56
+ model,
57
+ *([] if agent else ["-c", "features.codex_hooks=false", "-c", "features.mcp_servers=false"]),
58
+ *(["--output-schema", schema_path] if schema_path else []),
59
+ ]
60
+
61
+ def parse_response(self, raw: str, response_model: type[BaseModel] | None) -> str | BaseModel:
62
+ """Parse `codex` stdout into text or a validated model.
63
+
64
+ Args:
65
+ raw: Raw stdout from the `codex` CLI.
66
+ response_model: Model to validate against, or `None` for raw text.
67
+
68
+ Returns:
69
+ `raw` when `response_model` is `None`; otherwise `raw` validated as JSON against `response_model`.
70
+ """
71
+ return raw if not response_model else response_model.model_validate_json(raw)
72
+
73
+ def env(self) -> dict[str, str]:
74
+ """Return no extra environment variables; the `codex` CLI runs with the inherited environment."""
75
+ return {}
76
+
77
+ def is_authenticated(self, *, timeout: int) -> bool:
78
+ """Report whether `codex login status` exits cleanly, i.e. the CLI is logged in.
79
+
80
+ Args:
81
+ timeout: Seconds to wait for `codex login status`.
82
+
83
+ Returns:
84
+ `True` when `codex login status` exits 0.
85
+ """
86
+ return (
87
+ subprocess.run(
88
+ ["codex", "login", "status"], capture_output=True, text=True, timeout=timeout, check=False
89
+ ).returncode
90
+ == 0
91
+ )