spawnllm 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spawnllm-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Yasyf Mohamedali
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,109 @@
1
+ Metadata-Version: 2.4
2
+ Name: spawnllm
3
+ Version: 0.1.0
4
+ Summary: Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools.
5
+ Keywords:
6
+ Author: Yasyf Mohamedali
7
+ Author-email: Yasyf Mohamedali <yasyfm@gmail.com>
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3 :: Only
15
+ Classifier: Typing :: Typed
16
+ Requires-Dist: click>=8
17
+ Requires-Dist: loguru>=0.7
18
+ Requires-Dist: pydantic>=2
19
+ Requires-Dist: zstandard>=0.25.0 ; extra == 'adapter'
20
+ Requires-Dist: numpy>=1.26 ; extra == 'adapter'
21
+ Requires-Dist: orjson>=3.10 ; extra == 'adapter'
22
+ Requires-Dist: anyio>=4 ; extra == 'dev'
23
+ Requires-Dist: pytest>=8.0 ; extra == 'dev'
24
+ Requires-Dist: ruff>=0.8 ; extra == 'dev'
25
+ Requires-Dist: ty>=0.0.44 ; extra == 'dev'
26
+ Requires-Dist: zstandard>=0.25.0 ; extra == 'dev'
27
+ Requires-Dist: numpy>=1.26 ; extra == 'dev'
28
+ Requires-Dist: orjson>=3.10 ; extra == 'dev'
29
+ Requires-Dist: zstandard>=0.25.0 ; extra == 'mlx'
30
+ Requires-Dist: numpy>=1.26 ; extra == 'mlx'
31
+ Requires-Dist: orjson>=3.10 ; extra == 'mlx'
32
+ Requires-Dist: anyio>=4.4 ; extra == 'mlx'
33
+ Requires-Dist: huggingface-hub>=0.25 ; extra == 'mlx'
34
+ Requires-Dist: mlx-lm>=0.31.3 ; platform_machine == 'arm64' and sys_platform == 'darwin' and extra == 'mlx'
35
+ Requires-Python: >=3.13
36
+ Project-URL: Homepage, https://github.com/yasyf/spawnllm
37
+ Project-URL: Documentation, https://yasyf.github.io/spawnllm/
38
+ Project-URL: Repository, https://github.com/yasyf/spawnllm
39
+ Project-URL: Issues, https://github.com/yasyf/spawnllm/issues
40
+ Project-URL: Changelog, https://github.com/yasyf/spawnllm/blob/main/CHANGELOG.md
41
+ Provides-Extra: adapter
42
+ Provides-Extra: dev
43
+ Provides-Extra: mlx
44
+ Description-Content-Type: text/markdown
45
+
46
+ # spawnllm
47
+
48
+ [![PyPI](https://img.shields.io/pypi/v/spawnllm.svg)](https://pypi.org/project/spawnllm/)
49
+ [![Python](https://img.shields.io/pypi/pyversions/spawnllm.svg)](https://pypi.org/project/spawnllm/)
50
+ [![Docs](https://img.shields.io/github/actions/workflow/status/yasyf/spawnllm/docs.yml?branch=main&label=docs)](https://yasyf.github.io/spawnllm/)
51
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/yasyf/spawnllm/blob/main/LICENSE)
52
+
53
+ Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools.
54
+
55
+ spawnllm centralizes the LLM-calling plumbing that small tools keep re-inventing: driving the
56
+ `claude` and `codex` CLIs as subshells — with structured Pydantic output, model tiers, and
57
+ faithful error capture — and running local Apple-Silicon MLX models with adapter fusion,
58
+ prompt-cache reuse, and batched generation. Depend on it once and each tool keeps only its
59
+ domain logic instead of its own copy of the backends.
60
+
61
+ ## Install
62
+
63
+ No install needed — run everything through [uvx](https://docs.astral.sh/uv/):
64
+
65
+ ```bash
66
+ uvx spawnllm --help
67
+ ```
68
+
69
+ `uvx` fetches spawnllm into a throwaway environment and runs it. To add it
70
+ to a project instead:
71
+
72
+ ```bash
73
+ uv add spawnllm
74
+ ```
75
+
76
+ For the local MLX engine (Apple Silicon only), pull the extra:
77
+
78
+ ```bash
79
+ uv add "spawnllm[mlx]"
80
+ ```
81
+
82
+ ## Quickstart
83
+
84
+ List the backends spawnllm can drive:
85
+
86
+ ```bash
87
+ uvx spawnllm backends
88
+ ```
89
+
90
+ ```
91
+ claude
92
+ codex
93
+ mlx
94
+ ```
95
+
96
+ ## What problems does this solve?
97
+
98
+ - **Duplicate subshell plumbing.** Building `claude`/`codex` argv, piping stdin/stdout, teeing
99
+ stderr, and turning non-zero exits into useful errors — written once, not re-derived per tool.
100
+ - **Structured-output boilerplate.** A Pydantic model becomes a JSON-schema constraint and a
101
+ parsed, validated result the same way for every backend.
102
+ - **Local MLX is fiddly.** Adapter fusion, prompt-cache reuse, worker-thread lifecycle, and
103
+ batched single-token generation live behind one engine instead of in every consumer.
104
+ - **Behavior drift.** Two tools that call the same models stay byte-for-byte consistent because
105
+ they share the backend layer rather than each maintaining a copy.
106
+
107
+ ## Docs
108
+
109
+ [Read the docs](https://yasyf.github.io/spawnllm/) for the full guide and API reference.
@@ -0,0 +1,64 @@
1
+ # spawnllm
2
+
3
+ [![PyPI](https://img.shields.io/pypi/v/spawnllm.svg)](https://pypi.org/project/spawnllm/)
4
+ [![Python](https://img.shields.io/pypi/pyversions/spawnllm.svg)](https://pypi.org/project/spawnllm/)
5
+ [![Docs](https://img.shields.io/github/actions/workflow/status/yasyf/spawnllm/docs.yml?branch=main&label=docs)](https://yasyf.github.io/spawnllm/)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/yasyf/spawnllm/blob/main/LICENSE)
7
+
8
+ Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools.
9
+
10
+ spawnllm centralizes the LLM-calling plumbing that small tools keep re-inventing: driving the
11
+ `claude` and `codex` CLIs as subshells — with structured Pydantic output, model tiers, and
12
+ faithful error capture — and running local Apple-Silicon MLX models with adapter fusion,
13
+ prompt-cache reuse, and batched generation. Depend on it once and each tool keeps only its
14
+ domain logic instead of its own copy of the backends.
15
+
16
+ ## Install
17
+
18
+ No install needed — run everything through [uvx](https://docs.astral.sh/uv/):
19
+
20
+ ```bash
21
+ uvx spawnllm --help
22
+ ```
23
+
24
+ `uvx` fetches spawnllm into a throwaway environment and runs it. To add it
25
+ to a project instead:
26
+
27
+ ```bash
28
+ uv add spawnllm
29
+ ```
30
+
31
+ For the local MLX engine (Apple Silicon only), pull the extra:
32
+
33
+ ```bash
34
+ uv add "spawnllm[mlx]"
35
+ ```
36
+
37
+ ## Quickstart
38
+
39
+ List the backends spawnllm can drive:
40
+
41
+ ```bash
42
+ uvx spawnllm backends
43
+ ```
44
+
45
+ ```
46
+ claude
47
+ codex
48
+ mlx
49
+ ```
50
+
51
+ ## What problems does this solve?
52
+
53
+ - **Duplicate subshell plumbing.** Building `claude`/`codex` argv, piping stdin/stdout, teeing
54
+ stderr, and turning non-zero exits into useful errors — written once, not re-derived per tool.
55
+ - **Structured-output boilerplate.** A Pydantic model becomes a JSON-schema constraint and a
56
+ parsed, validated result the same way for every backend.
57
+ - **Local MLX is fiddly.** Adapter fusion, prompt-cache reuse, worker-thread lifecycle, and
58
+ batched single-token generation live behind one engine instead of in every consumer.
59
+ - **Behavior drift.** Two tools that call the same models stay byte-for-byte consistent because
60
+ they share the backend layer rather than each maintaining a copy.
61
+
62
+ ## Docs
63
+
64
+ [Read the docs](https://yasyf.github.io/spawnllm/) for the full guide and API reference.
@@ -0,0 +1,129 @@
1
+ [project]
2
+ name = "spawnllm"
3
+ version = "0.1.0"
4
+ description = "Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools."
5
+ readme = "README.md"
6
+ license = "MIT"
7
+ license-files = ["LICENSE"]
8
+ authors = [{ name = "Yasyf Mohamedali", email = "yasyfm@gmail.com" }]
9
+ keywords = []
10
+ classifiers = [
11
+ "Development Status :: 3 - Alpha",
12
+ "Intended Audience :: Developers",
13
+ "Operating System :: OS Independent",
14
+ "Programming Language :: Python :: 3",
15
+ "Programming Language :: Python :: 3 :: Only",
16
+ "Typing :: Typed",
17
+ ]
18
+ requires-python = ">=3.13"
19
+ dependencies = [
20
+ "click>=8",
21
+ "loguru>=0.7",
22
+ "pydantic>=2",
23
+ ]
24
+
25
+ [project.optional-dependencies]
26
+ dev = [
27
+ "anyio>=4",
28
+ "pytest>=8.0",
29
+ "ruff>=0.8",
30
+ "ty>=0.0.44",
31
+ # Codec tests exercise zstandard/numpy/orjson.
32
+ "zstandard>=0.25.0",
33
+ "numpy>=1.26",
34
+ "orjson>=3.10",
35
+ ]
36
+ # Cross-platform LoRA adapter codec (zstd + numpy); imported without mlx.
37
+ adapter = [
38
+ "zstandard>=0.25.0",
39
+ "numpy>=1.26",
40
+ "orjson>=3.10",
41
+ ]
42
+ # Local Apple-Silicon MLX engine. mlx-lm carries the darwin/arm64 marker; the
43
+ # codec libs and anyio are cross-platform but only meaningful alongside it.
44
+ mlx = [
45
+ "zstandard>=0.25.0",
46
+ "numpy>=1.26",
47
+ "orjson>=3.10",
48
+ "anyio>=4.4",
49
+ "huggingface-hub>=0.25",
50
+ "mlx-lm>=0.31.3; sys_platform == 'darwin' and platform_machine == 'arm64'",
51
+ ]
52
+
53
+ [project.scripts]
54
+ spawnllm = "spawnllm.cli:main"
55
+
56
+ [project.urls]
57
+ Homepage = "https://github.com/yasyf/spawnllm"
58
+ Documentation = "https://yasyf.github.io/spawnllm/"
59
+ Repository = "https://github.com/yasyf/spawnllm"
60
+ Issues = "https://github.com/yasyf/spawnllm/issues"
61
+ Changelog = "https://github.com/yasyf/spawnllm/blob/main/CHANGELOG.md"
62
+
63
+ [build-system]
64
+ requires = ["uv_build>=0.11,<0.12"]
65
+ build-backend = "uv_build"
66
+
67
+ [tool.uv.build-backend]
68
+ module-name = "spawnllm"
69
+ module-root = ""
70
+
71
+ [tool.pytest.ini_options]
72
+ testpaths = ["tests"]
73
+ anyio_mode = "auto"
74
+ addopts = ["-ra", "--strict-markers", "--tb=short", "-q"]
75
+ markers = [
76
+ "unit: Pure unit tests",
77
+ "integration: Integration tests",
78
+ ]
79
+
80
+ # ty (Astral) is the default type checker — run `uv run ty check spawnllm`.
81
+ # It is fast, understands modern syntax, and avoids the strict-pyright false
82
+ # positives on pydantic/attrs-style dynamic defaults and PK-type overrides.
83
+ [tool.ty.rules]
84
+ # Keep cross-checker `# type: ignore` / `# pyright: ignore` comments from tripping ty.
85
+ unused-type-ignore-comment = "ignore"
86
+ # The MLX engine lazily imports optional native deps (mlx_lm, mlx, huggingface_hub)
87
+ # that are absent off Apple Silicon and in the dev/CI environment.
88
+ unresolved-import = "ignore"
89
+
90
+ # pyright is kept as a secondary checker (editors / `uvx pyright`). Basic mode plus
91
+ # a few disables covers the noise; ty is the gate that runs in CI.
92
+ [tool.pyright]
93
+ pythonVersion = "3.13"
94
+ typeCheckingMode = "basic"
95
+ include = ["spawnllm"]
96
+ venvPath = "."
97
+ venv = ".venv"
98
+ reportImplicitOverride = "none"
99
+ reportIncompatibleVariableOverride = "none"
100
+ reportUnknownVariableType = "none"
101
+ reportUnknownMemberType = "none"
102
+ reportUnknownArgumentType = "none"
103
+ reportUnknownParameterType = "none"
104
+ reportUnknownLambdaType = "none"
105
+ reportMissingTypeArgument = "none"
106
+ reportPrivateImportUsage = "none"
107
+ reportUnusedCallResult = "none"
108
+
109
+ [tool.ruff]
110
+ line-length = 120
111
+ target-version = "py313"
112
+ src = [".", "tests"]
113
+
114
+ [tool.ruff.lint]
115
+ select = ["E", "F", "I", "UP"]
116
+
117
+ [dependency-groups]
118
+ docs = [
119
+ # great-docs imports griffe's 2.x module layout; griffelib is the modern
120
+ # griffe (2.x) distribution, overriding the legacy griffe<2 pin great-docs
121
+ # itself still declares.
122
+ "griffelib>=2.0",
123
+ # Tracking great-docs main until a release newer than 0.13.0: main carries
124
+ # build-time GitHub widget stats (embedded via the CI GITHUB_TOKEN), which
125
+ # drop the navbar widget's client-side API calls — the source of GitHub 403
126
+ # errors on the published site.
127
+ # TODO(bootstrap): revert to a PyPI pin (`great-docs>=0.14`) once released.
128
+ "great-docs @ git+https://github.com/posit-dev/great-docs@main",
129
+ ]
@@ -0,0 +1,54 @@
1
+ """Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools.
2
+
3
+ The top-level namespace exposes the CLI backends, subprocess transport, and
4
+ structured-output helpers. The MLX engine lives under :mod:`spawnllm.mlx` and is
5
+ imported lazily so that ``import spawnllm`` never pulls ``mlx_lm``/``zstandard``.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from spawnllm.backends import (
11
+ ClaudeCliBackend,
12
+ ClaudeNotAuthenticated,
13
+ ClaudeNotInstalled,
14
+ ClaudeReady,
15
+ ClaudeStatus,
16
+ CodexCliBackend,
17
+ LlmBackend,
18
+ LlmBackends,
19
+ check_status,
20
+ )
21
+ from spawnllm.call import call
22
+ from spawnllm.proc import arun_cli, collect_process, map_concurrent, run_cli
23
+ from spawnllm.structured import (
24
+ extract_structured,
25
+ parse_result_envelope,
26
+ parse_structured_output,
27
+ resolve_schema_path,
28
+ schema_for,
29
+ )
30
+ from spawnllm.types import TModel, TSpecialty
31
+
32
+ __all__ = [
33
+ "ClaudeCliBackend",
34
+ "ClaudeNotAuthenticated",
35
+ "ClaudeNotInstalled",
36
+ "ClaudeReady",
37
+ "ClaudeStatus",
38
+ "CodexCliBackend",
39
+ "LlmBackend",
40
+ "LlmBackends",
41
+ "TModel",
42
+ "TSpecialty",
43
+ "arun_cli",
44
+ "call",
45
+ "check_status",
46
+ "collect_process",
47
+ "extract_structured",
48
+ "map_concurrent",
49
+ "parse_result_envelope",
50
+ "parse_structured_output",
51
+ "resolve_schema_path",
52
+ "run_cli",
53
+ "schema_for",
54
+ ]
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+ from spawnllm.cli import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
@@ -0,0 +1,27 @@
1
+ """LLM CLI backends (Claude/Codex) and the specialty registry."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from spawnllm.backends.base import LlmBackend
6
+ from spawnllm.backends.claude import (
7
+ ClaudeCliBackend,
8
+ ClaudeNotAuthenticated,
9
+ ClaudeNotInstalled,
10
+ ClaudeReady,
11
+ ClaudeStatus,
12
+ check_status,
13
+ )
14
+ from spawnllm.backends.codex import CodexCliBackend
15
+ from spawnllm.backends.registry import LlmBackends
16
+
17
+ __all__ = [
18
+ "ClaudeCliBackend",
19
+ "ClaudeNotAuthenticated",
20
+ "ClaudeNotInstalled",
21
+ "ClaudeReady",
22
+ "ClaudeStatus",
23
+ "CodexCliBackend",
24
+ "LlmBackend",
25
+ "LlmBackends",
26
+ "check_status",
27
+ ]
@@ -0,0 +1,53 @@
1
+ """Abstract interface for an LLM CLI backend."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import TYPE_CHECKING, ClassVar
7
+
8
+ if TYPE_CHECKING:
9
+ from pydantic import BaseModel
10
+
11
+ from spawnllm.types import TModel
12
+
13
+
14
+ class LlmBackend(ABC):
15
+ """Abstract interface for an LLM CLI backend.
16
+
17
+ Concrete backends map abstract model sizes to provider-specific model names
18
+ and encapsulate how to invoke the provider's CLI and parse the raw response.
19
+
20
+ Attributes:
21
+ models: Mapping from abstract model size to the provider's model name.
22
+ """
23
+
24
+ models: ClassVar[dict[TModel, str]]
25
+
26
+ @abstractmethod
27
+ def build_command(self, model: str, schema_path: str | None, agent: bool) -> list[str]:
28
+ """Build the CLI argv for a single invocation (prompt delivered via stdin).
29
+
30
+ Args:
31
+ model: Provider-specific model name.
32
+ schema_path: Schema argument for structured output, or ``None``.
33
+ agent: Whether the invocation may use tools / agent capabilities.
34
+
35
+ Returns:
36
+ The argv list to execute.
37
+ """
38
+
39
+ @abstractmethod
40
+ def parse_response(self, raw: str, response_model: type[BaseModel] | None) -> str | BaseModel:
41
+ """Parse raw CLI stdout into text or a validated model.
42
+
43
+ Args:
44
+ raw: Raw stdout from the backend CLI.
45
+ response_model: Model to validate against, or ``None`` for raw text.
46
+
47
+ Returns:
48
+ ``raw`` when ``response_model`` is ``None``, else a validated instance.
49
+ """
50
+
51
+ @abstractmethod
52
+ def env(self) -> dict[str, str]:
53
+ """Return extra environment variables to set for the CLI invocation."""
@@ -0,0 +1,124 @@
1
+ """LlmBackend for the Anthropic ``claude`` CLI, plus install/auth status checks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import shutil
6
+ import subprocess
7
+ from dataclasses import dataclass
8
+ from typing import TYPE_CHECKING, ClassVar
9
+
10
+ from spawnllm.backends.base import LlmBackend
11
+ from spawnllm.structured import parse_result_envelope, parse_structured_output
12
+
13
+ if TYPE_CHECKING:
14
+ from pydantic import BaseModel
15
+
16
+ from spawnllm.types import TModel
17
+
18
+ CLAUDE_MODELS: dict[TModel, str] = {"small": "haiku", "medium": "sonnet", "large": "opus"}
19
+
20
+
21
+ @dataclass(frozen=True)
22
+ class ClaudeReady:
23
+ """The ``claude`` CLI is installed and authenticated."""
24
+
25
+
26
+ @dataclass(frozen=True)
27
+ class ClaudeNotInstalled:
28
+ """The ``claude`` CLI is not on PATH.
29
+
30
+ Attributes:
31
+ brew_available: Whether Homebrew is available to install it.
32
+ """
33
+
34
+ brew_available: bool
35
+
36
+
37
+ @dataclass(frozen=True)
38
+ class ClaudeNotAuthenticated:
39
+ """The ``claude`` CLI is installed but not authenticated."""
40
+
41
+
42
+ ClaudeStatus = ClaudeReady | ClaudeNotInstalled | ClaudeNotAuthenticated
43
+
44
+
45
+ def check_status(timeout: int = 10) -> ClaudeStatus:
46
+ """Return the install/auth status of the ``claude`` CLI."""
47
+ if not shutil.which("claude"):
48
+ return ClaudeNotInstalled(brew_available=bool(shutil.which("brew")))
49
+ result = subprocess.run(["claude", "auth", "status"], capture_output=True, text=True, timeout=timeout, check=False)
50
+ if result.returncode == 0:
51
+ return ClaudeReady()
52
+ return ClaudeNotAuthenticated()
53
+
54
+
55
+ @dataclass(frozen=True)
56
+ class ClaudeCliBackend(LlmBackend):
57
+ """:class:`LlmBackend` for the Anthropic ``claude`` CLI.
58
+
59
+ The default (no-arg) construction delivers the prompt over stdin with abstract
60
+ model tiers and structured-output parsing. The :meth:`cc_sentiment` preset
61
+ configures inline ``-p`` prompting with ``{is_error, result}`` envelope parsing.
62
+
63
+ Example:
64
+ >>> ClaudeCliBackend().build_command("haiku", None, agent=False)[0]
65
+ 'claude'
66
+ """
67
+
68
+ models: ClassVar[dict[TModel, str]] = CLAUDE_MODELS
69
+
70
+ inline_system_prompt: str = ""
71
+ verbose: bool = False
72
+
73
+ @classmethod
74
+ def cc_sentiment(cls, *, system_prompt: str, verbose: bool = False) -> ClaudeCliBackend:
75
+ """Return a backend configured for inline ``-p`` prompting + envelope parsing."""
76
+ return cls(inline_system_prompt=system_prompt, verbose=verbose)
77
+
78
+ def build_command(self, model: str, schema_path: str | None, agent: bool) -> list[str]:
79
+ return [
80
+ "claude",
81
+ "-p",
82
+ "--no-session-persistence",
83
+ "--model",
84
+ model,
85
+ *(
86
+ ["--permission-mode", "auto", "--max-budget-usd", "1"]
87
+ if agent
88
+ else ["--system-prompt", "", "--setting-sources", "", "--strict-mcp-config"]
89
+ ),
90
+ *(["--json-schema", schema_path, "--output-format", "json"] if schema_path else []),
91
+ ]
92
+
93
+ def parse_response(self, raw: str, response_model: type[BaseModel] | None) -> str | BaseModel:
94
+ return parse_structured_output(raw, response_model)
95
+
96
+ def env(self) -> dict[str, str]:
97
+ return {"CLAUDE_CODE_SIMPLE": "1"}
98
+
99
+ def build_argv(self, content: str, *, model: str) -> list[str]:
100
+ """Build the inline ``-p`` argv for the sentiment/pushback scoring path."""
101
+ argv = [
102
+ "claude",
103
+ "-p",
104
+ content,
105
+ "--model",
106
+ model,
107
+ "--system-prompt",
108
+ self.inline_system_prompt,
109
+ "--output-format",
110
+ "json",
111
+ "--max-turns",
112
+ "1",
113
+ "--tools",
114
+ "",
115
+ "--disable-slash-commands",
116
+ ]
117
+ if self.verbose:
118
+ argv.append("--verbose")
119
+ return argv
120
+
121
+ @staticmethod
122
+ def parse_result_envelope(stdout: bytes, *, argv: list[str], stderr: bytes) -> str:
123
+ """Parse the ``{is_error, result}`` JSON envelope; raise ``CalledProcessError`` on error."""
124
+ return parse_result_envelope(stdout, argv=argv, stderr=stderr)
@@ -0,0 +1,41 @@
1
+ """LlmBackend for the OpenAI ``codex`` CLI."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING, ClassVar
6
+
7
+ from spawnllm.backends.base import LlmBackend
8
+
9
+ if TYPE_CHECKING:
10
+ from pydantic import BaseModel
11
+
12
+ from spawnllm.types import TModel
13
+
14
+
15
+ class CodexCliBackend(LlmBackend):
16
+ """:class:`LlmBackend` for the OpenAI ``codex`` CLI."""
17
+
18
+ models: ClassVar[dict[TModel, str]] = {
19
+ "small": "gpt-5.3-codex-spark",
20
+ "medium": "gpt-5.4-mini",
21
+ "large": "gpt-5.5",
22
+ }
23
+
24
+ def build_command(self, model: str, schema_path: str | None, agent: bool) -> list[str]:
25
+ return [
26
+ "codex",
27
+ "exec",
28
+ "--ephemeral",
29
+ "--sandbox",
30
+ "read-only",
31
+ "--model",
32
+ model,
33
+ *([] if agent else ["-c", "features.codex_hooks=false", "-c", "features.mcp_servers=false"]),
34
+ *(["--output-schema", schema_path] if schema_path else []),
35
+ ]
36
+
37
+ def parse_response(self, raw: str, response_model: type[BaseModel] | None) -> str | BaseModel:
38
+ return raw if not response_model else response_model.model_validate_json(raw)
39
+
40
+ def env(self) -> dict[str, str]:
41
+ return {}
@@ -0,0 +1,27 @@
1
+ """Specialty → backend registry."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING, ClassVar
6
+
7
+ from spawnllm.backends.claude import ClaudeCliBackend
8
+ from spawnllm.backends.codex import CodexCliBackend
9
+
10
+ if TYPE_CHECKING:
11
+ from spawnllm.backends.base import LlmBackend
12
+ from spawnllm.types import TSpecialty
13
+
14
+
15
+ class LlmBackends:
16
+ """Registry mapping each specialty to the :class:`LlmBackend` that serves it."""
17
+
18
+ LLM_BACKENDS: ClassVar[dict[TSpecialty, LlmBackend]] = {
19
+ "debugging": CodexCliBackend(),
20
+ "review": CodexCliBackend(),
21
+ "general": ClaudeCliBackend(),
22
+ }
23
+
24
+ @classmethod
25
+ def for_specialty(cls, specialty: TSpecialty) -> LlmBackend:
26
+ """Return the backend registered for ``specialty``."""
27
+ return cls.LLM_BACKENDS[specialty]
@@ -0,0 +1,42 @@
1
+ """High-level one-shot sync LLM call used by the debugging CLI."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from typing import TYPE_CHECKING
7
+
8
+ from spawnllm.proc import run_cli
9
+ from spawnllm.structured import resolve_schema_path, schema_for
10
+
11
+ if TYPE_CHECKING:
12
+ from pydantic import BaseModel
13
+
14
+ from spawnllm.backends.base import LlmBackend
15
+ from spawnllm.types import TModel
16
+
17
+
18
+ def call(
19
+ prompt: str,
20
+ *,
21
+ backend: LlmBackend,
22
+ model: TModel = "small",
23
+ agent: bool = False,
24
+ response_model: type[BaseModel] | None = None,
25
+ ) -> str | BaseModel:
26
+ """Run one CLI-backed LLM call and parse its response.
27
+
28
+ Args:
29
+ prompt: The user prompt, delivered to the backend over stdin.
30
+ backend: The :class:`~spawnllm.backends.base.LlmBackend` to invoke.
31
+ model: Abstract model tier (``small``/``medium``/``large``).
32
+ agent: Whether the call may use tools / agent capabilities.
33
+ response_model: Pydantic model for structured output, or ``None`` for text.
34
+
35
+ Returns:
36
+ The raw text response, or a validated ``response_model`` instance.
37
+ """
38
+ schema = schema_for(response_model) if response_model is not None else None
39
+ schema_path = resolve_schema_path(backend, schema)
40
+ cmd = backend.build_command(backend.models[model], schema_path, agent)
41
+ raw = run_cli(cmd, input=prompt, env=os.environ | backend.env())
42
+ return backend.parse_response(raw, response_model)
@@ -0,0 +1,40 @@
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+ from typing import cast
5
+
6
+ import click
7
+ from loguru import logger
8
+
9
+ from spawnllm.backends import ClaudeCliBackend, CodexCliBackend
10
+ from spawnllm.call import call as call_backend
11
+ from spawnllm.types import TModel
12
+
13
+ BACKENDS = ("claude", "codex", "mlx")
14
+ CLI_BACKENDS = {"claude": ClaudeCliBackend, "codex": CodexCliBackend}
15
+
16
+
17
+ @click.group()
18
+ @click.version_option(package_name="spawnllm")
19
+ def main() -> None:
20
+ """Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools."""
21
+
22
+
23
+ @main.command()
24
+ def backends() -> None:
25
+ """List the LLM backends spawnllm can drive."""
26
+ logger.debug("backends invoked")
27
+ for name in BACKENDS:
28
+ click.echo(name)
29
+
30
+
31
+ @main.command()
32
+ @click.option("--backend", type=click.Choice(["claude", "codex"]), required=True)
33
+ @click.option("--model", type=click.Choice(["small", "medium", "large"]), default="small")
34
+ @click.option("--agent", is_flag=True, help="Allow tools / agent capabilities.")
35
+ @click.argument("prompt", required=False)
36
+ def call(backend: str, model: str, agent: bool, prompt: str | None) -> None:
37
+ """Make a one-off LLM call (reads PROMPT or stdin) and print the response."""
38
+ text = prompt if prompt is not None else sys.stdin.read()
39
+ result = call_backend(text, backend=CLI_BACKENDS[backend](), model=cast(TModel, model), agent=agent)
40
+ click.echo(result)
@@ -0,0 +1,38 @@
1
+ """Local MLX engine, adapter codec, fuser, and runtime patches.
2
+
3
+ Imports here are lazy so that ``import spawnllm`` never pulls ``mlx_lm``/``zstandard``;
4
+ only consumers that touch ``spawnllm.mlx`` attributes load the heavy dependencies.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import TYPE_CHECKING
10
+
11
+ if TYPE_CHECKING:
12
+ from spawnllm.mlx.codec import AdapterCodec
13
+ from spawnllm.mlx.engine import MlxEngine
14
+ from spawnllm.mlx.fuse import AdapterFuser
15
+ from spawnllm.mlx.patches import MLXPatches
16
+
17
+ __all__ = ["AdapterCodec", "AdapterFuser", "MLXPatches", "MlxEngine"]
18
+
19
+
20
+ def __getattr__(name: str) -> object:
21
+ match name:
22
+ case "AdapterCodec":
23
+ from spawnllm.mlx.codec import AdapterCodec
24
+
25
+ return AdapterCodec
26
+ case "AdapterFuser":
27
+ from spawnllm.mlx.fuse import AdapterFuser
28
+
29
+ return AdapterFuser
30
+ case "MlxEngine":
31
+ from spawnllm.mlx.engine import MlxEngine
32
+
33
+ return MlxEngine
34
+ case "MLXPatches":
35
+ from spawnllm.mlx.patches import MLXPatches
36
+
37
+ return MLXPatches
38
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@@ -0,0 +1,92 @@
1
+ """Cross-platform LoRA adapter codec (byte-shuffle + zstd; imports without ``mlx_lm``)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import struct
7
+ from pathlib import Path
8
+ from typing import ClassVar
9
+
10
+ import orjson
11
+
12
+
13
+ class AdapterCodec:
14
+ """Compress/decompress a homogeneous-dtype safetensors adapter with byte-shuffle + zstd.
15
+
16
+ Subclasses override :attr:`DIR`/:attr:`ZST`/:attr:`CONFIG` to point at their
17
+ shipped package data.
18
+
19
+ Example:
20
+ >>> AdapterCodec.encode(Path("adapters.safetensors"))
21
+ >>> AdapterCodec.digest()
22
+ """
23
+
24
+ DIR: ClassVar[Path] = Path(__file__).parent
25
+ ZST: ClassVar[Path] = DIR / "adapters.safetensors.zst"
26
+ CONFIG: ClassVar[Path] = DIR / "adapter_config.json"
27
+ TYPESIZES: ClassVar[dict[str, int]] = {"F32": 4, "BF16": 2, "F16": 2}
28
+ COMPRESSION_LEVEL: ClassVar[int] = 19
29
+
30
+ @classmethod
31
+ def digest(cls) -> str:
32
+ return hashlib.sha256(cls.ZST.read_bytes()).hexdigest()[:16]
33
+
34
+ @classmethod
35
+ def encode(cls, src: Path) -> None:
36
+ import zstandard as zstd
37
+
38
+ raw = src.read_bytes()
39
+ cls._assert_homogeneous_dtype(raw)
40
+ cls.ZST.write_bytes(zstd.ZstdCompressor(level=cls.COMPRESSION_LEVEL).compress(cls._walk(raw, shuffle=True)))
41
+
42
+ @classmethod
43
+ def decode(cls, dst: Path) -> None:
44
+ import zstandard as zstd
45
+
46
+ dst.write_bytes(cls._walk(zstd.ZstdDecompressor().decompress(cls.ZST.read_bytes()), shuffle=False))
47
+
48
+ @classmethod
49
+ def dtype(cls) -> str:
50
+ import zstandard as zstd
51
+
52
+ raw = zstd.ZstdDecompressor().decompress(cls.ZST.read_bytes())
53
+ cls._assert_homogeneous_dtype(raw)
54
+ header_end = 8 + struct.unpack("<Q", raw[:8])[0]
55
+ return next(v["dtype"] for k, v in orjson.loads(raw[8:header_end]).items() if k != "__metadata__")
56
+
57
+ @classmethod
58
+ def _walk(cls, raw: bytes, *, shuffle: bool) -> bytes:
59
+ header_end = 8 + struct.unpack("<Q", raw[:8])[0]
60
+ body = raw[header_end:]
61
+ out = bytearray(raw[:header_end])
62
+ cursor = 0
63
+ for name, meta in sorted(
64
+ ((k, v) for k, v in orjson.loads(raw[8:header_end]).items() if k != "__metadata__"),
65
+ key=lambda kv: kv[1]["data_offsets"][0],
66
+ ):
67
+ assert meta["dtype"] in cls.TYPESIZES, f"{name}: unsupported dtype {meta['dtype']}"
68
+ typesize = cls.TYPESIZES[meta["dtype"]]
69
+ nbytes = meta["data_offsets"][1] - meta["data_offsets"][0]
70
+ chunk = body[cursor : cursor + nbytes]
71
+ out.extend(cls._shuffle(chunk, typesize) if shuffle else cls._unshuffle(chunk, typesize))
72
+ cursor += nbytes
73
+ return bytes(out)
74
+
75
+ @classmethod
76
+ def _assert_homogeneous_dtype(cls, raw: bytes) -> None:
77
+ header_end = 8 + struct.unpack("<Q", raw[:8])[0]
78
+ dtypes = {v["dtype"] for k, v in orjson.loads(raw[8:header_end]).items() if k != "__metadata__"}
79
+ assert len(dtypes) == 1, f"adapter must be homogeneous-dtype, got {dtypes}"
80
+ assert dtypes.issubset(cls.TYPESIZES.keys()), f"unsupported dtype: {dtypes}"
81
+
82
+ @classmethod
83
+ def _shuffle(cls, chunk: bytes, typesize: int) -> bytes:
84
+ import numpy as np
85
+
86
+ return np.frombuffer(chunk, dtype=np.uint8).reshape(-1, typesize).T.tobytes()
87
+
88
+ @classmethod
89
+ def _unshuffle(cls, chunk: bytes, typesize: int) -> bytes:
90
+ import numpy as np
91
+
92
+ return np.frombuffer(chunk, dtype=np.uint8).reshape(typesize, -1).T.tobytes()
@@ -0,0 +1,148 @@
1
+ """Domain-agnostic MLX batch-inference engine running on a dedicated worker thread."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import copy
7
+ import platform
8
+ import queue
9
+ import sys
10
+ import threading
11
+ from typing import TYPE_CHECKING, Any
12
+
13
+ import anyio.to_thread
14
+
15
+ from spawnllm.mlx.patches import MLXPatches
16
+
17
+ if TYPE_CHECKING:
18
+ from collections.abc import Callable
19
+ from pathlib import Path
20
+
21
+ WORKER_STOP = object()
22
+
23
+
24
+ class MlxEngine:
25
+ """Run batched MLX inference on a dedicated worker thread.
26
+
27
+ Sentiment/domain specifics are injected: ``logits_processor_factory`` builds the
28
+ per-model logit processor from the loaded tokenizer, and ``prefix_messages`` is
29
+ the cached system/demo prefix shared across a batch.
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ fused_dir: Path,
35
+ *,
36
+ logits_processor_factory: Callable[[Any], Callable[..., Any]],
37
+ prefix_messages: list[dict[str, str]],
38
+ batch_size: int,
39
+ worker_name: str = "mlx",
40
+ ) -> None:
41
+ if sys.platform != "darwin" or platform.machine() != "arm64":
42
+ raise RuntimeError("The MLX engine requires macOS on Apple Silicon. Use a CLI backend on this platform.")
43
+ self._fused_dir = fused_dir
44
+ self._logits_processor_factory = logits_processor_factory
45
+ self._prefix_messages = prefix_messages
46
+ self._batch_size = batch_size
47
+ self._inbox: queue.SimpleQueue = queue.SimpleQueue()
48
+ self._loaded = threading.Event()
49
+ self._init_error: BaseException | None = None
50
+ self._thread = threading.Thread(target=self._worker, daemon=True, name=worker_name)
51
+ self._thread.start()
52
+
53
+ def _worker(self) -> None:
54
+ try:
55
+ self._load()
56
+ except BaseException as exc:
57
+ self._init_error = exc
58
+ self._loaded.set()
59
+ return
60
+ self._loaded.set()
61
+ while True:
62
+ job = self._inbox.get()
63
+ if job is WORKER_STOP:
64
+ return
65
+ fn, args, on_result, on_error = job
66
+ try:
67
+ on_result(fn(*args))
68
+ except BaseException as exc:
69
+ on_error(exc)
70
+
71
+ def _load(self) -> None:
72
+ from mlx_lm import batch_generate, load
73
+
74
+ MLXPatches.apply()
75
+ self.model, self.tokenizer = load(str(self._fused_dir))
76
+ self.logit_processor = self._logits_processor_factory(self.tokenizer)
77
+ self.prefix_messages = self._prefix_messages
78
+ self.prefix_tokens = self.tokenizer.apply_chat_template(
79
+ self.prefix_messages, tokenize=True, add_generation_prompt=False
80
+ )
81
+ self.base_cache = batch_generate(
82
+ self.model,
83
+ self.tokenizer,
84
+ [self.prefix_tokens],
85
+ max_tokens=1,
86
+ logits_processors=[self.logit_processor],
87
+ return_prompt_caches=True,
88
+ ).caches[0]
89
+
90
+ async def ensure_loaded(self) -> None:
91
+ await anyio.to_thread.run_sync(self._loaded.wait)
92
+ if self._init_error is not None:
93
+ raise self._init_error
94
+
95
+ async def submit[R](self, fn: Callable[..., R], *args: Any) -> R:
96
+ loop = asyncio.get_running_loop()
97
+ fut: asyncio.Future = loop.create_future()
98
+ self._inbox.put(
99
+ (
100
+ fn,
101
+ args,
102
+ lambda value: loop.call_soon_threadsafe(fut.set_result, value),
103
+ lambda exc: loop.call_soon_threadsafe(fut.set_exception, exc),
104
+ )
105
+ )
106
+ return await fut
107
+
108
+ def _generate_chunk(self, chunk: list[list[dict[str, str]]]) -> list[str]:
109
+ from mlx_lm import batch_generate
110
+
111
+ suffixes = [
112
+ self.tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True)[
113
+ len(self.prefix_tokens) :
114
+ ]
115
+ for messages in chunk
116
+ ]
117
+ return batch_generate(
118
+ self.model,
119
+ self.tokenizer,
120
+ suffixes,
121
+ max_tokens=1,
122
+ logits_processors=[self.logit_processor],
123
+ prompt_caches=[copy.deepcopy(self.base_cache) for _ in suffixes],
124
+ ).texts
125
+
126
+ async def generate(
127
+ self,
128
+ message_lists: list[list[dict[str, str]]],
129
+ on_progress: Callable[[int], None],
130
+ ) -> list[str]:
131
+ order = sorted(range(len(message_lists)), key=lambda i: len(message_lists[i][-1]["content"]))
132
+ responses: list[str] = [""] * len(message_lists)
133
+ for start in range(0, len(order), self._batch_size):
134
+ slice_ = order[start : start + self._batch_size]
135
+ chunk = [message_lists[i] for i in slice_]
136
+ chunk_responses = await self.submit(self._generate_chunk, chunk)
137
+ for i, r in zip(slice_, chunk_responses, strict=True):
138
+ responses[i] = r
139
+ on_progress(len(chunk))
140
+ return responses
141
+
142
+ def peak_memory_gb(self) -> float:
143
+ import resource
144
+
145
+ return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / (1024**3)
146
+
147
+ async def close(self) -> None:
148
+ self._inbox.put(WORKER_STOP)
@@ -0,0 +1,52 @@
1
+ """Fuse a shipped LoRA adapter into a base MLX model, cached in the HF-hub layout."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import tempfile
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING
8
+
9
+ if TYPE_CHECKING:
10
+ from spawnllm.mlx.codec import AdapterCodec
11
+
12
+
13
+ class AdapterFuser:
14
+ @classmethod
15
+ def ensure_fused(
16
+ cls,
17
+ model_repo: str,
18
+ *,
19
+ codec: AdapterCodec,
20
+ cache_namespace: str,
21
+ tqdm_class: type | None = None,
22
+ ) -> Path:
23
+ from huggingface_hub.constants import HF_HUB_CACHE
24
+
25
+ digest = codec.digest()
26
+ repo_dir = Path(HF_HUB_CACHE) / f"models--{cache_namespace}-{digest}"
27
+ fused_dir = repo_dir / "snapshots" / digest
28
+ if (fused_dir / "config.json").exists():
29
+ return fused_dir
30
+
31
+ from huggingface_hub import snapshot_download
32
+ from mlx.utils import tree_unflatten
33
+ from mlx_lm.utils import load_adapters, load_model, load_tokenizer, save
34
+
35
+ src_path = Path(snapshot_download(model_repo, tqdm_class=tqdm_class))
36
+ with tempfile.TemporaryDirectory() as tmp:
37
+ staging = Path(tmp)
38
+ (staging / "adapter_config.json").write_bytes(codec.CONFIG.read_bytes())
39
+ codec.decode(staging / "adapters.safetensors")
40
+ model, config = load_model(src_path, lazy=False, strict=False)
41
+ model = load_adapters(model, str(staging))
42
+ model.eval()
43
+ tokenizer = load_tokenizer(src_path, eos_token_ids=config.get("eos_token_id"))
44
+ model.update_modules(
45
+ tree_unflatten([(n, m.fuse()) for n, m in model.named_modules() if hasattr(m, "fuse")])
46
+ )
47
+ fused_dir.mkdir(parents=True, exist_ok=True)
48
+ save(fused_dir, src_path, model, tokenizer, config, donate_model=True)
49
+
50
+ (refs := repo_dir / "refs").mkdir(parents=True, exist_ok=True)
51
+ (refs / "main").write_text(digest)
52
+ return fused_dir
@@ -0,0 +1,43 @@
1
+ """Runtime patches applied in-worker before the first ``batch_generate``."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ import time
7
+
8
+
9
+ class MLXPatches:
10
+ applied: bool = False
11
+
12
+ @classmethod
13
+ def apply(cls) -> None:
14
+ if cls.applied:
15
+ return
16
+ cls.applied = True
17
+ cls._apply_batchstats_zerodiv_guard()
18
+
19
+ @staticmethod
20
+ def _apply_batchstats_zerodiv_guard() -> None:
21
+ import mlx.core as mx
22
+ from mlx_lm.generate import BatchGenerator, BatchStats
23
+
24
+ @contextlib.contextmanager
25
+ def stats(self, stats: BatchStats | None = None):
26
+ stats = stats or BatchStats()
27
+ self._prompt_tokens_counter = 0
28
+ self._prompt_time_counter = 0
29
+ self._gen_tokens_counter = 0
30
+ tic = time.perf_counter()
31
+ try:
32
+ yield stats
33
+ finally:
34
+ total_time = time.perf_counter() - tic
35
+ stats.prompt_tokens += self._prompt_tokens_counter
36
+ stats.prompt_time += self._prompt_time_counter
37
+ stats.prompt_tps = stats.prompt_tokens / max(stats.prompt_time, 1e-9)
38
+ stats.generation_tokens += self._gen_tokens_counter
39
+ stats.generation_time += total_time - self._prompt_time_counter
40
+ stats.generation_tps = stats.generation_tokens / max(stats.generation_time, 1e-9)
41
+ stats.peak_memory = max(stats.peak_memory, mx.get_peak_memory() / 1e9)
42
+
43
+ BatchGenerator.stats = stats
@@ -0,0 +1,108 @@
1
+ """Subprocess transport for CLI-backed LLM calls (sync ``run_cli`` + async ``arun_cli``)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import subprocess
7
+ from collections.abc import Awaitable, Callable, Sequence
8
+
9
+ __all__ = ["arun_cli", "collect_process", "map_concurrent", "run_cli"]
10
+
11
+
12
+ def run_cli(
13
+ argv: list[str],
14
+ *,
15
+ input: str | None = None,
16
+ timeout: int = 30,
17
+ env: dict[str, str] | None = None,
18
+ cwd: str | None = None,
19
+ ) -> str:
20
+ result = subprocess.run(
21
+ argv,
22
+ input=input,
23
+ capture_output=True,
24
+ text=True,
25
+ timeout=timeout,
26
+ env=env,
27
+ cwd=cwd,
28
+ )
29
+ if result.returncode != 0:
30
+ err = subprocess.CalledProcessError(result.returncode, argv, output=result.stdout, stderr=result.stderr)
31
+ err.add_note(f"argv: {argv}")
32
+ err.add_note(f"exit_code: {result.returncode}")
33
+ err.add_note(f"stderr: {result.stderr[-4096:]}")
34
+ err.add_note(f"stdout: {result.stdout[-4096:]}")
35
+ raise err
36
+ return result.stdout
37
+
38
+
39
+ async def collect_process(
40
+ proc: asyncio.subprocess.Process,
41
+ *,
42
+ stderr_tee: Callable[[bytes], None] | None = None,
43
+ ) -> tuple[bytes, bytes, int]:
44
+ assert proc.stderr is not None, "create_subprocess_exec was called with stderr=PIPE"
45
+ assert proc.stdout is not None, "create_subprocess_exec was called with stdout=PIPE"
46
+ stderr_buf = bytearray()
47
+ async with asyncio.TaskGroup() as tg:
48
+ tg.create_task(_tee_stderr(proc.stderr, stderr_buf, stderr_tee))
49
+ stdout_task = tg.create_task(proc.stdout.read())
50
+ rc_task = tg.create_task(proc.wait())
51
+ return stdout_task.result(), bytes(stderr_buf), rc_task.result()
52
+
53
+
54
+ async def _tee_stderr(
55
+ stream: asyncio.StreamReader,
56
+ buf: bytearray,
57
+ stderr_tee: Callable[[bytes], None] | None,
58
+ ) -> None:
59
+ async for raw in stream:
60
+ buf.extend(raw)
61
+ if stderr_tee is not None:
62
+ stderr_tee(raw)
63
+
64
+
65
+ async def arun_cli(
66
+ argv: list[str],
67
+ *,
68
+ input: str | None = None,
69
+ env: dict[str, str] | None = None,
70
+ cwd: str | None = None,
71
+ stderr_tee: Callable[[bytes], None] | None = None,
72
+ ) -> bytes:
73
+ proc = await asyncio.create_subprocess_exec(
74
+ *argv,
75
+ stdin=asyncio.subprocess.PIPE if input is not None else None,
76
+ stdout=asyncio.subprocess.PIPE,
77
+ stderr=asyncio.subprocess.PIPE,
78
+ env=env,
79
+ cwd=cwd,
80
+ )
81
+ if input is not None:
82
+ assert proc.stdin is not None, "create_subprocess_exec was called with stdin=PIPE"
83
+ proc.stdin.write(input.encode())
84
+ await proc.stdin.drain()
85
+ proc.stdin.close()
86
+ stdout, stderr, rc = await collect_process(proc, stderr_tee=stderr_tee)
87
+ if rc != 0:
88
+ raise subprocess.CalledProcessError(rc, argv, output=stdout, stderr=stderr)
89
+ return stdout
90
+
91
+
92
+ async def map_concurrent[T, R](
93
+ items: Sequence[T],
94
+ fn: Callable[[T], Awaitable[R]],
95
+ *,
96
+ limit: int,
97
+ on_done: Callable[[int], None] | None = None,
98
+ ) -> list[R]:
99
+ sem = asyncio.Semaphore(limit)
100
+
101
+ async def one(item: T) -> R:
102
+ async with sem:
103
+ result = await fn(item)
104
+ if on_done is not None:
105
+ on_done(1)
106
+ return result
107
+
108
+ return list(await asyncio.gather(*(one(item) for item in items)))
File without changes
@@ -0,0 +1,65 @@
1
+ """Structured-output helpers: JSON-schema build, schema-path resolution, response parsing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ import subprocess
8
+ import tempfile
9
+ from typing import TYPE_CHECKING, Any, cast
10
+
11
+ from spawnllm.backends.codex import CodexCliBackend
12
+
13
+ if TYPE_CHECKING:
14
+ from pydantic import BaseModel
15
+
16
+ from spawnllm.backends.base import LlmBackend
17
+
18
+ __all__ = [
19
+ "extract_structured",
20
+ "parse_result_envelope",
21
+ "parse_structured_output",
22
+ "resolve_schema_path",
23
+ "schema_for",
24
+ ]
25
+
26
+
27
+ def schema_for(model: type[BaseModel]) -> str:
28
+ return json.dumps(model.model_json_schema() | {"additionalProperties": False})
29
+
30
+
31
+ def resolve_schema_path(backend: LlmBackend, schema: str | None) -> str | None:
32
+ if not schema:
33
+ return None
34
+ if isinstance(backend, CodexCliBackend):
35
+ fd, path = tempfile.mkstemp(suffix=".json")
36
+ os.write(fd, schema.encode())
37
+ os.close(fd)
38
+ return path
39
+ return schema
40
+
41
+
42
+ def extract_structured(events: list[dict[str, Any]], model: type[BaseModel]) -> BaseModel | None:
43
+ """Return the validated ``structured_output`` from a stream-json event list, if present."""
44
+ for e in events:
45
+ if e.get("type") == "result" and "structured_output" in e:
46
+ return model.model_validate(e["structured_output"])
47
+ return None
48
+
49
+
50
+ def parse_structured_output(raw: str, response_model: type[BaseModel] | None) -> str | BaseModel:
51
+ if not response_model:
52
+ return raw
53
+ data = json.loads(raw)
54
+ if isinstance(data, list) and data:
55
+ return extract_structured(
56
+ cast(list[dict[str, Any]], data), response_model
57
+ ) or response_model.model_validate_json(raw)
58
+ return response_model.model_validate_json(raw)
59
+
60
+
61
+ def parse_result_envelope(stdout: bytes, *, argv: list[str], stderr: bytes) -> str:
62
+ data = json.loads(stdout)
63
+ if data["is_error"]:
64
+ raise subprocess.CalledProcessError(0, argv, output=stdout, stderr=stderr)
65
+ return data["result"]
@@ -0,0 +1,10 @@
1
+ """Shared type aliases for the LLM-calling surface."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Literal
6
+
7
+ __all__ = ["TModel", "TSpecialty"]
8
+
9
+ TSpecialty = Literal["debugging", "review", "general"]
10
+ TModel = Literal["small", "medium", "large"]