spawnllm 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spawnllm-0.1.0/LICENSE +21 -0
- spawnllm-0.1.0/PKG-INFO +109 -0
- spawnllm-0.1.0/README.md +64 -0
- spawnllm-0.1.0/pyproject.toml +129 -0
- spawnllm-0.1.0/spawnllm/__init__.py +54 -0
- spawnllm-0.1.0/spawnllm/__main__.py +6 -0
- spawnllm-0.1.0/spawnllm/backends/__init__.py +27 -0
- spawnllm-0.1.0/spawnllm/backends/base.py +53 -0
- spawnllm-0.1.0/spawnllm/backends/claude.py +124 -0
- spawnllm-0.1.0/spawnllm/backends/codex.py +41 -0
- spawnllm-0.1.0/spawnllm/backends/registry.py +27 -0
- spawnllm-0.1.0/spawnllm/call.py +42 -0
- spawnllm-0.1.0/spawnllm/cli.py +40 -0
- spawnllm-0.1.0/spawnllm/mlx/__init__.py +38 -0
- spawnllm-0.1.0/spawnllm/mlx/codec.py +92 -0
- spawnllm-0.1.0/spawnllm/mlx/engine.py +148 -0
- spawnllm-0.1.0/spawnllm/mlx/fuse.py +52 -0
- spawnllm-0.1.0/spawnllm/mlx/patches.py +43 -0
- spawnllm-0.1.0/spawnllm/proc.py +108 -0
- spawnllm-0.1.0/spawnllm/py.typed +0 -0
- spawnllm-0.1.0/spawnllm/structured.py +65 -0
- spawnllm-0.1.0/spawnllm/types.py +10 -0
spawnllm-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Yasyf Mohamedali
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
spawnllm-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: spawnllm
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools.
|
|
5
|
+
Keywords:
|
|
6
|
+
Author: Yasyf Mohamedali
|
|
7
|
+
Author-email: Yasyf Mohamedali <yasyfm@gmail.com>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Typing :: Typed
|
|
16
|
+
Requires-Dist: click>=8
|
|
17
|
+
Requires-Dist: loguru>=0.7
|
|
18
|
+
Requires-Dist: pydantic>=2
|
|
19
|
+
Requires-Dist: zstandard>=0.25.0 ; extra == 'adapter'
|
|
20
|
+
Requires-Dist: numpy>=1.26 ; extra == 'adapter'
|
|
21
|
+
Requires-Dist: orjson>=3.10 ; extra == 'adapter'
|
|
22
|
+
Requires-Dist: anyio>=4 ; extra == 'dev'
|
|
23
|
+
Requires-Dist: pytest>=8.0 ; extra == 'dev'
|
|
24
|
+
Requires-Dist: ruff>=0.8 ; extra == 'dev'
|
|
25
|
+
Requires-Dist: ty>=0.0.44 ; extra == 'dev'
|
|
26
|
+
Requires-Dist: zstandard>=0.25.0 ; extra == 'dev'
|
|
27
|
+
Requires-Dist: numpy>=1.26 ; extra == 'dev'
|
|
28
|
+
Requires-Dist: orjson>=3.10 ; extra == 'dev'
|
|
29
|
+
Requires-Dist: zstandard>=0.25.0 ; extra == 'mlx'
|
|
30
|
+
Requires-Dist: numpy>=1.26 ; extra == 'mlx'
|
|
31
|
+
Requires-Dist: orjson>=3.10 ; extra == 'mlx'
|
|
32
|
+
Requires-Dist: anyio>=4.4 ; extra == 'mlx'
|
|
33
|
+
Requires-Dist: huggingface-hub>=0.25 ; extra == 'mlx'
|
|
34
|
+
Requires-Dist: mlx-lm>=0.31.3 ; platform_machine == 'arm64' and sys_platform == 'darwin' and extra == 'mlx'
|
|
35
|
+
Requires-Python: >=3.13
|
|
36
|
+
Project-URL: Homepage, https://github.com/yasyf/spawnllm
|
|
37
|
+
Project-URL: Documentation, https://yasyf.github.io/spawnllm/
|
|
38
|
+
Project-URL: Repository, https://github.com/yasyf/spawnllm
|
|
39
|
+
Project-URL: Issues, https://github.com/yasyf/spawnllm/issues
|
|
40
|
+
Project-URL: Changelog, https://github.com/yasyf/spawnllm/blob/main/CHANGELOG.md
|
|
41
|
+
Provides-Extra: adapter
|
|
42
|
+
Provides-Extra: dev
|
|
43
|
+
Provides-Extra: mlx
|
|
44
|
+
Description-Content-Type: text/markdown
|
|
45
|
+
|
|
46
|
+
# spawnllm
|
|
47
|
+
|
|
48
|
+
[](https://pypi.org/project/spawnllm/)
|
|
49
|
+
[](https://pypi.org/project/spawnllm/)
|
|
50
|
+
[](https://yasyf.github.io/spawnllm/)
|
|
51
|
+
[](https://github.com/yasyf/spawnllm/blob/main/LICENSE)
|
|
52
|
+
|
|
53
|
+
Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools.
|
|
54
|
+
|
|
55
|
+
spawnllm centralizes the LLM-calling plumbing that small tools keep re-inventing: driving the
|
|
56
|
+
`claude` and `codex` CLIs as subshells — with structured Pydantic output, model tiers, and
|
|
57
|
+
faithful error capture — and running local Apple-Silicon MLX models with adapter fusion,
|
|
58
|
+
prompt-cache reuse, and batched generation. Depend on it once and each tool keeps only its
|
|
59
|
+
domain logic instead of its own copy of the backends.
|
|
60
|
+
|
|
61
|
+
## Install
|
|
62
|
+
|
|
63
|
+
No install needed — run everything through [uvx](https://docs.astral.sh/uv/):
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
uvx spawnllm --help
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
`uvx` fetches spawnllm into a throwaway environment and runs it. To add it
|
|
70
|
+
to a project instead:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
uv add spawnllm
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
For the local MLX engine (Apple Silicon only), pull the extra:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
uv add "spawnllm[mlx]"
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Quickstart
|
|
83
|
+
|
|
84
|
+
List the backends spawnllm can drive:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
uvx spawnllm backends
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
```
|
|
91
|
+
claude
|
|
92
|
+
codex
|
|
93
|
+
mlx
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## What problems does this solve?
|
|
97
|
+
|
|
98
|
+
- **Duplicate subshell plumbing.** Building `claude`/`codex` argv, piping stdin/stdout, teeing
|
|
99
|
+
stderr, and turning non-zero exits into useful errors — written once, not re-derived per tool.
|
|
100
|
+
- **Structured-output boilerplate.** A Pydantic model becomes a JSON-schema constraint and a
|
|
101
|
+
parsed, validated result the same way for every backend.
|
|
102
|
+
- **Local MLX is fiddly.** Adapter fusion, prompt-cache reuse, worker-thread lifecycle, and
|
|
103
|
+
batched single-token generation live behind one engine instead of in every consumer.
|
|
104
|
+
- **Behavior drift.** Two tools that call the same models stay byte-for-byte consistent because
|
|
105
|
+
they share the backend layer rather than each maintaining a copy.
|
|
106
|
+
|
|
107
|
+
## Docs
|
|
108
|
+
|
|
109
|
+
[Read the docs](https://yasyf.github.io/spawnllm/) for the full guide and API reference.
|
spawnllm-0.1.0/README.md
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# spawnllm
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/spawnllm/)
|
|
4
|
+
[](https://pypi.org/project/spawnllm/)
|
|
5
|
+
[](https://yasyf.github.io/spawnllm/)
|
|
6
|
+
[](https://github.com/yasyf/spawnllm/blob/main/LICENSE)
|
|
7
|
+
|
|
8
|
+
Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools.
|
|
9
|
+
|
|
10
|
+
spawnllm centralizes the LLM-calling plumbing that small tools keep re-inventing: driving the
|
|
11
|
+
`claude` and `codex` CLIs as subshells — with structured Pydantic output, model tiers, and
|
|
12
|
+
faithful error capture — and running local Apple-Silicon MLX models with adapter fusion,
|
|
13
|
+
prompt-cache reuse, and batched generation. Depend on it once and each tool keeps only its
|
|
14
|
+
domain logic instead of its own copy of the backends.
|
|
15
|
+
|
|
16
|
+
## Install
|
|
17
|
+
|
|
18
|
+
No install needed — run everything through [uvx](https://docs.astral.sh/uv/):
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
uvx spawnllm --help
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
`uvx` fetches spawnllm into a throwaway environment and runs it. To add it
|
|
25
|
+
to a project instead:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
uv add spawnllm
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
For the local MLX engine (Apple Silicon only), pull the extra:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
uv add "spawnllm[mlx]"
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Quickstart
|
|
38
|
+
|
|
39
|
+
List the backends spawnllm can drive:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
uvx spawnllm backends
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
```
|
|
46
|
+
claude
|
|
47
|
+
codex
|
|
48
|
+
mlx
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## What problems does this solve?
|
|
52
|
+
|
|
53
|
+
- **Duplicate subshell plumbing.** Building `claude`/`codex` argv, piping stdin/stdout, teeing
|
|
54
|
+
stderr, and turning non-zero exits into useful errors — written once, not re-derived per tool.
|
|
55
|
+
- **Structured-output boilerplate.** A Pydantic model becomes a JSON-schema constraint and a
|
|
56
|
+
parsed, validated result the same way for every backend.
|
|
57
|
+
- **Local MLX is fiddly.** Adapter fusion, prompt-cache reuse, worker-thread lifecycle, and
|
|
58
|
+
batched single-token generation live behind one engine instead of in every consumer.
|
|
59
|
+
- **Behavior drift.** Two tools that call the same models stay byte-for-byte consistent because
|
|
60
|
+
they share the backend layer rather than each maintaining a copy.
|
|
61
|
+
|
|
62
|
+
## Docs
|
|
63
|
+
|
|
64
|
+
[Read the docs](https://yasyf.github.io/spawnllm/) for the full guide and API reference.
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "spawnllm"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = "MIT"
|
|
7
|
+
license-files = ["LICENSE"]
|
|
8
|
+
authors = [{ name = "Yasyf Mohamedali", email = "yasyfm@gmail.com" }]
|
|
9
|
+
keywords = []
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Development Status :: 3 - Alpha",
|
|
12
|
+
"Intended Audience :: Developers",
|
|
13
|
+
"Operating System :: OS Independent",
|
|
14
|
+
"Programming Language :: Python :: 3",
|
|
15
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
16
|
+
"Typing :: Typed",
|
|
17
|
+
]
|
|
18
|
+
requires-python = ">=3.13"
|
|
19
|
+
dependencies = [
|
|
20
|
+
"click>=8",
|
|
21
|
+
"loguru>=0.7",
|
|
22
|
+
"pydantic>=2",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[project.optional-dependencies]
|
|
26
|
+
dev = [
|
|
27
|
+
"anyio>=4",
|
|
28
|
+
"pytest>=8.0",
|
|
29
|
+
"ruff>=0.8",
|
|
30
|
+
"ty>=0.0.44",
|
|
31
|
+
# Codec tests exercise zstandard/numpy/orjson.
|
|
32
|
+
"zstandard>=0.25.0",
|
|
33
|
+
"numpy>=1.26",
|
|
34
|
+
"orjson>=3.10",
|
|
35
|
+
]
|
|
36
|
+
# Cross-platform LoRA adapter codec (zstd + numpy); imported without mlx.
|
|
37
|
+
adapter = [
|
|
38
|
+
"zstandard>=0.25.0",
|
|
39
|
+
"numpy>=1.26",
|
|
40
|
+
"orjson>=3.10",
|
|
41
|
+
]
|
|
42
|
+
# Local Apple-Silicon MLX engine. mlx-lm carries the darwin/arm64 marker; the
|
|
43
|
+
# codec libs and anyio are cross-platform but only meaningful alongside it.
|
|
44
|
+
mlx = [
|
|
45
|
+
"zstandard>=0.25.0",
|
|
46
|
+
"numpy>=1.26",
|
|
47
|
+
"orjson>=3.10",
|
|
48
|
+
"anyio>=4.4",
|
|
49
|
+
"huggingface-hub>=0.25",
|
|
50
|
+
"mlx-lm>=0.31.3; sys_platform == 'darwin' and platform_machine == 'arm64'",
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
[project.scripts]
|
|
54
|
+
spawnllm = "spawnllm.cli:main"
|
|
55
|
+
|
|
56
|
+
[project.urls]
|
|
57
|
+
Homepage = "https://github.com/yasyf/spawnllm"
|
|
58
|
+
Documentation = "https://yasyf.github.io/spawnllm/"
|
|
59
|
+
Repository = "https://github.com/yasyf/spawnllm"
|
|
60
|
+
Issues = "https://github.com/yasyf/spawnllm/issues"
|
|
61
|
+
Changelog = "https://github.com/yasyf/spawnllm/blob/main/CHANGELOG.md"
|
|
62
|
+
|
|
63
|
+
[build-system]
|
|
64
|
+
requires = ["uv_build>=0.11,<0.12"]
|
|
65
|
+
build-backend = "uv_build"
|
|
66
|
+
|
|
67
|
+
[tool.uv.build-backend]
|
|
68
|
+
module-name = "spawnllm"
|
|
69
|
+
module-root = ""
|
|
70
|
+
|
|
71
|
+
[tool.pytest.ini_options]
|
|
72
|
+
testpaths = ["tests"]
|
|
73
|
+
anyio_mode = "auto"
|
|
74
|
+
addopts = ["-ra", "--strict-markers", "--tb=short", "-q"]
|
|
75
|
+
markers = [
|
|
76
|
+
"unit: Pure unit tests",
|
|
77
|
+
"integration: Integration tests",
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
# ty (Astral) is the default type checker — run `uv run ty check spawnllm`.
|
|
81
|
+
# It is fast, understands modern syntax, and avoids the strict-pyright false
|
|
82
|
+
# positives on pydantic/attrs-style dynamic defaults and PK-type overrides.
|
|
83
|
+
[tool.ty.rules]
|
|
84
|
+
# Keep cross-checker `# type: ignore` / `# pyright: ignore` comments from tripping ty.
|
|
85
|
+
unused-type-ignore-comment = "ignore"
|
|
86
|
+
# The MLX engine lazily imports optional native deps (mlx_lm, mlx, huggingface_hub)
|
|
87
|
+
# that are absent off Apple Silicon and in the dev/CI environment.
|
|
88
|
+
unresolved-import = "ignore"
|
|
89
|
+
|
|
90
|
+
# pyright is kept as a secondary checker (editors / `uvx pyright`). Basic mode plus
|
|
91
|
+
# a few disables covers the noise; ty is the gate that runs in CI.
|
|
92
|
+
[tool.pyright]
|
|
93
|
+
pythonVersion = "3.13"
|
|
94
|
+
typeCheckingMode = "basic"
|
|
95
|
+
include = ["spawnllm"]
|
|
96
|
+
venvPath = "."
|
|
97
|
+
venv = ".venv"
|
|
98
|
+
reportImplicitOverride = "none"
|
|
99
|
+
reportIncompatibleVariableOverride = "none"
|
|
100
|
+
reportUnknownVariableType = "none"
|
|
101
|
+
reportUnknownMemberType = "none"
|
|
102
|
+
reportUnknownArgumentType = "none"
|
|
103
|
+
reportUnknownParameterType = "none"
|
|
104
|
+
reportUnknownLambdaType = "none"
|
|
105
|
+
reportMissingTypeArgument = "none"
|
|
106
|
+
reportPrivateImportUsage = "none"
|
|
107
|
+
reportUnusedCallResult = "none"
|
|
108
|
+
|
|
109
|
+
[tool.ruff]
|
|
110
|
+
line-length = 120
|
|
111
|
+
target-version = "py313"
|
|
112
|
+
src = [".", "tests"]
|
|
113
|
+
|
|
114
|
+
[tool.ruff.lint]
|
|
115
|
+
select = ["E", "F", "I", "UP"]
|
|
116
|
+
|
|
117
|
+
[dependency-groups]
|
|
118
|
+
docs = [
|
|
119
|
+
# great-docs imports griffe's 2.x module layout; griffelib is the modern
|
|
120
|
+
# griffe (2.x) distribution, overriding the legacy griffe<2 pin great-docs
|
|
121
|
+
# itself still declares.
|
|
122
|
+
"griffelib>=2.0",
|
|
123
|
+
# Tracking great-docs main until a release newer than 0.13.0: main carries
|
|
124
|
+
# build-time GitHub widget stats (embedded via the CI GITHUB_TOKEN), which
|
|
125
|
+
# drop the navbar widget's client-side API calls — the source of GitHub 403
|
|
126
|
+
# errors on the published site.
|
|
127
|
+
# TODO(bootstrap): revert to a PyPI pin (`great-docs>=0.14`) once released.
|
|
128
|
+
"great-docs @ git+https://github.com/posit-dev/great-docs@main",
|
|
129
|
+
]
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools.
|
|
2
|
+
|
|
3
|
+
The top-level namespace exposes the CLI backends, subprocess transport, and
|
|
4
|
+
structured-output helpers. The MLX engine lives under :mod:`spawnllm.mlx` and is
|
|
5
|
+
imported lazily so that ``import spawnllm`` never pulls ``mlx_lm``/``zstandard``.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from spawnllm.backends import (
|
|
11
|
+
ClaudeCliBackend,
|
|
12
|
+
ClaudeNotAuthenticated,
|
|
13
|
+
ClaudeNotInstalled,
|
|
14
|
+
ClaudeReady,
|
|
15
|
+
ClaudeStatus,
|
|
16
|
+
CodexCliBackend,
|
|
17
|
+
LlmBackend,
|
|
18
|
+
LlmBackends,
|
|
19
|
+
check_status,
|
|
20
|
+
)
|
|
21
|
+
from spawnllm.call import call
|
|
22
|
+
from spawnllm.proc import arun_cli, collect_process, map_concurrent, run_cli
|
|
23
|
+
from spawnllm.structured import (
|
|
24
|
+
extract_structured,
|
|
25
|
+
parse_result_envelope,
|
|
26
|
+
parse_structured_output,
|
|
27
|
+
resolve_schema_path,
|
|
28
|
+
schema_for,
|
|
29
|
+
)
|
|
30
|
+
from spawnllm.types import TModel, TSpecialty
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
"ClaudeCliBackend",
|
|
34
|
+
"ClaudeNotAuthenticated",
|
|
35
|
+
"ClaudeNotInstalled",
|
|
36
|
+
"ClaudeReady",
|
|
37
|
+
"ClaudeStatus",
|
|
38
|
+
"CodexCliBackend",
|
|
39
|
+
"LlmBackend",
|
|
40
|
+
"LlmBackends",
|
|
41
|
+
"TModel",
|
|
42
|
+
"TSpecialty",
|
|
43
|
+
"arun_cli",
|
|
44
|
+
"call",
|
|
45
|
+
"check_status",
|
|
46
|
+
"collect_process",
|
|
47
|
+
"extract_structured",
|
|
48
|
+
"map_concurrent",
|
|
49
|
+
"parse_result_envelope",
|
|
50
|
+
"parse_structured_output",
|
|
51
|
+
"resolve_schema_path",
|
|
52
|
+
"run_cli",
|
|
53
|
+
"schema_for",
|
|
54
|
+
]
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""LLM CLI backends (Claude/Codex) and the specialty registry."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from spawnllm.backends.base import LlmBackend
|
|
6
|
+
from spawnllm.backends.claude import (
|
|
7
|
+
ClaudeCliBackend,
|
|
8
|
+
ClaudeNotAuthenticated,
|
|
9
|
+
ClaudeNotInstalled,
|
|
10
|
+
ClaudeReady,
|
|
11
|
+
ClaudeStatus,
|
|
12
|
+
check_status,
|
|
13
|
+
)
|
|
14
|
+
from spawnllm.backends.codex import CodexCliBackend
|
|
15
|
+
from spawnllm.backends.registry import LlmBackends
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"ClaudeCliBackend",
|
|
19
|
+
"ClaudeNotAuthenticated",
|
|
20
|
+
"ClaudeNotInstalled",
|
|
21
|
+
"ClaudeReady",
|
|
22
|
+
"ClaudeStatus",
|
|
23
|
+
"CodexCliBackend",
|
|
24
|
+
"LlmBackend",
|
|
25
|
+
"LlmBackends",
|
|
26
|
+
"check_status",
|
|
27
|
+
]
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""Abstract interface for an LLM CLI backend."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import TYPE_CHECKING, ClassVar
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
from spawnllm.types import TModel
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class LlmBackend(ABC):
|
|
15
|
+
"""Abstract interface for an LLM CLI backend.
|
|
16
|
+
|
|
17
|
+
Concrete backends map abstract model sizes to provider-specific model names
|
|
18
|
+
and encapsulate how to invoke the provider's CLI and parse the raw response.
|
|
19
|
+
|
|
20
|
+
Attributes:
|
|
21
|
+
models: Mapping from abstract model size to the provider's model name.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
models: ClassVar[dict[TModel, str]]
|
|
25
|
+
|
|
26
|
+
@abstractmethod
|
|
27
|
+
def build_command(self, model: str, schema_path: str | None, agent: bool) -> list[str]:
|
|
28
|
+
"""Build the CLI argv for a single invocation (prompt delivered via stdin).
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
model: Provider-specific model name.
|
|
32
|
+
schema_path: Schema argument for structured output, or ``None``.
|
|
33
|
+
agent: Whether the invocation may use tools / agent capabilities.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
The argv list to execute.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
@abstractmethod
|
|
40
|
+
def parse_response(self, raw: str, response_model: type[BaseModel] | None) -> str | BaseModel:
|
|
41
|
+
"""Parse raw CLI stdout into text or a validated model.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
raw: Raw stdout from the backend CLI.
|
|
45
|
+
response_model: Model to validate against, or ``None`` for raw text.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
``raw`` when ``response_model`` is ``None``, else a validated instance.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
@abstractmethod
|
|
52
|
+
def env(self) -> dict[str, str]:
|
|
53
|
+
"""Return extra environment variables to set for the CLI invocation."""
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""LlmBackend for the Anthropic ``claude`` CLI, plus install/auth status checks."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import shutil
|
|
6
|
+
import subprocess
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import TYPE_CHECKING, ClassVar
|
|
9
|
+
|
|
10
|
+
from spawnllm.backends.base import LlmBackend
|
|
11
|
+
from spawnllm.structured import parse_result_envelope, parse_structured_output
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from pydantic import BaseModel
|
|
15
|
+
|
|
16
|
+
from spawnllm.types import TModel
|
|
17
|
+
|
|
18
|
+
CLAUDE_MODELS: dict[TModel, str] = {"small": "haiku", "medium": "sonnet", "large": "opus"}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(frozen=True)
|
|
22
|
+
class ClaudeReady:
|
|
23
|
+
"""The ``claude`` CLI is installed and authenticated."""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(frozen=True)
|
|
27
|
+
class ClaudeNotInstalled:
|
|
28
|
+
"""The ``claude`` CLI is not on PATH.
|
|
29
|
+
|
|
30
|
+
Attributes:
|
|
31
|
+
brew_available: Whether Homebrew is available to install it.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
brew_available: bool
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass(frozen=True)
|
|
38
|
+
class ClaudeNotAuthenticated:
|
|
39
|
+
"""The ``claude`` CLI is installed but not authenticated."""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
ClaudeStatus = ClaudeReady | ClaudeNotInstalled | ClaudeNotAuthenticated
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def check_status(timeout: int = 10) -> ClaudeStatus:
|
|
46
|
+
"""Return the install/auth status of the ``claude`` CLI."""
|
|
47
|
+
if not shutil.which("claude"):
|
|
48
|
+
return ClaudeNotInstalled(brew_available=bool(shutil.which("brew")))
|
|
49
|
+
result = subprocess.run(["claude", "auth", "status"], capture_output=True, text=True, timeout=timeout, check=False)
|
|
50
|
+
if result.returncode == 0:
|
|
51
|
+
return ClaudeReady()
|
|
52
|
+
return ClaudeNotAuthenticated()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass(frozen=True)
|
|
56
|
+
class ClaudeCliBackend(LlmBackend):
|
|
57
|
+
""":class:`LlmBackend` for the Anthropic ``claude`` CLI.
|
|
58
|
+
|
|
59
|
+
The default (no-arg) construction delivers the prompt over stdin with abstract
|
|
60
|
+
model tiers and structured-output parsing. The :meth:`cc_sentiment` preset
|
|
61
|
+
configures inline ``-p`` prompting with ``{is_error, result}`` envelope parsing.
|
|
62
|
+
|
|
63
|
+
Example:
|
|
64
|
+
>>> ClaudeCliBackend().build_command("haiku", None, agent=False)[0]
|
|
65
|
+
'claude'
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
models: ClassVar[dict[TModel, str]] = CLAUDE_MODELS
|
|
69
|
+
|
|
70
|
+
inline_system_prompt: str = ""
|
|
71
|
+
verbose: bool = False
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def cc_sentiment(cls, *, system_prompt: str, verbose: bool = False) -> ClaudeCliBackend:
|
|
75
|
+
"""Return a backend configured for inline ``-p`` prompting + envelope parsing."""
|
|
76
|
+
return cls(inline_system_prompt=system_prompt, verbose=verbose)
|
|
77
|
+
|
|
78
|
+
def build_command(self, model: str, schema_path: str | None, agent: bool) -> list[str]:
|
|
79
|
+
return [
|
|
80
|
+
"claude",
|
|
81
|
+
"-p",
|
|
82
|
+
"--no-session-persistence",
|
|
83
|
+
"--model",
|
|
84
|
+
model,
|
|
85
|
+
*(
|
|
86
|
+
["--permission-mode", "auto", "--max-budget-usd", "1"]
|
|
87
|
+
if agent
|
|
88
|
+
else ["--system-prompt", "", "--setting-sources", "", "--strict-mcp-config"]
|
|
89
|
+
),
|
|
90
|
+
*(["--json-schema", schema_path, "--output-format", "json"] if schema_path else []),
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
def parse_response(self, raw: str, response_model: type[BaseModel] | None) -> str | BaseModel:
|
|
94
|
+
return parse_structured_output(raw, response_model)
|
|
95
|
+
|
|
96
|
+
def env(self) -> dict[str, str]:
|
|
97
|
+
return {"CLAUDE_CODE_SIMPLE": "1"}
|
|
98
|
+
|
|
99
|
+
def build_argv(self, content: str, *, model: str) -> list[str]:
|
|
100
|
+
"""Build the inline ``-p`` argv for the sentiment/pushback scoring path."""
|
|
101
|
+
argv = [
|
|
102
|
+
"claude",
|
|
103
|
+
"-p",
|
|
104
|
+
content,
|
|
105
|
+
"--model",
|
|
106
|
+
model,
|
|
107
|
+
"--system-prompt",
|
|
108
|
+
self.inline_system_prompt,
|
|
109
|
+
"--output-format",
|
|
110
|
+
"json",
|
|
111
|
+
"--max-turns",
|
|
112
|
+
"1",
|
|
113
|
+
"--tools",
|
|
114
|
+
"",
|
|
115
|
+
"--disable-slash-commands",
|
|
116
|
+
]
|
|
117
|
+
if self.verbose:
|
|
118
|
+
argv.append("--verbose")
|
|
119
|
+
return argv
|
|
120
|
+
|
|
121
|
+
@staticmethod
|
|
122
|
+
def parse_result_envelope(stdout: bytes, *, argv: list[str], stderr: bytes) -> str:
|
|
123
|
+
"""Parse the ``{is_error, result}`` JSON envelope; raise ``CalledProcessError`` on error."""
|
|
124
|
+
return parse_result_envelope(stdout, argv=argv, stderr=stderr)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""LlmBackend for the OpenAI ``codex`` CLI."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING, ClassVar
|
|
6
|
+
|
|
7
|
+
from spawnllm.backends.base import LlmBackend
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from pydantic import BaseModel
|
|
11
|
+
|
|
12
|
+
from spawnllm.types import TModel
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class CodexCliBackend(LlmBackend):
|
|
16
|
+
""":class:`LlmBackend` for the OpenAI ``codex`` CLI."""
|
|
17
|
+
|
|
18
|
+
models: ClassVar[dict[TModel, str]] = {
|
|
19
|
+
"small": "gpt-5.3-codex-spark",
|
|
20
|
+
"medium": "gpt-5.4-mini",
|
|
21
|
+
"large": "gpt-5.5",
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
def build_command(self, model: str, schema_path: str | None, agent: bool) -> list[str]:
|
|
25
|
+
return [
|
|
26
|
+
"codex",
|
|
27
|
+
"exec",
|
|
28
|
+
"--ephemeral",
|
|
29
|
+
"--sandbox",
|
|
30
|
+
"read-only",
|
|
31
|
+
"--model",
|
|
32
|
+
model,
|
|
33
|
+
*([] if agent else ["-c", "features.codex_hooks=false", "-c", "features.mcp_servers=false"]),
|
|
34
|
+
*(["--output-schema", schema_path] if schema_path else []),
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
def parse_response(self, raw: str, response_model: type[BaseModel] | None) -> str | BaseModel:
|
|
38
|
+
return raw if not response_model else response_model.model_validate_json(raw)
|
|
39
|
+
|
|
40
|
+
def env(self) -> dict[str, str]:
|
|
41
|
+
return {}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Specialty → backend registry."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING, ClassVar
|
|
6
|
+
|
|
7
|
+
from spawnllm.backends.claude import ClaudeCliBackend
|
|
8
|
+
from spawnllm.backends.codex import CodexCliBackend
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from spawnllm.backends.base import LlmBackend
|
|
12
|
+
from spawnllm.types import TSpecialty
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class LlmBackends:
|
|
16
|
+
"""Registry mapping each specialty to the :class:`LlmBackend` that serves it."""
|
|
17
|
+
|
|
18
|
+
LLM_BACKENDS: ClassVar[dict[TSpecialty, LlmBackend]] = {
|
|
19
|
+
"debugging": CodexCliBackend(),
|
|
20
|
+
"review": CodexCliBackend(),
|
|
21
|
+
"general": ClaudeCliBackend(),
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def for_specialty(cls, specialty: TSpecialty) -> LlmBackend:
|
|
26
|
+
"""Return the backend registered for ``specialty``."""
|
|
27
|
+
return cls.LLM_BACKENDS[specialty]
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""High-level one-shot sync LLM call used by the debugging CLI."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from spawnllm.proc import run_cli
|
|
9
|
+
from spawnllm.structured import resolve_schema_path, schema_for
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from pydantic import BaseModel
|
|
13
|
+
|
|
14
|
+
from spawnllm.backends.base import LlmBackend
|
|
15
|
+
from spawnllm.types import TModel
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def call(
|
|
19
|
+
prompt: str,
|
|
20
|
+
*,
|
|
21
|
+
backend: LlmBackend,
|
|
22
|
+
model: TModel = "small",
|
|
23
|
+
agent: bool = False,
|
|
24
|
+
response_model: type[BaseModel] | None = None,
|
|
25
|
+
) -> str | BaseModel:
|
|
26
|
+
"""Run one CLI-backed LLM call and parse its response.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
prompt: The user prompt, delivered to the backend over stdin.
|
|
30
|
+
backend: The :class:`~spawnllm.backends.base.LlmBackend` to invoke.
|
|
31
|
+
model: Abstract model tier (``small``/``medium``/``large``).
|
|
32
|
+
agent: Whether the call may use tools / agent capabilities.
|
|
33
|
+
response_model: Pydantic model for structured output, or ``None`` for text.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
The raw text response, or a validated ``response_model`` instance.
|
|
37
|
+
"""
|
|
38
|
+
schema = schema_for(response_model) if response_model is not None else None
|
|
39
|
+
schema_path = resolve_schema_path(backend, schema)
|
|
40
|
+
cmd = backend.build_command(backend.models[model], schema_path, agent)
|
|
41
|
+
raw = run_cli(cmd, input=prompt, env=os.environ | backend.env())
|
|
42
|
+
return backend.parse_response(raw, response_model)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from typing import cast
|
|
5
|
+
|
|
6
|
+
import click
|
|
7
|
+
from loguru import logger
|
|
8
|
+
|
|
9
|
+
from spawnllm.backends import ClaudeCliBackend, CodexCliBackend
|
|
10
|
+
from spawnllm.call import call as call_backend
|
|
11
|
+
from spawnllm.types import TModel
|
|
12
|
+
|
|
13
|
+
BACKENDS = ("claude", "codex", "mlx")
|
|
14
|
+
CLI_BACKENDS = {"claude": ClaudeCliBackend, "codex": CodexCliBackend}
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@click.group()
|
|
18
|
+
@click.version_option(package_name="spawnllm")
|
|
19
|
+
def main() -> None:
|
|
20
|
+
"""Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools."""
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@main.command()
|
|
24
|
+
def backends() -> None:
|
|
25
|
+
"""List the LLM backends spawnllm can drive."""
|
|
26
|
+
logger.debug("backends invoked")
|
|
27
|
+
for name in BACKENDS:
|
|
28
|
+
click.echo(name)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@main.command()
|
|
32
|
+
@click.option("--backend", type=click.Choice(["claude", "codex"]), required=True)
|
|
33
|
+
@click.option("--model", type=click.Choice(["small", "medium", "large"]), default="small")
|
|
34
|
+
@click.option("--agent", is_flag=True, help="Allow tools / agent capabilities.")
|
|
35
|
+
@click.argument("prompt", required=False)
|
|
36
|
+
def call(backend: str, model: str, agent: bool, prompt: str | None) -> None:
|
|
37
|
+
"""Make a one-off LLM call (reads PROMPT or stdin) and print the response."""
|
|
38
|
+
text = prompt if prompt is not None else sys.stdin.read()
|
|
39
|
+
result = call_backend(text, backend=CLI_BACKENDS[backend](), model=cast(TModel, model), agent=agent)
|
|
40
|
+
click.echo(result)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Local MLX engine, adapter codec, fuser, and runtime patches.
|
|
2
|
+
|
|
3
|
+
Imports here are lazy so that ``import spawnllm`` never pulls ``mlx_lm``/``zstandard``;
|
|
4
|
+
only consumers that touch ``spawnllm.mlx`` attributes load the heavy dependencies.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from spawnllm.mlx.codec import AdapterCodec
|
|
13
|
+
from spawnllm.mlx.engine import MlxEngine
|
|
14
|
+
from spawnllm.mlx.fuse import AdapterFuser
|
|
15
|
+
from spawnllm.mlx.patches import MLXPatches
|
|
16
|
+
|
|
17
|
+
__all__ = ["AdapterCodec", "AdapterFuser", "MLXPatches", "MlxEngine"]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def __getattr__(name: str) -> object:
|
|
21
|
+
match name:
|
|
22
|
+
case "AdapterCodec":
|
|
23
|
+
from spawnllm.mlx.codec import AdapterCodec
|
|
24
|
+
|
|
25
|
+
return AdapterCodec
|
|
26
|
+
case "AdapterFuser":
|
|
27
|
+
from spawnllm.mlx.fuse import AdapterFuser
|
|
28
|
+
|
|
29
|
+
return AdapterFuser
|
|
30
|
+
case "MlxEngine":
|
|
31
|
+
from spawnllm.mlx.engine import MlxEngine
|
|
32
|
+
|
|
33
|
+
return MlxEngine
|
|
34
|
+
case "MLXPatches":
|
|
35
|
+
from spawnllm.mlx.patches import MLXPatches
|
|
36
|
+
|
|
37
|
+
return MLXPatches
|
|
38
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""Cross-platform LoRA adapter codec (byte-shuffle + zstd; imports without ``mlx_lm``)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import struct
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import ClassVar
|
|
9
|
+
|
|
10
|
+
import orjson
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AdapterCodec:
|
|
14
|
+
"""Compress/decompress a homogeneous-dtype safetensors adapter with byte-shuffle + zstd.
|
|
15
|
+
|
|
16
|
+
Subclasses override :attr:`DIR`/:attr:`ZST`/:attr:`CONFIG` to point at their
|
|
17
|
+
shipped package data.
|
|
18
|
+
|
|
19
|
+
Example:
|
|
20
|
+
>>> AdapterCodec.encode(Path("adapters.safetensors"))
|
|
21
|
+
>>> AdapterCodec.digest()
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
DIR: ClassVar[Path] = Path(__file__).parent
|
|
25
|
+
ZST: ClassVar[Path] = DIR / "adapters.safetensors.zst"
|
|
26
|
+
CONFIG: ClassVar[Path] = DIR / "adapter_config.json"
|
|
27
|
+
TYPESIZES: ClassVar[dict[str, int]] = {"F32": 4, "BF16": 2, "F16": 2}
|
|
28
|
+
COMPRESSION_LEVEL: ClassVar[int] = 19
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
def digest(cls) -> str:
|
|
32
|
+
return hashlib.sha256(cls.ZST.read_bytes()).hexdigest()[:16]
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def encode(cls, src: Path) -> None:
|
|
36
|
+
import zstandard as zstd
|
|
37
|
+
|
|
38
|
+
raw = src.read_bytes()
|
|
39
|
+
cls._assert_homogeneous_dtype(raw)
|
|
40
|
+
cls.ZST.write_bytes(zstd.ZstdCompressor(level=cls.COMPRESSION_LEVEL).compress(cls._walk(raw, shuffle=True)))
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def decode(cls, dst: Path) -> None:
|
|
44
|
+
import zstandard as zstd
|
|
45
|
+
|
|
46
|
+
dst.write_bytes(cls._walk(zstd.ZstdDecompressor().decompress(cls.ZST.read_bytes()), shuffle=False))
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def dtype(cls) -> str:
|
|
50
|
+
import zstandard as zstd
|
|
51
|
+
|
|
52
|
+
raw = zstd.ZstdDecompressor().decompress(cls.ZST.read_bytes())
|
|
53
|
+
cls._assert_homogeneous_dtype(raw)
|
|
54
|
+
header_end = 8 + struct.unpack("<Q", raw[:8])[0]
|
|
55
|
+
return next(v["dtype"] for k, v in orjson.loads(raw[8:header_end]).items() if k != "__metadata__")
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
def _walk(cls, raw: bytes, *, shuffle: bool) -> bytes:
|
|
59
|
+
header_end = 8 + struct.unpack("<Q", raw[:8])[0]
|
|
60
|
+
body = raw[header_end:]
|
|
61
|
+
out = bytearray(raw[:header_end])
|
|
62
|
+
cursor = 0
|
|
63
|
+
for name, meta in sorted(
|
|
64
|
+
((k, v) for k, v in orjson.loads(raw[8:header_end]).items() if k != "__metadata__"),
|
|
65
|
+
key=lambda kv: kv[1]["data_offsets"][0],
|
|
66
|
+
):
|
|
67
|
+
assert meta["dtype"] in cls.TYPESIZES, f"{name}: unsupported dtype {meta['dtype']}"
|
|
68
|
+
typesize = cls.TYPESIZES[meta["dtype"]]
|
|
69
|
+
nbytes = meta["data_offsets"][1] - meta["data_offsets"][0]
|
|
70
|
+
chunk = body[cursor : cursor + nbytes]
|
|
71
|
+
out.extend(cls._shuffle(chunk, typesize) if shuffle else cls._unshuffle(chunk, typesize))
|
|
72
|
+
cursor += nbytes
|
|
73
|
+
return bytes(out)
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def _assert_homogeneous_dtype(cls, raw: bytes) -> None:
|
|
77
|
+
header_end = 8 + struct.unpack("<Q", raw[:8])[0]
|
|
78
|
+
dtypes = {v["dtype"] for k, v in orjson.loads(raw[8:header_end]).items() if k != "__metadata__"}
|
|
79
|
+
assert len(dtypes) == 1, f"adapter must be homogeneous-dtype, got {dtypes}"
|
|
80
|
+
assert dtypes.issubset(cls.TYPESIZES.keys()), f"unsupported dtype: {dtypes}"
|
|
81
|
+
|
|
82
|
+
@classmethod
|
|
83
|
+
def _shuffle(cls, chunk: bytes, typesize: int) -> bytes:
|
|
84
|
+
import numpy as np
|
|
85
|
+
|
|
86
|
+
return np.frombuffer(chunk, dtype=np.uint8).reshape(-1, typesize).T.tobytes()
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def _unshuffle(cls, chunk: bytes, typesize: int) -> bytes:
|
|
90
|
+
import numpy as np
|
|
91
|
+
|
|
92
|
+
return np.frombuffer(chunk, dtype=np.uint8).reshape(typesize, -1).T.tobytes()
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""Domain-agnostic MLX batch-inference engine running on a dedicated worker thread."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import copy
|
|
7
|
+
import platform
|
|
8
|
+
import queue
|
|
9
|
+
import sys
|
|
10
|
+
import threading
|
|
11
|
+
from typing import TYPE_CHECKING, Any
|
|
12
|
+
|
|
13
|
+
import anyio.to_thread
|
|
14
|
+
|
|
15
|
+
from spawnllm.mlx.patches import MLXPatches
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from collections.abc import Callable
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
WORKER_STOP = object()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class MlxEngine:
|
|
25
|
+
"""Run batched MLX inference on a dedicated worker thread.
|
|
26
|
+
|
|
27
|
+
Sentiment/domain specifics are injected: ``logits_processor_factory`` builds the
|
|
28
|
+
per-model logit processor from the loaded tokenizer, and ``prefix_messages`` is
|
|
29
|
+
the cached system/demo prefix shared across a batch.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
fused_dir: Path,
|
|
35
|
+
*,
|
|
36
|
+
logits_processor_factory: Callable[[Any], Callable[..., Any]],
|
|
37
|
+
prefix_messages: list[dict[str, str]],
|
|
38
|
+
batch_size: int,
|
|
39
|
+
worker_name: str = "mlx",
|
|
40
|
+
) -> None:
|
|
41
|
+
if sys.platform != "darwin" or platform.machine() != "arm64":
|
|
42
|
+
raise RuntimeError("The MLX engine requires macOS on Apple Silicon. Use a CLI backend on this platform.")
|
|
43
|
+
self._fused_dir = fused_dir
|
|
44
|
+
self._logits_processor_factory = logits_processor_factory
|
|
45
|
+
self._prefix_messages = prefix_messages
|
|
46
|
+
self._batch_size = batch_size
|
|
47
|
+
self._inbox: queue.SimpleQueue = queue.SimpleQueue()
|
|
48
|
+
self._loaded = threading.Event()
|
|
49
|
+
self._init_error: BaseException | None = None
|
|
50
|
+
self._thread = threading.Thread(target=self._worker, daemon=True, name=worker_name)
|
|
51
|
+
self._thread.start()
|
|
52
|
+
|
|
53
|
+
def _worker(self) -> None:
|
|
54
|
+
try:
|
|
55
|
+
self._load()
|
|
56
|
+
except BaseException as exc:
|
|
57
|
+
self._init_error = exc
|
|
58
|
+
self._loaded.set()
|
|
59
|
+
return
|
|
60
|
+
self._loaded.set()
|
|
61
|
+
while True:
|
|
62
|
+
job = self._inbox.get()
|
|
63
|
+
if job is WORKER_STOP:
|
|
64
|
+
return
|
|
65
|
+
fn, args, on_result, on_error = job
|
|
66
|
+
try:
|
|
67
|
+
on_result(fn(*args))
|
|
68
|
+
except BaseException as exc:
|
|
69
|
+
on_error(exc)
|
|
70
|
+
|
|
71
|
+
def _load(self) -> None:
|
|
72
|
+
from mlx_lm import batch_generate, load
|
|
73
|
+
|
|
74
|
+
MLXPatches.apply()
|
|
75
|
+
self.model, self.tokenizer = load(str(self._fused_dir))
|
|
76
|
+
self.logit_processor = self._logits_processor_factory(self.tokenizer)
|
|
77
|
+
self.prefix_messages = self._prefix_messages
|
|
78
|
+
self.prefix_tokens = self.tokenizer.apply_chat_template(
|
|
79
|
+
self.prefix_messages, tokenize=True, add_generation_prompt=False
|
|
80
|
+
)
|
|
81
|
+
self.base_cache = batch_generate(
|
|
82
|
+
self.model,
|
|
83
|
+
self.tokenizer,
|
|
84
|
+
[self.prefix_tokens],
|
|
85
|
+
max_tokens=1,
|
|
86
|
+
logits_processors=[self.logit_processor],
|
|
87
|
+
return_prompt_caches=True,
|
|
88
|
+
).caches[0]
|
|
89
|
+
|
|
90
|
+
async def ensure_loaded(self) -> None:
|
|
91
|
+
await anyio.to_thread.run_sync(self._loaded.wait)
|
|
92
|
+
if self._init_error is not None:
|
|
93
|
+
raise self._init_error
|
|
94
|
+
|
|
95
|
+
async def submit[R](self, fn: Callable[..., R], *args: Any) -> R:
|
|
96
|
+
loop = asyncio.get_running_loop()
|
|
97
|
+
fut: asyncio.Future = loop.create_future()
|
|
98
|
+
self._inbox.put(
|
|
99
|
+
(
|
|
100
|
+
fn,
|
|
101
|
+
args,
|
|
102
|
+
lambda value: loop.call_soon_threadsafe(fut.set_result, value),
|
|
103
|
+
lambda exc: loop.call_soon_threadsafe(fut.set_exception, exc),
|
|
104
|
+
)
|
|
105
|
+
)
|
|
106
|
+
return await fut
|
|
107
|
+
|
|
108
|
+
def _generate_chunk(self, chunk: list[list[dict[str, str]]]) -> list[str]:
|
|
109
|
+
from mlx_lm import batch_generate
|
|
110
|
+
|
|
111
|
+
suffixes = [
|
|
112
|
+
self.tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True)[
|
|
113
|
+
len(self.prefix_tokens) :
|
|
114
|
+
]
|
|
115
|
+
for messages in chunk
|
|
116
|
+
]
|
|
117
|
+
return batch_generate(
|
|
118
|
+
self.model,
|
|
119
|
+
self.tokenizer,
|
|
120
|
+
suffixes,
|
|
121
|
+
max_tokens=1,
|
|
122
|
+
logits_processors=[self.logit_processor],
|
|
123
|
+
prompt_caches=[copy.deepcopy(self.base_cache) for _ in suffixes],
|
|
124
|
+
).texts
|
|
125
|
+
|
|
126
|
+
async def generate(
|
|
127
|
+
self,
|
|
128
|
+
message_lists: list[list[dict[str, str]]],
|
|
129
|
+
on_progress: Callable[[int], None],
|
|
130
|
+
) -> list[str]:
|
|
131
|
+
order = sorted(range(len(message_lists)), key=lambda i: len(message_lists[i][-1]["content"]))
|
|
132
|
+
responses: list[str] = [""] * len(message_lists)
|
|
133
|
+
for start in range(0, len(order), self._batch_size):
|
|
134
|
+
slice_ = order[start : start + self._batch_size]
|
|
135
|
+
chunk = [message_lists[i] for i in slice_]
|
|
136
|
+
chunk_responses = await self.submit(self._generate_chunk, chunk)
|
|
137
|
+
for i, r in zip(slice_, chunk_responses, strict=True):
|
|
138
|
+
responses[i] = r
|
|
139
|
+
on_progress(len(chunk))
|
|
140
|
+
return responses
|
|
141
|
+
|
|
142
|
+
def peak_memory_gb(self) -> float:
|
|
143
|
+
import resource
|
|
144
|
+
|
|
145
|
+
return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / (1024**3)
|
|
146
|
+
|
|
147
|
+
async def close(self) -> None:
|
|
148
|
+
self._inbox.put(WORKER_STOP)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""Fuse a shipped LoRA adapter into a base MLX model, cached in the HF-hub layout."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import tempfile
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from spawnllm.mlx.codec import AdapterCodec
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AdapterFuser:
|
|
14
|
+
@classmethod
|
|
15
|
+
def ensure_fused(
|
|
16
|
+
cls,
|
|
17
|
+
model_repo: str,
|
|
18
|
+
*,
|
|
19
|
+
codec: AdapterCodec,
|
|
20
|
+
cache_namespace: str,
|
|
21
|
+
tqdm_class: type | None = None,
|
|
22
|
+
) -> Path:
|
|
23
|
+
from huggingface_hub.constants import HF_HUB_CACHE
|
|
24
|
+
|
|
25
|
+
digest = codec.digest()
|
|
26
|
+
repo_dir = Path(HF_HUB_CACHE) / f"models--{cache_namespace}-{digest}"
|
|
27
|
+
fused_dir = repo_dir / "snapshots" / digest
|
|
28
|
+
if (fused_dir / "config.json").exists():
|
|
29
|
+
return fused_dir
|
|
30
|
+
|
|
31
|
+
from huggingface_hub import snapshot_download
|
|
32
|
+
from mlx.utils import tree_unflatten
|
|
33
|
+
from mlx_lm.utils import load_adapters, load_model, load_tokenizer, save
|
|
34
|
+
|
|
35
|
+
src_path = Path(snapshot_download(model_repo, tqdm_class=tqdm_class))
|
|
36
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
37
|
+
staging = Path(tmp)
|
|
38
|
+
(staging / "adapter_config.json").write_bytes(codec.CONFIG.read_bytes())
|
|
39
|
+
codec.decode(staging / "adapters.safetensors")
|
|
40
|
+
model, config = load_model(src_path, lazy=False, strict=False)
|
|
41
|
+
model = load_adapters(model, str(staging))
|
|
42
|
+
model.eval()
|
|
43
|
+
tokenizer = load_tokenizer(src_path, eos_token_ids=config.get("eos_token_id"))
|
|
44
|
+
model.update_modules(
|
|
45
|
+
tree_unflatten([(n, m.fuse()) for n, m in model.named_modules() if hasattr(m, "fuse")])
|
|
46
|
+
)
|
|
47
|
+
fused_dir.mkdir(parents=True, exist_ok=True)
|
|
48
|
+
save(fused_dir, src_path, model, tokenizer, config, donate_model=True)
|
|
49
|
+
|
|
50
|
+
(refs := repo_dir / "refs").mkdir(parents=True, exist_ok=True)
|
|
51
|
+
(refs / "main").write_text(digest)
|
|
52
|
+
return fused_dir
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Runtime patches applied in-worker before the first ``batch_generate``."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import contextlib
|
|
6
|
+
import time
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class MLXPatches:
|
|
10
|
+
applied: bool = False
|
|
11
|
+
|
|
12
|
+
@classmethod
|
|
13
|
+
def apply(cls) -> None:
|
|
14
|
+
if cls.applied:
|
|
15
|
+
return
|
|
16
|
+
cls.applied = True
|
|
17
|
+
cls._apply_batchstats_zerodiv_guard()
|
|
18
|
+
|
|
19
|
+
@staticmethod
|
|
20
|
+
def _apply_batchstats_zerodiv_guard() -> None:
|
|
21
|
+
import mlx.core as mx
|
|
22
|
+
from mlx_lm.generate import BatchGenerator, BatchStats
|
|
23
|
+
|
|
24
|
+
@contextlib.contextmanager
|
|
25
|
+
def stats(self, stats: BatchStats | None = None):
|
|
26
|
+
stats = stats or BatchStats()
|
|
27
|
+
self._prompt_tokens_counter = 0
|
|
28
|
+
self._prompt_time_counter = 0
|
|
29
|
+
self._gen_tokens_counter = 0
|
|
30
|
+
tic = time.perf_counter()
|
|
31
|
+
try:
|
|
32
|
+
yield stats
|
|
33
|
+
finally:
|
|
34
|
+
total_time = time.perf_counter() - tic
|
|
35
|
+
stats.prompt_tokens += self._prompt_tokens_counter
|
|
36
|
+
stats.prompt_time += self._prompt_time_counter
|
|
37
|
+
stats.prompt_tps = stats.prompt_tokens / max(stats.prompt_time, 1e-9)
|
|
38
|
+
stats.generation_tokens += self._gen_tokens_counter
|
|
39
|
+
stats.generation_time += total_time - self._prompt_time_counter
|
|
40
|
+
stats.generation_tps = stats.generation_tokens / max(stats.generation_time, 1e-9)
|
|
41
|
+
stats.peak_memory = max(stats.peak_memory, mx.get_peak_memory() / 1e9)
|
|
42
|
+
|
|
43
|
+
BatchGenerator.stats = stats
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Subprocess transport for CLI-backed LLM calls (sync ``run_cli`` + async ``arun_cli``)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import subprocess
|
|
7
|
+
from collections.abc import Awaitable, Callable, Sequence
|
|
8
|
+
|
|
9
|
+
__all__ = ["arun_cli", "collect_process", "map_concurrent", "run_cli"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def run_cli(
|
|
13
|
+
argv: list[str],
|
|
14
|
+
*,
|
|
15
|
+
input: str | None = None,
|
|
16
|
+
timeout: int = 30,
|
|
17
|
+
env: dict[str, str] | None = None,
|
|
18
|
+
cwd: str | None = None,
|
|
19
|
+
) -> str:
|
|
20
|
+
result = subprocess.run(
|
|
21
|
+
argv,
|
|
22
|
+
input=input,
|
|
23
|
+
capture_output=True,
|
|
24
|
+
text=True,
|
|
25
|
+
timeout=timeout,
|
|
26
|
+
env=env,
|
|
27
|
+
cwd=cwd,
|
|
28
|
+
)
|
|
29
|
+
if result.returncode != 0:
|
|
30
|
+
err = subprocess.CalledProcessError(result.returncode, argv, output=result.stdout, stderr=result.stderr)
|
|
31
|
+
err.add_note(f"argv: {argv}")
|
|
32
|
+
err.add_note(f"exit_code: {result.returncode}")
|
|
33
|
+
err.add_note(f"stderr: {result.stderr[-4096:]}")
|
|
34
|
+
err.add_note(f"stdout: {result.stdout[-4096:]}")
|
|
35
|
+
raise err
|
|
36
|
+
return result.stdout
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
async def collect_process(
|
|
40
|
+
proc: asyncio.subprocess.Process,
|
|
41
|
+
*,
|
|
42
|
+
stderr_tee: Callable[[bytes], None] | None = None,
|
|
43
|
+
) -> tuple[bytes, bytes, int]:
|
|
44
|
+
assert proc.stderr is not None, "create_subprocess_exec was called with stderr=PIPE"
|
|
45
|
+
assert proc.stdout is not None, "create_subprocess_exec was called with stdout=PIPE"
|
|
46
|
+
stderr_buf = bytearray()
|
|
47
|
+
async with asyncio.TaskGroup() as tg:
|
|
48
|
+
tg.create_task(_tee_stderr(proc.stderr, stderr_buf, stderr_tee))
|
|
49
|
+
stdout_task = tg.create_task(proc.stdout.read())
|
|
50
|
+
rc_task = tg.create_task(proc.wait())
|
|
51
|
+
return stdout_task.result(), bytes(stderr_buf), rc_task.result()
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
async def _tee_stderr(
|
|
55
|
+
stream: asyncio.StreamReader,
|
|
56
|
+
buf: bytearray,
|
|
57
|
+
stderr_tee: Callable[[bytes], None] | None,
|
|
58
|
+
) -> None:
|
|
59
|
+
async for raw in stream:
|
|
60
|
+
buf.extend(raw)
|
|
61
|
+
if stderr_tee is not None:
|
|
62
|
+
stderr_tee(raw)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
async def arun_cli(
|
|
66
|
+
argv: list[str],
|
|
67
|
+
*,
|
|
68
|
+
input: str | None = None,
|
|
69
|
+
env: dict[str, str] | None = None,
|
|
70
|
+
cwd: str | None = None,
|
|
71
|
+
stderr_tee: Callable[[bytes], None] | None = None,
|
|
72
|
+
) -> bytes:
|
|
73
|
+
proc = await asyncio.create_subprocess_exec(
|
|
74
|
+
*argv,
|
|
75
|
+
stdin=asyncio.subprocess.PIPE if input is not None else None,
|
|
76
|
+
stdout=asyncio.subprocess.PIPE,
|
|
77
|
+
stderr=asyncio.subprocess.PIPE,
|
|
78
|
+
env=env,
|
|
79
|
+
cwd=cwd,
|
|
80
|
+
)
|
|
81
|
+
if input is not None:
|
|
82
|
+
assert proc.stdin is not None, "create_subprocess_exec was called with stdin=PIPE"
|
|
83
|
+
proc.stdin.write(input.encode())
|
|
84
|
+
await proc.stdin.drain()
|
|
85
|
+
proc.stdin.close()
|
|
86
|
+
stdout, stderr, rc = await collect_process(proc, stderr_tee=stderr_tee)
|
|
87
|
+
if rc != 0:
|
|
88
|
+
raise subprocess.CalledProcessError(rc, argv, output=stdout, stderr=stderr)
|
|
89
|
+
return stdout
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
async def map_concurrent[T, R](
|
|
93
|
+
items: Sequence[T],
|
|
94
|
+
fn: Callable[[T], Awaitable[R]],
|
|
95
|
+
*,
|
|
96
|
+
limit: int,
|
|
97
|
+
on_done: Callable[[int], None] | None = None,
|
|
98
|
+
) -> list[R]:
|
|
99
|
+
sem = asyncio.Semaphore(limit)
|
|
100
|
+
|
|
101
|
+
async def one(item: T) -> R:
|
|
102
|
+
async with sem:
|
|
103
|
+
result = await fn(item)
|
|
104
|
+
if on_done is not None:
|
|
105
|
+
on_done(1)
|
|
106
|
+
return result
|
|
107
|
+
|
|
108
|
+
return list(await asyncio.gather(*(one(item) for item in items)))
|
|
File without changes
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Structured-output helpers: JSON-schema build, schema-path resolution, response parsing."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import subprocess
|
|
8
|
+
import tempfile
|
|
9
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
10
|
+
|
|
11
|
+
from spawnllm.backends.codex import CodexCliBackend
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from pydantic import BaseModel
|
|
15
|
+
|
|
16
|
+
from spawnllm.backends.base import LlmBackend
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"extract_structured",
|
|
20
|
+
"parse_result_envelope",
|
|
21
|
+
"parse_structured_output",
|
|
22
|
+
"resolve_schema_path",
|
|
23
|
+
"schema_for",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def schema_for(model: type[BaseModel]) -> str:
|
|
28
|
+
return json.dumps(model.model_json_schema() | {"additionalProperties": False})
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def resolve_schema_path(backend: LlmBackend, schema: str | None) -> str | None:
|
|
32
|
+
if not schema:
|
|
33
|
+
return None
|
|
34
|
+
if isinstance(backend, CodexCliBackend):
|
|
35
|
+
fd, path = tempfile.mkstemp(suffix=".json")
|
|
36
|
+
os.write(fd, schema.encode())
|
|
37
|
+
os.close(fd)
|
|
38
|
+
return path
|
|
39
|
+
return schema
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def extract_structured(events: list[dict[str, Any]], model: type[BaseModel]) -> BaseModel | None:
|
|
43
|
+
"""Return the validated ``structured_output`` from a stream-json event list, if present."""
|
|
44
|
+
for e in events:
|
|
45
|
+
if e.get("type") == "result" and "structured_output" in e:
|
|
46
|
+
return model.model_validate(e["structured_output"])
|
|
47
|
+
return None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def parse_structured_output(raw: str, response_model: type[BaseModel] | None) -> str | BaseModel:
|
|
51
|
+
if not response_model:
|
|
52
|
+
return raw
|
|
53
|
+
data = json.loads(raw)
|
|
54
|
+
if isinstance(data, list) and data:
|
|
55
|
+
return extract_structured(
|
|
56
|
+
cast(list[dict[str, Any]], data), response_model
|
|
57
|
+
) or response_model.model_validate_json(raw)
|
|
58
|
+
return response_model.model_validate_json(raw)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def parse_result_envelope(stdout: bytes, *, argv: list[str], stderr: bytes) -> str:
|
|
62
|
+
data = json.loads(stdout)
|
|
63
|
+
if data["is_error"]:
|
|
64
|
+
raise subprocess.CalledProcessError(0, argv, output=stdout, stderr=stderr)
|
|
65
|
+
return data["result"]
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Shared type aliases for the LLM-calling surface."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Literal
|
|
6
|
+
|
|
7
|
+
__all__ = ["TModel", "TSpecialty"]
|
|
8
|
+
|
|
9
|
+
TSpecialty = Literal["debugging", "review", "general"]
|
|
10
|
+
TModel = Literal["small", "medium", "large"]
|