spawnllm 0.3.1__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {spawnllm-0.3.1 → spawnllm-0.5.0}/PKG-INFO +38 -34
- {spawnllm-0.3.1 → spawnllm-0.5.0}/README.md +37 -33
- {spawnllm-0.3.1 → spawnllm-0.5.0}/pyproject.toml +2 -1
- spawnllm-0.5.0/spawnllm/__init__.py +66 -0
- {spawnllm-0.3.1 → spawnllm-0.5.0}/spawnllm/backends/__init__.py +6 -2
- spawnllm-0.5.0/spawnllm/backends/base.py +313 -0
- spawnllm-0.5.0/spawnllm/backends/claude.py +173 -0
- {spawnllm-0.3.1 → spawnllm-0.5.0}/spawnllm/backends/codex.py +48 -39
- {spawnllm-0.3.1 → spawnllm-0.5.0}/spawnllm/backends/gemini.py +54 -55
- spawnllm-0.5.0/spawnllm/backends/mlx.py +65 -0
- spawnllm-0.5.0/spawnllm/call.py +100 -0
- {spawnllm-0.3.1 → spawnllm-0.5.0}/spawnllm/cli.py +2 -2
- spawnllm-0.5.0/spawnllm/extract.py +122 -0
- {spawnllm-0.3.1 → spawnllm-0.5.0}/spawnllm/mlx/engine.py +8 -4
- {spawnllm-0.3.1 → spawnllm-0.5.0}/spawnllm/proc.py +99 -1
- spawnllm-0.5.0/spawnllm/response.py +29 -0
- spawnllm-0.5.0/spawnllm/run.py +65 -0
- spawnllm-0.5.0/spawnllm/spec.py +96 -0
- {spawnllm-0.3.1 → spawnllm-0.5.0}/spawnllm/structured.py +37 -41
- {spawnllm-0.3.1 → spawnllm-0.5.0}/spawnllm/types.py +4 -1
- spawnllm-0.3.1/spawnllm/__init__.py +0 -60
- spawnllm-0.3.1/spawnllm/backends/base.py +0 -184
- spawnllm-0.3.1/spawnllm/backends/claude.py +0 -194
- spawnllm-0.3.1/spawnllm/call.py +0 -58
- {spawnllm-0.3.1 → spawnllm-0.5.0}/LICENSE +0 -0
- {spawnllm-0.3.1 → spawnllm-0.5.0}/spawnllm/__main__.py +0 -0
- {spawnllm-0.3.1 → spawnllm-0.5.0}/spawnllm/backends/registry.py +0 -0
- {spawnllm-0.3.1 → spawnllm-0.5.0}/spawnllm/mlx/__init__.py +0 -0
- {spawnllm-0.3.1 → spawnllm-0.5.0}/spawnllm/mlx/codec.py +0 -0
- {spawnllm-0.3.1 → spawnllm-0.5.0}/spawnllm/mlx/fuse.py +0 -0
- {spawnllm-0.3.1 → spawnllm-0.5.0}/spawnllm/mlx/patches.py +0 -0
- {spawnllm-0.3.1 → spawnllm-0.5.0}/spawnllm/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spawnllm
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools.
|
|
5
5
|
Keywords:
|
|
6
6
|
Author: Yasyf Mohamedali
|
|
@@ -64,24 +64,13 @@ domain logic instead of its own copy of the backends.
|
|
|
64
64
|
|
|
65
65
|
## Install
|
|
66
66
|
|
|
67
|
-
|
|
67
|
+
Run the CLI with [uvx](https://docs.astral.sh/uv/):
|
|
68
68
|
|
|
69
69
|
```bash
|
|
70
70
|
uvx spawnllm --help
|
|
71
71
|
```
|
|
72
72
|
|
|
73
|
-
|
|
74
|
-
to a project instead:
|
|
75
|
-
|
|
76
|
-
```bash
|
|
77
|
-
uv add spawnllm
|
|
78
|
-
```
|
|
79
|
-
|
|
80
|
-
For the local MLX engine (Apple Silicon only), pull the extra:
|
|
81
|
-
|
|
82
|
-
```bash
|
|
83
|
-
uv add "spawnllm[mlx]"
|
|
84
|
-
```
|
|
73
|
+
For the local MLX engine (Apple Silicon only), pull the extra: `uv add "spawnllm[mlx]"`.
|
|
85
74
|
|
|
86
75
|
## Quickstart
|
|
87
76
|
|
|
@@ -107,19 +96,20 @@ uvx spawnllm call --backend claude "What is 2+2? Reply with just the number."
|
|
|
107
96
|
4
|
|
108
97
|
```
|
|
109
98
|
|
|
110
|
-
`--model small|medium|large` swaps the tier, which each backend maps to a concrete model
|
|
111
|
-
|
|
112
|
-
`--agent` to let the call use tools.
|
|
99
|
+
`--model small|medium|large` swaps the tier, which each backend maps to a concrete model — the
|
|
100
|
+
`claude` backend resolves `small` to Haiku, `medium` to Sonnet, and `large` to Opus. Add
|
|
101
|
+
`--agent` to let the call use tools. Run `uvx spawnllm --help` for the full flag list.
|
|
113
102
|
|
|
114
103
|
### From Python
|
|
115
104
|
|
|
116
|
-
`
|
|
117
|
-
first installed, authenticated CLI
|
|
105
|
+
`call_sync` runs one request and returns the response. With no `backend`, it auto-selects
|
|
106
|
+
the first installed, authenticated CLI (its async companion `call` mirrors the same
|
|
107
|
+
signature):
|
|
118
108
|
|
|
119
109
|
```python
|
|
120
|
-
from spawnllm import
|
|
110
|
+
from spawnllm import call_sync
|
|
121
111
|
|
|
122
|
-
print(
|
|
112
|
+
print(call_sync("Reply with just the word: pong"))
|
|
123
113
|
# pong
|
|
124
114
|
```
|
|
125
115
|
|
|
@@ -129,7 +119,7 @@ instead of text:
|
|
|
129
119
|
```python
|
|
130
120
|
from pydantic import BaseModel
|
|
131
121
|
|
|
132
|
-
from spawnllm import
|
|
122
|
+
from spawnllm import call_sync, ClaudeCliBackend
|
|
133
123
|
|
|
134
124
|
|
|
135
125
|
class Capital(BaseModel):
|
|
@@ -137,7 +127,7 @@ class Capital(BaseModel):
|
|
|
137
127
|
capital: str
|
|
138
128
|
|
|
139
129
|
|
|
140
|
-
result =
|
|
130
|
+
result = call_sync(
|
|
141
131
|
"What is the capital of France?",
|
|
142
132
|
backend=ClaudeCliBackend(),
|
|
143
133
|
model="large",
|
|
@@ -149,21 +139,35 @@ print(result.capital) # Paris
|
|
|
149
139
|
When you don't pin a backend, set `specialty=` to scope auto-selection by task. The
|
|
150
140
|
`debugging` and `review` specialties route to Codex, and `general` routes to Claude.
|
|
151
141
|
|
|
152
|
-
|
|
142
|
+
### Spec-driven runs
|
|
153
143
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
144
|
+
For full control, build a `RunSpec` and execute it with `run_sync` (or its async companion
|
|
145
|
+
`run`). A `RunSpec` takes a literal provider model id — no tier mapping — and per-provider
|
|
146
|
+
flag passthrough via `provider_configs`. The call returns a `RunResult` with raw stdout,
|
|
147
|
+
stderr, and exit code, retrying transient `529`/overloaded/rate-limit failures with backoff:
|
|
157
148
|
|
|
158
|
-
|
|
159
|
-
|
|
149
|
+
```python
|
|
150
|
+
from spawnllm import run_sync, RunSpec, ClaudeConfig, ClaudeCliBackend
|
|
151
|
+
|
|
152
|
+
result = run_sync(
|
|
153
|
+
RunSpec(
|
|
154
|
+
prompt="What is 2+2? Reply with just the number.",
|
|
155
|
+
model="opus",
|
|
156
|
+
provider_configs={"claude": ClaudeConfig(permission_mode="bypassPermissions")},
|
|
157
|
+
),
|
|
158
|
+
backend=ClaudeCliBackend(),
|
|
159
|
+
)
|
|
160
|
+
print(result.stdout) # 4
|
|
161
|
+
```
|
|
160
162
|
|
|
161
|
-
|
|
162
|
-
batched single-token generation live behind one engine instead of in every consumer.
|
|
163
|
+
## How it works
|
|
163
164
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
165
|
+
Each backend holds plumbing that consumers would otherwise rebuild: the CLI backends own argv
|
|
166
|
+
construction, stdin/stdout piping, stderr teeing, and turning non-zero exits into useful errors,
|
|
167
|
+
and they turn a Pydantic model into a JSON-schema constraint plus a parsed, validated result. The
|
|
168
|
+
MLX engine wraps adapter fusion, prompt-cache reuse, worker-thread lifecycle, and batched
|
|
169
|
+
single-token generation. Tools that share the layer stay byte-for-byte consistent instead of
|
|
170
|
+
drifting across diverging copies.
|
|
167
171
|
|
|
168
172
|
## Docs
|
|
169
173
|
|
|
@@ -17,24 +17,13 @@ domain logic instead of its own copy of the backends.
|
|
|
17
17
|
|
|
18
18
|
## Install
|
|
19
19
|
|
|
20
|
-
|
|
20
|
+
Run the CLI with [uvx](https://docs.astral.sh/uv/):
|
|
21
21
|
|
|
22
22
|
```bash
|
|
23
23
|
uvx spawnllm --help
|
|
24
24
|
```
|
|
25
25
|
|
|
26
|
-
|
|
27
|
-
to a project instead:
|
|
28
|
-
|
|
29
|
-
```bash
|
|
30
|
-
uv add spawnllm
|
|
31
|
-
```
|
|
32
|
-
|
|
33
|
-
For the local MLX engine (Apple Silicon only), pull the extra:
|
|
34
|
-
|
|
35
|
-
```bash
|
|
36
|
-
uv add "spawnllm[mlx]"
|
|
37
|
-
```
|
|
26
|
+
For the local MLX engine (Apple Silicon only), pull the extra: `uv add "spawnllm[mlx]"`.
|
|
38
27
|
|
|
39
28
|
## Quickstart
|
|
40
29
|
|
|
@@ -60,19 +49,20 @@ uvx spawnllm call --backend claude "What is 2+2? Reply with just the number."
|
|
|
60
49
|
4
|
|
61
50
|
```
|
|
62
51
|
|
|
63
|
-
`--model small|medium|large` swaps the tier, which each backend maps to a concrete model
|
|
64
|
-
|
|
65
|
-
`--agent` to let the call use tools.
|
|
52
|
+
`--model small|medium|large` swaps the tier, which each backend maps to a concrete model — the
|
|
53
|
+
`claude` backend resolves `small` to Haiku, `medium` to Sonnet, and `large` to Opus. Add
|
|
54
|
+
`--agent` to let the call use tools. Run `uvx spawnllm --help` for the full flag list.
|
|
66
55
|
|
|
67
56
|
### From Python
|
|
68
57
|
|
|
69
|
-
`
|
|
70
|
-
first installed, authenticated CLI
|
|
58
|
+
`call_sync` runs one request and returns the response. With no `backend`, it auto-selects
|
|
59
|
+
the first installed, authenticated CLI (its async companion `call` mirrors the same
|
|
60
|
+
signature):
|
|
71
61
|
|
|
72
62
|
```python
|
|
73
|
-
from spawnllm import
|
|
63
|
+
from spawnllm import call_sync
|
|
74
64
|
|
|
75
|
-
print(
|
|
65
|
+
print(call_sync("Reply with just the word: pong"))
|
|
76
66
|
# pong
|
|
77
67
|
```
|
|
78
68
|
|
|
@@ -82,7 +72,7 @@ instead of text:
|
|
|
82
72
|
```python
|
|
83
73
|
from pydantic import BaseModel
|
|
84
74
|
|
|
85
|
-
from spawnllm import
|
|
75
|
+
from spawnllm import call_sync, ClaudeCliBackend
|
|
86
76
|
|
|
87
77
|
|
|
88
78
|
class Capital(BaseModel):
|
|
@@ -90,7 +80,7 @@ class Capital(BaseModel):
|
|
|
90
80
|
capital: str
|
|
91
81
|
|
|
92
82
|
|
|
93
|
-
result =
|
|
83
|
+
result = call_sync(
|
|
94
84
|
"What is the capital of France?",
|
|
95
85
|
backend=ClaudeCliBackend(),
|
|
96
86
|
model="large",
|
|
@@ -102,21 +92,35 @@ print(result.capital) # Paris
|
|
|
102
92
|
When you don't pin a backend, set `specialty=` to scope auto-selection by task. The
|
|
103
93
|
`debugging` and `review` specialties route to Codex, and `general` routes to Claude.
|
|
104
94
|
|
|
105
|
-
|
|
95
|
+
### Spec-driven runs
|
|
106
96
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
97
|
+
For full control, build a `RunSpec` and execute it with `run_sync` (or its async companion
|
|
98
|
+
`run`). A `RunSpec` takes a literal provider model id — no tier mapping — and per-provider
|
|
99
|
+
flag passthrough via `provider_configs`. The call returns a `RunResult` with raw stdout,
|
|
100
|
+
stderr, and exit code, retrying transient `529`/overloaded/rate-limit failures with backoff:
|
|
110
101
|
|
|
111
|
-
|
|
112
|
-
|
|
102
|
+
```python
|
|
103
|
+
from spawnllm import run_sync, RunSpec, ClaudeConfig, ClaudeCliBackend
|
|
104
|
+
|
|
105
|
+
result = run_sync(
|
|
106
|
+
RunSpec(
|
|
107
|
+
prompt="What is 2+2? Reply with just the number.",
|
|
108
|
+
model="opus",
|
|
109
|
+
provider_configs={"claude": ClaudeConfig(permission_mode="bypassPermissions")},
|
|
110
|
+
),
|
|
111
|
+
backend=ClaudeCliBackend(),
|
|
112
|
+
)
|
|
113
|
+
print(result.stdout) # 4
|
|
114
|
+
```
|
|
113
115
|
|
|
114
|
-
|
|
115
|
-
batched single-token generation live behind one engine instead of in every consumer.
|
|
116
|
+
## How it works
|
|
116
117
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
118
|
+
Each backend holds plumbing that consumers would otherwise rebuild: the CLI backends own argv
|
|
119
|
+
construction, stdin/stdout piping, stderr teeing, and turning non-zero exits into useful errors,
|
|
120
|
+
and they turn a Pydantic model into a JSON-schema constraint plus a parsed, validated result. The
|
|
121
|
+
MLX engine wraps adapter fusion, prompt-cache reuse, worker-thread lifecycle, and batched
|
|
122
|
+
single-token generation. Tools that share the layer stay byte-for-byte consistent instead of
|
|
123
|
+
drifting across diverging copies.
|
|
120
124
|
|
|
121
125
|
## Docs
|
|
122
126
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "spawnllm"
|
|
3
|
-
version
|
|
3
|
+
# Inert sentinel: the real version is set from the release tag (uv version --frozen).
|
|
4
|
+
version = "0.5.0"
|
|
4
5
|
description = "Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools."
|
|
5
6
|
readme = "README.md"
|
|
6
7
|
license = "MIT"
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Subshell + MLX LLM-calling backends (Claude/Codex CLI, local MLX) shared across tools.
|
|
2
|
+
|
|
3
|
+
The top-level namespace exposes the three primitives — `run`/`call`/`extract`
|
|
4
|
+
and their `_sync` companions — over a `Backend` family that fully encapsulates
|
|
5
|
+
execution and returns one shared `Response`. The MLX engine lives under
|
|
6
|
+
`spawnllm.mlx`, whose imports are lazy so that `import spawnllm` never pulls
|
|
7
|
+
`mlx_lm`/`zstandard`.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from spawnllm.backends import (
|
|
13
|
+
AntigravityCliBackend,
|
|
14
|
+
BackendCallError,
|
|
15
|
+
BackendNotAuthenticated,
|
|
16
|
+
BackendNotInstalled,
|
|
17
|
+
BackendReady,
|
|
18
|
+
BackendStatus,
|
|
19
|
+
BackendUnavailable,
|
|
20
|
+
ClaudeCliBackend,
|
|
21
|
+
CliBackend,
|
|
22
|
+
CodexCliBackend,
|
|
23
|
+
GeminiCliBackend,
|
|
24
|
+
LlmBackend,
|
|
25
|
+
LlmBackends,
|
|
26
|
+
MlxBackend,
|
|
27
|
+
select_backend,
|
|
28
|
+
)
|
|
29
|
+
from spawnllm.call import call, call_sync
|
|
30
|
+
from spawnllm.extract import extract, extract_sync
|
|
31
|
+
from spawnllm.response import Response
|
|
32
|
+
from spawnllm.run import run, run_sync
|
|
33
|
+
from spawnllm.spec import ClaudeConfig, CodexConfig, GeminiConfig, RunSpec
|
|
34
|
+
from spawnllm.types import ProviderName, TModel, TSpecialty
|
|
35
|
+
|
|
36
|
+
__all__ = [
|
|
37
|
+
"AntigravityCliBackend",
|
|
38
|
+
"BackendCallError",
|
|
39
|
+
"BackendNotAuthenticated",
|
|
40
|
+
"BackendNotInstalled",
|
|
41
|
+
"BackendReady",
|
|
42
|
+
"BackendStatus",
|
|
43
|
+
"BackendUnavailable",
|
|
44
|
+
"ClaudeCliBackend",
|
|
45
|
+
"ClaudeConfig",
|
|
46
|
+
"CliBackend",
|
|
47
|
+
"CodexCliBackend",
|
|
48
|
+
"CodexConfig",
|
|
49
|
+
"GeminiCliBackend",
|
|
50
|
+
"GeminiConfig",
|
|
51
|
+
"LlmBackend",
|
|
52
|
+
"LlmBackends",
|
|
53
|
+
"MlxBackend",
|
|
54
|
+
"ProviderName",
|
|
55
|
+
"Response",
|
|
56
|
+
"RunSpec",
|
|
57
|
+
"TModel",
|
|
58
|
+
"TSpecialty",
|
|
59
|
+
"call",
|
|
60
|
+
"call_sync",
|
|
61
|
+
"extract",
|
|
62
|
+
"extract_sync",
|
|
63
|
+
"run",
|
|
64
|
+
"run_sync",
|
|
65
|
+
"select_backend",
|
|
66
|
+
]
|
|
@@ -3,31 +3,35 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
from spawnllm.backends.base import (
|
|
6
|
+
BackendCallError,
|
|
6
7
|
BackendNotAuthenticated,
|
|
7
8
|
BackendNotInstalled,
|
|
8
9
|
BackendReady,
|
|
9
10
|
BackendStatus,
|
|
10
11
|
BackendUnavailable,
|
|
11
|
-
|
|
12
|
+
CliBackend,
|
|
12
13
|
LlmBackend,
|
|
13
14
|
)
|
|
14
15
|
from spawnllm.backends.claude import ClaudeCliBackend
|
|
15
16
|
from spawnllm.backends.codex import CodexCliBackend
|
|
16
17
|
from spawnllm.backends.gemini import AntigravityCliBackend, GeminiCliBackend
|
|
18
|
+
from spawnllm.backends.mlx import MlxBackend
|
|
17
19
|
from spawnllm.backends.registry import LlmBackends, select_backend
|
|
18
20
|
|
|
19
21
|
__all__ = [
|
|
20
22
|
"AntigravityCliBackend",
|
|
23
|
+
"BackendCallError",
|
|
21
24
|
"BackendNotAuthenticated",
|
|
22
25
|
"BackendNotInstalled",
|
|
23
26
|
"BackendReady",
|
|
24
27
|
"BackendStatus",
|
|
25
28
|
"BackendUnavailable",
|
|
26
29
|
"ClaudeCliBackend",
|
|
30
|
+
"CliBackend",
|
|
27
31
|
"CodexCliBackend",
|
|
28
32
|
"GeminiCliBackend",
|
|
29
|
-
"Invocation",
|
|
30
33
|
"LlmBackend",
|
|
31
34
|
"LlmBackends",
|
|
35
|
+
"MlxBackend",
|
|
32
36
|
"select_backend",
|
|
33
37
|
]
|
|
@@ -0,0 +1,313 @@
|
|
|
1
|
+
"""Abstract execution contract for an LLM backend and its subprocess family."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import shutil
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import TYPE_CHECKING, ClassVar
|
|
12
|
+
|
|
13
|
+
from spawnllm.proc import acapture_cli, capture_cli
|
|
14
|
+
from spawnllm.response import Response
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from pydantic import BaseModel
|
|
18
|
+
|
|
19
|
+
from spawnllm.spec import RunSpec
|
|
20
|
+
from spawnllm.types import ProviderName, TModel
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass(frozen=True)
|
|
24
|
+
class BackendReady:
|
|
25
|
+
"""A backend whose CLI is installed and authenticated.
|
|
26
|
+
|
|
27
|
+
Attributes:
|
|
28
|
+
binary: Name of the backend's CLI executable on PATH.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
binary: str
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass(frozen=True)
|
|
35
|
+
class BackendNotInstalled:
|
|
36
|
+
"""A backend whose CLI is not on PATH.
|
|
37
|
+
|
|
38
|
+
Attributes:
|
|
39
|
+
binary: Name of the backend's CLI executable.
|
|
40
|
+
install_hint: Suggested shell command to install the CLI.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
binary: str
|
|
44
|
+
install_hint: str
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass(frozen=True)
|
|
48
|
+
class BackendNotAuthenticated:
|
|
49
|
+
"""A backend whose CLI is installed but not authenticated.
|
|
50
|
+
|
|
51
|
+
Attributes:
|
|
52
|
+
binary: Name of the backend's CLI executable on PATH.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
binary: str
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
BackendStatus = BackendReady | BackendNotInstalled | BackendNotAuthenticated
|
|
59
|
+
"""Result of `LlmBackend.check_status`: `BackendReady`, `BackendNotInstalled`, or `BackendNotAuthenticated`."""
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class BackendUnavailable(RuntimeError):
|
|
63
|
+
"""Raised when no backend is ready (installed and authenticated)."""
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class BackendCallError(RuntimeError):
|
|
67
|
+
"""Raised by `call`/`extract` when a backend returns a provider error.
|
|
68
|
+
|
|
69
|
+
Carries the backend's error string (a nonzero exit with stderr, or an error
|
|
70
|
+
envelope), attached both as the message and as a note for tracebacks.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
def __init__(self, error: str) -> None:
|
|
74
|
+
super().__init__(error)
|
|
75
|
+
self.add_note(error)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass(frozen=True)
|
|
79
|
+
class Invocation:
|
|
80
|
+
"""A built CLI invocation: argv, optional stdin, and where to read the result.
|
|
81
|
+
|
|
82
|
+
Attributes:
|
|
83
|
+
argv: The argv list to execute.
|
|
84
|
+
stdin: Prompt text delivered over stdin, or `None` when delivered inline.
|
|
85
|
+
result_path: File the backend writes its final message to; when set, the
|
|
86
|
+
result is read from this file instead of stdout.
|
|
87
|
+
cleanup_paths: Temp files to remove once the invocation completes.
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
argv: list[str]
|
|
91
|
+
stdin: str | None = None
|
|
92
|
+
result_path: str | None = None
|
|
93
|
+
cleanup_paths: tuple[str, ...] = ()
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class LlmBackend(ABC):
|
|
97
|
+
"""Abstract execution contract for an LLM backend.
|
|
98
|
+
|
|
99
|
+
Concrete backends map abstract model sizes to provider-specific model names
|
|
100
|
+
and encapsulate how to execute a `RunSpec` and parse the raw response.
|
|
101
|
+
|
|
102
|
+
Attributes:
|
|
103
|
+
models: Mapping from abstract model size to the provider's model name.
|
|
104
|
+
provider: Provider identifier keying a `RunSpec`'s `provider_configs`.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
models: ClassVar[dict[TModel, str]]
|
|
108
|
+
provider: ClassVar[ProviderName]
|
|
109
|
+
|
|
110
|
+
@abstractmethod
|
|
111
|
+
async def aexecute(self, spec: RunSpec) -> Response:
|
|
112
|
+
"""Execute a single run asynchronously and resolve it to a `Response`.
|
|
113
|
+
|
|
114
|
+
The backend runs the process, reads its output wherever the provider
|
|
115
|
+
writes it, detects failure, and validates against `spec.response_model`.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
spec: The configured run to execute.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
The resolved `Response`.
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
@abstractmethod
|
|
125
|
+
def execute(self, spec: RunSpec) -> Response:
|
|
126
|
+
"""Execute a single run synchronously and resolve it to a `Response`.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
spec: The configured run to execute.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
The resolved `Response`.
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
@abstractmethod
|
|
136
|
+
def env(self) -> dict[str, str]:
|
|
137
|
+
"""Return extra environment variables for the invocation, merged over the inherited environment."""
|
|
138
|
+
|
|
139
|
+
@abstractmethod
|
|
140
|
+
def is_authenticated(self, *, timeout: int) -> bool:
|
|
141
|
+
"""Probe whether the backend holds valid credentials for its provider.
|
|
142
|
+
|
|
143
|
+
"Authenticated" means the backend reports an active login session for the
|
|
144
|
+
provider, not merely that an executable is present on PATH.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
timeout: Seconds to wait for the credential probe.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
`True` when the backend reports an authenticated session.
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
@abstractmethod
|
|
154
|
+
def check_status(self, *, timeout: int = 10) -> BackendStatus:
|
|
155
|
+
"""Check whether this backend is installed and authenticated.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
timeout: Seconds to wait for the authentication probe.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
`BackendReady` when authenticated, `BackendNotInstalled` when the
|
|
162
|
+
backend is not available, else `BackendNotAuthenticated`.
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
def schema_for(self, model: type[BaseModel]) -> str:
|
|
166
|
+
"""Serialize a Pydantic model into the JSON-schema string this backend expects.
|
|
167
|
+
|
|
168
|
+
The default emits the model's plain JSON schema; provider backends
|
|
169
|
+
override to apply their SDK's strict-schema transform.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
model: The Pydantic model describing the structured output.
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
A JSON-schema string suitable for this backend's structured-output argument.
|
|
176
|
+
"""
|
|
177
|
+
return json.dumps(model.model_json_schema())
|
|
178
|
+
|
|
179
|
+
def schema_arg(self, spec: RunSpec) -> str | None:
|
|
180
|
+
"""Return the JSON-schema string for `spec`'s `response_model`, or `None` when absent."""
|
|
181
|
+
return self.schema_for(spec.response_model) if spec.response_model is not None else None
|
|
182
|
+
|
|
183
|
+
def to_response(self, raw: str, *, returncode: int, stderr: str, spec: RunSpec) -> Response:
|
|
184
|
+
"""Resolve a raw capture into a `Response`: detect failure, extract text, validate.
|
|
185
|
+
|
|
186
|
+
A nonzero exit or an error envelope becomes `Response.error`; otherwise
|
|
187
|
+
the text comes from `result_text` and, when `spec.response_model` is set,
|
|
188
|
+
the validated model from `result_value`. A `pydantic.ValidationError`
|
|
189
|
+
from a non-conforming model propagates.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
raw: The raw output read wherever the provider wrote it.
|
|
193
|
+
returncode: The process exit code.
|
|
194
|
+
stderr: The captured stderr.
|
|
195
|
+
spec: The configured run, carrying the optional `response_model`.
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
The resolved `Response`.
|
|
199
|
+
"""
|
|
200
|
+
if returncode != 0:
|
|
201
|
+
return Response(error=f"{self.provider} exited {returncode}: {stderr.strip()[-2000:]}", result=None)
|
|
202
|
+
if (err := self.envelope_error(raw)) is not None:
|
|
203
|
+
return Response(error=err, result=None)
|
|
204
|
+
if spec.response_model is None:
|
|
205
|
+
return Response(error=None, result=self.result_text(raw))
|
|
206
|
+
return Response(
|
|
207
|
+
error=None, result=self.result_text(raw), parsed=spec.response_model.model_validate(self.result_value(raw))
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
def result_text(self, raw: str) -> str:
|
|
211
|
+
"""Return the final text output from a raw capture; the default is `raw` unchanged."""
|
|
212
|
+
return raw
|
|
213
|
+
|
|
214
|
+
def result_value(self, raw: str) -> object:
|
|
215
|
+
"""Return the JSON value to validate from a raw capture; the default parses `raw` as JSON."""
|
|
216
|
+
return json.loads(raw)
|
|
217
|
+
|
|
218
|
+
def envelope_error(self, raw: str) -> str | None:
|
|
219
|
+
"""Return the provider's error message from an error envelope, or `None` on success."""
|
|
220
|
+
return None
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
class CliBackend(LlmBackend):
|
|
224
|
+
"""Execution contract for the subprocess-backed LLM family.
|
|
225
|
+
|
|
226
|
+
Concrete CLI backends build an argv from a `RunSpec`; `aexecute`/`execute`
|
|
227
|
+
run it, merge environment overrides, and resolve the result from stdout or a
|
|
228
|
+
designated result file.
|
|
229
|
+
|
|
230
|
+
Attributes:
|
|
231
|
+
binary: Name of the backend's CLI executable on PATH.
|
|
232
|
+
install_hint: Suggested shell command to install the CLI.
|
|
233
|
+
"""
|
|
234
|
+
|
|
235
|
+
binary: ClassVar[str]
|
|
236
|
+
install_hint: ClassVar[str]
|
|
237
|
+
|
|
238
|
+
@abstractmethod
|
|
239
|
+
def build_command(self, spec: RunSpec) -> list[str]:
|
|
240
|
+
"""Build the CLI argv for a single invocation.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
spec: The configured run to translate into argv.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
The argv list to execute.
|
|
247
|
+
"""
|
|
248
|
+
|
|
249
|
+
def invocation(self, spec: RunSpec) -> Invocation:
|
|
250
|
+
"""Build the argv, stdin, and result source for a single invocation.
|
|
251
|
+
|
|
252
|
+
The default delivers the prompt over stdin and reads the result from
|
|
253
|
+
stdout; subclasses override to deliver the prompt inline or to read the
|
|
254
|
+
result from a file.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
spec: The configured run to translate into an invocation.
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
An `Invocation` carrying the argv, stdin text, and result source.
|
|
261
|
+
"""
|
|
262
|
+
return Invocation(self.build_command(spec), spec.prompt)
|
|
263
|
+
|
|
264
|
+
async def aexecute(self, spec: RunSpec) -> Response:
|
|
265
|
+
inv = self.invocation(spec)
|
|
266
|
+
try:
|
|
267
|
+
rr = await acapture_cli(
|
|
268
|
+
inv.argv,
|
|
269
|
+
input=inv.stdin,
|
|
270
|
+
env=os.environ | self.env() | (spec.env or {}),
|
|
271
|
+
cwd=spec.cwd,
|
|
272
|
+
timeout=spec.timeout,
|
|
273
|
+
)
|
|
274
|
+
raw = Path(inv.result_path).read_text() if inv.result_path else rr.stdout
|
|
275
|
+
finally:
|
|
276
|
+
for path in inv.cleanup_paths:
|
|
277
|
+
Path(path).unlink(missing_ok=True)
|
|
278
|
+
return self.to_response(raw, returncode=rr.returncode, stderr=rr.stderr, spec=spec)
|
|
279
|
+
|
|
280
|
+
def execute(self, spec: RunSpec) -> Response:
|
|
281
|
+
inv = self.invocation(spec)
|
|
282
|
+
try:
|
|
283
|
+
rr = capture_cli(
|
|
284
|
+
inv.argv,
|
|
285
|
+
input=inv.stdin,
|
|
286
|
+
env=os.environ | self.env() | (spec.env or {}),
|
|
287
|
+
cwd=spec.cwd,
|
|
288
|
+
timeout=spec.timeout,
|
|
289
|
+
)
|
|
290
|
+
raw = Path(inv.result_path).read_text() if inv.result_path else rr.stdout
|
|
291
|
+
finally:
|
|
292
|
+
for path in inv.cleanup_paths:
|
|
293
|
+
Path(path).unlink(missing_ok=True)
|
|
294
|
+
return self.to_response(raw, returncode=rr.returncode, stderr=rr.stderr, spec=spec)
|
|
295
|
+
|
|
296
|
+
def check_status(self, *, timeout: int = 10) -> BackendStatus:
|
|
297
|
+
"""Check whether this backend's CLI is installed and authenticated.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
timeout: Seconds to wait for the authentication probe.
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
`BackendReady` when authenticated, `BackendNotInstalled` when the CLI
|
|
304
|
+
is not on PATH, else `BackendNotAuthenticated`.
|
|
305
|
+
|
|
306
|
+
Raises:
|
|
307
|
+
subprocess.TimeoutExpired: If `is_authenticated` exceeds `timeout`.
|
|
308
|
+
"""
|
|
309
|
+
if not shutil.which(self.binary):
|
|
310
|
+
return BackendNotInstalled(binary=self.binary, install_hint=self.install_hint)
|
|
311
|
+
if self.is_authenticated(timeout=timeout):
|
|
312
|
+
return BackendReady(binary=self.binary)
|
|
313
|
+
return BackendNotAuthenticated(binary=self.binary)
|