stapel-agent 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stapel_agent-0.1.0/LICENSE +21 -0
- stapel_agent-0.1.0/PKG-INFO +155 -0
- stapel_agent-0.1.0/README.md +135 -0
- stapel_agent-0.1.0/__init__.py +55 -0
- stapel_agent-0.1.0/admin.py +35 -0
- stapel_agent-0.1.0/apps.py +17 -0
- stapel_agent-0.1.0/cache.py +85 -0
- stapel_agent-0.1.0/checks.py +77 -0
- stapel_agent-0.1.0/conf.py +70 -0
- stapel_agent-0.1.0/conftest.py +123 -0
- stapel_agent-0.1.0/dto.py +55 -0
- stapel_agent-0.1.0/errors.py +21 -0
- stapel_agent-0.1.0/functions.py +93 -0
- stapel_agent-0.1.0/migrations/0001_initial.py +43 -0
- stapel_agent-0.1.0/migrations/__init__.py +0 -0
- stapel_agent-0.1.0/models.py +67 -0
- stapel_agent-0.1.0/parsing.py +86 -0
- stapel_agent-0.1.0/providers/__init__.py +95 -0
- stapel_agent-0.1.0/providers/anthropic.py +63 -0
- stapel_agent-0.1.0/providers/base.py +61 -0
- stapel_agent-0.1.0/providers/claude_cli.py +79 -0
- stapel_agent-0.1.0/providers/openai_compat.py +81 -0
- stapel_agent-0.1.0/py.typed +0 -0
- stapel_agent-0.1.0/pyproject.toml +46 -0
- stapel_agent-0.1.0/schemas/functions/llm.complete.json +27 -0
- stapel_agent-0.1.0/schemas/functions/llm.translate.json +23 -0
- stapel_agent-0.1.0/serializers.py +37 -0
- stapel_agent-0.1.0/services.py +282 -0
- stapel_agent-0.1.0/setup.cfg +4 -0
- stapel_agent-0.1.0/stapel_agent.egg-info/PKG-INFO +155 -0
- stapel_agent-0.1.0/stapel_agent.egg-info/SOURCES.txt +82 -0
- stapel_agent-0.1.0/stapel_agent.egg-info/dependency_links.txt +1 -0
- stapel_agent-0.1.0/stapel_agent.egg-info/requires.txt +7 -0
- stapel_agent-0.1.0/stapel_agent.egg-info/top_level.txt +1 -0
- stapel_agent-0.1.0/tests/__init__.py +0 -0
- stapel_agent-0.1.0/tests/fakes.py +84 -0
- stapel_agent-0.1.0/tests/test_api.py +201 -0
- stapel_agent-0.1.0/tests/test_extension_points.py +210 -0
- stapel_agent-0.1.0/tests/test_functions.py +79 -0
- stapel_agent-0.1.0/tests/test_models_and_admin.py +93 -0
- stapel_agent-0.1.0/tests/test_parsing.py +83 -0
- stapel_agent-0.1.0/tests/test_providers.py +334 -0
- stapel_agent-0.1.0/tests/test_public_api.py +100 -0
- stapel_agent-0.1.0/tests/test_services.py +203 -0
- stapel_agent-0.1.0/tests/urls.py +5 -0
- stapel_agent-0.1.0/urls.py +19 -0
- stapel_agent-0.1.0/views.py +114 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 usestapel
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: stapel-agent
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: LLM facade Django app for the Stapel framework
|
|
5
|
+
License: MIT
|
|
6
|
+
Keywords: django,stapel,llm,anthropic,openai,translation
|
|
7
|
+
Classifier: Framework :: Django
|
|
8
|
+
Classifier: Framework :: Django :: 5.2
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
11
|
+
Requires-Python: >=3.11
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Requires-Dist: stapel-core<0.4,>=0.3.0
|
|
15
|
+
Provides-Extra: anthropic
|
|
16
|
+
Requires-Dist: anthropic>=0.34; extra == "anthropic"
|
|
17
|
+
Provides-Extra: all
|
|
18
|
+
Requires-Dist: stapel-agent[anthropic]; extra == "all"
|
|
19
|
+
Dynamic: license-file
|
|
20
|
+
|
|
21
|
+
# stapel-agent
|
|
22
|
+
|
|
23
|
+
[](https://github.com/usestapel/stapel-agent/actions/workflows/ci.yml)
|
|
24
|
+
[](https://codecov.io/gh/usestapel/stapel-agent)
|
|
25
|
+
|
|
26
|
+
> LLM facade — one JSON-completion/translation surface in front of swappable model providers, with a prompt cache and a token ledger
|
|
27
|
+
|
|
28
|
+
Part of the [Stapel framework](https://github.com/usestapel) — composable Django apps for building production-grade platforms.
|
|
29
|
+
|
|
30
|
+
Python port of the `iron-agent` NestJS service. Same HTTP paths and contracts
|
|
31
|
+
(`stapel-translate`'s `AgentProvider` keeps working unchanged), plus a comm
|
|
32
|
+
surface so monolith deployments call it in-process without HTTP.
|
|
33
|
+
|
|
34
|
+
## Installation
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install stapel-agent # core
|
|
38
|
+
pip install stapel-agent[anthropic] # + the Anthropic SDK for the default provider
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Quick start
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
# settings.py
|
|
45
|
+
INSTALLED_APPS = [
|
|
46
|
+
...
|
|
47
|
+
'stapel_agent',
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
STAPEL_AGENT = {
|
|
51
|
+
"ANTHROPIC_API_KEY": "sk-ant-...",
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
# urls.py — paths stay 1:1 with iron-agent under the agent/ mount
|
|
55
|
+
urlpatterns = [
|
|
56
|
+
...
|
|
57
|
+
path("agent/", include("stapel_agent.urls")),
|
|
58
|
+
]
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Two surfaces, same contracts:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
# HTTP (service-to-service: X-API-KEY, or a staff session)
|
|
65
|
+
POST /agent/api/llm/complete {"prompt": "...", "model": "small|medium|large",
|
|
66
|
+
"provider"?: "...", "system_prompt"?: "..."}
|
|
67
|
+
POST /agent/api/llm/translate {"from": "auto", "to": "de", "entries": {"key": "text"}}
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
# comm (in-process in a monolith, transport chosen by STAPEL_COMM)
|
|
72
|
+
from stapel_core.comm import call
|
|
73
|
+
|
|
74
|
+
call("llm.complete", {"prompt": "...", "model": "small"})
|
|
75
|
+
call("llm.translate", {"from_lang": "auto", "to": "de", "entries": {...}})
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Responses follow the iron-agent contract: LLM failures are **HTTP 200** with
|
|
79
|
+
`{"status": "failure", "reason": ...}` — 4xx/5xx are reserved for request
|
|
80
|
+
validation and auth. Successful completions return the parsed JSON in
|
|
81
|
+
`result`, prose around it in `comment`, and snake_case `usage`
|
|
82
|
+
(`input_tokens` / `output_tokens`).
|
|
83
|
+
|
|
84
|
+
Every provider call writes a `PromptLog` row: model, size, source, status,
|
|
85
|
+
duration and the full token ledger (input / output / thinking / cache-read /
|
|
86
|
+
cache-write) — per-user and per-source cost accounting needs no other table.
|
|
87
|
+
|
|
88
|
+
## Settings — `STAPEL_AGENT`
|
|
89
|
+
|
|
90
|
+
| Key | Default | Meaning |
|
|
91
|
+
|---|---|---|
|
|
92
|
+
| `MODELS` | `{"small": "claude-haiku-4-5-20251001", "medium": "claude-sonnet-5", "large": "claude-opus-4-8"}` | Size → model-name map |
|
|
93
|
+
| `PROVIDERS` | `{}` | Overlay **merged over** the built-in registry (anthropic / openai-compat / claude-code) — add/override entries, `None` removes one; resolved lazily per request |
|
|
94
|
+
| `DEFAULT_PROVIDER` | `"anthropic"` | Provider used when a request names none |
|
|
95
|
+
| `ANTHROPIC_API_KEY` | `""` | Key for the Anthropic SDK provider (read lazily) |
|
|
96
|
+
| `OPENAI_COMPAT_BASE_URL` | `""` | Base URL of any OpenAI-compatible endpoint |
|
|
97
|
+
| `OPENAI_COMPAT_API_KEY` | `""` | Bearer token for that endpoint |
|
|
98
|
+
| `OPENAI_COMPAT_MODELS` | `{}` | Optional size → model map for openai-compat (missing sizes fall back to `MODELS`) |
|
|
99
|
+
| `CLI_BINARY` | `"claude"` | Claude Code CLI binary (opt-in provider only) |
|
|
100
|
+
| `CLI_TIMEOUT` | `120` | Provider timeout, seconds |
|
|
101
|
+
| `MAX_TOKENS` | `4096` | Completion token cap |
|
|
102
|
+
| `CACHE_LOOKUP` | `{"llm_facade": False, "translate": True}` | Per-source cache-by-prompt toggle (used by the default cache policy) |
|
|
103
|
+
| `CACHE_TTL` | `604800` | Cache window in seconds (7 days); older rows are ignored (default policy) |
|
|
104
|
+
| `CACHE_POLICY` | `"stapel_agent.cache.PromptLogCachePolicy"` | Dotted path to a `CachePolicy` subclass — swap the prompt cache (Redis, no-op, ...) without forking |
|
|
105
|
+
|
|
106
|
+
## Provider matrix
|
|
107
|
+
|
|
108
|
+
| Name | Class | Backend | Needs |
|
|
109
|
+
|---|---|---|---|
|
|
110
|
+
| `anthropic` (default) | `providers.anthropic.AnthropicProvider` | Anthropic SDK | `anthropic` extra + `ANTHROPIC_API_KEY` |
|
|
111
|
+
| `openai-compat` | `providers.openai_compat.OpenAICompatProvider` | Any `/chat/completions` dialect: OpenAI, DeepSeek, MiMo, GLM, Kimi | `OPENAI_COMPAT_BASE_URL` (+ key) |
|
|
112
|
+
| `claude-code` | `providers.claude_cli.ClaudeCodeCLIProvider` | Spawns `claude -p ... --output-format json` | The CLI in the host image |
|
|
113
|
+
|
|
114
|
+
**No CLI in any default path.** `claude-code` is strictly opt-in: it exists for
|
|
115
|
+
hosts that ship the Claude Code CLI in their image and want the CLI to handle
|
|
116
|
+
its own authentication (`provider: "claude-code"` per request, or
|
|
117
|
+
`DEFAULT_PROVIDER` override). Unlike iron-agent there is no OAuth credential
|
|
118
|
+
reading and no background token-refresh — that plumbing was deliberately
|
|
119
|
+
dropped.
|
|
120
|
+
|
|
121
|
+
### Adding, overriding and removing providers (merge semantics)
|
|
122
|
+
|
|
123
|
+
`STAPEL_AGENT["PROVIDERS"]` is an **overlay merged over the built-ins**, not a
|
|
124
|
+
replacement dict — adding your provider never requires restating the three
|
|
125
|
+
shipped ones, and setting a name to `None` removes it:
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
# settings.py — one line per change, built-ins stay available
|
|
129
|
+
STAPEL_AGENT = {
|
|
130
|
+
"PROVIDERS": {
|
|
131
|
+
"acme": "myproject.llm.AcmeProvider", # add a custom backend
|
|
132
|
+
"claude-code": None, # remove a built-in
|
|
133
|
+
},
|
|
134
|
+
"DEFAULT_PROVIDER": "acme",
|
|
135
|
+
}
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Or register at runtime from your app's `AppConfig.ready()` (highest
|
|
139
|
+
precedence):
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
from stapel_agent import register_provider
|
|
143
|
+
|
|
144
|
+
register_provider("acme", AcmeProvider) # class or dotted path
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
A custom backend is just a `stapel_agent.LlmProvider` subclass returning a
|
|
148
|
+
`ProviderResult`. Django system checks (`stapel_agent.E001/W001/W002`) flag a
|
|
149
|
+
`DEFAULT_PROVIDER` that is not in the effective registry, unimportable dotted
|
|
150
|
+
paths and non-`LlmProvider` entries at startup. See [MODULE.md](MODULE.md)
|
|
151
|
+
for the full extension-point map.
|
|
152
|
+
|
|
153
|
+
## License
|
|
154
|
+
|
|
155
|
+
MIT — see [LICENSE](LICENSE)
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# stapel-agent
|
|
2
|
+
|
|
3
|
+
[](https://github.com/usestapel/stapel-agent/actions/workflows/ci.yml)
|
|
4
|
+
[](https://codecov.io/gh/usestapel/stapel-agent)
|
|
5
|
+
|
|
6
|
+
> LLM facade — one JSON-completion/translation surface in front of swappable model providers, with a prompt cache and a token ledger
|
|
7
|
+
|
|
8
|
+
Part of the [Stapel framework](https://github.com/usestapel) — composable Django apps for building production-grade platforms.
|
|
9
|
+
|
|
10
|
+
Python port of the `iron-agent` NestJS service. Same HTTP paths and contracts
|
|
11
|
+
(`stapel-translate`'s `AgentProvider` keeps working unchanged), plus a comm
|
|
12
|
+
surface so monolith deployments call it in-process without HTTP.
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install stapel-agent # core
|
|
18
|
+
pip install stapel-agent[anthropic] # + the Anthropic SDK for the default provider
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Quick start
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
# settings.py
|
|
25
|
+
INSTALLED_APPS = [
|
|
26
|
+
...
|
|
27
|
+
'stapel_agent',
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
STAPEL_AGENT = {
|
|
31
|
+
"ANTHROPIC_API_KEY": "sk-ant-...",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
# urls.py — paths stay 1:1 with iron-agent under the agent/ mount
|
|
35
|
+
urlpatterns = [
|
|
36
|
+
...
|
|
37
|
+
path("agent/", include("stapel_agent.urls")),
|
|
38
|
+
]
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Two surfaces, same contracts:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
# HTTP (service-to-service: X-API-KEY, or a staff session)
|
|
45
|
+
POST /agent/api/llm/complete {"prompt": "...", "model": "small|medium|large",
|
|
46
|
+
"provider"?: "...", "system_prompt"?: "..."}
|
|
47
|
+
POST /agent/api/llm/translate {"from": "auto", "to": "de", "entries": {"key": "text"}}
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
# comm (in-process in a monolith, transport chosen by STAPEL_COMM)
|
|
52
|
+
from stapel_core.comm import call
|
|
53
|
+
|
|
54
|
+
call("llm.complete", {"prompt": "...", "model": "small"})
|
|
55
|
+
call("llm.translate", {"from_lang": "auto", "to": "de", "entries": {...}})
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Responses follow the iron-agent contract: LLM failures are **HTTP 200** with
|
|
59
|
+
`{"status": "failure", "reason": ...}` — 4xx/5xx are reserved for request
|
|
60
|
+
validation and auth. Successful completions return the parsed JSON in
|
|
61
|
+
`result`, prose around it in `comment`, and snake_case `usage`
|
|
62
|
+
(`input_tokens` / `output_tokens`).
|
|
63
|
+
|
|
64
|
+
Every provider call writes a `PromptLog` row: model, size, source, status,
|
|
65
|
+
duration and the full token ledger (input / output / thinking / cache-read /
|
|
66
|
+
cache-write) — per-user and per-source cost accounting needs no other table.
|
|
67
|
+
|
|
68
|
+
## Settings — `STAPEL_AGENT`
|
|
69
|
+
|
|
70
|
+
| Key | Default | Meaning |
|
|
71
|
+
|---|---|---|
|
|
72
|
+
| `MODELS` | `{"small": "claude-haiku-4-5-20251001", "medium": "claude-sonnet-5", "large": "claude-opus-4-8"}` | Size → model-name map |
|
|
73
|
+
| `PROVIDERS` | `{}` | Overlay **merged over** the built-in registry (anthropic / openai-compat / claude-code) — add/override entries, `None` removes one; resolved lazily per request |
|
|
74
|
+
| `DEFAULT_PROVIDER` | `"anthropic"` | Provider used when a request names none |
|
|
75
|
+
| `ANTHROPIC_API_KEY` | `""` | Key for the Anthropic SDK provider (read lazily) |
|
|
76
|
+
| `OPENAI_COMPAT_BASE_URL` | `""` | Base URL of any OpenAI-compatible endpoint |
|
|
77
|
+
| `OPENAI_COMPAT_API_KEY` | `""` | Bearer token for that endpoint |
|
|
78
|
+
| `OPENAI_COMPAT_MODELS` | `{}` | Optional size → model map for openai-compat (missing sizes fall back to `MODELS`) |
|
|
79
|
+
| `CLI_BINARY` | `"claude"` | Claude Code CLI binary (opt-in provider only) |
|
|
80
|
+
| `CLI_TIMEOUT` | `120` | Provider timeout, seconds |
|
|
81
|
+
| `MAX_TOKENS` | `4096` | Completion token cap |
|
|
82
|
+
| `CACHE_LOOKUP` | `{"llm_facade": False, "translate": True}` | Per-source cache-by-prompt toggle (used by the default cache policy) |
|
|
83
|
+
| `CACHE_TTL` | `604800` | Cache window in seconds (7 days); older rows are ignored (default policy) |
|
|
84
|
+
| `CACHE_POLICY` | `"stapel_agent.cache.PromptLogCachePolicy"` | Dotted path to a `CachePolicy` subclass — swap the prompt cache (Redis, no-op, ...) without forking |
|
|
85
|
+
|
|
86
|
+
## Provider matrix
|
|
87
|
+
|
|
88
|
+
| Name | Class | Backend | Needs |
|
|
89
|
+
|---|---|---|---|
|
|
90
|
+
| `anthropic` (default) | `providers.anthropic.AnthropicProvider` | Anthropic SDK | `anthropic` extra + `ANTHROPIC_API_KEY` |
|
|
91
|
+
| `openai-compat` | `providers.openai_compat.OpenAICompatProvider` | Any `/chat/completions` dialect: OpenAI, DeepSeek, MiMo, GLM, Kimi | `OPENAI_COMPAT_BASE_URL` (+ key) |
|
|
92
|
+
| `claude-code` | `providers.claude_cli.ClaudeCodeCLIProvider` | Spawns `claude -p ... --output-format json` | The CLI in the host image |
|
|
93
|
+
|
|
94
|
+
**No CLI in any default path.** `claude-code` is strictly opt-in: it exists for
|
|
95
|
+
hosts that ship the Claude Code CLI in their image and want the CLI to handle
|
|
96
|
+
its own authentication (`provider: "claude-code"` per request, or
|
|
97
|
+
`DEFAULT_PROVIDER` override). Unlike iron-agent there is no OAuth credential
|
|
98
|
+
reading and no background token-refresh — that plumbing was deliberately
|
|
99
|
+
dropped.
|
|
100
|
+
|
|
101
|
+
### Adding, overriding and removing providers (merge semantics)
|
|
102
|
+
|
|
103
|
+
`STAPEL_AGENT["PROVIDERS"]` is an **overlay merged over the built-ins**, not a
|
|
104
|
+
replacement dict — adding your provider never requires restating the three
|
|
105
|
+
shipped ones, and setting a name to `None` removes it:
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
# settings.py — one line per change, built-ins stay available
|
|
109
|
+
STAPEL_AGENT = {
|
|
110
|
+
"PROVIDERS": {
|
|
111
|
+
"acme": "myproject.llm.AcmeProvider", # add a custom backend
|
|
112
|
+
"claude-code": None, # remove a built-in
|
|
113
|
+
},
|
|
114
|
+
"DEFAULT_PROVIDER": "acme",
|
|
115
|
+
}
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Or register at runtime from your app's `AppConfig.ready()` (highest
|
|
119
|
+
precedence):
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
from stapel_agent import register_provider
|
|
123
|
+
|
|
124
|
+
register_provider("acme", AcmeProvider) # class or dotted path
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
A custom backend is just a `stapel_agent.LlmProvider` subclass returning a
|
|
128
|
+
`ProviderResult`. Django system checks (`stapel_agent.E001/W001/W002`) flag a
|
|
129
|
+
`DEFAULT_PROVIDER` that is not in the effective registry, unimportable dotted
|
|
130
|
+
paths and non-`LlmProvider` entries at startup. See [MODULE.md](MODULE.md)
|
|
131
|
+
for the full extension-point map.
|
|
132
|
+
|
|
133
|
+
## License
|
|
134
|
+
|
|
135
|
+
MIT — see [LICENSE](LICENSE)
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""stapel-agent — LLM facade: completion, translation, prompt cache/ledger.
|
|
2
|
+
|
|
3
|
+
Public API (lazily resolved, PEP 562 — importing this package pulls in
|
|
4
|
+
no Django code until an attribute is actually accessed):
|
|
5
|
+
|
|
6
|
+
agent_settings — the ``STAPEL_AGENT`` settings namespace
|
|
7
|
+
complete — raw LLM completion (cache + PromptLog ledger)
|
|
8
|
+
translate — key-value translation flow
|
|
9
|
+
LlmProvider — base class for custom LLM backends
|
|
10
|
+
ProviderResult — completion text + token accounting dataclass
|
|
11
|
+
CachePolicy — base class for custom prompt-cache policies
|
|
12
|
+
register_provider — runtime provider registration (apps.ready())
|
|
13
|
+
registered_providers — the effective provider registry mapping
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"CachePolicy",
|
|
18
|
+
"LlmProvider",
|
|
19
|
+
"ProviderResult",
|
|
20
|
+
"agent_settings",
|
|
21
|
+
"complete",
|
|
22
|
+
"register_provider",
|
|
23
|
+
"registered_providers",
|
|
24
|
+
"translate",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
# name -> (relative module, attribute)
|
|
28
|
+
_EXPORTS = {
|
|
29
|
+
"agent_settings": (".conf", "agent_settings"),
|
|
30
|
+
"complete": (".services", "complete"),
|
|
31
|
+
"translate": (".services", "translate"),
|
|
32
|
+
"LlmProvider": (".providers.base", "LlmProvider"),
|
|
33
|
+
"ProviderResult": (".providers.base", "ProviderResult"),
|
|
34
|
+
"CachePolicy": (".cache", "CachePolicy"),
|
|
35
|
+
"register_provider": (".providers", "register_provider"),
|
|
36
|
+
"registered_providers": (".providers", "registered_providers"),
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def __getattr__(name):
|
|
41
|
+
try:
|
|
42
|
+
module_path, attr = _EXPORTS[name]
|
|
43
|
+
except KeyError:
|
|
44
|
+
raise AttributeError(
|
|
45
|
+
f"module {__name__!r} has no attribute {name!r}"
|
|
46
|
+
) from None
|
|
47
|
+
from importlib import import_module
|
|
48
|
+
|
|
49
|
+
value = getattr(import_module(module_path, __name__), attr)
|
|
50
|
+
globals()[name] = value # cache: subsequent lookups skip __getattr__
|
|
51
|
+
return value
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def __dir__():
|
|
55
|
+
return sorted(set(globals()) | set(__all__))
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from django.contrib import admin
|
|
2
|
+
|
|
3
|
+
from .models import PromptLog
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@admin.register(PromptLog)
|
|
7
|
+
class PromptLogAdmin(admin.ModelAdmin):
|
|
8
|
+
"""Read-only: PromptLog is an immutable ledger — rows are written by
|
|
9
|
+
the service layer only (editing one would corrupt token accounting
|
|
10
|
+
and could poison the prompt cache)."""
|
|
11
|
+
|
|
12
|
+
list_display = [
|
|
13
|
+
"created_at",
|
|
14
|
+
"source",
|
|
15
|
+
"model",
|
|
16
|
+
"model_size",
|
|
17
|
+
"status",
|
|
18
|
+
"input_tokens",
|
|
19
|
+
"output_tokens",
|
|
20
|
+
"duration_ms",
|
|
21
|
+
"user_id",
|
|
22
|
+
]
|
|
23
|
+
list_filter = ["source", "status", "model_size"]
|
|
24
|
+
search_fields = ["prompt", "user_id", "model"]
|
|
25
|
+
date_hierarchy = "created_at"
|
|
26
|
+
ordering = ["-created_at"]
|
|
27
|
+
|
|
28
|
+
def has_add_permission(self, request):
|
|
29
|
+
return False
|
|
30
|
+
|
|
31
|
+
def has_change_permission(self, request, obj=None):
|
|
32
|
+
return False
|
|
33
|
+
|
|
34
|
+
def has_delete_permission(self, request, obj=None):
|
|
35
|
+
return False
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from django.apps import AppConfig
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class AgentConfig(AppConfig):
|
|
5
|
+
default_auto_field = "django.db.models.BigAutoField"
|
|
6
|
+
name = "stapel_agent"
|
|
7
|
+
label = "agent"
|
|
8
|
+
verbose_name = "Stapel Agent"
|
|
9
|
+
|
|
10
|
+
def ready(self):
|
|
11
|
+
# comm Function providers (in-process in a monolith, transport
|
|
12
|
+
# chosen by STAPEL_COMM in microservices — same code).
|
|
13
|
+
from . import functions # noqa: F401
|
|
14
|
+
|
|
15
|
+
# Django system checks (provider registry / DEFAULT_PROVIDER
|
|
16
|
+
# misconfiguration) — registered on import.
|
|
17
|
+
from . import checks # noqa: F401
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Cache-policy seam — swap the prompt cache without forking.
|
|
2
|
+
|
|
3
|
+
``STAPEL_AGENT["CACHE_POLICY"]`` is a dotted path to a ``CachePolicy``
|
|
4
|
+
subclass (resolved via ``import_strings``, instantiated per call). The
|
|
5
|
+
default, ``PromptLogCachePolicy``, implements the stock behaviour: the
|
|
6
|
+
latest successful ``PromptLog`` row with an identical
|
|
7
|
+
prompt+system_prompt+source within ``CACHE_TTL``, gated per source by
|
|
8
|
+
``CACHE_LOOKUP``.
|
|
9
|
+
|
|
10
|
+
Hosts can point the setting at a Redis-backed policy, a no-op policy,
|
|
11
|
+
or anything else::
|
|
12
|
+
|
|
13
|
+
# myproject/llm_cache.py
|
|
14
|
+
from stapel_agent.cache import CachePolicy
|
|
15
|
+
|
|
16
|
+
class RedisCachePolicy(CachePolicy):
|
|
17
|
+
def should_cache(self, source): ...
|
|
18
|
+
def lookup(self, prompt, system_prompt, source): ...
|
|
19
|
+
def store(self, prompt, system_prompt, source, response): ...
|
|
20
|
+
|
|
21
|
+
# settings.py
|
|
22
|
+
STAPEL_AGENT = {"CACHE_POLICY": "myproject.llm_cache.RedisCachePolicy"}
|
|
23
|
+
|
|
24
|
+
The PromptLog *ledger* row is always written regardless of the policy —
|
|
25
|
+
caching is a read seam, accounting is not optional. ``store()`` exists
|
|
26
|
+
for policies with their own storage; the default is a no-op because the
|
|
27
|
+
ledger row IS the default policy's storage.
|
|
28
|
+
"""
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
from abc import ABC, abstractmethod
|
|
32
|
+
from datetime import timedelta
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class CachePolicy(ABC):
|
|
36
|
+
"""Decides when to consult the prompt cache and answers lookups."""
|
|
37
|
+
|
|
38
|
+
@abstractmethod
|
|
39
|
+
def should_cache(self, source: str) -> bool:
|
|
40
|
+
"""Whether *source* ("llm_facade"/"translate"/...) uses the cache."""
|
|
41
|
+
|
|
42
|
+
@abstractmethod
|
|
43
|
+
def lookup(self, prompt: str, system_prompt: str | None, source: str) -> str | None:
|
|
44
|
+
"""Return the cached raw response text, or None on a miss."""
|
|
45
|
+
|
|
46
|
+
def store(self, prompt: str, system_prompt: str | None, source: str, response: str) -> None:
|
|
47
|
+
"""Persist a successful response for future lookups.
|
|
48
|
+
|
|
49
|
+
No-op by default: the default policy reads the PromptLog ledger
|
|
50
|
+
row that ``services.complete`` writes anyway. Policies with
|
|
51
|
+
external storage (Redis, ...) override this.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class PromptLogCachePolicy(CachePolicy):
|
|
56
|
+
"""Stock policy: PromptLog rows + CACHE_LOOKUP/CACHE_TTL settings."""
|
|
57
|
+
|
|
58
|
+
def should_cache(self, source: str) -> bool:
|
|
59
|
+
from .conf import agent_settings
|
|
60
|
+
|
|
61
|
+
return bool((agent_settings.CACHE_LOOKUP or {}).get(source, False))
|
|
62
|
+
|
|
63
|
+
def lookup(self, prompt: str, system_prompt: str | None, source: str) -> str | None:
|
|
64
|
+
from django.utils import timezone
|
|
65
|
+
|
|
66
|
+
from .conf import agent_settings
|
|
67
|
+
from .models import PromptLog, PromptStatus
|
|
68
|
+
|
|
69
|
+
ttl = int(agent_settings.CACHE_TTL)
|
|
70
|
+
qs = PromptLog.objects.filter(
|
|
71
|
+
prompt=prompt,
|
|
72
|
+
source=source,
|
|
73
|
+
status=PromptStatus.SUCCESS,
|
|
74
|
+
response__isnull=False,
|
|
75
|
+
created_at__gte=timezone.now() - timedelta(seconds=ttl),
|
|
76
|
+
)
|
|
77
|
+
if system_prompt:
|
|
78
|
+
qs = qs.filter(system_prompt=system_prompt)
|
|
79
|
+
else:
|
|
80
|
+
qs = qs.filter(system_prompt__isnull=True)
|
|
81
|
+
row = qs.order_by("-created_at").first()
|
|
82
|
+
return row.response if row is not None else None
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
__all__ = ["CachePolicy", "PromptLogCachePolicy"]
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Django system checks — catch provider misconfiguration at startup.
|
|
2
|
+
|
|
3
|
+
Registered from ``AgentConfig.ready()``. IDs:
|
|
4
|
+
|
|
5
|
+
- ``stapel_agent.E001`` — ``DEFAULT_PROVIDER`` names a provider that is
|
|
6
|
+
not in the effective registry (built-ins ← settings merge ← runtime).
|
|
7
|
+
- ``stapel_agent.W001`` — a registry entry's dotted path fails to import
|
|
8
|
+
(typo, or an optional dependency missing in this image).
|
|
9
|
+
- ``stapel_agent.W002`` — a registry entry resolves to something that is
|
|
10
|
+
not an ``LlmProvider`` subclass.
|
|
11
|
+
|
|
12
|
+
Import/subclass problems are warnings, not errors, on purpose: providers
|
|
13
|
+
resolve lazily per request and degrade to ``status: "failure"`` — a
|
|
14
|
+
broken *unused* entry must not block deploys, but it should be visible.
|
|
15
|
+
"""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import inspect
|
|
19
|
+
|
|
20
|
+
from django.core import checks
|
|
21
|
+
from django.utils.module_loading import import_string
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@checks.register("stapel_agent")
|
|
25
|
+
def check_providers(app_configs, **kwargs):
|
|
26
|
+
from .conf import agent_settings
|
|
27
|
+
from .providers import registered_providers
|
|
28
|
+
from .providers.base import LlmProvider
|
|
29
|
+
|
|
30
|
+
issues = []
|
|
31
|
+
effective = registered_providers()
|
|
32
|
+
|
|
33
|
+
default = agent_settings.DEFAULT_PROVIDER
|
|
34
|
+
if default not in effective:
|
|
35
|
+
issues.append(
|
|
36
|
+
checks.Error(
|
|
37
|
+
f"STAPEL_AGENT['DEFAULT_PROVIDER'] is {default!r}, which is "
|
|
38
|
+
"not in the effective provider registry "
|
|
39
|
+
f"({sorted(effective) or 'empty'}).",
|
|
40
|
+
hint=(
|
|
41
|
+
"Add it via STAPEL_AGENT['PROVIDERS'] or "
|
|
42
|
+
"stapel_agent.providers.register_provider(), or point "
|
|
43
|
+
"DEFAULT_PROVIDER at an existing name."
|
|
44
|
+
),
|
|
45
|
+
id="stapel_agent.E001",
|
|
46
|
+
)
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
for name, target in effective.items():
|
|
50
|
+
if isinstance(target, str):
|
|
51
|
+
try:
|
|
52
|
+
target = import_string(target)
|
|
53
|
+
except ImportError as exc:
|
|
54
|
+
issues.append(
|
|
55
|
+
checks.Warning(
|
|
56
|
+
f"LLM provider {name!r} cannot be imported: {exc}",
|
|
57
|
+
hint=(
|
|
58
|
+
"Fix the dotted path, install the missing "
|
|
59
|
+
"dependency, or remove the entry (set it to None)."
|
|
60
|
+
),
|
|
61
|
+
id="stapel_agent.W001",
|
|
62
|
+
)
|
|
63
|
+
)
|
|
64
|
+
continue
|
|
65
|
+
if not (inspect.isclass(target) and issubclass(target, LlmProvider)):
|
|
66
|
+
issues.append(
|
|
67
|
+
checks.Warning(
|
|
68
|
+
f"LLM provider {name!r} resolves to {target!r}, which is "
|
|
69
|
+
"not a stapel_agent.LlmProvider subclass.",
|
|
70
|
+
hint="Implement the LlmProvider ABC (see MODULE.md).",
|
|
71
|
+
id="stapel_agent.W002",
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
return issues
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
__all__ = ["check_providers"]
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Settings namespace for stapel-agent.
|
|
2
|
+
|
|
3
|
+
All configuration is read through ``agent_settings`` (lazily, at call
|
|
4
|
+
time) instead of module-level ``os.getenv`` — so tests and host projects
|
|
5
|
+
can override any key via ``settings.STAPEL_AGENT``, a flat Django setting
|
|
6
|
+
of the same name, or an environment variable::
|
|
7
|
+
|
|
8
|
+
STAPEL_AGENT = {
|
|
9
|
+
"DEFAULT_PROVIDER": "openai-compat",
|
|
10
|
+
"OPENAI_COMPAT_BASE_URL": "https://api.deepseek.com/v1",
|
|
11
|
+
"OPENAI_COMPAT_API_KEY": "sk-...",
|
|
12
|
+
"OPENAI_COMPAT_MODELS": {"small": "deepseek-chat"},
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
``PROVIDERS`` entries are **merged over** the built-in registry
|
|
16
|
+
(``stapel_agent.providers.BUILTIN_PROVIDERS``) — adding one custom
|
|
17
|
+
provider does not require restating the built-ins, and setting a name to
|
|
18
|
+
``None``/``""`` removes it. Values are dotted paths to ``LlmProvider``
|
|
19
|
+
subclasses, resolved lazily per request in ``services.get_provider``
|
|
20
|
+
(not via ``import_strings`` — an unknown or broken provider must degrade
|
|
21
|
+
to a ``status: failure`` response, never an import-time crash).
|
|
22
|
+
"""
|
|
23
|
+
from stapel_core.conf import AppSettings
|
|
24
|
+
|
|
25
|
+
agent_settings = AppSettings(
|
|
26
|
+
"STAPEL_AGENT",
|
|
27
|
+
defaults={
|
|
28
|
+
# Size → model-name map used by the default (Anthropic-flavoured)
|
|
29
|
+
# providers. OpenAI-compatible hosts override per-size names via
|
|
30
|
+
# OPENAI_COMPAT_MODELS instead.
|
|
31
|
+
"MODELS": {
|
|
32
|
+
"small": "claude-haiku-4-5-20251001",
|
|
33
|
+
"medium": "claude-sonnet-5",
|
|
34
|
+
"large": "claude-opus-4-8",
|
|
35
|
+
},
|
|
36
|
+
# Overlay merged OVER providers.BUILTIN_PROVIDERS (anthropic /
|
|
37
|
+
# openai-compat / claude-code): add or override entries here,
|
|
38
|
+
# None/"" removes a name. Resolved lazily per request via
|
|
39
|
+
# import_string in services.get_provider(name).
|
|
40
|
+
"PROVIDERS": {},
|
|
41
|
+
"DEFAULT_PROVIDER": "anthropic",
|
|
42
|
+
# Anthropic SDK (read lazily at call time, never frozen at import).
|
|
43
|
+
"ANTHROPIC_API_KEY": "",
|
|
44
|
+
# Any OpenAI-compatible /chat/completions endpoint
|
|
45
|
+
# (OpenAI, DeepSeek, MiMo, GLM, Kimi, ...).
|
|
46
|
+
"OPENAI_COMPAT_BASE_URL": "",
|
|
47
|
+
"OPENAI_COMPAT_API_KEY": "",
|
|
48
|
+
# Optional size → model-name map for the openai-compat provider,
|
|
49
|
+
# e.g. {"small": "gpt-4o-mini", "medium": "gpt-4o"}. Missing sizes
|
|
50
|
+
# fall back to MODELS[size].
|
|
51
|
+
"OPENAI_COMPAT_MODELS": {},
|
|
52
|
+
# Claude Code CLI provider (opt-in only, never the default).
|
|
53
|
+
"CLI_BINARY": "claude",
|
|
54
|
+
"CLI_TIMEOUT": 120,
|
|
55
|
+
"MAX_TOKENS": 4096,
|
|
56
|
+
# Per-source cache-by-prompt toggle: a repeated identical
|
|
57
|
+
# prompt+system_prompt within CACHE_TTL returns the stored response
|
|
58
|
+
# without calling the provider.
|
|
59
|
+
"CACHE_LOOKUP": {"llm_facade": False, "translate": True},
|
|
60
|
+
# Seconds; cached rows older than this are ignored (7 days).
|
|
61
|
+
"CACHE_TTL": 604800,
|
|
62
|
+
# Dotted path to a stapel_agent.cache.CachePolicy subclass — the
|
|
63
|
+
# cache seam. The default implements the PromptLog+TTL behaviour;
|
|
64
|
+
# swap for Redis/no-op without forking.
|
|
65
|
+
"CACHE_POLICY": "stapel_agent.cache.PromptLogCachePolicy",
|
|
66
|
+
},
|
|
67
|
+
import_strings=("CACHE_POLICY",),
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
__all__ = ["agent_settings"]
|