token-budget-contracts 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- token_budget_contracts-0.1.0/LICENSE +21 -0
- token_budget_contracts-0.1.0/PKG-INFO +154 -0
- token_budget_contracts-0.1.0/README.md +125 -0
- token_budget_contracts-0.1.0/pyproject.toml +37 -0
- token_budget_contracts-0.1.0/src/tbcontracts/__init__.py +38 -0
- token_budget_contracts-0.1.0/src/tbcontracts/confidence_gate.py +33 -0
- token_budget_contracts-0.1.0/src/tbcontracts/contract.py +49 -0
- token_budget_contracts-0.1.0/src/tbcontracts/exceptions.py +24 -0
- token_budget_contracts-0.1.0/src/tbcontracts/ledger.py +72 -0
- token_budget_contracts-0.1.0/src/tbcontracts/manager.py +121 -0
- token_budget_contracts-0.1.0/src/tbcontracts/reallocator.py +78 -0
- token_budget_contracts-0.1.0/src/tbcontracts/tokenizer.py +44 -0
- token_budget_contracts-0.1.0/tests/test_confidence_gate.py +21 -0
- token_budget_contracts-0.1.0/tests/test_contract.py +28 -0
- token_budget_contracts-0.1.0/tests/test_ledger.py +43 -0
- token_budget_contracts-0.1.0/tests/test_manager.py +74 -0
- token_budget_contracts-0.1.0/tests/test_reallocator.py +57 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Swaranshu Borgaonkar
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: token-budget-contracts
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Priority-weighted, confidence-gated token budget governance for multi-agent LLM systems.
|
|
5
|
+
Project-URL: Homepage, https://github.com/swaranshu-borgaonkar/Research_TBC
|
|
6
|
+
Project-URL: Repository, https://github.com/swaranshu-borgaonkar/Research_TBC
|
|
7
|
+
Author: Swaranshu Borgaonkar
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: agents,budget,crewai,langgraph,llm,multi-agent,orchestration,tokens
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Provides-Extra: accurate-tokenizer
|
|
23
|
+
Requires-Dist: tiktoken>=0.5; extra == 'accurate-tokenizer'
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: build; extra == 'dev'
|
|
26
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
27
|
+
Requires-Dist: twine; extra == 'dev'
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# Token Budget Contracts (`tbcontracts`)
|
|
31
|
+
|
|
32
|
+
Priority-weighted, confidence-gated token budget governance for
|
|
33
|
+
multi-agent LLM systems.
|
|
34
|
+
|
|
35
|
+
When a multi-agent system (a planner spawning a researcher, a writer, a
|
|
36
|
+
critic, etc.) is running, every sub-agent burns tokens — and money. Most
|
|
37
|
+
projects today either hardcode a flat cap per agent or don't budget at
|
|
38
|
+
all, so an important agent can starve mid-task while a less important one
|
|
39
|
+
sits on unused budget. `tbcontracts` fixes that by letting unused budget
|
|
40
|
+
flow, in real time, from lower-priority or idle agents to whichever agent
|
|
41
|
+
actually needs it right now — without ever starving the donor below a
|
|
42
|
+
protected minimum reserve, and without ever letting tokens flow "uphill"
|
|
43
|
+
from an important agent to a less important one.
|
|
44
|
+
|
|
45
|
+
It also supports **confidence-gated spending**: once an agent reports a
|
|
46
|
+
high-confidence result, further spending on that agent is automatically
|
|
47
|
+
blocked, so you're not paying for retrieval the model didn't need.
|
|
48
|
+
|
|
49
|
+
This implements the governance model described in U.S. Provisional
|
|
50
|
+
Patent Application No. 64/081,925 ("Token Budget Contracts"), filed
|
|
51
|
+
June 3, 2026, and published research (Zenodo DOI:
|
|
52
|
+
10.5281/zenodo.20549509). The patent application is pending; this
|
|
53
|
+
package's code is released under the MIT license. If you plan to use
|
|
54
|
+
this commercially at scale, consult your own counsel on licensing terms.
|
|
55
|
+
|
|
56
|
+
## Install
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
pip install token-budget-contracts
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
For more accurate token counting (OpenAI/Anthropic-style BPE tokenizer),
|
|
63
|
+
install the optional extra:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
pip install "token-budget-contracts[accurate-tokenizer]"
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Without it, the library falls back to a lightweight word-count heuristic
|
|
70
|
+
and has zero hard dependencies.
|
|
71
|
+
|
|
72
|
+
## Quick start
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from tbcontracts import BudgetManager
|
|
76
|
+
|
|
77
|
+
manager = BudgetManager()
|
|
78
|
+
|
|
79
|
+
# Higher priority = more important. The researcher will be able to pull
|
|
80
|
+
# spare budget from the lower-priority critic agent if it runs low.
|
|
81
|
+
manager.register_agent("researcher", priority=3, max_tokens=4000)
|
|
82
|
+
manager.register_agent("critic", priority=1, max_tokens=2000)
|
|
83
|
+
|
|
84
|
+
@manager.govern("researcher")
|
|
85
|
+
def call_researcher(prompt: str) -> str:
|
|
86
|
+
return my_llm_call(prompt) # however you already call your LLM
|
|
87
|
+
|
|
88
|
+
@manager.govern("critic")
|
|
89
|
+
def call_critic(prompt: str) -> str:
|
|
90
|
+
return my_llm_call(prompt)
|
|
91
|
+
|
|
92
|
+
call_researcher("find the latest figures on X")
|
|
93
|
+
call_critic("check the researcher's claims")
|
|
94
|
+
|
|
95
|
+
print(manager.report())
|
|
96
|
+
# {'researcher': {'allocated': 4000, 'consumed': 312, 'remaining': 3688},
|
|
97
|
+
# 'critic': {'allocated': 2000, 'consumed': 88, 'remaining': 1912}}
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Confidence-gated spending
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
manager.register_agent(
|
|
104
|
+
"fact_checker",
|
|
105
|
+
priority=2,
|
|
106
|
+
max_tokens=3000,
|
|
107
|
+
confidence_threshold=0.85, # block further calls once this confident
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
@manager.govern("fact_checker", confidence_fn=lambda result: result["confidence"])
|
|
111
|
+
def check_fact(claim: str) -> dict:
|
|
112
|
+
response = my_llm_call(claim)
|
|
113
|
+
return {"answer": response, "confidence": extract_confidence(response)}
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Once `check_fact` returns a confidence at or above `0.85`, the next call
|
|
117
|
+
to `check_fact` raises `BudgetExceededError` — the agent has already done
|
|
118
|
+
its job well enough, so further spend isn't approved.
|
|
119
|
+
|
|
120
|
+
## What happens when an agent runs out of budget
|
|
121
|
+
|
|
122
|
+
1. `tbcontracts` estimates how many tokens the call used (or you can
|
|
123
|
+
call `manager.ledger.record(agent_name, exact_token_count)` directly
|
|
124
|
+
if your LLM provider returns real usage numbers).
|
|
125
|
+
2. If the agent doesn't have enough remaining budget, the `Reallocator`
|
|
126
|
+
looks for spare capacity in other agents — starting with the
|
|
127
|
+
lowest-priority, most-idle ones — and pulls just enough to cover the
|
|
128
|
+
shortfall, never dipping a donor below its own `min_reserve`.
|
|
129
|
+
3. If no combination of donors can cover the shortfall, a
|
|
130
|
+
`BudgetExceededError` is raised so you can handle it (retry, degrade
|
|
131
|
+
gracefully, alert, etc.) instead of silently overspending.
|
|
132
|
+
|
|
133
|
+
## Exact accounting
|
|
134
|
+
|
|
135
|
+
The built-in token estimate is good enough for budget *governance*
|
|
136
|
+
decisions, but if you want exact dollar accounting, bypass the estimator
|
|
137
|
+
and record real usage directly:
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
response = my_llm_call(prompt)
|
|
141
|
+
manager.ledger.record("researcher", response.usage.total_tokens)
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## Project status
|
|
145
|
+
|
|
146
|
+
This is an early, actively developed implementation of the Token Budget
|
|
147
|
+
Contracts protocol. Issues and PRs welcome at the GitHub repository
|
|
148
|
+
linked in the project metadata.
|
|
149
|
+
|
|
150
|
+
## License
|
|
151
|
+
|
|
152
|
+
MIT for the code in this package. See `LICENSE`. The underlying
|
|
153
|
+
governance method is the subject of a pending U.S. patent application;
|
|
154
|
+
see the note above.
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# Token Budget Contracts (`tbcontracts`)
|
|
2
|
+
|
|
3
|
+
Priority-weighted, confidence-gated token budget governance for
|
|
4
|
+
multi-agent LLM systems.
|
|
5
|
+
|
|
6
|
+
When a multi-agent system (a planner spawning a researcher, a writer, a
|
|
7
|
+
critic, etc.) is running, every sub-agent burns tokens — and money. Most
|
|
8
|
+
projects today either hardcode a flat cap per agent or don't budget at
|
|
9
|
+
all, so an important agent can starve mid-task while a less important one
|
|
10
|
+
sits on unused budget. `tbcontracts` fixes that by letting unused budget
|
|
11
|
+
flow, in real time, from lower-priority or idle agents to whichever agent
|
|
12
|
+
actually needs it right now — without ever starving the donor below a
|
|
13
|
+
protected minimum reserve, and without ever letting tokens flow "uphill"
|
|
14
|
+
from an important agent to a less important one.
|
|
15
|
+
|
|
16
|
+
It also supports **confidence-gated spending**: once an agent reports a
|
|
17
|
+
high-confidence result, further spending on that agent is automatically
|
|
18
|
+
blocked, so you're not paying for retrieval the model didn't need.
|
|
19
|
+
|
|
20
|
+
This implements the governance model described in U.S. Provisional
|
|
21
|
+
Patent Application No. 64/081,925 ("Token Budget Contracts"), filed
|
|
22
|
+
June 3, 2026, and published research (Zenodo DOI:
|
|
23
|
+
10.5281/zenodo.20549509). The patent application is pending; this
|
|
24
|
+
package's code is released under the MIT license. If you plan to use
|
|
25
|
+
this commercially at scale, consult your own counsel on licensing terms.
|
|
26
|
+
|
|
27
|
+
## Install
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install token-budget-contracts
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
For more accurate token counting (OpenAI/Anthropic-style BPE tokenizer),
|
|
34
|
+
install the optional extra:
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install "token-budget-contracts[accurate-tokenizer]"
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Without it, the library falls back to a lightweight word-count heuristic
|
|
41
|
+
and has zero hard dependencies.
|
|
42
|
+
|
|
43
|
+
## Quick start
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
from tbcontracts import BudgetManager
|
|
47
|
+
|
|
48
|
+
manager = BudgetManager()
|
|
49
|
+
|
|
50
|
+
# Higher priority = more important. The researcher will be able to pull
|
|
51
|
+
# spare budget from the lower-priority critic agent if it runs low.
|
|
52
|
+
manager.register_agent("researcher", priority=3, max_tokens=4000)
|
|
53
|
+
manager.register_agent("critic", priority=1, max_tokens=2000)
|
|
54
|
+
|
|
55
|
+
@manager.govern("researcher")
|
|
56
|
+
def call_researcher(prompt: str) -> str:
|
|
57
|
+
return my_llm_call(prompt) # however you already call your LLM
|
|
58
|
+
|
|
59
|
+
@manager.govern("critic")
|
|
60
|
+
def call_critic(prompt: str) -> str:
|
|
61
|
+
return my_llm_call(prompt)
|
|
62
|
+
|
|
63
|
+
call_researcher("find the latest figures on X")
|
|
64
|
+
call_critic("check the researcher's claims")
|
|
65
|
+
|
|
66
|
+
print(manager.report())
|
|
67
|
+
# {'researcher': {'allocated': 4000, 'consumed': 312, 'remaining': 3688},
|
|
68
|
+
# 'critic': {'allocated': 2000, 'consumed': 88, 'remaining': 1912}}
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Confidence-gated spending
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
manager.register_agent(
|
|
75
|
+
"fact_checker",
|
|
76
|
+
priority=2,
|
|
77
|
+
max_tokens=3000,
|
|
78
|
+
confidence_threshold=0.85, # block further calls once this confident
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
@manager.govern("fact_checker", confidence_fn=lambda result: result["confidence"])
|
|
82
|
+
def check_fact(claim: str) -> dict:
|
|
83
|
+
response = my_llm_call(claim)
|
|
84
|
+
return {"answer": response, "confidence": extract_confidence(response)}
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Once `check_fact` returns a confidence at or above `0.85`, the next call
|
|
88
|
+
to `check_fact` raises `BudgetExceededError` — the agent has already done
|
|
89
|
+
its job well enough, so further spend isn't approved.
|
|
90
|
+
|
|
91
|
+
## What happens when an agent runs out of budget
|
|
92
|
+
|
|
93
|
+
1. `tbcontracts` estimates how many tokens the call used (or you can
|
|
94
|
+
call `manager.ledger.record(agent_name, exact_token_count)` directly
|
|
95
|
+
if your LLM provider returns real usage numbers).
|
|
96
|
+
2. If the agent doesn't have enough remaining budget, the `Reallocator`
|
|
97
|
+
looks for spare capacity in other agents — starting with the
|
|
98
|
+
lowest-priority, most-idle ones — and pulls just enough to cover the
|
|
99
|
+
shortfall, never dipping a donor below its own `min_reserve`.
|
|
100
|
+
3. If no combination of donors can cover the shortfall, a
|
|
101
|
+
`BudgetExceededError` is raised so you can handle it (retry, degrade
|
|
102
|
+
gracefully, alert, etc.) instead of silently overspending.
|
|
103
|
+
|
|
104
|
+
## Exact accounting
|
|
105
|
+
|
|
106
|
+
The built-in token estimate is good enough for budget *governance*
|
|
107
|
+
decisions, but if you want exact dollar accounting, bypass the estimator
|
|
108
|
+
and record real usage directly:
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
response = my_llm_call(prompt)
|
|
112
|
+
manager.ledger.record("researcher", response.usage.total_tokens)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Project status
|
|
116
|
+
|
|
117
|
+
This is an early, actively developed implementation of the Token Budget
|
|
118
|
+
Contracts protocol. Issues and PRs welcome at the GitHub repository
|
|
119
|
+
linked in the project metadata.
|
|
120
|
+
|
|
121
|
+
## License
|
|
122
|
+
|
|
123
|
+
MIT for the code in this package. See `LICENSE`. The underlying
|
|
124
|
+
governance method is the subject of a pending U.S. patent application;
|
|
125
|
+
see the note above.
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "token-budget-contracts"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Priority-weighted, confidence-gated token budget governance for multi-agent LLM systems."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
authors = [{ name = "Swaranshu Borgaonkar" }]
|
|
13
|
+
keywords = ["llm", "agents", "tokens", "budget", "multi-agent", "orchestration", "langgraph", "crewai"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.9",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"License :: OSI Approved :: MIT License",
|
|
23
|
+
"Operating System :: OS Independent",
|
|
24
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
25
|
+
]
|
|
26
|
+
dependencies = []
|
|
27
|
+
|
|
28
|
+
[project.optional-dependencies]
|
|
29
|
+
accurate-tokenizer = ["tiktoken>=0.5"]
|
|
30
|
+
dev = ["pytest>=7.0", "build", "twine"]
|
|
31
|
+
|
|
32
|
+
[project.urls]
|
|
33
|
+
Homepage = "https://github.com/swaranshu-borgaonkar/Research_TBC"
|
|
34
|
+
Repository = "https://github.com/swaranshu-borgaonkar/Research_TBC"
|
|
35
|
+
|
|
36
|
+
[tool.hatch.build.targets.wheel]
|
|
37
|
+
packages = ["src/tbcontracts"]
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""tbcontracts: priority-weighted, confidence-gated token budget
|
|
2
|
+
governance for multi-agent LLM systems.
|
|
3
|
+
|
|
4
|
+
Quick start:
|
|
5
|
+
|
|
6
|
+
from tbcontracts import BudgetManager
|
|
7
|
+
|
|
8
|
+
manager = BudgetManager()
|
|
9
|
+
manager.register_agent("researcher", priority=3, max_tokens=4000)
|
|
10
|
+
manager.register_agent("writer", priority=2, max_tokens=2000)
|
|
11
|
+
|
|
12
|
+
@manager.govern("researcher")
|
|
13
|
+
def call_researcher(prompt):
|
|
14
|
+
return my_llm_call(prompt)
|
|
15
|
+
|
|
16
|
+
call_researcher("find the latest figures on X")
|
|
17
|
+
print(manager.report())
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from .confidence_gate import ConfidenceGate
|
|
21
|
+
from .contract import BudgetContract
|
|
22
|
+
from .exceptions import BudgetExceededError, TBCError, UnknownAgentError
|
|
23
|
+
from .ledger import Ledger
|
|
24
|
+
from .manager import BudgetManager
|
|
25
|
+
from .reallocator import Reallocator
|
|
26
|
+
|
|
27
|
+
__version__ = "0.1.0"
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"BudgetContract",
|
|
31
|
+
"Ledger",
|
|
32
|
+
"Reallocator",
|
|
33
|
+
"ConfidenceGate",
|
|
34
|
+
"BudgetManager",
|
|
35
|
+
"TBCError",
|
|
36
|
+
"BudgetExceededError",
|
|
37
|
+
"UnknownAgentError",
|
|
38
|
+
]
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Adaptive, confidence-gated spending decisions."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from .contract import BudgetContract
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ConfidenceGate:
|
|
9
|
+
"""Decides whether an agent should be allowed to spend further tokens
|
|
10
|
+
based on how confident its most recent output was.
|
|
11
|
+
|
|
12
|
+
The idea: if an agent already produced a high-confidence answer, there
|
|
13
|
+
is no reason to let it keep spending tokens on additional retrieval or
|
|
14
|
+
reasoning. If it's still uncertain, further spend is justified.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
@staticmethod
|
|
18
|
+
def allow_further_retrieval(contract: BudgetContract, confidence: Optional[float]) -> bool:
|
|
19
|
+
"""Returns True if the agent should be allowed to keep spending.
|
|
20
|
+
|
|
21
|
+
- If the contract has no `confidence_threshold` configured,
|
|
22
|
+
gating is disabled and this always returns True.
|
|
23
|
+
- If no confidence score has been recorded yet (e.g. before the
|
|
24
|
+
agent's first call), this fails open and returns True, since
|
|
25
|
+
there's no signal yet to deny on.
|
|
26
|
+
- Otherwise, spending is allowed only while confidence remains
|
|
27
|
+
below the configured threshold.
|
|
28
|
+
"""
|
|
29
|
+
if contract.confidence_threshold is None:
|
|
30
|
+
return True
|
|
31
|
+
if confidence is None:
|
|
32
|
+
return True
|
|
33
|
+
return confidence < contract.confidence_threshold
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Budget contract definitions for individual agents."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Optional
|
|
5
|
+
import time
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class BudgetContract:
|
|
10
|
+
"""Defines the token budget rules for a single agent.
|
|
11
|
+
|
|
12
|
+
Attributes:
|
|
13
|
+
agent_name: Unique identifier for the agent.
|
|
14
|
+
priority: Relative importance (higher = more important). The
|
|
15
|
+
Reallocator only ever pulls spare tokens from agents whose
|
|
16
|
+
priority is less than or equal to the needy agent's priority,
|
|
17
|
+
so budget never flows "downhill" from an important agent to a
|
|
18
|
+
less important one.
|
|
19
|
+
max_tokens: The initial token allocation for this agent.
|
|
20
|
+
min_reserve: The minimum number of tokens this agent must always
|
|
21
|
+
keep, even when the Reallocator wants to pull from it.
|
|
22
|
+
Defaults to 10% of max_tokens.
|
|
23
|
+
confidence_threshold: Minimum model confidence (0-1) at which the
|
|
24
|
+
agent is considered "confident enough" and further spending is
|
|
25
|
+
blocked. Defaults to None (confidence gating disabled).
|
|
26
|
+
last_confidence: The most recent confidence score reported for
|
|
27
|
+
this agent, used internally by ConfidenceGate. Not meant to
|
|
28
|
+
be set directly; updated automatically by BudgetManager.govern.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
agent_name: str
|
|
32
|
+
priority: int = 1
|
|
33
|
+
max_tokens: int = 1000
|
|
34
|
+
min_reserve: Optional[int] = None
|
|
35
|
+
confidence_threshold: Optional[float] = None
|
|
36
|
+
last_confidence: Optional[float] = field(default=None, repr=False)
|
|
37
|
+
created_at: float = field(default_factory=time.time)
|
|
38
|
+
|
|
39
|
+
def __post_init__(self):
|
|
40
|
+
if self.priority < 1:
|
|
41
|
+
raise ValueError("priority must be >= 1")
|
|
42
|
+
if self.max_tokens < 0:
|
|
43
|
+
raise ValueError("max_tokens must be >= 0")
|
|
44
|
+
if self.min_reserve is None:
|
|
45
|
+
self.min_reserve = int(self.max_tokens * 0.1)
|
|
46
|
+
if self.min_reserve < 0 or self.min_reserve > self.max_tokens:
|
|
47
|
+
raise ValueError("min_reserve must be between 0 and max_tokens")
|
|
48
|
+
if self.confidence_threshold is not None and not (0 <= self.confidence_threshold <= 1):
|
|
49
|
+
raise ValueError("confidence_threshold must be between 0 and 1")
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Exceptions raised by tbcontracts."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class TBCError(Exception):
|
|
5
|
+
"""Base exception for all tbcontracts errors."""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BudgetExceededError(TBCError):
|
|
9
|
+
"""Raised when an agent attempts to spend beyond its available budget
|
|
10
|
+
and no further reallocation from other agents is possible.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, agent_name: str, requested: int, available: int):
|
|
14
|
+
self.agent_name = agent_name
|
|
15
|
+
self.requested = requested
|
|
16
|
+
self.available = available
|
|
17
|
+
super().__init__(
|
|
18
|
+
f"Agent '{agent_name}' requested {requested} tokens but only "
|
|
19
|
+
f"{available} were available even after attempting reallocation."
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class UnknownAgentError(TBCError):
|
|
24
|
+
"""Raised when referencing an agent that was never registered."""
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""Tracks live token consumption and allocation per agent."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Dict, List, Optional
|
|
5
|
+
import time
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class LedgerEntry:
|
|
10
|
+
agent_name: str
|
|
11
|
+
tokens: int
|
|
12
|
+
timestamp: float
|
|
13
|
+
note: str = ""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Ledger:
|
|
17
|
+
"""Tracks consumption and remaining budget for every registered agent.
|
|
18
|
+
|
|
19
|
+
This is the single source of truth the Reallocator and BudgetManager
|
|
20
|
+
read from and write to. Allocation totals can change over time (when
|
|
21
|
+
the Reallocator moves tokens between agents); consumption only ever
|
|
22
|
+
goes up.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self):
|
|
26
|
+
self._allocated: Dict[str, int] = {}
|
|
27
|
+
self._consumed: Dict[str, int] = {}
|
|
28
|
+
self._history: List[LedgerEntry] = []
|
|
29
|
+
|
|
30
|
+
def register(self, agent_name: str, allocated_tokens: int) -> None:
|
|
31
|
+
self._allocated[agent_name] = allocated_tokens
|
|
32
|
+
self._consumed.setdefault(agent_name, 0)
|
|
33
|
+
|
|
34
|
+
def record(self, agent_name: str, tokens: int, note: str = "") -> None:
|
|
35
|
+
if agent_name not in self._allocated:
|
|
36
|
+
raise KeyError(f"Unknown agent '{agent_name}'. Register it first.")
|
|
37
|
+
self._consumed[agent_name] += tokens
|
|
38
|
+
self._history.append(LedgerEntry(agent_name, tokens, time.time(), note))
|
|
39
|
+
|
|
40
|
+
def remaining(self, agent_name: str) -> int:
|
|
41
|
+
return self._allocated[agent_name] - self._consumed[agent_name]
|
|
42
|
+
|
|
43
|
+
def consumed(self, agent_name: str) -> int:
|
|
44
|
+
return self._consumed[agent_name]
|
|
45
|
+
|
|
46
|
+
def allocated(self, agent_name: str) -> int:
|
|
47
|
+
return self._allocated[agent_name]
|
|
48
|
+
|
|
49
|
+
def adjust_allocation(self, agent_name: str, delta: int) -> None:
|
|
50
|
+
"""Increase (delta > 0) or decrease (delta < 0) an agent's total
|
|
51
|
+
allocation. Used by the Reallocator to move budget between agents.
|
|
52
|
+
"""
|
|
53
|
+
self._allocated[agent_name] += delta
|
|
54
|
+
|
|
55
|
+
def history(self, agent_name: Optional[str] = None) -> List[LedgerEntry]:
|
|
56
|
+
if agent_name is None:
|
|
57
|
+
return list(self._history)
|
|
58
|
+
return [e for e in self._history if e.agent_name == agent_name]
|
|
59
|
+
|
|
60
|
+
def snapshot(self) -> Dict[str, Dict[str, int]]:
|
|
61
|
+
"""A point-in-time view of every agent's allocated/consumed/
|
|
62
|
+
remaining tokens. Useful for building a dashboard or printing a
|
|
63
|
+
summary report.
|
|
64
|
+
"""
|
|
65
|
+
return {
|
|
66
|
+
name: {
|
|
67
|
+
"allocated": self._allocated[name],
|
|
68
|
+
"consumed": self._consumed[name],
|
|
69
|
+
"remaining": self.remaining(name),
|
|
70
|
+
}
|
|
71
|
+
for name in self._allocated
|
|
72
|
+
}
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""High-level orchestrator tying contracts, ledger, reallocation, and
|
|
2
|
+
confidence gating together behind a simple decorator API.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import functools
|
|
6
|
+
from typing import Callable, Dict, Optional
|
|
7
|
+
|
|
8
|
+
from .confidence_gate import ConfidenceGate
|
|
9
|
+
from .contract import BudgetContract
|
|
10
|
+
from .exceptions import BudgetExceededError, UnknownAgentError
|
|
11
|
+
from .ledger import Ledger
|
|
12
|
+
from .reallocator import Reallocator
|
|
13
|
+
from .tokenizer import estimate_tokens
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BudgetManager:
|
|
17
|
+
"""The main entry point for tbcontracts.
|
|
18
|
+
|
|
19
|
+
Example:
|
|
20
|
+
manager = BudgetManager()
|
|
21
|
+
manager.register_agent("researcher", priority=3, max_tokens=4000)
|
|
22
|
+
manager.register_agent("writer", priority=2, max_tokens=2000)
|
|
23
|
+
|
|
24
|
+
@manager.govern("researcher")
|
|
25
|
+
def call_researcher(prompt: str) -> str:
|
|
26
|
+
return my_llm_call(prompt)
|
|
27
|
+
|
|
28
|
+
call_researcher("find the latest figures on X")
|
|
29
|
+
print(manager.report())
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self):
|
|
33
|
+
self.contracts: Dict[str, BudgetContract] = {}
|
|
34
|
+
self.ledger = Ledger()
|
|
35
|
+
self.reallocator = Reallocator(self.contracts, self.ledger)
|
|
36
|
+
|
|
37
|
+
def register_agent(
|
|
38
|
+
self,
|
|
39
|
+
agent_name: str,
|
|
40
|
+
priority: int = 1,
|
|
41
|
+
max_tokens: int = 1000,
|
|
42
|
+
min_reserve: Optional[int] = None,
|
|
43
|
+
confidence_threshold: Optional[float] = None,
|
|
44
|
+
) -> BudgetContract:
|
|
45
|
+
"""Register a new agent and its budget contract. Returns the
|
|
46
|
+
created BudgetContract for inspection if needed."""
|
|
47
|
+
contract = BudgetContract(
|
|
48
|
+
agent_name=agent_name,
|
|
49
|
+
priority=priority,
|
|
50
|
+
max_tokens=max_tokens,
|
|
51
|
+
min_reserve=min_reserve,
|
|
52
|
+
confidence_threshold=confidence_threshold,
|
|
53
|
+
)
|
|
54
|
+
self.contracts[agent_name] = contract
|
|
55
|
+
self.ledger.register(agent_name, max_tokens)
|
|
56
|
+
return contract
|
|
57
|
+
|
|
58
|
+
def govern(self, agent_name: str, confidence_fn: Optional[Callable] = None):
|
|
59
|
+
"""Decorator that meters every call a function makes against the
|
|
60
|
+
named agent's budget contract.
|
|
61
|
+
|
|
62
|
+
Behavior on each call:
|
|
63
|
+
1. Checks the ConfidenceGate - if the agent was already
|
|
64
|
+
confident enough on its last call, the call is blocked.
|
|
65
|
+
2. Runs the wrapped function.
|
|
66
|
+
3. Estimates tokens used (input + output) and records it.
|
|
67
|
+
4. If the agent doesn't have enough budget left, asks the
|
|
68
|
+
Reallocator to pull spare tokens from lower/equal priority
|
|
69
|
+
agents before raising BudgetExceededError.
|
|
70
|
+
|
|
71
|
+
`confidence_fn`, if provided, is called with the wrapped
|
|
72
|
+
function's return value and should return a float between 0 and 1
|
|
73
|
+
representing how confident that result was. It's stored on the
|
|
74
|
+
agent's contract and used by the ConfidenceGate on the *next*
|
|
75
|
+
call to decide whether further spending is justified.
|
|
76
|
+
"""
|
|
77
|
+
if agent_name not in self.contracts:
|
|
78
|
+
raise UnknownAgentError(f"Agent '{agent_name}' was never registered.")
|
|
79
|
+
|
|
80
|
+
def decorator(fn: Callable):
|
|
81
|
+
@functools.wraps(fn)
|
|
82
|
+
def wrapper(*args, **kwargs):
|
|
83
|
+
contract = self.contracts[agent_name]
|
|
84
|
+
|
|
85
|
+
if not ConfidenceGate.allow_further_retrieval(contract, contract.last_confidence):
|
|
86
|
+
raise BudgetExceededError(
|
|
87
|
+
agent_name,
|
|
88
|
+
requested=0,
|
|
89
|
+
available=self.ledger.remaining(agent_name),
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
result = fn(*args, **kwargs)
|
|
93
|
+
|
|
94
|
+
tokens_used = estimate_tokens(args, kwargs, result)
|
|
95
|
+
remaining = self.ledger.remaining(agent_name)
|
|
96
|
+
|
|
97
|
+
if tokens_used > remaining:
|
|
98
|
+
shortfall = tokens_used - remaining
|
|
99
|
+
recovered = self.reallocator.reallocate(agent_name, shortfall)
|
|
100
|
+
if recovered < shortfall:
|
|
101
|
+
raise BudgetExceededError(
|
|
102
|
+
agent_name,
|
|
103
|
+
requested=tokens_used,
|
|
104
|
+
available=remaining + recovered,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
self.ledger.record(agent_name, tokens_used, note=fn.__name__)
|
|
108
|
+
|
|
109
|
+
if confidence_fn is not None:
|
|
110
|
+
contract.last_confidence = confidence_fn(result)
|
|
111
|
+
|
|
112
|
+
return result
|
|
113
|
+
|
|
114
|
+
return wrapper
|
|
115
|
+
|
|
116
|
+
return decorator
|
|
117
|
+
|
|
118
|
+
def report(self) -> dict:
|
|
119
|
+
"""A snapshot of every agent's allocated/consumed/remaining
|
|
120
|
+
tokens. Useful for building a dashboard or printing a summary."""
|
|
121
|
+
return self.ledger.snapshot()
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Priority-weighted dynamic token reallocation across agents.
|
|
2
|
+
|
|
3
|
+
This is the core mechanism of Token Budget Contracts: instead of every
|
|
4
|
+
agent being stuck with a fixed, isolated cap, spare budget from
|
|
5
|
+
lower-priority or idle agents can flow to a higher-priority agent that's
|
|
6
|
+
running low, in real time, without ever dropping a donor below its own
|
|
7
|
+
protected minimum reserve.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from typing import Dict, List
|
|
11
|
+
|
|
12
|
+
from .contract import BudgetContract
|
|
13
|
+
from .ledger import Ledger
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Reallocator:
|
|
17
|
+
"""Moves unused token budget between agents according to priority.
|
|
18
|
+
|
|
19
|
+
Rules:
|
|
20
|
+
1. A donor never gives up tokens below its own `min_reserve`.
|
|
21
|
+
2. Tokens only ever flow from an agent with priority <= the needy
|
|
22
|
+
agent's priority. Budget never flows "downhill" from a more
|
|
23
|
+
important agent to a less important one.
|
|
24
|
+
3. Among eligible donors, the lowest-priority / most-idle agents are
|
|
25
|
+
drained first.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, contracts: Dict[str, BudgetContract], ledger: Ledger):
|
|
29
|
+
self.contracts = contracts
|
|
30
|
+
self.ledger = ledger
|
|
31
|
+
|
|
32
|
+
def find_donors(self, exclude: str) -> List[str]:
|
|
33
|
+
"""Agents with spare budget above their reserve, sorted so the
|
|
34
|
+
lowest-priority / most-idle agents are donated from first."""
|
|
35
|
+
donors = []
|
|
36
|
+
for name, contract in self.contracts.items():
|
|
37
|
+
if name == exclude:
|
|
38
|
+
continue
|
|
39
|
+
spare = self.ledger.remaining(name) - contract.min_reserve
|
|
40
|
+
if spare > 0:
|
|
41
|
+
donors.append(name)
|
|
42
|
+
donors.sort(key=lambda n: (self.contracts[n].priority, -self.ledger.remaining(n)))
|
|
43
|
+
return donors
|
|
44
|
+
|
|
45
|
+
def reallocate(self, needy_agent: str, shortfall: int) -> int:
|
|
46
|
+
"""Attempt to cover `shortfall` tokens for `needy_agent` by pulling
|
|
47
|
+
spare budget from eligible donor agents. Returns the number of
|
|
48
|
+
tokens actually recovered, which may be less than `shortfall` if
|
|
49
|
+
no eligible donor has enough spare capacity.
|
|
50
|
+
"""
|
|
51
|
+
if needy_agent not in self.contracts:
|
|
52
|
+
raise KeyError(f"Unknown agent '{needy_agent}'")
|
|
53
|
+
if shortfall <= 0:
|
|
54
|
+
return 0
|
|
55
|
+
|
|
56
|
+
needy_priority = self.contracts[needy_agent].priority
|
|
57
|
+
recovered = 0
|
|
58
|
+
|
|
59
|
+
for donor in self.find_donors(exclude=needy_agent):
|
|
60
|
+
if recovered >= shortfall:
|
|
61
|
+
break
|
|
62
|
+
|
|
63
|
+
donor_priority = self.contracts[donor].priority
|
|
64
|
+
if donor_priority > needy_priority:
|
|
65
|
+
# Never pull tokens from a more important agent.
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
donor_contract = self.contracts[donor]
|
|
69
|
+
spare = self.ledger.remaining(donor) - donor_contract.min_reserve
|
|
70
|
+
take = min(spare, shortfall - recovered)
|
|
71
|
+
if take <= 0:
|
|
72
|
+
continue
|
|
73
|
+
|
|
74
|
+
self.ledger.adjust_allocation(donor, -take)
|
|
75
|
+
self.ledger.adjust_allocation(needy_agent, take)
|
|
76
|
+
recovered += take
|
|
77
|
+
|
|
78
|
+
return recovered
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Pluggable token estimation.
|
|
2
|
+
|
|
3
|
+
Uses tiktoken if it's installed (accurate for OpenAI/Anthropic-style BPE
|
|
4
|
+
tokenizers). Falls back to a simple word-count heuristic otherwise, so the
|
|
5
|
+
library has zero hard dependencies out of the box.
|
|
6
|
+
|
|
7
|
+
For exact accounting (e.g. real usage numbers returned by your LLM
|
|
8
|
+
provider's API response), bypass this entirely and call
|
|
9
|
+
`Ledger.record(agent_name, real_token_count)` directly instead of relying
|
|
10
|
+
on the estimate.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
import tiktoken
|
|
17
|
+
|
|
18
|
+
_ENCODER = tiktoken.get_encoding("cl100k_base")
|
|
19
|
+
except Exception: # pragma: no cover - exercised only when tiktoken absent
|
|
20
|
+
_ENCODER = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _count(text: str) -> int:
|
|
24
|
+
if not text:
|
|
25
|
+
return 0
|
|
26
|
+
if _ENCODER is not None:
|
|
27
|
+
return len(_ENCODER.encode(text))
|
|
28
|
+
# Fallback heuristic: roughly 1.3 tokens per whitespace-separated word.
|
|
29
|
+
return max(1, int(len(text.split()) * 1.3))
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def estimate_tokens(args: tuple, kwargs: dict, result: Any) -> int:
|
|
33
|
+
"""Rough token estimate covering both the input an agent was called
|
|
34
|
+
with and the output it produced."""
|
|
35
|
+
total = 0
|
|
36
|
+
for a in args:
|
|
37
|
+
if isinstance(a, str):
|
|
38
|
+
total += _count(a)
|
|
39
|
+
for v in kwargs.values():
|
|
40
|
+
if isinstance(v, str):
|
|
41
|
+
total += _count(v)
|
|
42
|
+
if isinstance(result, str):
|
|
43
|
+
total += _count(result)
|
|
44
|
+
return total
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from tbcontracts import BudgetContract, ConfidenceGate
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_no_threshold_always_allows():
|
|
5
|
+
c = BudgetContract("a")
|
|
6
|
+
assert ConfidenceGate.allow_further_retrieval(c, confidence=0.1) is True
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_low_confidence_allows_more_retrieval():
|
|
10
|
+
c = BudgetContract("a", confidence_threshold=0.8)
|
|
11
|
+
assert ConfidenceGate.allow_further_retrieval(c, confidence=0.5) is True
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_high_confidence_blocks_further_retrieval():
|
|
15
|
+
c = BudgetContract("a", confidence_threshold=0.8)
|
|
16
|
+
assert ConfidenceGate.allow_further_retrieval(c, confidence=0.95) is False
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_missing_confidence_fails_open():
|
|
20
|
+
c = BudgetContract("a", confidence_threshold=0.8)
|
|
21
|
+
assert ConfidenceGate.allow_further_retrieval(c, confidence=None) is True
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from tbcontracts import BudgetContract
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_default_min_reserve_is_ten_percent():
|
|
7
|
+
c = BudgetContract(agent_name="a", max_tokens=1000)
|
|
8
|
+
assert c.min_reserve == 100
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_explicit_min_reserve_respected():
|
|
12
|
+
c = BudgetContract(agent_name="a", max_tokens=1000, min_reserve=50)
|
|
13
|
+
assert c.min_reserve == 50
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_invalid_priority_raises():
|
|
17
|
+
with pytest.raises(ValueError):
|
|
18
|
+
BudgetContract(agent_name="a", priority=0)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_invalid_confidence_threshold_raises():
|
|
22
|
+
with pytest.raises(ValueError):
|
|
23
|
+
BudgetContract(agent_name="a", confidence_threshold=1.5)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_min_reserve_cannot_exceed_max_tokens():
|
|
27
|
+
with pytest.raises(ValueError):
|
|
28
|
+
BudgetContract(agent_name="a", max_tokens=100, min_reserve=200)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from tbcontracts import Ledger
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_register_and_record():
|
|
7
|
+
ledger = Ledger()
|
|
8
|
+
ledger.register("a", 1000)
|
|
9
|
+
ledger.record("a", 200, note="call 1")
|
|
10
|
+
assert ledger.consumed("a") == 200
|
|
11
|
+
assert ledger.remaining("a") == 800
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_record_unknown_agent_raises():
|
|
15
|
+
ledger = Ledger()
|
|
16
|
+
with pytest.raises(KeyError):
|
|
17
|
+
ledger.record("ghost", 10)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_adjust_allocation():
|
|
21
|
+
ledger = Ledger()
|
|
22
|
+
ledger.register("a", 1000)
|
|
23
|
+
ledger.adjust_allocation("a", -300)
|
|
24
|
+
assert ledger.allocated("a") == 700
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_snapshot():
|
|
28
|
+
ledger = Ledger()
|
|
29
|
+
ledger.register("a", 1000)
|
|
30
|
+
ledger.record("a", 400)
|
|
31
|
+
snap = ledger.snapshot()
|
|
32
|
+
assert snap["a"] == {"allocated": 1000, "consumed": 400, "remaining": 600}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_history_filters_by_agent():
|
|
36
|
+
ledger = Ledger()
|
|
37
|
+
ledger.register("a", 1000)
|
|
38
|
+
ledger.register("b", 1000)
|
|
39
|
+
ledger.record("a", 100)
|
|
40
|
+
ledger.record("b", 50)
|
|
41
|
+
ledger.record("a", 25)
|
|
42
|
+
assert len(ledger.history("a")) == 2
|
|
43
|
+
assert len(ledger.history()) == 3
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from tbcontracts import BudgetManager, BudgetExceededError, UnknownAgentError
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_govern_tracks_consumption():
|
|
7
|
+
manager = BudgetManager()
|
|
8
|
+
manager.register_agent("writer", priority=2, max_tokens=100)
|
|
9
|
+
|
|
10
|
+
@manager.govern("writer")
|
|
11
|
+
def call_writer(prompt):
|
|
12
|
+
return "a short reply"
|
|
13
|
+
|
|
14
|
+
call_writer("write something short")
|
|
15
|
+
snap = manager.report()
|
|
16
|
+
assert snap["writer"]["consumed"] > 0
|
|
17
|
+
assert snap["writer"]["remaining"] < 100
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_govern_reallocates_from_lower_priority_agent():
|
|
21
|
+
manager = BudgetManager()
|
|
22
|
+
manager.register_agent("researcher", priority=3, max_tokens=20, min_reserve=2)
|
|
23
|
+
manager.register_agent("helper", priority=1, max_tokens=500, min_reserve=10)
|
|
24
|
+
|
|
25
|
+
@manager.govern("researcher")
|
|
26
|
+
def call_researcher(prompt):
|
|
27
|
+
return (
|
|
28
|
+
"a fairly long generated response that will use more tokens "
|
|
29
|
+
"than the small budget allows for this particular agent"
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# Should NOT raise: helper has plenty of spare budget to donate.
|
|
33
|
+
result = call_researcher("research something")
|
|
34
|
+
assert isinstance(result, str)
|
|
35
|
+
snap = manager.report()
|
|
36
|
+
assert snap["helper"]["allocated"] < 500 # tokens were pulled from it
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_govern_raises_when_no_budget_anywhere():
|
|
40
|
+
manager = BudgetManager()
|
|
41
|
+
manager.register_agent("solo", priority=1, max_tokens=5, min_reserve=0)
|
|
42
|
+
|
|
43
|
+
@manager.govern("solo")
|
|
44
|
+
def call_solo(prompt):
|
|
45
|
+
return "a fairly long generated response with way more tokens than five"
|
|
46
|
+
|
|
47
|
+
with pytest.raises(BudgetExceededError):
|
|
48
|
+
call_solo("go")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def test_unregistered_agent_raises():
|
|
52
|
+
manager = BudgetManager()
|
|
53
|
+
with pytest.raises(UnknownAgentError):
|
|
54
|
+
|
|
55
|
+
@manager.govern("ghost")
|
|
56
|
+
def fn(prompt):
|
|
57
|
+
return "x"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_confidence_gate_blocks_second_call_once_confident():
|
|
61
|
+
manager = BudgetManager()
|
|
62
|
+
manager.register_agent("checker", priority=1, max_tokens=1000, confidence_threshold=0.8)
|
|
63
|
+
|
|
64
|
+
@manager.govern("checker", confidence_fn=lambda result: 0.95)
|
|
65
|
+
def call_checker(prompt):
|
|
66
|
+
return "confident answer"
|
|
67
|
+
|
|
68
|
+
# First call succeeds and records confidence 0.95 afterwards.
|
|
69
|
+
call_checker("question 1")
|
|
70
|
+
|
|
71
|
+
# Second call should be blocked: 0.95 >= 0.8 means we're already
|
|
72
|
+
# confident enough, so no further spend is justified.
|
|
73
|
+
with pytest.raises(BudgetExceededError):
|
|
74
|
+
call_checker("question 2")
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
from tbcontracts import BudgetContract, Ledger, Reallocator
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def make_setup():
|
|
5
|
+
contracts = {
|
|
6
|
+
"researcher": BudgetContract("researcher", priority=3, max_tokens=1000, min_reserve=100),
|
|
7
|
+
"critic": BudgetContract("critic", priority=1, max_tokens=1000, min_reserve=100),
|
|
8
|
+
}
|
|
9
|
+
ledger = Ledger()
|
|
10
|
+
for name, c in contracts.items():
|
|
11
|
+
ledger.register(name, c.max_tokens)
|
|
12
|
+
return contracts, ledger
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_reallocate_pulls_from_lower_priority_idle_agent():
|
|
16
|
+
contracts, ledger = make_setup()
|
|
17
|
+
ledger.record("researcher", 950) # only 50 left
|
|
18
|
+
reallocator = Reallocator(contracts, ledger)
|
|
19
|
+
|
|
20
|
+
recovered = reallocator.reallocate("researcher", shortfall=300)
|
|
21
|
+
|
|
22
|
+
assert recovered == 300
|
|
23
|
+
assert ledger.remaining("researcher") == 350 # 50 + 300, before consuming
|
|
24
|
+
assert ledger.allocated("critic") == 700 # 1000 - 300 taken
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_reallocate_never_dips_below_donor_reserve():
|
|
28
|
+
contracts, ledger = make_setup()
|
|
29
|
+
ledger.record("researcher", 950)
|
|
30
|
+
ledger.record("critic", 850) # critic only has 150 spare above its 100 reserve
|
|
31
|
+
reallocator = Reallocator(contracts, ledger)
|
|
32
|
+
|
|
33
|
+
recovered = reallocator.reallocate("researcher", shortfall=300)
|
|
34
|
+
|
|
35
|
+
assert recovered == 50 # critic could only spare 150 - 100 reserve = 50
|
|
36
|
+
assert ledger.allocated("critic") == 950
|
|
37
|
+
assert ledger.remaining("critic") == 100 # exactly at reserve, never below
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_reallocate_does_not_pull_from_higher_priority_agent():
|
|
41
|
+
contracts, ledger = make_setup()
|
|
42
|
+
# critic (priority 1) is needy; researcher (priority 3) has spare, but
|
|
43
|
+
# tokens must never flow from a more important agent to a less
|
|
44
|
+
# important one.
|
|
45
|
+
ledger.record("critic", 950)
|
|
46
|
+
reallocator = Reallocator(contracts, ledger)
|
|
47
|
+
|
|
48
|
+
recovered = reallocator.reallocate("critic", shortfall=300)
|
|
49
|
+
|
|
50
|
+
assert recovered == 0
|
|
51
|
+
assert ledger.allocated("researcher") == 1000 # untouched
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def test_reallocate_zero_shortfall_is_noop():
|
|
55
|
+
contracts, ledger = make_setup()
|
|
56
|
+
reallocator = Reallocator(contracts, ledger)
|
|
57
|
+
assert reallocator.reallocate("researcher", shortfall=0) == 0
|