tbay 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tbay-0.1.0/LICENSE +21 -0
- tbay-0.1.0/PKG-INFO +195 -0
- tbay-0.1.0/README.md +174 -0
- tbay-0.1.0/pyproject.toml +31 -0
- tbay-0.1.0/setup.cfg +4 -0
- tbay-0.1.0/src/tbay/__init__.py +25 -0
- tbay-0.1.0/src/tbay/backends/__init__.py +3 -0
- tbay-0.1.0/src/tbay/backends/base.py +156 -0
- tbay-0.1.0/src/tbay/backends/postgres_backend.py +331 -0
- tbay-0.1.0/src/tbay/backends/sqlite_backend.py +333 -0
- tbay-0.1.0/src/tbay/cli.py +87 -0
- tbay-0.1.0/src/tbay/client.py +446 -0
- tbay-0.1.0/src/tbay/decorator.py +53 -0
- tbay-0.1.0/src/tbay/exceptions.py +40 -0
- tbay-0.1.0/src/tbay/policy.py +146 -0
- tbay-0.1.0/src/tbay.egg-info/PKG-INFO +195 -0
- tbay-0.1.0/src/tbay.egg-info/SOURCES.txt +22 -0
- tbay-0.1.0/src/tbay.egg-info/dependency_links.txt +1 -0
- tbay-0.1.0/src/tbay.egg-info/entry_points.txt +2 -0
- tbay-0.1.0/src/tbay.egg-info/requires.txt +9 -0
- tbay-0.1.0/src/tbay.egg-info/top_level.txt +1 -0
- tbay-0.1.0/tests/test_policy_features.py +194 -0
- tbay-0.1.0/tests/test_postgres_backend.py +65 -0
- tbay-0.1.0/tests/test_tbay.py +188 -0
tbay-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 tbay contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
tbay-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tbay
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Execution safety for AI agent tool calls: idempotency, caching, singleflight, policy, and approval gating, as a library, not a service.
|
|
5
|
+
Author: tbay contributors
|
|
6
|
+
License: MIT
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Python: >=3.9
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: pyyaml>=6.0
|
|
14
|
+
Requires-Dist: click>=8.1
|
|
15
|
+
Provides-Extra: postgres
|
|
16
|
+
Requires-Dist: psycopg2-binary>=2.9; extra == "postgres"
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest>=7.4; extra == "dev"
|
|
19
|
+
Requires-Dist: psycopg2-binary>=2.9; extra == "dev"
|
|
20
|
+
Dynamic: license-file
|
|
21
|
+
|
|
22
|
+
# tbay
|
|
23
|
+
|
|
24
|
+
Execution safety for AI agent tool calls: idempotency, TTL caching,
|
|
25
|
+
singleflight deduplication, risk-tiered policy, and human approval gating,
|
|
26
|
+
as a library you install, not a service you depend on.
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
from tbay import TbayClient, guarded
|
|
30
|
+
|
|
31
|
+
client = TbayClient("sqlite:///~/.tbay/db.sqlite")
|
|
32
|
+
# or: TbayClient("postgresql://user:pass@host/dbname")
|
|
33
|
+
|
|
34
|
+
@guarded(client, policy="readonly")
|
|
35
|
+
def github_search(query: str) -> dict:
|
|
36
|
+
return real_github_api_call(query)
|
|
37
|
+
|
|
38
|
+
@guarded(client, policy="destructive")
|
|
39
|
+
def refund_customer(customer_id: str, amount: float) -> dict:
|
|
40
|
+
return stripe_refund(customer_id, amount)
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
`@guarded` only ever wraps a plain callable, so it drops in under
|
|
44
|
+
LangChain's `@tool`, the OpenAI Agents SDK's `@function_tool`, CrewAI tools,
|
|
45
|
+
or bare functions, with zero framework-specific code. See `examples/`.
|
|
46
|
+
|
|
47
|
+
## Why
|
|
48
|
+
|
|
49
|
+
Agent frameworks solve planning and orchestration. None of them solve
|
|
50
|
+
*execution safety*: once a tool is selected, nothing stops it from being
|
|
51
|
+
called twice, cached when it shouldn't be, called too often, or fired on a
|
|
52
|
+
destructive action without a human in the loop. tbay sits underneath any
|
|
53
|
+
framework and handles that, durably, across processes, in whatever database
|
|
54
|
+
you already run.
|
|
55
|
+
|
|
56
|
+
**This is not a hosted service.** You `pip install` it; state (idempotency
|
|
57
|
+
keys, cached results, the audit log) lives entirely in a database you own.
|
|
58
|
+
Nothing calls home.
|
|
59
|
+
|
|
60
|
+
## Install
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
pip install tbay # SQLite backend, stdlib only
|
|
64
|
+
pip install tbay[postgres] # + Postgres backend
|
|
65
|
+
|
|
66
|
+
# or, with uv:
|
|
67
|
+
uv add tbay
|
|
68
|
+
uv add "tbay[postgres]"
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## How it works
|
|
72
|
+
|
|
73
|
+
Every `@guarded` call computes an idempotency key (tool name + normalized
|
|
74
|
+
args + tenant), then atomically claims a row in your database:
|
|
75
|
+
|
|
76
|
+
- **First caller** for a given key becomes the owner and runs the real
|
|
77
|
+
function.
|
|
78
|
+
- **Concurrent callers** with the identical key block and receive the same
|
|
79
|
+
result once the owner finishes (singleflight). No in-memory daemon is
|
|
80
|
+
required; coordination happens through the database's own atomicity
|
|
81
|
+
(`INSERT ... ON CONFLICT`, plus an advisory lock for the Postgres backend
|
|
82
|
+
when `max_concurrent` is set).
|
|
83
|
+
- **Later callers**, after the owner finishes, get the stored result
|
|
84
|
+
instead of re-running: permanently for `mutating`/`destructive` policies
|
|
85
|
+
(true idempotency), or until the policy's `cache_ttl` expires for
|
|
86
|
+
`readonly` policies.
|
|
87
|
+
- **`destructive`-policy calls** pause in `WAITING_APPROVAL` until someone
|
|
88
|
+
runs `tbay approve <execution_id>` (optionally after a webhook fires).
|
|
89
|
+
- **Not every tool call is idempotent.** An LLM call used to decide
|
|
90
|
+
something, "roll a die", "get the current time": calling these twice with
|
|
91
|
+
the same arguments should *not* return the same cached answer. Use the
|
|
92
|
+
`volatile` policy (`idempotent: false`) for these; tbay then ignores
|
|
93
|
+
caching, dedup, and `key_fn` entirely and runs the call fresh every time.
|
|
94
|
+
|
|
95
|
+
## Policies
|
|
96
|
+
|
|
97
|
+
```yaml
|
|
98
|
+
policies:
|
|
99
|
+
# Safe to call repeatedly; safe to serve a slightly stale answer.
|
|
100
|
+
readonly:
|
|
101
|
+
cache_ttl: 5m
|
|
102
|
+
singleflight: true
|
|
103
|
+
max_retries: 2
|
|
104
|
+
retry_backoff: 1s
|
|
105
|
+
|
|
106
|
+
# Has a real effect, but must never double-run for the same input.
|
|
107
|
+
mutating:
|
|
108
|
+
idempotent: true
|
|
109
|
+
cache_ttl: 0 # 0/omitted means "keep the result forever"
|
|
110
|
+
max_retries: 0
|
|
111
|
+
|
|
112
|
+
# Has a real-world consequence a human should sign off on.
|
|
113
|
+
destructive:
|
|
114
|
+
idempotent: true
|
|
115
|
+
approval_required: true
|
|
116
|
+
approval_timeout: 1h
|
|
117
|
+
approval_bypass_arg: amount # optional: skip approval for small values
|
|
118
|
+
approval_bypass_max: 50
|
|
119
|
+
redact_args: [card_number] # optional: mask these args in the audit log
|
|
120
|
+
|
|
121
|
+
# Runs fresh every time, even with identical arguments: an LLM call used
|
|
122
|
+
# to decide something, a random number, "get the current time".
|
|
123
|
+
volatile:
|
|
124
|
+
idempotent: false
|
|
125
|
+
max_retries: 1
|
|
126
|
+
retry_backoff: 0.5
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Pass a policy file via `TbayClient(db_url, policy_file="policy.yaml")`, or
|
|
130
|
+
override policies in code (`client.policies["readonly"].cache_ttl = 60`).
|
|
131
|
+
See `policy.example.yaml` for every field, including the `rate_limit` and
|
|
132
|
+
`max_concurrent` throughput guardrails.
|
|
133
|
+
|
|
134
|
+
### Approval webhooks
|
|
135
|
+
|
|
136
|
+
Setting `approval_webhook` on a policy makes tbay fire an HTTP POST when a
|
|
137
|
+
call enters `WAITING_APPROVAL`, so a human finds out without polling
|
|
138
|
+
`tbay log` themselves. The body is
|
|
139
|
+
`{"execution_id": "...", "tool_name": "..."}`; look the execution up with
|
|
140
|
+
`tbay log --tool <tool_name>` to see its (possibly redacted) arguments
|
|
141
|
+
before approving or rejecting it.
|
|
142
|
+
|
|
143
|
+
The webhook is best-effort: if the URL is unreachable or returns an error,
|
|
144
|
+
tbay ignores it silently and the call still waits normally. `tbay
|
|
145
|
+
approve`/`tbay reject` always work, whether or not the webhook fired, so a
|
|
146
|
+
flaky webhook endpoint can never leave a call stuck.
|
|
147
|
+
|
|
148
|
+
### Approval bypass and other guardrails
|
|
149
|
+
|
|
150
|
+
`approval_bypass_arg`/`approval_bypass_max` let small, low-risk calls
|
|
151
|
+
through automatically while still pausing anything larger for a human:
|
|
152
|
+
a refund of $50 or less runs immediately, a refund of $500 waits for
|
|
153
|
+
`tbay approve`. `rate_limit` and `max_concurrent` protect a tool (and
|
|
154
|
+
whatever paid or rate-limited API it calls) from a runaway agent loop by
|
|
155
|
+
capping how often it can be called and how many calls can be in flight at
|
|
156
|
+
once. `execution_timeout` gives up on a hung call after a set time, though
|
|
157
|
+
this is best-effort: Python can't force-kill a thread, so the call may keep
|
|
158
|
+
running in the background even after tbay marks it failed.
|
|
159
|
+
|
|
160
|
+
## CLI
|
|
161
|
+
|
|
162
|
+
```
|
|
163
|
+
tbay log # the audit log
|
|
164
|
+
tbay log --tool refund_customer --status WAITING_APPROVAL
|
|
165
|
+
tbay approve <execution_id>
|
|
166
|
+
tbay reject <execution_id>
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
Point the CLI at the same database as your app with `--db-url` or the
|
|
170
|
+
`TBAY_DB_URL` environment variable.
|
|
171
|
+
|
|
172
|
+
## Examples
|
|
173
|
+
|
|
174
|
+
- `examples/plain_python_demo.py`: no framework, just `@guarded` functions,
|
|
175
|
+
covering readonly caching, mutating idempotency, a volatile LLM call, and
|
|
176
|
+
an approval bypass threshold.
|
|
177
|
+
- `examples/langchain_demo.py`: stacks under LangChain's `@tool`.
|
|
178
|
+
- `examples/openai_agents_demo.py`: stacks under the OpenAI Agents SDK's
|
|
179
|
+
`@function_tool`.
|
|
180
|
+
|
|
181
|
+
## Development
|
|
182
|
+
|
|
183
|
+
Uses [uv](https://docs.astral.sh/uv/) for dependency management:
|
|
184
|
+
|
|
185
|
+
```
|
|
186
|
+
uv sync --extra dev
|
|
187
|
+
uv run pytest
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
Postgres-backed tests are skipped unless `TBAY_TEST_PG_DSN` is set to a
|
|
191
|
+
running Postgres instance (CI provides one automatically).
|
|
192
|
+
|
|
193
|
+
## License
|
|
194
|
+
|
|
195
|
+
MIT
|
tbay-0.1.0/README.md
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
# tbay
|
|
2
|
+
|
|
3
|
+
Execution safety for AI agent tool calls: idempotency, TTL caching,
|
|
4
|
+
singleflight deduplication, risk-tiered policy, and human approval gating,
|
|
5
|
+
as a library you install, not a service you depend on.
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
from tbay import TbayClient, guarded
|
|
9
|
+
|
|
10
|
+
client = TbayClient("sqlite:///~/.tbay/db.sqlite")
|
|
11
|
+
# or: TbayClient("postgresql://user:pass@host/dbname")
|
|
12
|
+
|
|
13
|
+
@guarded(client, policy="readonly")
|
|
14
|
+
def github_search(query: str) -> dict:
|
|
15
|
+
return real_github_api_call(query)
|
|
16
|
+
|
|
17
|
+
@guarded(client, policy="destructive")
|
|
18
|
+
def refund_customer(customer_id: str, amount: float) -> dict:
|
|
19
|
+
return stripe_refund(customer_id, amount)
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
`@guarded` only ever wraps a plain callable, so it drops in under
|
|
23
|
+
LangChain's `@tool`, the OpenAI Agents SDK's `@function_tool`, CrewAI tools,
|
|
24
|
+
or bare functions, with zero framework-specific code. See `examples/`.
|
|
25
|
+
|
|
26
|
+
## Why
|
|
27
|
+
|
|
28
|
+
Agent frameworks solve planning and orchestration. None of them solve
|
|
29
|
+
*execution safety*: once a tool is selected, nothing stops it from being
|
|
30
|
+
called twice, cached when it shouldn't be, called too often, or fired on a
|
|
31
|
+
destructive action without a human in the loop. tbay sits underneath any
|
|
32
|
+
framework and handles that, durably, across processes, in whatever database
|
|
33
|
+
you already run.
|
|
34
|
+
|
|
35
|
+
**This is not a hosted service.** You `pip install` it; state (idempotency
|
|
36
|
+
keys, cached results, the audit log) lives entirely in a database you own.
|
|
37
|
+
Nothing calls home.
|
|
38
|
+
|
|
39
|
+
## Install
|
|
40
|
+
|
|
41
|
+
```
|
|
42
|
+
pip install tbay # SQLite backend, stdlib only
|
|
43
|
+
pip install tbay[postgres] # + Postgres backend
|
|
44
|
+
|
|
45
|
+
# or, with uv:
|
|
46
|
+
uv add tbay
|
|
47
|
+
uv add "tbay[postgres]"
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## How it works
|
|
51
|
+
|
|
52
|
+
Every `@guarded` call computes an idempotency key (tool name + normalized
|
|
53
|
+
args + tenant), then atomically claims a row in your database:
|
|
54
|
+
|
|
55
|
+
- **First caller** for a given key becomes the owner and runs the real
|
|
56
|
+
function.
|
|
57
|
+
- **Concurrent callers** with the identical key block and receive the same
|
|
58
|
+
result once the owner finishes (singleflight). No in-memory daemon is
|
|
59
|
+
required; coordination happens through the database's own atomicity
|
|
60
|
+
(`INSERT ... ON CONFLICT`, plus an advisory lock for the Postgres backend
|
|
61
|
+
when `max_concurrent` is set).
|
|
62
|
+
- **Later callers**, after the owner finishes, get the stored result
|
|
63
|
+
instead of re-running: permanently for `mutating`/`destructive` policies
|
|
64
|
+
(true idempotency), or until the policy's `cache_ttl` expires for
|
|
65
|
+
`readonly` policies.
|
|
66
|
+
- **`destructive`-policy calls** pause in `WAITING_APPROVAL` until someone
|
|
67
|
+
runs `tbay approve <execution_id>` (optionally after a webhook fires).
|
|
68
|
+
- **Not every tool call is idempotent.** An LLM call used to decide
|
|
69
|
+
something, "roll a die", "get the current time": calling these twice with
|
|
70
|
+
the same arguments should *not* return the same cached answer. Use the
|
|
71
|
+
`volatile` policy (`idempotent: false`) for these; tbay then ignores
|
|
72
|
+
caching, dedup, and `key_fn` entirely and runs the call fresh every time.
|
|
73
|
+
|
|
74
|
+
## Policies
|
|
75
|
+
|
|
76
|
+
```yaml
|
|
77
|
+
policies:
|
|
78
|
+
# Safe to call repeatedly; safe to serve a slightly stale answer.
|
|
79
|
+
readonly:
|
|
80
|
+
cache_ttl: 5m
|
|
81
|
+
singleflight: true
|
|
82
|
+
max_retries: 2
|
|
83
|
+
retry_backoff: 1s
|
|
84
|
+
|
|
85
|
+
# Has a real effect, but must never double-run for the same input.
|
|
86
|
+
mutating:
|
|
87
|
+
idempotent: true
|
|
88
|
+
cache_ttl: 0 # 0/omitted means "keep the result forever"
|
|
89
|
+
max_retries: 0
|
|
90
|
+
|
|
91
|
+
# Has a real-world consequence a human should sign off on.
|
|
92
|
+
destructive:
|
|
93
|
+
idempotent: true
|
|
94
|
+
approval_required: true
|
|
95
|
+
approval_timeout: 1h
|
|
96
|
+
approval_bypass_arg: amount # optional: skip approval for small values
|
|
97
|
+
approval_bypass_max: 50
|
|
98
|
+
redact_args: [card_number] # optional: mask these args in the audit log
|
|
99
|
+
|
|
100
|
+
# Runs fresh every time, even with identical arguments: an LLM call used
|
|
101
|
+
# to decide something, a random number, "get the current time".
|
|
102
|
+
volatile:
|
|
103
|
+
idempotent: false
|
|
104
|
+
max_retries: 1
|
|
105
|
+
retry_backoff: 0.5
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Pass a policy file via `TbayClient(db_url, policy_file="policy.yaml")`, or
|
|
109
|
+
override policies in code (`client.policies["readonly"].cache_ttl = 60`).
|
|
110
|
+
See `policy.example.yaml` for every field, including the `rate_limit` and
|
|
111
|
+
`max_concurrent` throughput guardrails.
|
|
112
|
+
|
|
113
|
+
### Approval webhooks
|
|
114
|
+
|
|
115
|
+
Setting `approval_webhook` on a policy makes tbay fire an HTTP POST when a
|
|
116
|
+
call enters `WAITING_APPROVAL`, so a human finds out without polling
|
|
117
|
+
`tbay log` themselves. The body is
|
|
118
|
+
`{"execution_id": "...", "tool_name": "..."}`; look the execution up with
|
|
119
|
+
`tbay log --tool <tool_name>` to see its (possibly redacted) arguments
|
|
120
|
+
before approving or rejecting it.
|
|
121
|
+
|
|
122
|
+
The webhook is best-effort: if the URL is unreachable or returns an error,
|
|
123
|
+
tbay ignores it silently and the call still waits normally. `tbay
|
|
124
|
+
approve`/`tbay reject` always work, whether or not the webhook fired, so a
|
|
125
|
+
flaky webhook endpoint can never leave a call stuck.
|
|
126
|
+
|
|
127
|
+
### Approval bypass and other guardrails
|
|
128
|
+
|
|
129
|
+
`approval_bypass_arg`/`approval_bypass_max` let small, low-risk calls
|
|
130
|
+
through automatically while still pausing anything larger for a human:
|
|
131
|
+
a refund of $50 or less runs immediately, a refund of $500 waits for
|
|
132
|
+
`tbay approve`. `rate_limit` and `max_concurrent` protect a tool (and
|
|
133
|
+
whatever paid or rate-limited API it calls) from a runaway agent loop by
|
|
134
|
+
capping how often it can be called and how many calls can be in flight at
|
|
135
|
+
once. `execution_timeout` gives up on a hung call after a set time, though
|
|
136
|
+
this is best-effort: Python can't force-kill a thread, so the call may keep
|
|
137
|
+
running in the background even after tbay marks it failed.
|
|
138
|
+
|
|
139
|
+
## CLI
|
|
140
|
+
|
|
141
|
+
```
|
|
142
|
+
tbay log # the audit log
|
|
143
|
+
tbay log --tool refund_customer --status WAITING_APPROVAL
|
|
144
|
+
tbay approve <execution_id>
|
|
145
|
+
tbay reject <execution_id>
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Point the CLI at the same database as your app with `--db-url` or the
|
|
149
|
+
`TBAY_DB_URL` environment variable.
|
|
150
|
+
|
|
151
|
+
## Examples
|
|
152
|
+
|
|
153
|
+
- `examples/plain_python_demo.py`: no framework, just `@guarded` functions,
|
|
154
|
+
covering readonly caching, mutating idempotency, a volatile LLM call, and
|
|
155
|
+
an approval bypass threshold.
|
|
156
|
+
- `examples/langchain_demo.py`: stacks under LangChain's `@tool`.
|
|
157
|
+
- `examples/openai_agents_demo.py`: stacks under the OpenAI Agents SDK's
|
|
158
|
+
`@function_tool`.
|
|
159
|
+
|
|
160
|
+
## Development
|
|
161
|
+
|
|
162
|
+
Uses [uv](https://docs.astral.sh/uv/) for dependency management:
|
|
163
|
+
|
|
164
|
+
```
|
|
165
|
+
uv sync --extra dev
|
|
166
|
+
uv run pytest
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
Postgres-backed tests are skipped unless `TBAY_TEST_PG_DSN` is set to a
|
|
170
|
+
running Postgres instance (CI provides one automatically).
|
|
171
|
+
|
|
172
|
+
## License
|
|
173
|
+
|
|
174
|
+
MIT
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "tbay"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Execution safety for AI agent tool calls: idempotency, caching, singleflight, policy, and approval gating, as a library, not a service."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "tbay contributors" }]
|
|
13
|
+
dependencies = [
|
|
14
|
+
"pyyaml>=6.0",
|
|
15
|
+
"click>=8.1",
|
|
16
|
+
]
|
|
17
|
+
classifiers = [
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Operating System :: OS Independent",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[project.optional-dependencies]
|
|
24
|
+
postgres = ["psycopg2-binary>=2.9"]
|
|
25
|
+
dev = ["pytest>=7.4", "psycopg2-binary>=2.9"]
|
|
26
|
+
|
|
27
|
+
[project.scripts]
|
|
28
|
+
tbay = "tbay.cli:main"
|
|
29
|
+
|
|
30
|
+
[tool.setuptools.packages.find]
|
|
31
|
+
where = ["src"]
|
tbay-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from .client import TbayClient
|
|
2
|
+
from .decorator import guarded
|
|
3
|
+
from .exceptions import (
|
|
4
|
+
ApprovalRejected,
|
|
5
|
+
ApprovalTimeout,
|
|
6
|
+
ConcurrencyLimitExceeded,
|
|
7
|
+
ExecutionFailed,
|
|
8
|
+
ExecutionTimeout,
|
|
9
|
+
RateLimitExceeded,
|
|
10
|
+
TbayError,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
__version__ = "0.1.0"
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"TbayClient",
|
|
17
|
+
"guarded",
|
|
18
|
+
"TbayError",
|
|
19
|
+
"ApprovalRejected",
|
|
20
|
+
"ApprovalTimeout",
|
|
21
|
+
"ExecutionFailed",
|
|
22
|
+
"ExecutionTimeout",
|
|
23
|
+
"RateLimitExceeded",
|
|
24
|
+
"ConcurrencyLimitExceeded",
|
|
25
|
+
]
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import List, Optional
|
|
6
|
+
|
|
7
|
+
from ..exceptions import ApprovalTimeout, ExecutionTimeout
|
|
8
|
+
|
|
9
|
+
RUNNING = "RUNNING"
|
|
10
|
+
SUCCEEDED = "SUCCEEDED"
|
|
11
|
+
FAILED = "FAILED"
|
|
12
|
+
WAITING_APPROVAL = "WAITING_APPROVAL"
|
|
13
|
+
|
|
14
|
+
APPROVAL_PENDING = "pending"
|
|
15
|
+
APPROVAL_APPROVED = "approved"
|
|
16
|
+
APPROVAL_REJECTED = "rejected"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ExecutionRecord:
|
|
21
|
+
"""One row of the audit log: everything tbay knows about a single
|
|
22
|
+
tool call, keyed by (tool_name, idempotency_key, tenant)."""
|
|
23
|
+
|
|
24
|
+
id: str
|
|
25
|
+
tool_name: str
|
|
26
|
+
idempotency_key: str
|
|
27
|
+
tenant: str
|
|
28
|
+
status: str
|
|
29
|
+
args_hash: str
|
|
30
|
+
args_json: Optional[str]
|
|
31
|
+
result_json: Optional[str]
|
|
32
|
+
error: Optional[str]
|
|
33
|
+
policy_name: str
|
|
34
|
+
retry_count: int
|
|
35
|
+
cache_expires_at: Optional[float]
|
|
36
|
+
created_at: float
|
|
37
|
+
finished_at: Optional[float]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class AcquireResult:
|
|
42
|
+
"""What acquire_or_get() decided the caller should do next. Exactly one
|
|
43
|
+
of the flags below matters:
|
|
44
|
+
|
|
45
|
+
- owner: caller must run the real function.
|
|
46
|
+
- use_cached: caller should return record.result_json right away.
|
|
47
|
+
- follow_running: caller should block on wait_for_result().
|
|
48
|
+
- follow_approval: caller should block on wait_for_approval(), then wait_for_result().
|
|
49
|
+
- raise_stored_error: caller should raise the stored error right away (no retry left).
|
|
50
|
+
- wait_for_slot: max_concurrent is full; caller should pause briefly and call
|
|
51
|
+
acquire_or_get() again (record is None in this case, since
|
|
52
|
+
nothing was reserved).
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
record: Optional[ExecutionRecord]
|
|
56
|
+
owner: bool = False
|
|
57
|
+
use_cached: bool = False
|
|
58
|
+
follow_running: bool = False
|
|
59
|
+
wait_for_slot: bool = False
|
|
60
|
+
follow_approval: bool = False
|
|
61
|
+
raise_stored_error: bool = False
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class StorageBackend:
|
|
65
|
+
"""The interface SQLiteBackend and PostgresBackend both implement.
|
|
66
|
+
|
|
67
|
+
Coordinating multiple callers, possibly in different processes or on
|
|
68
|
+
different machines, is entirely the database's job here: acquire_or_get
|
|
69
|
+
must claim a lease atomically (INSERT ... ON CONFLICT DO NOTHING,
|
|
70
|
+
followed by a compare-and-swap UPDATE when reclaiming an expired or
|
|
71
|
+
failed row). There's no shared in-memory state between callers, only
|
|
72
|
+
whatever the database itself guarantees.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def init_schema(self) -> None:
|
|
76
|
+
"""Create the executions/approvals tables if they don't already exist."""
|
|
77
|
+
raise NotImplementedError
|
|
78
|
+
|
|
79
|
+
def acquire_or_get(
|
|
80
|
+
self,
|
|
81
|
+
*,
|
|
82
|
+
execution_id: str,
|
|
83
|
+
tool_name: str,
|
|
84
|
+
idempotency_key: str,
|
|
85
|
+
tenant: str,
|
|
86
|
+
policy_name: str,
|
|
87
|
+
args_hash: str,
|
|
88
|
+
args_json: Optional[str],
|
|
89
|
+
max_retries: int,
|
|
90
|
+
retry_backoff: float,
|
|
91
|
+
max_concurrent: Optional[int] = None,
|
|
92
|
+
) -> AcquireResult:
|
|
93
|
+
"""Try to become the owner of this (tool_name, idempotency_key, tenant).
|
|
94
|
+
If someone already owns it, report back what the caller should do instead.
|
|
95
|
+
|
|
96
|
+
If max_concurrent is set, the RUNNING count check and the insert
|
|
97
|
+
happen in the same transaction, so this is atomic, not a separate
|
|
98
|
+
check-then-act: two simultaneous new callers can't both slip through
|
|
99
|
+
past the cap the way a check performed before this call would allow.
|
|
100
|
+
"""
|
|
101
|
+
raise NotImplementedError
|
|
102
|
+
|
|
103
|
+
def mark_waiting_approval(self, execution_id: str) -> None:
|
|
104
|
+
raise NotImplementedError
|
|
105
|
+
|
|
106
|
+
def complete(self, execution_id: str, result_json: str, cache_ttl_seconds: Optional[float]) -> None:
|
|
107
|
+
raise NotImplementedError
|
|
108
|
+
|
|
109
|
+
def fail(self, execution_id: str, error: str) -> None:
|
|
110
|
+
raise NotImplementedError
|
|
111
|
+
|
|
112
|
+
def get(self, execution_id: str) -> Optional[ExecutionRecord]:
|
|
113
|
+
raise NotImplementedError
|
|
114
|
+
|
|
115
|
+
def get_approval_status(self, execution_id: str) -> Optional[str]:
|
|
116
|
+
raise NotImplementedError
|
|
117
|
+
|
|
118
|
+
def resolve_approval(self, execution_id: str, approved: bool, resolver: str = "") -> None:
|
|
119
|
+
raise NotImplementedError
|
|
120
|
+
|
|
121
|
+
def list_executions(
|
|
122
|
+
self,
|
|
123
|
+
*,
|
|
124
|
+
tool_name: Optional[str] = None,
|
|
125
|
+
status: Optional[str] = None,
|
|
126
|
+
tenant: Optional[str] = None,
|
|
127
|
+
limit: int = 50,
|
|
128
|
+
) -> List[ExecutionRecord]:
|
|
129
|
+
raise NotImplementedError
|
|
130
|
+
|
|
131
|
+
def count_since(self, tool_name: str, tenant: str, since: float) -> int:
|
|
132
|
+
"""How many calls to this tool (for this tenant) were created at or
|
|
133
|
+
after `since` (a time.time() timestamp). Backs the rate_limit guardrail."""
|
|
134
|
+
raise NotImplementedError
|
|
135
|
+
|
|
136
|
+
# -- generic polling helpers, shared by every backend --
|
|
137
|
+
|
|
138
|
+
def wait_for_result(self, execution_id: str, timeout: float, poll_interval: float) -> ExecutionRecord:
|
|
139
|
+
deadline = time.time() + timeout
|
|
140
|
+
while True:
|
|
141
|
+
record = self.get(execution_id)
|
|
142
|
+
if record and record.status in (SUCCEEDED, FAILED):
|
|
143
|
+
return record
|
|
144
|
+
if time.time() > deadline:
|
|
145
|
+
raise ExecutionTimeout(f"timed out waiting for execution {execution_id} to finish")
|
|
146
|
+
time.sleep(poll_interval)
|
|
147
|
+
|
|
148
|
+
def wait_for_approval(self, execution_id: str, timeout: float, poll_interval: float) -> str:
|
|
149
|
+
deadline = time.time() + timeout
|
|
150
|
+
while True:
|
|
151
|
+
status = self.get_approval_status(execution_id)
|
|
152
|
+
if status and status != APPROVAL_PENDING:
|
|
153
|
+
return status
|
|
154
|
+
if time.time() > deadline:
|
|
155
|
+
raise ApprovalTimeout(f"nobody approved or rejected execution {execution_id} in time")
|
|
156
|
+
time.sleep(poll_interval)
|