tokenfence 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tokenfence-0.1.0/LICENSE +21 -0
- tokenfence-0.1.0/PKG-INFO +127 -0
- tokenfence-0.1.0/README.md +94 -0
- tokenfence-0.1.0/pyproject.toml +40 -0
- tokenfence-0.1.0/setup.cfg +4 -0
- tokenfence-0.1.0/src/tokenfence/__init__.py +7 -0
- tokenfence-0.1.0/src/tokenfence/exceptions.py +16 -0
- tokenfence-0.1.0/src/tokenfence/guard.py +353 -0
- tokenfence-0.1.0/src/tokenfence/pricing.py +80 -0
- tokenfence-0.1.0/src/tokenfence/tracker.py +72 -0
- tokenfence-0.1.0/src/tokenfence.egg-info/PKG-INFO +127 -0
- tokenfence-0.1.0/src/tokenfence.egg-info/SOURCES.txt +17 -0
- tokenfence-0.1.0/src/tokenfence.egg-info/dependency_links.txt +1 -0
- tokenfence-0.1.0/src/tokenfence.egg-info/requires.txt +14 -0
- tokenfence-0.1.0/src/tokenfence.egg-info/top_level.txt +1 -0
- tokenfence-0.1.0/tests/test_anthropic.py +266 -0
- tokenfence-0.1.0/tests/test_guard.py +198 -0
- tokenfence-0.1.0/tests/test_pricing.py +46 -0
- tokenfence-0.1.0/tests/test_tracker.py +85 -0
tokenfence-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 TokenFence Team
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tokenfence
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Cost circuit breaker for AI agents — guard your OpenAI spend with automatic downgrade and kill switch.
|
|
5
|
+
Author: TokenFence Team
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/tokenfence/tokenfence-python
|
|
8
|
+
Project-URL: Issues, https://github.com/tokenfence/tokenfence-python/issues
|
|
9
|
+
Keywords: openai,cost,budget,ai,llm,guardrail
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Provides-Extra: openai
|
|
23
|
+
Requires-Dist: openai>=1.0.0; extra == "openai"
|
|
24
|
+
Provides-Extra: anthropic
|
|
25
|
+
Requires-Dist: anthropic>=0.30.0; extra == "anthropic"
|
|
26
|
+
Provides-Extra: google
|
|
27
|
+
Requires-Dist: google-generativeai>=0.7.0; extra == "google"
|
|
28
|
+
Provides-Extra: all
|
|
29
|
+
Requires-Dist: openai>=1.0.0; extra == "all"
|
|
30
|
+
Requires-Dist: anthropic>=0.30.0; extra == "all"
|
|
31
|
+
Requires-Dist: google-generativeai>=0.7.0; extra == "all"
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
# TokenFence
|
|
35
|
+
|
|
36
|
+
Cost circuit breaker for AI agents. Guard your LLM spend with automatic model downgrade and kill switch. Supports OpenAI, Anthropic Claude, Google Gemini, and DeepSeek.
|
|
37
|
+
|
|
38
|
+
## Install
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install tokenfence[openai]
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Quick Start
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
import openai
|
|
48
|
+
from tokenfence import guard
|
|
49
|
+
|
|
50
|
+
client = guard(
|
|
51
|
+
openai.OpenAI(),
|
|
52
|
+
budget='$0.50',
|
|
53
|
+
fallback='gpt-4o-mini',
|
|
54
|
+
on_limit='stop',
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Use exactly like a normal OpenAI client
|
|
58
|
+
response = client.chat.completions.create(
|
|
59
|
+
model='gpt-4o',
|
|
60
|
+
messages=[{'role': 'user', 'content': 'Hello'}],
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Check spend
|
|
64
|
+
print(client.tokenfence.spent) # 0.0023
|
|
65
|
+
print(client.tokenfence.remaining) # 0.4977
|
|
66
|
+
print(client.tokenfence.calls) # 1
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Anthropic Claude
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
import anthropic
|
|
73
|
+
from tokenfence import guard
|
|
74
|
+
|
|
75
|
+
client = guard(
|
|
76
|
+
anthropic.Anthropic(),
|
|
77
|
+
budget='$1.00',
|
|
78
|
+
fallback='claude-3-haiku-20240307',
|
|
79
|
+
on_limit='stop',
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Use exactly like a normal Anthropic client
|
|
83
|
+
response = client.messages.create(
|
|
84
|
+
model='claude-3-5-sonnet-20241022',
|
|
85
|
+
max_tokens=1024,
|
|
86
|
+
messages=[{'role': 'user', 'content': 'Hello'}],
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# Check spend
|
|
90
|
+
print(client.tokenfence.spent) # 0.00105
|
|
91
|
+
print(client.tokenfence.remaining) # 0.99895
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## How It Works
|
|
95
|
+
|
|
96
|
+
1. **Track** — every `chat.completions.create()` call records token usage and calculates cost.
|
|
97
|
+
2. **Downgrade** — when cumulative spend hits the threshold (default 80% of budget), the model is transparently swapped to your fallback.
|
|
98
|
+
3. **Kill switch** — when the budget is fully consumed:
|
|
99
|
+
- `on_limit='stop'` — returns a synthetic response explaining the budget was exceeded.
|
|
100
|
+
- `on_limit='warn'` — logs a warning but allows the call through.
|
|
101
|
+
- `on_limit='raise'` — raises `BudgetExceeded`.
|
|
102
|
+
|
|
103
|
+
## API
|
|
104
|
+
|
|
105
|
+
### `guard(client, *, budget, fallback=None, on_limit='stop', threshold=0.8)`
|
|
106
|
+
|
|
107
|
+
| Parameter | Type | Description |
|
|
108
|
+
|-----------|------|-------------|
|
|
109
|
+
| `client` | `openai.OpenAI` | An OpenAI client instance |
|
|
110
|
+
| `budget` | `str \| float` | Max spend — `'$0.50'` or `0.50` |
|
|
111
|
+
| `fallback` | `str \| None` | Model to downgrade to when threshold is hit |
|
|
112
|
+
| `on_limit` | `str` | `'stop'`, `'warn'`, or `'raise'` |
|
|
113
|
+
| `threshold` | `float` | Fraction of budget at which downgrade kicks in (0.0–1.0) |
|
|
114
|
+
|
|
115
|
+
### `client.tokenfence`
|
|
116
|
+
|
|
117
|
+
| Attribute | Description |
|
|
118
|
+
|-----------|-------------|
|
|
119
|
+
| `.spent` | Total USD spent so far |
|
|
120
|
+
| `.remaining` | USD remaining in budget |
|
|
121
|
+
| `.calls` | Number of tracked API calls |
|
|
122
|
+
| `.budget` | The configured budget |
|
|
123
|
+
| `.reset()` | Reset spend tracking to zero |
|
|
124
|
+
|
|
125
|
+
## License
|
|
126
|
+
|
|
127
|
+
MIT
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# TokenFence
|
|
2
|
+
|
|
3
|
+
Cost circuit breaker for AI agents. Guard your LLM spend with automatic model downgrade and kill switch. Supports OpenAI, Anthropic Claude, Google Gemini, and DeepSeek.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install tokenfence[openai]
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quick Start
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
import openai
|
|
15
|
+
from tokenfence import guard
|
|
16
|
+
|
|
17
|
+
client = guard(
|
|
18
|
+
openai.OpenAI(),
|
|
19
|
+
budget='$0.50',
|
|
20
|
+
fallback='gpt-4o-mini',
|
|
21
|
+
on_limit='stop',
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# Use exactly like a normal OpenAI client
|
|
25
|
+
response = client.chat.completions.create(
|
|
26
|
+
model='gpt-4o',
|
|
27
|
+
messages=[{'role': 'user', 'content': 'Hello'}],
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
# Check spend
|
|
31
|
+
print(client.tokenfence.spent) # 0.0023
|
|
32
|
+
print(client.tokenfence.remaining) # 0.4977
|
|
33
|
+
print(client.tokenfence.calls) # 1
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Anthropic Claude
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
import anthropic
|
|
40
|
+
from tokenfence import guard
|
|
41
|
+
|
|
42
|
+
client = guard(
|
|
43
|
+
anthropic.Anthropic(),
|
|
44
|
+
budget='$1.00',
|
|
45
|
+
fallback='claude-3-haiku-20240307',
|
|
46
|
+
on_limit='stop',
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Use exactly like a normal Anthropic client
|
|
50
|
+
response = client.messages.create(
|
|
51
|
+
model='claude-3-5-sonnet-20241022',
|
|
52
|
+
max_tokens=1024,
|
|
53
|
+
messages=[{'role': 'user', 'content': 'Hello'}],
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Check spend
|
|
57
|
+
print(client.tokenfence.spent) # 0.00105
|
|
58
|
+
print(client.tokenfence.remaining) # 0.99895
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## How It Works
|
|
62
|
+
|
|
63
|
+
1. **Track** — every `chat.completions.create()` call records token usage and calculates cost.
|
|
64
|
+
2. **Downgrade** — when cumulative spend hits the threshold (default 80% of budget), the model is transparently swapped to your fallback.
|
|
65
|
+
3. **Kill switch** — when the budget is fully consumed:
|
|
66
|
+
- `on_limit='stop'` — returns a synthetic response explaining the budget was exceeded.
|
|
67
|
+
- `on_limit='warn'` — logs a warning but allows the call through.
|
|
68
|
+
- `on_limit='raise'` — raises `BudgetExceeded`.
|
|
69
|
+
|
|
70
|
+
## API
|
|
71
|
+
|
|
72
|
+
### `guard(client, *, budget, fallback=None, on_limit='stop', threshold=0.8)`
|
|
73
|
+
|
|
74
|
+
| Parameter | Type | Description |
|
|
75
|
+
|-----------|------|-------------|
|
|
76
|
+
| `client` | `openai.OpenAI` | An OpenAI client instance |
|
|
77
|
+
| `budget` | `str \| float` | Max spend — `'$0.50'` or `0.50` |
|
|
78
|
+
| `fallback` | `str \| None` | Model to downgrade to when threshold is hit |
|
|
79
|
+
| `on_limit` | `str` | `'stop'`, `'warn'`, or `'raise'` |
|
|
80
|
+
| `threshold` | `float` | Fraction of budget at which downgrade kicks in (0.0–1.0) |
|
|
81
|
+
|
|
82
|
+
### `client.tokenfence`
|
|
83
|
+
|
|
84
|
+
| Attribute | Description |
|
|
85
|
+
|-----------|-------------|
|
|
86
|
+
| `.spent` | Total USD spent so far |
|
|
87
|
+
| `.remaining` | USD remaining in budget |
|
|
88
|
+
| `.calls` | Number of tracked API calls |
|
|
89
|
+
| `.budget` | The configured budget |
|
|
90
|
+
| `.reset()` | Reset spend tracking to zero |
|
|
91
|
+
|
|
92
|
+
## License
|
|
93
|
+
|
|
94
|
+
MIT
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "tokenfence"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Cost circuit breaker for AI agents — guard your OpenAI spend with automatic downgrade and kill switch."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "TokenFence Team" },
|
|
14
|
+
]
|
|
15
|
+
keywords = ["openai", "cost", "budget", "ai", "llm", "guardrail"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.9",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
|
+
"Programming Language :: Python :: 3.13",
|
|
26
|
+
"Topic :: Software Development :: Libraries",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
[project.optional-dependencies]
|
|
30
|
+
openai = ["openai>=1.0.0"]
|
|
31
|
+
anthropic = ["anthropic>=0.30.0"]
|
|
32
|
+
google = ["google-generativeai>=0.7.0"]
|
|
33
|
+
all = ["openai>=1.0.0", "anthropic>=0.30.0", "google-generativeai>=0.7.0"]
|
|
34
|
+
|
|
35
|
+
[project.urls]
|
|
36
|
+
Homepage = "https://github.com/tokenfence/tokenfence-python"
|
|
37
|
+
Issues = "https://github.com/tokenfence/tokenfence-python/issues"
|
|
38
|
+
|
|
39
|
+
[tool.setuptools.packages.find]
|
|
40
|
+
where = ["src"]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""TokenFence exceptions."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class TokenFenceError(Exception):
|
|
5
|
+
"""Base exception for all TokenFence errors."""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BudgetExceeded(TokenFenceError):
|
|
9
|
+
"""Raised when the budget has been fully consumed and on_limit='raise'."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, budget: float, spent: float) -> None:
|
|
12
|
+
self.budget = budget
|
|
13
|
+
self.spent = spent
|
|
14
|
+
super().__init__(
|
|
15
|
+
f"Budget of ${budget:.4f} exceeded (spent ${spent:.4f})"
|
|
16
|
+
)
|
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
"""Core ``guard()`` function — wraps an OpenAI client with cost controls."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Any, Literal, Optional, Union
|
|
7
|
+
|
|
8
|
+
from .exceptions import BudgetExceeded, TokenFenceError
|
|
9
|
+
from .pricing import calculate_cost
|
|
10
|
+
from .tracker import CostTracker
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger("tokenfence")
|
|
13
|
+
|
|
14
|
+
OnLimit = Literal["stop", "warn", "raise"]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _parse_budget(budget: Union[str, float, int]) -> float:
|
|
18
|
+
"""Convert a budget value like ``'$0.50'`` or ``0.50`` to a float."""
|
|
19
|
+
if isinstance(budget, str):
|
|
20
|
+
cleaned = budget.strip().lstrip("$").strip()
|
|
21
|
+
try:
|
|
22
|
+
return float(cleaned)
|
|
23
|
+
except ValueError:
|
|
24
|
+
raise TokenFenceError(f"Invalid budget string: {budget!r}") from None
|
|
25
|
+
return float(budget)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def guard(
|
|
29
|
+
client: Any,
|
|
30
|
+
*,
|
|
31
|
+
budget: Union[str, float, int],
|
|
32
|
+
fallback: Optional[str] = None,
|
|
33
|
+
on_limit: OnLimit = "stop",
|
|
34
|
+
threshold: float = 0.8,
|
|
35
|
+
) -> Any:
|
|
36
|
+
"""Wrap an OpenAI client with cost tracking and budget enforcement.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
client: An ``openai.OpenAI`` (or compatible) client instance.
|
|
40
|
+
budget: Maximum spend in USD — accepts ``'$0.50'`` or ``0.50``.
|
|
41
|
+
fallback: Model name to downgrade to when the threshold is reached.
|
|
42
|
+
on_limit: Behaviour when the budget is exhausted —
|
|
43
|
+
``'stop'`` returns a synthetic response,
|
|
44
|
+
``'warn'`` logs a warning and allows the call,
|
|
45
|
+
``'raise'`` raises :class:`BudgetExceeded`.
|
|
46
|
+
threshold: Fraction of the budget (0.0–1.0) at which to start
|
|
47
|
+
downgrading to the *fallback* model.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
A wrapped client that is a drop-in replacement for the original.
|
|
51
|
+
"""
|
|
52
|
+
if on_limit not in ("stop", "warn", "raise"):
|
|
53
|
+
raise TokenFenceError(f"on_limit must be 'stop', 'warn', or 'raise', got {on_limit!r}")
|
|
54
|
+
if not 0.0 <= threshold <= 1.0:
|
|
55
|
+
raise TokenFenceError(f"threshold must be between 0.0 and 1.0, got {threshold!r}")
|
|
56
|
+
|
|
57
|
+
parsed_budget = _parse_budget(budget)
|
|
58
|
+
if parsed_budget <= 0:
|
|
59
|
+
raise TokenFenceError(f"budget must be positive, got {parsed_budget}")
|
|
60
|
+
|
|
61
|
+
tracker = CostTracker(budget=parsed_budget, threshold=threshold)
|
|
62
|
+
|
|
63
|
+
return _GuardedClient(
|
|
64
|
+
client=client,
|
|
65
|
+
tracker=tracker,
|
|
66
|
+
fallback=fallback,
|
|
67
|
+
on_limit=on_limit,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# ---------------------------------------------------------------------------
|
|
72
|
+
# Internal proxy objects
|
|
73
|
+
# ---------------------------------------------------------------------------
|
|
74
|
+
|
|
75
|
+
class _GuardedClient:
|
|
76
|
+
"""Transparent proxy around an OpenAI client."""
|
|
77
|
+
|
|
78
|
+
def __init__(
|
|
79
|
+
self,
|
|
80
|
+
client: Any,
|
|
81
|
+
tracker: CostTracker,
|
|
82
|
+
fallback: Optional[str],
|
|
83
|
+
on_limit: OnLimit,
|
|
84
|
+
) -> None:
|
|
85
|
+
self._client = client
|
|
86
|
+
self._tracker = tracker
|
|
87
|
+
self._fallback = fallback
|
|
88
|
+
self._on_limit = on_limit
|
|
89
|
+
|
|
90
|
+
# Expose tracker as ``client.tokenfence``
|
|
91
|
+
@property
|
|
92
|
+
def tokenfence(self) -> CostTracker:
|
|
93
|
+
return self._tracker
|
|
94
|
+
|
|
95
|
+
# Intercept ``client.chat`` to return our guarded namespace (OpenAI)
|
|
96
|
+
@property
|
|
97
|
+
def chat(self) -> "_GuardedChat":
|
|
98
|
+
return _GuardedChat(self._client.chat, self._tracker, self._fallback, self._on_limit)
|
|
99
|
+
|
|
100
|
+
# Intercept ``client.messages`` for Anthropic-style clients
|
|
101
|
+
@property
|
|
102
|
+
def messages(self) -> "_GuardedAnthropicMessages":
|
|
103
|
+
return _GuardedAnthropicMessages(self._client.messages, self._tracker, self._fallback, self._on_limit)
|
|
104
|
+
|
|
105
|
+
# Pass everything else through to the real client
|
|
106
|
+
def __getattr__(self, name: str) -> Any:
|
|
107
|
+
return getattr(self._client, name)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class _GuardedChat:
|
|
111
|
+
"""Proxy for ``client.chat``."""
|
|
112
|
+
|
|
113
|
+
def __init__(self, chat: Any, tracker: CostTracker, fallback: Optional[str], on_limit: OnLimit) -> None:
|
|
114
|
+
self._chat = chat
|
|
115
|
+
self._tracker = tracker
|
|
116
|
+
self._fallback = fallback
|
|
117
|
+
self._on_limit = on_limit
|
|
118
|
+
|
|
119
|
+
@property
|
|
120
|
+
def completions(self) -> "_GuardedCompletions":
|
|
121
|
+
return _GuardedCompletions(self._chat.completions, self._tracker, self._fallback, self._on_limit)
|
|
122
|
+
|
|
123
|
+
def __getattr__(self, name: str) -> Any:
|
|
124
|
+
return getattr(self._chat, name)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class _GuardedCompletions:
|
|
128
|
+
"""Proxy for ``client.chat.completions`` — intercepts ``create()``."""
|
|
129
|
+
|
|
130
|
+
def __init__(self, completions: Any, tracker: CostTracker, fallback: Optional[str], on_limit: OnLimit) -> None:
|
|
131
|
+
self._completions = completions
|
|
132
|
+
self._tracker = tracker
|
|
133
|
+
self._fallback = fallback
|
|
134
|
+
self._on_limit = on_limit
|
|
135
|
+
|
|
136
|
+
def create(self, **kwargs: Any) -> Any:
|
|
137
|
+
"""Intercept ``chat.completions.create()`` with budget enforcement."""
|
|
138
|
+
tracker = self._tracker
|
|
139
|
+
|
|
140
|
+
# --- Kill switch: budget already exhausted before the call ----------
|
|
141
|
+
if tracker.budget_exceeded:
|
|
142
|
+
return self._handle_limit(kwargs)
|
|
143
|
+
|
|
144
|
+
# --- Auto-downgrade -------------------------------------------------
|
|
145
|
+
original_model = kwargs.get("model")
|
|
146
|
+
if tracker.should_downgrade and self._fallback and original_model != self._fallback:
|
|
147
|
+
logger.warning(
|
|
148
|
+
"TokenFence: spend $%.4f has reached %.0f%% of $%.4f budget — "
|
|
149
|
+
"downgrading from %s to %s",
|
|
150
|
+
tracker.spent,
|
|
151
|
+
tracker.usage_ratio * 100,
|
|
152
|
+
tracker.budget,
|
|
153
|
+
original_model,
|
|
154
|
+
self._fallback,
|
|
155
|
+
)
|
|
156
|
+
kwargs["model"] = self._fallback
|
|
157
|
+
|
|
158
|
+
# --- Make the real API call -----------------------------------------
|
|
159
|
+
response = self._completions.create(**kwargs)
|
|
160
|
+
|
|
161
|
+
# --- Track cost -----------------------------------------------------
|
|
162
|
+
model_used = kwargs.get("model", original_model) or ""
|
|
163
|
+
usage = getattr(response, "usage", None)
|
|
164
|
+
if usage is not None:
|
|
165
|
+
input_tokens = getattr(usage, "prompt_tokens", 0) or 0
|
|
166
|
+
output_tokens = getattr(usage, "completion_tokens", 0) or 0
|
|
167
|
+
cost = calculate_cost(model_used, input_tokens, output_tokens)
|
|
168
|
+
tracker.record(cost)
|
|
169
|
+
else:
|
|
170
|
+
tracker.record(0.0)
|
|
171
|
+
|
|
172
|
+
return response
|
|
173
|
+
|
|
174
|
+
# --- limit handling -----------------------------------------------------
|
|
175
|
+
|
|
176
|
+
def _handle_limit(self, kwargs: dict[str, Any]) -> Any:
|
|
177
|
+
tracker = self._tracker
|
|
178
|
+
if self._on_limit == "raise":
|
|
179
|
+
raise BudgetExceeded(budget=tracker.budget, spent=tracker.spent)
|
|
180
|
+
|
|
181
|
+
if self._on_limit == "warn":
|
|
182
|
+
logger.warning(
|
|
183
|
+
"TokenFence: budget of $%.4f exhausted (spent $%.4f) — allowing call anyway",
|
|
184
|
+
tracker.budget,
|
|
185
|
+
tracker.spent,
|
|
186
|
+
)
|
|
187
|
+
return self._completions.create(**kwargs)
|
|
188
|
+
|
|
189
|
+
# on_limit == "stop" — return a synthetic response
|
|
190
|
+
return _synthetic_response(tracker)
|
|
191
|
+
|
|
192
|
+
def __getattr__(self, name: str) -> Any:
|
|
193
|
+
return getattr(self._completions, name)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
# ---------------------------------------------------------------------------
|
|
197
|
+
# Anthropic ``client.messages.create()`` proxy
|
|
198
|
+
# ---------------------------------------------------------------------------
|
|
199
|
+
|
|
200
|
+
class _GuardedAnthropicMessages:
|
|
201
|
+
"""Proxy for Anthropic ``client.messages`` — intercepts ``create()``."""
|
|
202
|
+
|
|
203
|
+
def __init__(self, messages: Any, tracker: CostTracker, fallback: Optional[str], on_limit: OnLimit) -> None:
|
|
204
|
+
self._messages = messages
|
|
205
|
+
self._tracker = tracker
|
|
206
|
+
self._fallback = fallback
|
|
207
|
+
self._on_limit = on_limit
|
|
208
|
+
|
|
209
|
+
def create(self, **kwargs: Any) -> Any:
|
|
210
|
+
"""Intercept ``messages.create()`` with budget enforcement."""
|
|
211
|
+
tracker = self._tracker
|
|
212
|
+
|
|
213
|
+
# --- Kill switch: budget already exhausted --------------------------
|
|
214
|
+
if tracker.budget_exceeded:
|
|
215
|
+
return self._handle_limit(kwargs)
|
|
216
|
+
|
|
217
|
+
# --- Auto-downgrade -------------------------------------------------
|
|
218
|
+
original_model = kwargs.get("model")
|
|
219
|
+
if tracker.should_downgrade and self._fallback and original_model != self._fallback:
|
|
220
|
+
logger.warning(
|
|
221
|
+
"TokenFence: spend $%.4f has reached %.0f%% of $%.4f budget — "
|
|
222
|
+
"downgrading from %s to %s",
|
|
223
|
+
tracker.spent,
|
|
224
|
+
tracker.usage_ratio * 100,
|
|
225
|
+
tracker.budget,
|
|
226
|
+
original_model,
|
|
227
|
+
self._fallback,
|
|
228
|
+
)
|
|
229
|
+
kwargs["model"] = self._fallback
|
|
230
|
+
|
|
231
|
+
# --- Make the real API call -----------------------------------------
|
|
232
|
+
response = self._messages.create(**kwargs)
|
|
233
|
+
|
|
234
|
+
# --- Track cost (Anthropic usage format) ----------------------------
|
|
235
|
+
model_used = kwargs.get("model", original_model) or ""
|
|
236
|
+
usage = getattr(response, "usage", None)
|
|
237
|
+
if usage is not None:
|
|
238
|
+
input_tokens = getattr(usage, "input_tokens", 0) or 0
|
|
239
|
+
output_tokens = getattr(usage, "output_tokens", 0) or 0
|
|
240
|
+
cost = calculate_cost(model_used, input_tokens, output_tokens)
|
|
241
|
+
tracker.record(cost)
|
|
242
|
+
else:
|
|
243
|
+
tracker.record(0.0)
|
|
244
|
+
|
|
245
|
+
return response
|
|
246
|
+
|
|
247
|
+
def _handle_limit(self, kwargs: dict[str, Any]) -> Any:
|
|
248
|
+
tracker = self._tracker
|
|
249
|
+
if self._on_limit == "raise":
|
|
250
|
+
raise BudgetExceeded(budget=tracker.budget, spent=tracker.spent)
|
|
251
|
+
|
|
252
|
+
if self._on_limit == "warn":
|
|
253
|
+
logger.warning(
|
|
254
|
+
"TokenFence: budget of $%.4f exhausted (spent $%.4f) — allowing call anyway",
|
|
255
|
+
tracker.budget,
|
|
256
|
+
tracker.spent,
|
|
257
|
+
)
|
|
258
|
+
return self._messages.create(**kwargs)
|
|
259
|
+
|
|
260
|
+
# on_limit == "stop" — return a synthetic Anthropic-style response
|
|
261
|
+
return _synthetic_anthropic_response(tracker)
|
|
262
|
+
|
|
263
|
+
def __getattr__(self, name: str) -> Any:
|
|
264
|
+
return getattr(self._messages, name)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
# ---------------------------------------------------------------------------
|
|
268
|
+
# Synthetic response for on_limit='stop'
|
|
269
|
+
# ---------------------------------------------------------------------------
|
|
270
|
+
|
|
271
|
+
class _SyntheticUsage:
|
|
272
|
+
prompt_tokens: int = 0
|
|
273
|
+
completion_tokens: int = 0
|
|
274
|
+
total_tokens: int = 0
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
class _SyntheticMessage:
|
|
278
|
+
role: str = "assistant"
|
|
279
|
+
content: str = ""
|
|
280
|
+
|
|
281
|
+
def __init__(self, content: str) -> None:
|
|
282
|
+
self.content = content
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
class _SyntheticChoice:
|
|
286
|
+
index: int = 0
|
|
287
|
+
finish_reason: str = "stop"
|
|
288
|
+
message: _SyntheticMessage
|
|
289
|
+
|
|
290
|
+
def __init__(self, message: _SyntheticMessage) -> None:
|
|
291
|
+
self.message = message
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
class _SyntheticResponse:
|
|
295
|
+
"""Mimics the shape of an OpenAI ``ChatCompletion`` enough for most code."""
|
|
296
|
+
|
|
297
|
+
id: str = "tokenfence-budget-exceeded"
|
|
298
|
+
object: str = "chat.completion"
|
|
299
|
+
model: str = "tokenfence"
|
|
300
|
+
usage: _SyntheticUsage
|
|
301
|
+
|
|
302
|
+
def __init__(self, tracker: CostTracker) -> None:
|
|
303
|
+
self.usage = _SyntheticUsage()
|
|
304
|
+
msg = _SyntheticMessage(
|
|
305
|
+
f"[TokenFence] Budget of ${tracker.budget:.2f} exceeded "
|
|
306
|
+
f"(spent ${tracker.spent:.4f}). Request blocked."
|
|
307
|
+
)
|
|
308
|
+
self.choices = [_SyntheticChoice(msg)]
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _synthetic_response(tracker: CostTracker) -> _SyntheticResponse:
|
|
312
|
+
return _SyntheticResponse(tracker)
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
# ---------------------------------------------------------------------------
|
|
316
|
+
# Anthropic-style synthetic response for on_limit='stop'
|
|
317
|
+
# ---------------------------------------------------------------------------
|
|
318
|
+
|
|
319
|
+
class _SyntheticAnthropicUsage:
|
|
320
|
+
input_tokens: int = 0
|
|
321
|
+
output_tokens: int = 0
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
class _SyntheticAnthropicContentBlock:
|
|
325
|
+
type: str = "text"
|
|
326
|
+
text: str = ""
|
|
327
|
+
|
|
328
|
+
def __init__(self, text: str) -> None:
|
|
329
|
+
self.type = "text"
|
|
330
|
+
self.text = text
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
class _SyntheticAnthropicResponse:
|
|
334
|
+
"""Mimics the shape of an Anthropic ``Message`` enough for most code."""
|
|
335
|
+
|
|
336
|
+
id: str = "tokenfence-budget-exceeded"
|
|
337
|
+
type: str = "message"
|
|
338
|
+
role: str = "assistant"
|
|
339
|
+
model: str = "tokenfence"
|
|
340
|
+
stop_reason: str = "end_turn"
|
|
341
|
+
|
|
342
|
+
def __init__(self, tracker: CostTracker) -> None:
|
|
343
|
+
self.usage = _SyntheticAnthropicUsage()
|
|
344
|
+
self.content = [
|
|
345
|
+
_SyntheticAnthropicContentBlock(
|
|
346
|
+
f"[TokenFence] Budget of ${tracker.budget:.2f} exceeded "
|
|
347
|
+
f"(spent ${tracker.spent:.4f}). Request blocked."
|
|
348
|
+
)
|
|
349
|
+
]
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def _synthetic_anthropic_response(tracker: CostTracker) -> _SyntheticAnthropicResponse:
|
|
353
|
+
return _SyntheticAnthropicResponse(tracker)
|