tokenfence 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 TokenFence Team
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,127 @@
1
+ Metadata-Version: 2.4
2
+ Name: tokenfence
3
+ Version: 0.1.0
4
+ Summary: Cost circuit breaker for AI agents — guard your OpenAI spend with automatic downgrade and kill switch.
5
+ Author: TokenFence Team
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/tokenfence/tokenfence-python
8
+ Project-URL: Issues, https://github.com/tokenfence/tokenfence-python/issues
9
+ Keywords: openai,cost,budget,ai,llm,guardrail
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Software Development :: Libraries
19
+ Requires-Python: >=3.9
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Provides-Extra: openai
23
+ Requires-Dist: openai>=1.0.0; extra == "openai"
24
+ Provides-Extra: anthropic
25
+ Requires-Dist: anthropic>=0.30.0; extra == "anthropic"
26
+ Provides-Extra: google
27
+ Requires-Dist: google-generativeai>=0.7.0; extra == "google"
28
+ Provides-Extra: all
29
+ Requires-Dist: openai>=1.0.0; extra == "all"
30
+ Requires-Dist: anthropic>=0.30.0; extra == "all"
31
+ Requires-Dist: google-generativeai>=0.7.0; extra == "all"
32
+ Dynamic: license-file
33
+
34
+ # TokenFence
35
+
36
+ Cost circuit breaker for AI agents. Guard your LLM spend with automatic model downgrade and kill switch. Supports OpenAI, Anthropic Claude, Google Gemini, and DeepSeek.
37
+
38
+ ## Install
39
+
40
+ ```bash
41
+ pip install tokenfence[openai]
42
+ ```
43
+
44
+ ## Quick Start
45
+
46
+ ```python
47
+ import openai
48
+ from tokenfence import guard
49
+
50
+ client = guard(
51
+ openai.OpenAI(),
52
+ budget='$0.50',
53
+ fallback='gpt-4o-mini',
54
+ on_limit='stop',
55
+ )
56
+
57
+ # Use exactly like a normal OpenAI client
58
+ response = client.chat.completions.create(
59
+ model='gpt-4o',
60
+ messages=[{'role': 'user', 'content': 'Hello'}],
61
+ )
62
+
63
+ # Check spend
64
+ print(client.tokenfence.spent) # 0.0023
65
+ print(client.tokenfence.remaining) # 0.4977
66
+ print(client.tokenfence.calls) # 1
67
+ ```
68
+
69
+ ## Anthropic Claude
70
+
71
+ ```python
72
+ import anthropic
73
+ from tokenfence import guard
74
+
75
+ client = guard(
76
+ anthropic.Anthropic(),
77
+ budget='$1.00',
78
+ fallback='claude-3-haiku-20240307',
79
+ on_limit='stop',
80
+ )
81
+
82
+ # Use exactly like a normal Anthropic client
83
+ response = client.messages.create(
84
+ model='claude-3-5-sonnet-20241022',
85
+ max_tokens=1024,
86
+ messages=[{'role': 'user', 'content': 'Hello'}],
87
+ )
88
+
89
+ # Check spend
90
+ print(client.tokenfence.spent) # 0.00105
91
+ print(client.tokenfence.remaining) # 0.99895
92
+ ```
93
+
94
+ ## How It Works
95
+
96
+ 1. **Track** — every `chat.completions.create()` call records token usage and calculates cost.
97
+ 2. **Downgrade** — when cumulative spend hits the threshold (default 80% of budget), the model is transparently swapped to your fallback.
98
+ 3. **Kill switch** — when the budget is fully consumed:
99
+ - `on_limit='stop'` — returns a synthetic response explaining the budget was exceeded.
100
+ - `on_limit='warn'` — logs a warning but allows the call through.
101
+ - `on_limit='raise'` — raises `BudgetExceeded`.
102
+
103
+ ## API
104
+
105
+ ### `guard(client, *, budget, fallback=None, on_limit='stop', threshold=0.8)`
106
+
107
+ | Parameter | Type | Description |
108
+ |-----------|------|-------------|
109
+ | `client` | `openai.OpenAI` | An OpenAI client instance |
110
+ | `budget` | `str \| float` | Max spend — `'$0.50'` or `0.50` |
111
+ | `fallback` | `str \| None` | Model to downgrade to when threshold is hit |
112
+ | `on_limit` | `str` | `'stop'`, `'warn'`, or `'raise'` |
113
+ | `threshold` | `float` | Fraction of budget at which downgrade kicks in (0.0–1.0) |
114
+
115
+ ### `client.tokenfence`
116
+
117
+ | Attribute | Description |
118
+ |-----------|-------------|
119
+ | `.spent` | Total USD spent so far |
120
+ | `.remaining` | USD remaining in budget |
121
+ | `.calls` | Number of tracked API calls |
122
+ | `.budget` | The configured budget |
123
+ | `.reset()` | Reset spend tracking to zero |
124
+
125
+ ## License
126
+
127
+ MIT
@@ -0,0 +1,94 @@
1
+ # TokenFence
2
+
3
+ Cost circuit breaker for AI agents. Guard your LLM spend with automatic model downgrade and kill switch. Supports OpenAI, Anthropic Claude, Google Gemini, and DeepSeek.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install tokenfence[openai]
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ ```python
14
+ import openai
15
+ from tokenfence import guard
16
+
17
+ client = guard(
18
+ openai.OpenAI(),
19
+ budget='$0.50',
20
+ fallback='gpt-4o-mini',
21
+ on_limit='stop',
22
+ )
23
+
24
+ # Use exactly like a normal OpenAI client
25
+ response = client.chat.completions.create(
26
+ model='gpt-4o',
27
+ messages=[{'role': 'user', 'content': 'Hello'}],
28
+ )
29
+
30
+ # Check spend
31
+ print(client.tokenfence.spent) # 0.0023
32
+ print(client.tokenfence.remaining) # 0.4977
33
+ print(client.tokenfence.calls) # 1
34
+ ```
35
+
36
+ ## Anthropic Claude
37
+
38
+ ```python
39
+ import anthropic
40
+ from tokenfence import guard
41
+
42
+ client = guard(
43
+ anthropic.Anthropic(),
44
+ budget='$1.00',
45
+ fallback='claude-3-haiku-20240307',
46
+ on_limit='stop',
47
+ )
48
+
49
+ # Use exactly like a normal Anthropic client
50
+ response = client.messages.create(
51
+ model='claude-3-5-sonnet-20241022',
52
+ max_tokens=1024,
53
+ messages=[{'role': 'user', 'content': 'Hello'}],
54
+ )
55
+
56
+ # Check spend
57
+ print(client.tokenfence.spent) # 0.00105
58
+ print(client.tokenfence.remaining) # 0.99895
59
+ ```
60
+
61
+ ## How It Works
62
+
63
+ 1. **Track** — every `chat.completions.create()` call records token usage and calculates cost.
64
+ 2. **Downgrade** — when cumulative spend hits the threshold (default 80% of budget), the model is transparently swapped to your fallback.
65
+ 3. **Kill switch** — when the budget is fully consumed:
66
+ - `on_limit='stop'` — returns a synthetic response explaining the budget was exceeded.
67
+ - `on_limit='warn'` — logs a warning but allows the call through.
68
+ - `on_limit='raise'` — raises `BudgetExceeded`.
69
+
70
+ ## API
71
+
72
+ ### `guard(client, *, budget, fallback=None, on_limit='stop', threshold=0.8)`
73
+
74
+ | Parameter | Type | Description |
75
+ |-----------|------|-------------|
76
+ | `client` | `openai.OpenAI` | An OpenAI client instance |
77
+ | `budget` | `str \| float` | Max spend — `'$0.50'` or `0.50` |
78
+ | `fallback` | `str \| None` | Model to downgrade to when threshold is hit |
79
+ | `on_limit` | `str` | `'stop'`, `'warn'`, or `'raise'` |
80
+ | `threshold` | `float` | Fraction of budget at which downgrade kicks in (0.0–1.0) |
81
+
82
+ ### `client.tokenfence`
83
+
84
+ | Attribute | Description |
85
+ |-----------|-------------|
86
+ | `.spent` | Total USD spent so far |
87
+ | `.remaining` | USD remaining in budget |
88
+ | `.calls` | Number of tracked API calls |
89
+ | `.budget` | The configured budget |
90
+ | `.reset()` | Reset spend tracking to zero |
91
+
92
+ ## License
93
+
94
+ MIT
@@ -0,0 +1,40 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "tokenfence"
7
+ version = "0.1.0"
8
+ description = "Cost circuit breaker for AI agents — guard your OpenAI spend with automatic downgrade and kill switch."
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.9"
12
+ authors = [
13
+ { name = "TokenFence Team" },
14
+ ]
15
+ keywords = ["openai", "cost", "budget", "ai", "llm", "guardrail"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Developers",
19
+
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.9",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Programming Language :: Python :: 3.13",
26
+ "Topic :: Software Development :: Libraries",
27
+ ]
28
+
29
+ [project.optional-dependencies]
30
+ openai = ["openai>=1.0.0"]
31
+ anthropic = ["anthropic>=0.30.0"]
32
+ google = ["google-generativeai>=0.7.0"]
33
+ all = ["openai>=1.0.0", "anthropic>=0.30.0", "google-generativeai>=0.7.0"]
34
+
35
+ [project.urls]
36
+ Homepage = "https://github.com/tokenfence/tokenfence-python"
37
+ Issues = "https://github.com/tokenfence/tokenfence-python/issues"
38
+
39
+ [tool.setuptools.packages.find]
40
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,7 @@
1
+ """TokenFence — cost circuit breaker for AI agents."""
2
+
3
+ from .exceptions import BudgetExceeded, TokenFenceError
4
+ from .guard import guard
5
+
6
+ __all__ = ["guard", "TokenFenceError", "BudgetExceeded"]
7
+ __version__ = "0.1.0"
@@ -0,0 +1,16 @@
1
+ """TokenFence exceptions."""
2
+
3
+
4
+ class TokenFenceError(Exception):
5
+ """Base exception for all TokenFence errors."""
6
+
7
+
8
+ class BudgetExceeded(TokenFenceError):
9
+ """Raised when the budget has been fully consumed and on_limit='raise'."""
10
+
11
+ def __init__(self, budget: float, spent: float) -> None:
12
+ self.budget = budget
13
+ self.spent = spent
14
+ super().__init__(
15
+ f"Budget of ${budget:.4f} exceeded (spent ${spent:.4f})"
16
+ )
@@ -0,0 +1,353 @@
1
+ """Core ``guard()`` function — wraps an OpenAI client with cost controls."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from typing import Any, Literal, Optional, Union
7
+
8
+ from .exceptions import BudgetExceeded, TokenFenceError
9
+ from .pricing import calculate_cost
10
+ from .tracker import CostTracker
11
+
12
+ logger = logging.getLogger("tokenfence")
13
+
14
+ OnLimit = Literal["stop", "warn", "raise"]
15
+
16
+
17
+ def _parse_budget(budget: Union[str, float, int]) -> float:
18
+ """Convert a budget value like ``'$0.50'`` or ``0.50`` to a float."""
19
+ if isinstance(budget, str):
20
+ cleaned = budget.strip().lstrip("$").strip()
21
+ try:
22
+ return float(cleaned)
23
+ except ValueError:
24
+ raise TokenFenceError(f"Invalid budget string: {budget!r}") from None
25
+ return float(budget)
26
+
27
+
28
+ def guard(
29
+ client: Any,
30
+ *,
31
+ budget: Union[str, float, int],
32
+ fallback: Optional[str] = None,
33
+ on_limit: OnLimit = "stop",
34
+ threshold: float = 0.8,
35
+ ) -> Any:
36
+ """Wrap an OpenAI client with cost tracking and budget enforcement.
37
+
38
+ Args:
39
+ client: An ``openai.OpenAI`` (or compatible) client instance.
40
+ budget: Maximum spend in USD — accepts ``'$0.50'`` or ``0.50``.
41
+ fallback: Model name to downgrade to when the threshold is reached.
42
+ on_limit: Behaviour when the budget is exhausted —
43
+ ``'stop'`` returns a synthetic response,
44
+ ``'warn'`` logs a warning and allows the call,
45
+ ``'raise'`` raises :class:`BudgetExceeded`.
46
+ threshold: Fraction of the budget (0.0–1.0) at which to start
47
+ downgrading to the *fallback* model.
48
+
49
+ Returns:
50
+ A wrapped client that is a drop-in replacement for the original.
51
+ """
52
+ if on_limit not in ("stop", "warn", "raise"):
53
+ raise TokenFenceError(f"on_limit must be 'stop', 'warn', or 'raise', got {on_limit!r}")
54
+ if not 0.0 <= threshold <= 1.0:
55
+ raise TokenFenceError(f"threshold must be between 0.0 and 1.0, got {threshold!r}")
56
+
57
+ parsed_budget = _parse_budget(budget)
58
+ if parsed_budget <= 0:
59
+ raise TokenFenceError(f"budget must be positive, got {parsed_budget}")
60
+
61
+ tracker = CostTracker(budget=parsed_budget, threshold=threshold)
62
+
63
+ return _GuardedClient(
64
+ client=client,
65
+ tracker=tracker,
66
+ fallback=fallback,
67
+ on_limit=on_limit,
68
+ )
69
+
70
+
71
+ # ---------------------------------------------------------------------------
72
+ # Internal proxy objects
73
+ # ---------------------------------------------------------------------------
74
+
75
+ class _GuardedClient:
76
+ """Transparent proxy around an OpenAI client."""
77
+
78
+ def __init__(
79
+ self,
80
+ client: Any,
81
+ tracker: CostTracker,
82
+ fallback: Optional[str],
83
+ on_limit: OnLimit,
84
+ ) -> None:
85
+ self._client = client
86
+ self._tracker = tracker
87
+ self._fallback = fallback
88
+ self._on_limit = on_limit
89
+
90
+ # Expose tracker as ``client.tokenfence``
91
+ @property
92
+ def tokenfence(self) -> CostTracker:
93
+ return self._tracker
94
+
95
+ # Intercept ``client.chat`` to return our guarded namespace (OpenAI)
96
+ @property
97
+ def chat(self) -> "_GuardedChat":
98
+ return _GuardedChat(self._client.chat, self._tracker, self._fallback, self._on_limit)
99
+
100
+ # Intercept ``client.messages`` for Anthropic-style clients
101
+ @property
102
+ def messages(self) -> "_GuardedAnthropicMessages":
103
+ return _GuardedAnthropicMessages(self._client.messages, self._tracker, self._fallback, self._on_limit)
104
+
105
+ # Pass everything else through to the real client
106
+ def __getattr__(self, name: str) -> Any:
107
+ return getattr(self._client, name)
108
+
109
+
110
+ class _GuardedChat:
111
+ """Proxy for ``client.chat``."""
112
+
113
+ def __init__(self, chat: Any, tracker: CostTracker, fallback: Optional[str], on_limit: OnLimit) -> None:
114
+ self._chat = chat
115
+ self._tracker = tracker
116
+ self._fallback = fallback
117
+ self._on_limit = on_limit
118
+
119
+ @property
120
+ def completions(self) -> "_GuardedCompletions":
121
+ return _GuardedCompletions(self._chat.completions, self._tracker, self._fallback, self._on_limit)
122
+
123
+ def __getattr__(self, name: str) -> Any:
124
+ return getattr(self._chat, name)
125
+
126
+
127
+ class _GuardedCompletions:
128
+ """Proxy for ``client.chat.completions`` — intercepts ``create()``."""
129
+
130
+ def __init__(self, completions: Any, tracker: CostTracker, fallback: Optional[str], on_limit: OnLimit) -> None:
131
+ self._completions = completions
132
+ self._tracker = tracker
133
+ self._fallback = fallback
134
+ self._on_limit = on_limit
135
+
136
+ def create(self, **kwargs: Any) -> Any:
137
+ """Intercept ``chat.completions.create()`` with budget enforcement."""
138
+ tracker = self._tracker
139
+
140
+ # --- Kill switch: budget already exhausted before the call ----------
141
+ if tracker.budget_exceeded:
142
+ return self._handle_limit(kwargs)
143
+
144
+ # --- Auto-downgrade -------------------------------------------------
145
+ original_model = kwargs.get("model")
146
+ if tracker.should_downgrade and self._fallback and original_model != self._fallback:
147
+ logger.warning(
148
+ "TokenFence: spend $%.4f has reached %.0f%% of $%.4f budget — "
149
+ "downgrading from %s to %s",
150
+ tracker.spent,
151
+ tracker.usage_ratio * 100,
152
+ tracker.budget,
153
+ original_model,
154
+ self._fallback,
155
+ )
156
+ kwargs["model"] = self._fallback
157
+
158
+ # --- Make the real API call -----------------------------------------
159
+ response = self._completions.create(**kwargs)
160
+
161
+ # --- Track cost -----------------------------------------------------
162
+ model_used = kwargs.get("model", original_model) or ""
163
+ usage = getattr(response, "usage", None)
164
+ if usage is not None:
165
+ input_tokens = getattr(usage, "prompt_tokens", 0) or 0
166
+ output_tokens = getattr(usage, "completion_tokens", 0) or 0
167
+ cost = calculate_cost(model_used, input_tokens, output_tokens)
168
+ tracker.record(cost)
169
+ else:
170
+ tracker.record(0.0)
171
+
172
+ return response
173
+
174
+ # --- limit handling -----------------------------------------------------
175
+
176
+ def _handle_limit(self, kwargs: dict[str, Any]) -> Any:
177
+ tracker = self._tracker
178
+ if self._on_limit == "raise":
179
+ raise BudgetExceeded(budget=tracker.budget, spent=tracker.spent)
180
+
181
+ if self._on_limit == "warn":
182
+ logger.warning(
183
+ "TokenFence: budget of $%.4f exhausted (spent $%.4f) — allowing call anyway",
184
+ tracker.budget,
185
+ tracker.spent,
186
+ )
187
+ return self._completions.create(**kwargs)
188
+
189
+ # on_limit == "stop" — return a synthetic response
190
+ return _synthetic_response(tracker)
191
+
192
+ def __getattr__(self, name: str) -> Any:
193
+ return getattr(self._completions, name)
194
+
195
+
196
+ # ---------------------------------------------------------------------------
197
+ # Anthropic ``client.messages.create()`` proxy
198
+ # ---------------------------------------------------------------------------
199
+
200
+ class _GuardedAnthropicMessages:
201
+ """Proxy for Anthropic ``client.messages`` — intercepts ``create()``."""
202
+
203
+ def __init__(self, messages: Any, tracker: CostTracker, fallback: Optional[str], on_limit: OnLimit) -> None:
204
+ self._messages = messages
205
+ self._tracker = tracker
206
+ self._fallback = fallback
207
+ self._on_limit = on_limit
208
+
209
+ def create(self, **kwargs: Any) -> Any:
210
+ """Intercept ``messages.create()`` with budget enforcement."""
211
+ tracker = self._tracker
212
+
213
+ # --- Kill switch: budget already exhausted --------------------------
214
+ if tracker.budget_exceeded:
215
+ return self._handle_limit(kwargs)
216
+
217
+ # --- Auto-downgrade -------------------------------------------------
218
+ original_model = kwargs.get("model")
219
+ if tracker.should_downgrade and self._fallback and original_model != self._fallback:
220
+ logger.warning(
221
+ "TokenFence: spend $%.4f has reached %.0f%% of $%.4f budget — "
222
+ "downgrading from %s to %s",
223
+ tracker.spent,
224
+ tracker.usage_ratio * 100,
225
+ tracker.budget,
226
+ original_model,
227
+ self._fallback,
228
+ )
229
+ kwargs["model"] = self._fallback
230
+
231
+ # --- Make the real API call -----------------------------------------
232
+ response = self._messages.create(**kwargs)
233
+
234
+ # --- Track cost (Anthropic usage format) ----------------------------
235
+ model_used = kwargs.get("model", original_model) or ""
236
+ usage = getattr(response, "usage", None)
237
+ if usage is not None:
238
+ input_tokens = getattr(usage, "input_tokens", 0) or 0
239
+ output_tokens = getattr(usage, "output_tokens", 0) or 0
240
+ cost = calculate_cost(model_used, input_tokens, output_tokens)
241
+ tracker.record(cost)
242
+ else:
243
+ tracker.record(0.0)
244
+
245
+ return response
246
+
247
+ def _handle_limit(self, kwargs: dict[str, Any]) -> Any:
248
+ tracker = self._tracker
249
+ if self._on_limit == "raise":
250
+ raise BudgetExceeded(budget=tracker.budget, spent=tracker.spent)
251
+
252
+ if self._on_limit == "warn":
253
+ logger.warning(
254
+ "TokenFence: budget of $%.4f exhausted (spent $%.4f) — allowing call anyway",
255
+ tracker.budget,
256
+ tracker.spent,
257
+ )
258
+ return self._messages.create(**kwargs)
259
+
260
+ # on_limit == "stop" — return a synthetic Anthropic-style response
261
+ return _synthetic_anthropic_response(tracker)
262
+
263
+ def __getattr__(self, name: str) -> Any:
264
+ return getattr(self._messages, name)
265
+
266
+
267
+ # ---------------------------------------------------------------------------
268
+ # Synthetic response for on_limit='stop'
269
+ # ---------------------------------------------------------------------------
270
+
271
+ class _SyntheticUsage:
272
+ prompt_tokens: int = 0
273
+ completion_tokens: int = 0
274
+ total_tokens: int = 0
275
+
276
+
277
+ class _SyntheticMessage:
278
+ role: str = "assistant"
279
+ content: str = ""
280
+
281
+ def __init__(self, content: str) -> None:
282
+ self.content = content
283
+
284
+
285
+ class _SyntheticChoice:
286
+ index: int = 0
287
+ finish_reason: str = "stop"
288
+ message: _SyntheticMessage
289
+
290
+ def __init__(self, message: _SyntheticMessage) -> None:
291
+ self.message = message
292
+
293
+
294
+ class _SyntheticResponse:
295
+ """Mimics the shape of an OpenAI ``ChatCompletion`` enough for most code."""
296
+
297
+ id: str = "tokenfence-budget-exceeded"
298
+ object: str = "chat.completion"
299
+ model: str = "tokenfence"
300
+ usage: _SyntheticUsage
301
+
302
+ def __init__(self, tracker: CostTracker) -> None:
303
+ self.usage = _SyntheticUsage()
304
+ msg = _SyntheticMessage(
305
+ f"[TokenFence] Budget of ${tracker.budget:.2f} exceeded "
306
+ f"(spent ${tracker.spent:.4f}). Request blocked."
307
+ )
308
+ self.choices = [_SyntheticChoice(msg)]
309
+
310
+
311
+ def _synthetic_response(tracker: CostTracker) -> _SyntheticResponse:
312
+ return _SyntheticResponse(tracker)
313
+
314
+
315
+ # ---------------------------------------------------------------------------
316
+ # Anthropic-style synthetic response for on_limit='stop'
317
+ # ---------------------------------------------------------------------------
318
+
319
+ class _SyntheticAnthropicUsage:
320
+ input_tokens: int = 0
321
+ output_tokens: int = 0
322
+
323
+
324
+ class _SyntheticAnthropicContentBlock:
325
+ type: str = "text"
326
+ text: str = ""
327
+
328
+ def __init__(self, text: str) -> None:
329
+ self.type = "text"
330
+ self.text = text
331
+
332
+
333
+ class _SyntheticAnthropicResponse:
334
+ """Mimics the shape of an Anthropic ``Message`` enough for most code."""
335
+
336
+ id: str = "tokenfence-budget-exceeded"
337
+ type: str = "message"
338
+ role: str = "assistant"
339
+ model: str = "tokenfence"
340
+ stop_reason: str = "end_turn"
341
+
342
+ def __init__(self, tracker: CostTracker) -> None:
343
+ self.usage = _SyntheticAnthropicUsage()
344
+ self.content = [
345
+ _SyntheticAnthropicContentBlock(
346
+ f"[TokenFence] Budget of ${tracker.budget:.2f} exceeded "
347
+ f"(spent ${tracker.spent:.4f}). Request blocked."
348
+ )
349
+ ]
350
+
351
+
352
+ def _synthetic_anthropic_response(tracker: CostTracker) -> _SyntheticAnthropicResponse:
353
+ return _SyntheticAnthropicResponse(tracker)