token-sentinel 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- token_sentinel/__init__.py +33 -0
- token_sentinel/cloud_client.py +455 -0
- token_sentinel/enrichers/__init__.py +22 -0
- token_sentinel/enrichers/langchain.py +816 -0
- token_sentinel/enrichers/otel.py +601 -0
- token_sentinel/events.py +240 -0
- token_sentinel/policy_client.py +645 -0
- token_sentinel/py.typed +0 -0
- token_sentinel/rules/__init__.py +79 -0
- token_sentinel/rules/audio_multichannel_doubling.py +207 -0
- token_sentinel/rules/base.py +31 -0
- token_sentinel/rules/context_bloat.py +63 -0
- token_sentinel/rules/embedding_waste.py +58 -0
- token_sentinel/rules/model_misroute.py +400 -0
- token_sentinel/rules/repair_loop.py +532 -0
- token_sentinel/rules/rerank_thrash.py +171 -0
- token_sentinel/rules/retrieval_thrash.py +172 -0
- token_sentinel/rules/retry_storm.py +45 -0
- token_sentinel/rules/tool_definition_bloat.py +324 -0
- token_sentinel/rules/tool_loop.py +517 -0
- token_sentinel/rules/vision_cost_concentration.py +182 -0
- token_sentinel/rules/vision_high_detail_misroute.py +154 -0
- token_sentinel/rules/vision_re_upload.py +651 -0
- token_sentinel/rules/voice_switching_loop.py +241 -0
- token_sentinel/rules/zombie.py +48 -0
- token_sentinel/sentinel.py +1251 -0
- token_sentinel/tracer.py +71 -0
- token_sentinel/wrappers/__init__.py +57 -0
- token_sentinel/wrappers/anthropic.py +629 -0
- token_sentinel/wrappers/bedrock.py +560 -0
- token_sentinel/wrappers/cohere.py +800 -0
- token_sentinel/wrappers/deepgram.py +853 -0
- token_sentinel/wrappers/elevenlabs.py +740 -0
- token_sentinel/wrappers/gemini.py +691 -0
- token_sentinel/wrappers/openai.py +1612 -0
- token_sentinel/wrappers/replicate.py +650 -0
- token_sentinel/wrappers/voyage.py +407 -0
- token_sentinel-1.0.0.dist-info/METADATA +253 -0
- token_sentinel-1.0.0.dist-info/RECORD +41 -0
- token_sentinel-1.0.0.dist-info/WHEEL +4 -0
- token_sentinel-1.0.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""TokenSentinel — predictive token-waste detection for AI agents.
|
|
2
|
+
|
|
3
|
+
Public framing is "token waste" (cost) — not "token leak" (security). The
|
|
4
|
+
internal Python API still uses ``LeakEvent`` / ``LeakDetected`` / ``on_leak``
|
|
5
|
+
for backward compatibility with installed customer code; the stable release
|
|
6
|
+
adds ``WasteEvent`` / ``WasteDetected`` / ``on_waste`` as transparent aliases
|
|
7
|
+
(same objects, not subclasses). New code may use either set of names.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from token_sentinel.events import (
|
|
11
|
+
BudgetExceeded,
|
|
12
|
+
CallRecord,
|
|
13
|
+
KillSwitchActive,
|
|
14
|
+
LeakDetected,
|
|
15
|
+
LeakEvent,
|
|
16
|
+
VelocityExceeded,
|
|
17
|
+
WasteDetected,
|
|
18
|
+
WasteEvent,
|
|
19
|
+
)
|
|
20
|
+
from token_sentinel.sentinel import Sentinel
|
|
21
|
+
|
|
22
|
+
__version__ = "1.0.0"
|
|
23
|
+
__all__ = [
|
|
24
|
+
"Sentinel",
|
|
25
|
+
"LeakEvent",
|
|
26
|
+
"WasteEvent",
|
|
27
|
+
"CallRecord",
|
|
28
|
+
"LeakDetected",
|
|
29
|
+
"WasteDetected",
|
|
30
|
+
"BudgetExceeded",
|
|
31
|
+
"VelocityExceeded",
|
|
32
|
+
"KillSwitchActive",
|
|
33
|
+
]
|
|
@@ -0,0 +1,455 @@
|
|
|
1
|
+
"""Cloud sink — ships :class:`LeakEvent` instances to the TokenSentinel
|
|
2
|
+
backend.
|
|
3
|
+
|
|
4
|
+
The sink is structured as a fire-and-forget background pipeline:
|
|
5
|
+
|
|
6
|
+
Sentinel._run_handlers(event)
|
|
7
|
+
└── self._cloud_sink.enqueue(event) # <100us hot path; queue.put_nowait
|
|
8
|
+
│
|
|
9
|
+
▼
|
|
10
|
+
bounded queue.Queue (default cloud_queue_max=1000)
|
|
11
|
+
│
|
|
12
|
+
▼
|
|
13
|
+
daemon thread loop ── batches events ──▶ POST /v1/events
|
|
14
|
+
│ flushes every cloud_flush_interval_seconds (default 5.0s)
|
|
15
|
+
│ OR once cloud_batch_size events accumulate (default 50)
|
|
16
|
+
└── HTTP retry: 3x with exponential backoff (1s, 2s, 4s); drop after.
|
|
17
|
+
|
|
18
|
+
Discipline:
|
|
19
|
+
* The agent's call path NEVER blocks on this sink.
|
|
20
|
+
* The sink NEVER raises into user code. All exceptions are either dropped
|
|
21
|
+
silently (where loud reporting would itself disturb the agent) or
|
|
22
|
+
surfaced through ``warnings.warn(..., RuntimeWarning)`` so they are
|
|
23
|
+
visible without coupling to user error handling.
|
|
24
|
+
* Stdlib only — ``urllib.request`` for HTTP. The SDK is keeping its
|
|
25
|
+
zero-dep core; adding ``requests`` here would cost users a transitive
|
|
26
|
+
dep just to enable the cloud feature.
|
|
27
|
+
|
|
28
|
+
Wire format::
|
|
29
|
+
|
|
30
|
+
POST /v1/events
|
|
31
|
+
Authorization: Bearer <api_key>
|
|
32
|
+
Content-Type: application/json
|
|
33
|
+
User-Agent: token-sentinel-py/<version>
|
|
34
|
+
|
|
35
|
+
{"project": "<project>", "events": [<event-dict>, ...]}
|
|
36
|
+
|
|
37
|
+
The wire contract is small and stable. To self-host the cloud sink, point
|
|
38
|
+
``cloud_endpoint`` at your own server and accept this POST shape.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
from __future__ import annotations
|
|
42
|
+
|
|
43
|
+
import dataclasses
|
|
44
|
+
import json
|
|
45
|
+
import queue
|
|
46
|
+
import threading
|
|
47
|
+
import time
|
|
48
|
+
import urllib.error
|
|
49
|
+
import urllib.request
|
|
50
|
+
import warnings
|
|
51
|
+
from datetime import datetime
|
|
52
|
+
from typing import TYPE_CHECKING, Any
|
|
53
|
+
|
|
54
|
+
if TYPE_CHECKING:
|
|
55
|
+
from token_sentinel.events import LeakEvent
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# ---------------------------------------------------------------------------
|
|
59
|
+
# Module constants
|
|
60
|
+
# ---------------------------------------------------------------------------
|
|
61
|
+
|
|
62
|
+
# Retry policy: 3 attempts total, with exponential backoff between retries.
|
|
63
|
+
# Sleeps fire after a *failure*, not before the first attempt — so
|
|
64
|
+
# (1s, 2s, 4s) describe the delay between the four attempts.
|
|
65
|
+
_RETRY_BACKOFFS_SECONDS = (1.0, 2.0, 4.0)
|
|
66
|
+
|
|
67
|
+
# How long the daemon thread will block on the queue before checking the
|
|
68
|
+
# flush-interval timer. Keeping this short (vs. waiting on a Condition) keeps
|
|
69
|
+
# the implementation a single Queue without extra signalling. The cost is one
|
|
70
|
+
# wakeup every 0.5s when idle — negligible.
|
|
71
|
+
_QUEUE_POLL_TIMEOUT_SECONDS = 0.5
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
# Wire serialization
|
|
76
|
+
# ---------------------------------------------------------------------------
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _event_to_wire(event: LeakEvent, sdk_version: str, *, mode: str = "log") -> dict[str, Any]:
|
|
80
|
+
"""Build the JSON-serializable dict for a single event.
|
|
81
|
+
|
|
82
|
+
``dataclasses.asdict`` flattens the LeakEvent for us, but ``raised_at`` is
|
|
83
|
+
a ``datetime`` and the cloud expects an ISO-8601 string. We post-process
|
|
84
|
+
that one field rather than write a full custom encoder — keeps the helper
|
|
85
|
+
cheap and the contract obvious.
|
|
86
|
+
|
|
87
|
+
``mode`` (the Sentinel's ``log`` / ``alert`` / ``block`` setting) is
|
|
88
|
+
stamped onto every event so the cloud's savings aggregator credits the
|
|
89
|
+
right per-mode weight. Defaults to ``'log'`` for cloud back-compat —
|
|
90
|
+
the field is additive on the wire, and older cloud Pydantic models
|
|
91
|
+
discard unknown keys silently.
|
|
92
|
+
|
|
93
|
+
Tag-based Chargeback: the LeakEvent's ``tags`` field (a
|
|
94
|
+
``dict[str, str]`` populated from the originating ``Session``) is
|
|
95
|
+
already included by ``dataclasses.asdict`` and round-trips through
|
|
96
|
+
the JSON encoder unchanged. Pre- cloud Pydantic models discard
|
|
97
|
+
the field silently (``extra='ignore'``), so the SDK can ship this
|
|
98
|
+
to any cloud version. Empty dict (the default for sessions opened
|
|
99
|
+
without ``tags=...``) round-trips as ``{}`` — semantically identical
|
|
100
|
+
to a pre- SDK omitting the field.
|
|
101
|
+
|
|
102
|
+
Note: ``LeakEvent.evidence`` is already redacted by the redaction
|
|
103
|
+
contract. This helper does not re-process it.
|
|
104
|
+
"""
|
|
105
|
+
payload = dataclasses.asdict(event)
|
|
106
|
+
raised_at = payload.get("raised_at")
|
|
107
|
+
if isinstance(raised_at, datetime):
|
|
108
|
+
# Use ``isoformat`` directly — it preserves tz info if present.
|
|
109
|
+
payload["raised_at"] = raised_at.isoformat()
|
|
110
|
+
payload["sdk_version"] = sdk_version
|
|
111
|
+
payload["mode"] = mode
|
|
112
|
+
# defensive — coerce ``tags`` to a plain dict if a future
|
|
113
|
+
# subclass somehow ships a non-dict. ``dataclasses.asdict`` already
|
|
114
|
+
# returns the original dict by value (it doesn't deepcopy strings),
|
|
115
|
+
# but an OrderedDict or a Mapping subclass would still serialize
|
|
116
|
+
# correctly — this guard exists to short-circuit the surprising
|
|
117
|
+
# case where a customer mutated ``LeakEvent.tags`` to a list.
|
|
118
|
+
if "tags" in payload and not isinstance(payload["tags"], dict):
|
|
119
|
+
payload["tags"] = {}
|
|
120
|
+
return payload
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# ---------------------------------------------------------------------------
|
|
124
|
+
# CloudSink
|
|
125
|
+
# ---------------------------------------------------------------------------
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class CloudSink:
|
|
129
|
+
"""Bounded-queue + daemon-thread shipper for ``LeakEvent``s.
|
|
130
|
+
|
|
131
|
+
The sink is intentionally simple: ``enqueue`` does a single
|
|
132
|
+
``queue.put_nowait`` (sub-millisecond), and a single daemon thread does
|
|
133
|
+
everything else. We rely on the GIL plus the queue's internal mutex for
|
|
134
|
+
correctness; no extra locking is required for normal traffic.
|
|
135
|
+
|
|
136
|
+
Public surface:
|
|
137
|
+
* ``enqueue(event)`` — non-blocking; safe to call from the agent's
|
|
138
|
+
hot path.
|
|
139
|
+
* ``close(timeout)`` — drains the queue, flushes any remaining batch,
|
|
140
|
+
and joins the daemon thread. Returns True on clean shutdown,
|
|
141
|
+
False if the timeout was exceeded.
|
|
142
|
+
|
|
143
|
+
Failure isolation:
|
|
144
|
+
* Queue overflow → drop oldest, emit ``RuntimeWarning`` once per
|
|
145
|
+
sink instance (see ``_overflow_warning_emitted``).
|
|
146
|
+
* HTTP failure (after 3 retries) → drop the batch, emit
|
|
147
|
+
``RuntimeWarning``.
|
|
148
|
+
* Unexpected exception in the daemon loop → swallowed and logged via
|
|
149
|
+
``RuntimeWarning``; the daemon keeps going. Better to silently lose
|
|
150
|
+
one batch than to take the agent down.
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
def __init__(
|
|
154
|
+
self,
|
|
155
|
+
endpoint: str,
|
|
156
|
+
api_key: str,
|
|
157
|
+
*,
|
|
158
|
+
project: str,
|
|
159
|
+
sdk_version: str,
|
|
160
|
+
flush_interval_seconds: float = 5.0,
|
|
161
|
+
batch_size: int = 50,
|
|
162
|
+
queue_max: int = 1000,
|
|
163
|
+
extra_headers: dict[str, str] | None = None,
|
|
164
|
+
mode: str = "log",
|
|
165
|
+
) -> None:
|
|
166
|
+
if not endpoint:
|
|
167
|
+
raise ValueError("CloudSink: endpoint is required")
|
|
168
|
+
if not api_key:
|
|
169
|
+
raise ValueError("CloudSink: api_key is required")
|
|
170
|
+
|
|
171
|
+
# Strip a trailing slash so callers can pass either form. We append
|
|
172
|
+
# ``/v1/events`` regardless.
|
|
173
|
+
self.endpoint = endpoint.rstrip("/")
|
|
174
|
+
self.api_key = api_key
|
|
175
|
+
self.project = project
|
|
176
|
+
self.sdk_version = sdk_version
|
|
177
|
+
self.flush_interval_seconds = float(flush_interval_seconds)
|
|
178
|
+
self.batch_size = int(batch_size)
|
|
179
|
+
self.queue_max = int(queue_max)
|
|
180
|
+
# stamp the Sentinel's mode onto every outbound event so the
|
|
181
|
+
# cloud-side savings aggregator credits the right per-mode weight.
|
|
182
|
+
# We coerce to str defensively — the public API accepts only
|
|
183
|
+
# ``Literal["log", "alert", "block"]``, but a buggy caller could
|
|
184
|
+
# send something else; the cloud falls back to "log" on unknown
|
|
185
|
+
# values rather than reject the row.
|
|
186
|
+
self.mode = str(mode) if mode else "log"
|
|
187
|
+
# Pro-tier: ``extra_headers`` carries the judge tunables
|
|
188
|
+
# (X-Judge-Threshold-Low/High/Calls-Cap). Passed verbatim on every
|
|
189
|
+
# POST. We coerce to str on construction so a buggy caller passing
|
|
190
|
+
# ints doesn't surface as a urllib TypeError mid-flight; non-string
|
|
191
|
+
# values would otherwise be rejected by ``urllib.request.Request``.
|
|
192
|
+
if extra_headers:
|
|
193
|
+
self._extra_headers: dict[str, str] = {str(k): str(v) for k, v in extra_headers.items()}
|
|
194
|
+
else:
|
|
195
|
+
self._extra_headers = {}
|
|
196
|
+
|
|
197
|
+
# Pre-build the URL once — saves a string concat per batch.
|
|
198
|
+
self._post_url = f"{self.endpoint}/v1/events"
|
|
199
|
+
|
|
200
|
+
# The queue itself is the synchronization primitive. ``maxsize``
|
|
201
|
+
# bounds memory usage; on full we drop the oldest entry to make room
|
|
202
|
+
# so the *newest* signal (most useful for live debugging) wins.
|
|
203
|
+
self._queue: queue.Queue[LeakEvent] = queue.Queue(maxsize=self.queue_max)
|
|
204
|
+
|
|
205
|
+
# ``threading.Event`` rather than a bool — ``wait`` lets us nudge the
|
|
206
|
+
# daemon awake immediately on close().
|
|
207
|
+
self._stop = threading.Event()
|
|
208
|
+
|
|
209
|
+
# One-time gate for the overflow warning — we don't want to spam stderr
|
|
210
|
+
# if a misconfigured agent fires a flood of events.
|
|
211
|
+
self._overflow_warning_emitted = False
|
|
212
|
+
|
|
213
|
+
self._thread = threading.Thread(
|
|
214
|
+
target=self._run,
|
|
215
|
+
name=f"token-sentinel-cloud-sink-{project}",
|
|
216
|
+
daemon=True,
|
|
217
|
+
)
|
|
218
|
+
self._thread.start()
|
|
219
|
+
|
|
220
|
+
# -----------------------------------------------------------------
|
|
221
|
+
# Hot path — enqueue
|
|
222
|
+
# -----------------------------------------------------------------
|
|
223
|
+
|
|
224
|
+
def enqueue(self, event: LeakEvent) -> None:
|
|
225
|
+
"""Hand a leak event to the sink. Non-blocking.
|
|
226
|
+
|
|
227
|
+
Discipline:
|
|
228
|
+
* Must complete in <100us. We use ``put_nowait`` and handle the
|
|
229
|
+
``Full`` exception locally rather than ever blocking on the
|
|
230
|
+
queue (which would be unbounded latency on a full queue).
|
|
231
|
+
* Never raises. Any failure is either dropped silently or
|
|
232
|
+
surfaced as a ``RuntimeWarning``.
|
|
233
|
+
"""
|
|
234
|
+
try:
|
|
235
|
+
self._queue.put_nowait(event)
|
|
236
|
+
except queue.Full:
|
|
237
|
+
# Drop oldest to make room, then re-attempt. We only warn once
|
|
238
|
+
# per sink lifetime; firing on every overflow would itself become
|
|
239
|
+
# the bottleneck (warnings are not free) and the customer only
|
|
240
|
+
# needs to see the signal once to know to bump ``cloud_queue_max``.
|
|
241
|
+
try:
|
|
242
|
+
self._queue.get_nowait()
|
|
243
|
+
except queue.Empty:
|
|
244
|
+
# Race: another consumer drained it between Full and get.
|
|
245
|
+
# No-op — the next put_nowait will succeed.
|
|
246
|
+
pass
|
|
247
|
+
try:
|
|
248
|
+
self._queue.put_nowait(event)
|
|
249
|
+
except queue.Full:
|
|
250
|
+
# Still full. Drop the new event (caller's intent: the
|
|
251
|
+
# *newest* events are most valuable, but a one-off lost
|
|
252
|
+
# event under heavy contention is acceptable).
|
|
253
|
+
pass
|
|
254
|
+
if not self._overflow_warning_emitted:
|
|
255
|
+
self._overflow_warning_emitted = True
|
|
256
|
+
warnings.warn(
|
|
257
|
+
f"TokenSentinel CloudSink: queue full (max={self.queue_max}); "
|
|
258
|
+
"dropping oldest event. Bump cloud_queue_max if this is "
|
|
259
|
+
"expected, or check that the cloud endpoint is reachable.",
|
|
260
|
+
RuntimeWarning,
|
|
261
|
+
stacklevel=2,
|
|
262
|
+
)
|
|
263
|
+
except Exception:
|
|
264
|
+
# Defence-in-depth: ``put_nowait`` shouldn't raise anything else,
|
|
265
|
+
# but a buggy subclass or a deserialization issue could. Swallow
|
|
266
|
+
# so the agent's call path is never disturbed.
|
|
267
|
+
pass
|
|
268
|
+
|
|
269
|
+
# -----------------------------------------------------------------
|
|
270
|
+
# Daemon loop
|
|
271
|
+
# -----------------------------------------------------------------
|
|
272
|
+
|
|
273
|
+
def _run(self) -> None:
|
|
274
|
+
"""Background loop: drain → batch → POST.
|
|
275
|
+
|
|
276
|
+
Termination: ``self._stop`` is set by ``close()``. We finish the
|
|
277
|
+
in-flight batch, drain anything still on the queue (so callers that
|
|
278
|
+
``close(timeout=…)`` get every event delivered up to the timeout),
|
|
279
|
+
and exit.
|
|
280
|
+
"""
|
|
281
|
+
batch: list[LeakEvent] = []
|
|
282
|
+
last_flush = time.monotonic()
|
|
283
|
+
|
|
284
|
+
while not self._stop.is_set():
|
|
285
|
+
timeout = max(0.0, _QUEUE_POLL_TIMEOUT_SECONDS)
|
|
286
|
+
try:
|
|
287
|
+
event = self._queue.get(timeout=timeout)
|
|
288
|
+
batch.append(event)
|
|
289
|
+
except queue.Empty:
|
|
290
|
+
pass
|
|
291
|
+
except Exception:
|
|
292
|
+
# The Queue itself shouldn't raise other exceptions, but if
|
|
293
|
+
# it does we don't want to spin a tight loop. Sleep briefly
|
|
294
|
+
# and continue.
|
|
295
|
+
time.sleep(0.1)
|
|
296
|
+
continue
|
|
297
|
+
|
|
298
|
+
now = time.monotonic()
|
|
299
|
+
full_batch = len(batch) >= self.batch_size
|
|
300
|
+
interval_elapsed = (now - last_flush) >= self.flush_interval_seconds
|
|
301
|
+
|
|
302
|
+
if batch and (full_batch or interval_elapsed):
|
|
303
|
+
self._flush(batch)
|
|
304
|
+
batch = []
|
|
305
|
+
last_flush = now
|
|
306
|
+
|
|
307
|
+
# Stop requested. Drain everything still queued and flush in batches
|
|
308
|
+
# so the timeout caller gets full delivery (subject to network
|
|
309
|
+
# success). We deliberately reuse the same batch_size cap so a huge
|
|
310
|
+
# backlog doesn't become one giant POST.
|
|
311
|
+
try:
|
|
312
|
+
while True:
|
|
313
|
+
event = self._queue.get_nowait()
|
|
314
|
+
batch.append(event)
|
|
315
|
+
if len(batch) >= self.batch_size:
|
|
316
|
+
self._flush(batch)
|
|
317
|
+
batch = []
|
|
318
|
+
except queue.Empty:
|
|
319
|
+
pass
|
|
320
|
+
|
|
321
|
+
if batch:
|
|
322
|
+
self._flush(batch)
|
|
323
|
+
|
|
324
|
+
# -----------------------------------------------------------------
|
|
325
|
+
# Flush — turn a batch into one POST request
|
|
326
|
+
# -----------------------------------------------------------------
|
|
327
|
+
|
|
328
|
+
def _flush(self, batch: list[LeakEvent]) -> None:
|
|
329
|
+
"""POST a batch with retries. Never raises.
|
|
330
|
+
|
|
331
|
+
Failure handling:
|
|
332
|
+
* After ``len(_RETRY_BACKOFFS_SECONDS)`` retries, emit a
|
|
333
|
+
``RuntimeWarning`` and drop the batch.
|
|
334
|
+
* Any unexpected exception is also caught and warned about — we
|
|
335
|
+
never want the daemon thread to die.
|
|
336
|
+
"""
|
|
337
|
+
if not batch:
|
|
338
|
+
return
|
|
339
|
+
try:
|
|
340
|
+
body = self._serialize(batch)
|
|
341
|
+
except Exception as exc:
|
|
342
|
+
# Serialization failure is a bug, not a transient. Warn loudly so
|
|
343
|
+
# the customer notices, but don't crash.
|
|
344
|
+
warnings.warn(
|
|
345
|
+
f"TokenSentinel CloudSink: failed to serialize batch of "
|
|
346
|
+
f"{len(batch)} events: {exc!r}; dropping batch.",
|
|
347
|
+
RuntimeWarning,
|
|
348
|
+
stacklevel=2,
|
|
349
|
+
)
|
|
350
|
+
return
|
|
351
|
+
|
|
352
|
+
last_exc: BaseException | None = None
|
|
353
|
+
# Total attempts == len(backoffs); we sleep between attempts.
|
|
354
|
+
for attempt in range(len(_RETRY_BACKOFFS_SECONDS)):
|
|
355
|
+
try:
|
|
356
|
+
self._post(body)
|
|
357
|
+
return # success
|
|
358
|
+
except Exception as exc: # noqa: BLE001 — treat all HTTP errors uniformly
|
|
359
|
+
last_exc = exc
|
|
360
|
+
# Sleep before the *next* attempt; do not sleep after the
|
|
361
|
+
# final attempt (we're about to drop the batch anyway).
|
|
362
|
+
if attempt < len(_RETRY_BACKOFFS_SECONDS) - 1:
|
|
363
|
+
delay = _RETRY_BACKOFFS_SECONDS[attempt]
|
|
364
|
+
# Use ``self._stop.wait`` so close() can wake us
|
|
365
|
+
# immediately rather than sitting on a long backoff.
|
|
366
|
+
if self._stop.wait(delay):
|
|
367
|
+
# Stop signalled during backoff — abandon retries.
|
|
368
|
+
break
|
|
369
|
+
|
|
370
|
+
warnings.warn(
|
|
371
|
+
f"TokenSentinel CloudSink: dropping batch of {len(batch)} events "
|
|
372
|
+
f"after {len(_RETRY_BACKOFFS_SECONDS)} failed attempts; "
|
|
373
|
+
f"last error: {last_exc!r}",
|
|
374
|
+
RuntimeWarning,
|
|
375
|
+
stacklevel=2,
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
def _serialize(self, batch: list[LeakEvent]) -> bytes:
|
|
379
|
+
"""Build the JSON request body for ``batch``."""
|
|
380
|
+
payload = {
|
|
381
|
+
"project": self.project,
|
|
382
|
+
"events": [_event_to_wire(ev, self.sdk_version, mode=self.mode) for ev in batch],
|
|
383
|
+
}
|
|
384
|
+
return json.dumps(payload, default=str).encode("utf-8")
|
|
385
|
+
|
|
386
|
+
def _post(self, body: bytes) -> None:
|
|
387
|
+
"""Single HTTP POST. Raises on any non-2xx or network error.
|
|
388
|
+
|
|
389
|
+
Stdlib only: ``urllib.request``. We construct a Request with the
|
|
390
|
+
documented headers and let urllib raise ``HTTPError`` for non-2xx.
|
|
391
|
+
"""
|
|
392
|
+
headers = {
|
|
393
|
+
"Authorization": f"Bearer {self.api_key}",
|
|
394
|
+
"Content-Type": "application/json",
|
|
395
|
+
"User-Agent": f"token-sentinel-py/{self.sdk_version}",
|
|
396
|
+
}
|
|
397
|
+
# ``extra_headers`` (currently the V1 X-Judge-* knobs) merge after the
|
|
398
|
+
# baseline so callers can never overwrite Authorization or
|
|
399
|
+
# Content-Type by accident — those come from sink construction.
|
|
400
|
+
for k, v in self._extra_headers.items():
|
|
401
|
+
if k.lower() not in {"authorization", "content-type"}:
|
|
402
|
+
headers[k] = v
|
|
403
|
+
req = urllib.request.Request(
|
|
404
|
+
url=self._post_url,
|
|
405
|
+
data=body,
|
|
406
|
+
method="POST",
|
|
407
|
+
headers=headers,
|
|
408
|
+
)
|
|
409
|
+
# ``urlopen`` returns a context-manager response. We don't read the
|
|
410
|
+
# body — the daemon doesn't need it. Closing immediately frees the
|
|
411
|
+
# underlying socket.
|
|
412
|
+
with urllib.request.urlopen(req, timeout=10.0) as resp:
|
|
413
|
+
status = getattr(resp, "status", None)
|
|
414
|
+
if status is None:
|
|
415
|
+
# Older urllib responses expose ``getcode()`` instead.
|
|
416
|
+
getter = getattr(resp, "getcode", None)
|
|
417
|
+
status = getter() if callable(getter) else 200
|
|
418
|
+
if not (200 <= int(status) < 300):
|
|
419
|
+
raise urllib.error.HTTPError(
|
|
420
|
+
self._post_url,
|
|
421
|
+
status,
|
|
422
|
+
f"unexpected status {status}",
|
|
423
|
+
resp.headers,
|
|
424
|
+
None,
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
# -----------------------------------------------------------------
|
|
428
|
+
# Shutdown
|
|
429
|
+
# -----------------------------------------------------------------
|
|
430
|
+
|
|
431
|
+
def close(self, timeout: float = 5.0) -> bool:
|
|
432
|
+
"""Stop the daemon thread and flush remaining events.
|
|
433
|
+
|
|
434
|
+
Returns ``True`` on clean shutdown (thread joined within ``timeout``),
|
|
435
|
+
``False`` if the timeout was exceeded. Either way, the sink is no
|
|
436
|
+
longer usable after this call returns.
|
|
437
|
+
|
|
438
|
+
This method is OPTIONAL for short-lived processes — the daemon thread
|
|
439
|
+
is daemonic, so process exit will reap it. Call ``close`` from
|
|
440
|
+
long-running agents that want every event delivered before they go
|
|
441
|
+
away.
|
|
442
|
+
"""
|
|
443
|
+
self._stop.set()
|
|
444
|
+
# Joining outside any lock — the daemon's only synchronization is
|
|
445
|
+
# via the Queue (lock-free w.r.t. us) and the ``_stop`` Event.
|
|
446
|
+
self._thread.join(timeout=timeout)
|
|
447
|
+
if self._thread.is_alive():
|
|
448
|
+
warnings.warn(
|
|
449
|
+
f"TokenSentinel CloudSink: close() timeout after {timeout}s; "
|
|
450
|
+
"daemon thread still running (events may be lost).",
|
|
451
|
+
RuntimeWarning,
|
|
452
|
+
stacklevel=2,
|
|
453
|
+
)
|
|
454
|
+
return False
|
|
455
|
+
return True
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Enrichers — bridges from third-party callback / observability frameworks
|
|
2
|
+
into the TokenSentinel session buffer.
|
|
3
|
+
|
|
4
|
+
A wrapper instruments a *client* (``sentinel.wrap(anthropic.Anthropic())``);
|
|
5
|
+
an enricher instruments a *framework* by hooking its event bus. Both end up
|
|
6
|
+
producing :class:`token_sentinel.events.CallRecord` instances and routing
|
|
7
|
+
them through :meth:`Sentinel.record_call`, so the rule engine sees a
|
|
8
|
+
uniform view regardless of which surface the customer used to make their
|
|
9
|
+
LLM calls.
|
|
10
|
+
|
|
11
|
+
Each enricher module is gated on its dependency being installed. The
|
|
12
|
+
import below uses a defensive shim: if ``langchain_core`` is missing, the
|
|
13
|
+
module imports successfully but ``TokenSentinelCallbackHandler`` raises
|
|
14
|
+
:class:`ImportError` with installation hint at construction time. This
|
|
15
|
+
mirrors the wrapper pattern (cold-import cost stays minimal for customers
|
|
16
|
+
who don't use the integration).
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from token_sentinel.enrichers.langchain import TokenSentinelCallbackHandler
|
|
20
|
+
from token_sentinel.enrichers.otel import TokenSentinelSpanProcessor
|
|
21
|
+
|
|
22
|
+
__all__ = ["TokenSentinelCallbackHandler", "TokenSentinelSpanProcessor"]
|