tracectrl 0.3.0__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tracectrl-0.3.0 → tracectrl-0.3.2}/PKG-INFO +1 -1
- {tracectrl-0.3.0 → tracectrl-0.3.2}/pyproject.toml +1 -1
- {tracectrl-0.3.0 → tracectrl-0.3.2}/src/tracectrl/__init__.py +1 -1
- {tracectrl-0.3.0 → tracectrl-0.3.2}/src/tracectrl/config.py +5 -1
- {tracectrl-0.3.0 → tracectrl-0.3.2}/src/tracectrl/guardrails/judge.py +39 -13
- {tracectrl-0.3.0 → tracectrl-0.3.2}/src/tracectrl/guardrails/strands_hook.py +121 -25
- {tracectrl-0.3.0 → tracectrl-0.3.2}/.gitignore +0 -0
- {tracectrl-0.3.0 → tracectrl-0.3.2}/LICENSE +0 -0
- {tracectrl-0.3.0 → tracectrl-0.3.2}/README.md +0 -0
- {tracectrl-0.3.0 → tracectrl-0.3.2}/src/tracectrl/_tui.py +0 -0
- {tracectrl-0.3.0 → tracectrl-0.3.2}/src/tracectrl/agent_tagging.py +0 -0
- {tracectrl-0.3.0 → tracectrl-0.3.2}/src/tracectrl/cli.py +0 -0
- {tracectrl-0.3.0 → tracectrl-0.3.2}/src/tracectrl/context.py +0 -0
- {tracectrl-0.3.0 → tracectrl-0.3.2}/src/tracectrl/exporter.py +0 -0
- {tracectrl-0.3.0 → tracectrl-0.3.2}/src/tracectrl/guardrails/__init__.py +0 -0
- {tracectrl-0.3.0 → tracectrl-0.3.2}/src/tracectrl/guardrails/guardrail.py +0 -0
- {tracectrl-0.3.0 → tracectrl-0.3.2}/src/tracectrl/inference.py +0 -0
- {tracectrl-0.3.0 → tracectrl-0.3.2}/src/tracectrl/processor.py +0 -0
- {tracectrl-0.3.0 → tracectrl-0.3.2}/src/tracectrl/protector.py +0 -0
- {tracectrl-0.3.0 → tracectrl-0.3.2}/src/tracectrl/schema.py +0 -0
- {tracectrl-0.3.0 → tracectrl-0.3.2}/src/tracectrl/session.py +0 -0
|
@@ -82,7 +82,11 @@ def get_tracer_provider() -> TracerProvider:
|
|
|
82
82
|
|
|
83
83
|
headers = {}
|
|
84
84
|
if _config.api_key:
|
|
85
|
-
|
|
85
|
+
# Lowercase key: gRPC metadata keys MUST be lowercase (HTTP/2 spec;
|
|
86
|
+
# the grpc lib rejects "Authorization" with "Illegal header key").
|
|
87
|
+
# Lowercase is also valid for OTLP/HTTP (HTTP/1.1 headers are
|
|
88
|
+
# case-insensitive), so this works for both exporters.
|
|
89
|
+
headers["authorization"] = f"Bearer {_config.api_key}"
|
|
86
90
|
|
|
87
91
|
normalized_endpoint, insecure = _normalize_endpoint(_config.endpoint)
|
|
88
92
|
|
|
@@ -297,19 +297,7 @@ def _invoke_gemini_judge(judge_llm: Any, prompt: str, *, attempt: int) -> JudgeR
|
|
|
297
297
|
attempt we sharpen the system instruction so the model recovers from
|
|
298
298
|
whatever malformed-JSON cause the first attempt hit.
|
|
299
299
|
"""
|
|
300
|
-
client =
|
|
301
|
-
if client is None:
|
|
302
|
-
# Older Strands or unusual init — try to construct one from
|
|
303
|
-
# client_args, mirroring what Strands' GeminiModel does internally.
|
|
304
|
-
client_args = getattr(judge_llm, "client_args", None) or {}
|
|
305
|
-
try:
|
|
306
|
-
from google import genai # type: ignore
|
|
307
|
-
except ImportError as e:
|
|
308
|
-
raise RuntimeError(
|
|
309
|
-
"GeminiModel passed as judge_llm but `google-genai` is not "
|
|
310
|
-
"installed. `pip install google-genai`."
|
|
311
|
-
) from e
|
|
312
|
-
client = genai.Client(**client_args)
|
|
300
|
+
client = _resolve_gemini_client(judge_llm)
|
|
313
301
|
|
|
314
302
|
model_id = _resolve_gemini_model_id(judge_llm)
|
|
315
303
|
|
|
@@ -363,6 +351,44 @@ def _invoke_gemini_judge(judge_llm: Any, prompt: str, *, attempt: int) -> JudgeR
|
|
|
363
351
|
)
|
|
364
352
|
|
|
365
353
|
|
|
354
|
+
def _resolve_gemini_client(judge_llm: Any) -> Any:
|
|
355
|
+
"""Return a cached `google.genai.Client` for this judge_llm, building it
|
|
356
|
+
once and stashing it on the judge_llm instance.
|
|
357
|
+
|
|
358
|
+
Strands' `GeminiModel` does NOT expose a `.client` attribute — it stores
|
|
359
|
+
`_custom_client` + `client_args` and builds a fresh `genai.Client` on
|
|
360
|
+
every request via `_get_client()`. Before this cache, every guardrail
|
|
361
|
+
evaluation was constructing a brand new `genai.Client` (with its own
|
|
362
|
+
httpx pool and credential setup), which under sustained load against
|
|
363
|
+
the Gemini preview models has been observed to stall judge calls and
|
|
364
|
+
starve subsequent agent invocations of FDs. One client per judge_llm
|
|
365
|
+
is enough — `genai.Client` is documented as not safe to share across
|
|
366
|
+
asyncio event loops, but we only call it from the synchronous path on
|
|
367
|
+
a dedicated thread, so a single instance is correct here.
|
|
368
|
+
"""
|
|
369
|
+
cached = getattr(judge_llm, "_tracectrl_genai_client", None)
|
|
370
|
+
if cached is not None:
|
|
371
|
+
return cached
|
|
372
|
+
# If the GeminiModel was constructed with an injected client, honour it.
|
|
373
|
+
injected = getattr(judge_llm, "_custom_client", None)
|
|
374
|
+
if injected is not None:
|
|
375
|
+
return injected
|
|
376
|
+
client_args = getattr(judge_llm, "client_args", None) or {}
|
|
377
|
+
try:
|
|
378
|
+
from google import genai # type: ignore
|
|
379
|
+
except ImportError as e:
|
|
380
|
+
raise RuntimeError(
|
|
381
|
+
"GeminiModel passed as judge_llm but `google-genai` is not "
|
|
382
|
+
"installed. `pip install google-genai`."
|
|
383
|
+
) from e
|
|
384
|
+
client = genai.Client(**client_args)
|
|
385
|
+
try:
|
|
386
|
+
judge_llm._tracectrl_genai_client = client
|
|
387
|
+
except Exception: # noqa: BLE001 — frozen dataclasses etc.
|
|
388
|
+
pass
|
|
389
|
+
return client
|
|
390
|
+
|
|
391
|
+
|
|
366
392
|
def _resolve_gemini_model_id(judge_llm: Any) -> str:
|
|
367
393
|
"""Extract model_id from a Strands GeminiModel. Mirrors the
|
|
368
394
|
Bedrock-side `_resolve_bedrock_model` shape but returns just the id —
|
|
@@ -6,14 +6,35 @@ callbacks. So we wrap the agent's `__call__` method directly: run the agent,
|
|
|
6
6
|
capture its response, then evaluate each guardrail in order. This keeps the
|
|
7
7
|
core `Guardrail` class framework-agnostic and isolates the Strands knowledge
|
|
8
8
|
to this file.
|
|
9
|
+
|
|
10
|
+
Two correctness details that bit us before:
|
|
11
|
+
|
|
12
|
+
- **Post-output evals run on a background thread.** Strands' `__call__`
|
|
13
|
+
is sync-on-the-surface but internally uses `run_async` (a fresh
|
|
14
|
+
ThreadPoolExecutor + asyncio.run per call). If we evaluate the judge
|
|
15
|
+
synchronously after `super().__call__()` returns, the agent caller
|
|
16
|
+
blocks on the judge round-trip (2–8s for Gemini preview models with
|
|
17
|
+
`response_schema`). To the user it looks like the agent "stops" after
|
|
18
|
+
producing output. We fire-and-forget the eval onto a bounded executor,
|
|
19
|
+
re-attaching the captured OTel context in the worker so the span lands
|
|
20
|
+
under the same agent invocation. Pre-input stays sync — semantically
|
|
21
|
+
must run before the agent fires.
|
|
22
|
+
|
|
23
|
+
- **Snapshot the eval text BEFORE submitting.** The eval text builder
|
|
24
|
+
reads `agent.messages`, which Strands mutates on subsequent calls.
|
|
25
|
+
Without a snapshot, a fast follow-up prompt would race the bg thread
|
|
26
|
+
and the judge would see a half-mutated history.
|
|
9
27
|
"""
|
|
10
28
|
|
|
11
29
|
from __future__ import annotations
|
|
12
30
|
|
|
31
|
+
import atexit
|
|
13
32
|
import logging
|
|
33
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
14
34
|
from datetime import datetime, timezone
|
|
15
35
|
from typing import Any, Iterable, List
|
|
16
36
|
|
|
37
|
+
from opentelemetry import context as otel_context
|
|
17
38
|
from opentelemetry import trace
|
|
18
39
|
|
|
19
40
|
from tracectrl.guardrails.guardrail import Guardrail, _model_identifier
|
|
@@ -22,6 +43,36 @@ logger = logging.getLogger(__name__)
|
|
|
22
43
|
|
|
23
44
|
|
|
24
45
|
_REGISTRATION_SPAN_NAME = "tracectrl.guardrail.registered"
|
|
46
|
+
_INVOCATION_SPAN_NAME = "tracectrl.agent.invocation"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# Bounded executor for post-output evals. max_workers=2 keeps memory + FD
|
|
50
|
+
# usage tight; the queue is unbounded but in practice a single agent caller
|
|
51
|
+
# can't outpace 2 workers by much (judge calls are 1–8s each). Daemon
|
|
52
|
+
# threads so a hung judge doesn't block process exit. atexit shuts it down
|
|
53
|
+
# with a short grace period so short scripts still flush their spans.
|
|
54
|
+
_eval_executor: ThreadPoolExecutor | None = None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _get_eval_executor() -> ThreadPoolExecutor:
|
|
58
|
+
global _eval_executor
|
|
59
|
+
if _eval_executor is None:
|
|
60
|
+
_eval_executor = ThreadPoolExecutor(
|
|
61
|
+
max_workers=2,
|
|
62
|
+
thread_name_prefix="tracectrl-guardrail-eval",
|
|
63
|
+
)
|
|
64
|
+
atexit.register(_shutdown_eval_executor)
|
|
65
|
+
return _eval_executor
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _shutdown_eval_executor() -> None:
|
|
69
|
+
global _eval_executor
|
|
70
|
+
if _eval_executor is not None:
|
|
71
|
+
# wait=True so a script that runs `agent(...)` then exits still
|
|
72
|
+
# flushes the eval span. Workers are bounded, so worst case we
|
|
73
|
+
# wait one judge round-trip per pending eval.
|
|
74
|
+
_eval_executor.shutdown(wait=True)
|
|
75
|
+
_eval_executor = None
|
|
25
76
|
|
|
26
77
|
|
|
27
78
|
def _emit_registration_span(agent_id: str, agent_name: str, guardrail: Guardrail) -> None:
|
|
@@ -132,32 +183,54 @@ def wrap_agent_with_guardrails(agent: Any, guardrails: Iterable[Guardrail]) -> A
|
|
|
132
183
|
a_id = getattr(self, "_tracectrl_agent_id", None)
|
|
133
184
|
a_name = getattr(self, "_tracectrl_agent_name", None)
|
|
134
185
|
|
|
135
|
-
|
|
136
|
-
user_input = _extract_input(args, kwargs)
|
|
137
|
-
if user_input is not None:
|
|
138
|
-
for g in pre:
|
|
139
|
-
try:
|
|
140
|
-
g.evaluate(user_input, agent_id=a_id, agent_name=a_name)
|
|
141
|
-
except Exception: # noqa: BLE001
|
|
142
|
-
logger.exception("guardrail %s raised during pre_input eval", g.name)
|
|
143
|
-
|
|
144
|
-
response = super(GuardedAgent, self).__call__(*args, **kwargs)
|
|
145
|
-
|
|
146
|
-
if post:
|
|
147
|
-
# The agent's final response is often a terse status summary
|
|
148
|
-
# ("Payment workflow complete.") that hides the actual content
|
|
149
|
-
# we need to screen — tool inputs/outputs, OCR'd text from
|
|
150
|
-
# session context, etc. Pull the full message history off the
|
|
151
|
-
# Strands agent so the judge sees the COMPLETE picture, not just
|
|
152
|
-
# the synthesized summary.
|
|
153
|
-
output_text = _build_eval_text(self, response)
|
|
154
|
-
for g in post:
|
|
155
|
-
try:
|
|
156
|
-
g.evaluate(output_text, agent_id=a_id, agent_name=a_name)
|
|
157
|
-
except Exception: # noqa: BLE001 — never break the agent
|
|
158
|
-
logger.exception("guardrail %s raised during post_output eval", g.name)
|
|
186
|
+
tracer = trace.get_tracer("tracectrl.guardrails")
|
|
159
187
|
|
|
160
|
-
|
|
188
|
+
# Outer span wraps the entire invocation. Strands' run_async copies
|
|
189
|
+
# the OTel context into its worker thread, so the invoke_agent /
|
|
190
|
+
# chat / tool spans Strands creates become children of this span.
|
|
191
|
+
# The bg-thread post-eval re-attaches this same context, so its
|
|
192
|
+
# eval span also lands here. Net result: one tidy tree per call.
|
|
193
|
+
with tracer.start_as_current_span(_INVOCATION_SPAN_NAME) as invocation_span:
|
|
194
|
+
if a_id:
|
|
195
|
+
invocation_span.set_attribute("tracectrl.agent.id", a_id)
|
|
196
|
+
if a_name:
|
|
197
|
+
invocation_span.set_attribute("tracectrl.agent.name", a_name)
|
|
198
|
+
|
|
199
|
+
if pre:
|
|
200
|
+
user_input = _extract_input(args, kwargs)
|
|
201
|
+
if user_input is not None:
|
|
202
|
+
for g in pre:
|
|
203
|
+
try:
|
|
204
|
+
g.evaluate(user_input, agent_id=a_id, agent_name=a_name)
|
|
205
|
+
except Exception: # noqa: BLE001
|
|
206
|
+
logger.exception("guardrail %s raised during pre_input eval", g.name)
|
|
207
|
+
|
|
208
|
+
response = super(GuardedAgent, self).__call__(*args, **kwargs)
|
|
209
|
+
|
|
210
|
+
if post:
|
|
211
|
+
# Snapshot the eval text NOW, while we still hold the lock
|
|
212
|
+
# of the current invocation — a follow-up agent call would
|
|
213
|
+
# mutate `agent.messages` and racing the bg worker against
|
|
214
|
+
# that mutation is what produces the "memory leak between
|
|
215
|
+
# agents" symptom users have reported.
|
|
216
|
+
output_text = _build_eval_text(self, response)
|
|
217
|
+
captured_ctx = otel_context.get_current()
|
|
218
|
+
for g in post:
|
|
219
|
+
try:
|
|
220
|
+
_get_eval_executor().submit(
|
|
221
|
+
_run_post_eval_bg,
|
|
222
|
+
g,
|
|
223
|
+
output_text,
|
|
224
|
+
a_id,
|
|
225
|
+
a_name,
|
|
226
|
+
captured_ctx,
|
|
227
|
+
)
|
|
228
|
+
except Exception: # noqa: BLE001 — never break the agent
|
|
229
|
+
logger.exception(
|
|
230
|
+
"guardrail %s failed to submit post_output eval", g.name
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
return response
|
|
161
234
|
|
|
162
235
|
GuardedAgent = type(
|
|
163
236
|
f"_TraceCtrlGuarded_{cls.__name__}",
|
|
@@ -172,6 +245,29 @@ def wrap_agent_with_guardrails(agent: Any, guardrails: Iterable[Guardrail]) -> A
|
|
|
172
245
|
return agent
|
|
173
246
|
|
|
174
247
|
|
|
248
|
+
def _run_post_eval_bg(
|
|
249
|
+
guardrail: Guardrail,
|
|
250
|
+
output_text: str,
|
|
251
|
+
agent_id: str | None,
|
|
252
|
+
agent_name: str | None,
|
|
253
|
+
captured_ctx: otel_context.Context,
|
|
254
|
+
) -> None:
|
|
255
|
+
"""Run a single post-output guardrail evaluation on a background thread.
|
|
256
|
+
|
|
257
|
+
Re-attaches the OTel context captured at submit time so the eval span
|
|
258
|
+
parents under the same agent invocation, not under whatever happened to
|
|
259
|
+
be active in this worker. Errors are logged, never raised — this thread
|
|
260
|
+
has no caller to surface them to.
|
|
261
|
+
"""
|
|
262
|
+
token = otel_context.attach(captured_ctx)
|
|
263
|
+
try:
|
|
264
|
+
guardrail.evaluate(output_text, agent_id=agent_id, agent_name=agent_name)
|
|
265
|
+
except Exception: # noqa: BLE001
|
|
266
|
+
logger.exception("guardrail %s raised during post_output eval", guardrail.name)
|
|
267
|
+
finally:
|
|
268
|
+
otel_context.detach(token)
|
|
269
|
+
|
|
270
|
+
|
|
175
271
|
def register_guardrails(agent: Any, guardrails: Iterable[Guardrail]) -> None:
|
|
176
272
|
"""Emit registration spans without wrapping the agent.
|
|
177
273
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|