tracectrl 0.3.0__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tracectrl
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: TraceCtrl SDK — agentic AI security observability
5
5
  Author: CloudsineAI
6
6
  License-Expression: Apache-2.0
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "tracectrl"
7
- version = "0.3.0"
7
+ version = "0.3.2"
8
8
  description = "TraceCtrl SDK — agentic AI security observability"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -5,7 +5,7 @@
5
5
  from pkgutil import extend_path
6
6
  __path__ = extend_path(__path__, __name__)
7
7
 
8
- __version__ = "0.3.0"
8
+ __version__ = "0.3.2"
9
9
 
10
10
  from tracectrl.config import configure # noqa: F401
11
11
  from tracectrl.context import ingress # noqa: F401
@@ -82,7 +82,11 @@ def get_tracer_provider() -> TracerProvider:
82
82
 
83
83
  headers = {}
84
84
  if _config.api_key:
85
- headers["Authorization"] = f"Bearer {_config.api_key}"
85
+ # Lowercase key: gRPC metadata keys MUST be lowercase (HTTP/2 spec;
86
+ # the grpc lib rejects "Authorization" with "Illegal header key").
87
+ # Lowercase is also valid for OTLP/HTTP (HTTP/1.1 headers are
88
+ # case-insensitive), so this works for both exporters.
89
+ headers["authorization"] = f"Bearer {_config.api_key}"
86
90
 
87
91
  normalized_endpoint, insecure = _normalize_endpoint(_config.endpoint)
88
92
 
@@ -297,19 +297,7 @@ def _invoke_gemini_judge(judge_llm: Any, prompt: str, *, attempt: int) -> JudgeR
297
297
  attempt we sharpen the system instruction so the model recovers from
298
298
  whatever malformed-JSON cause the first attempt hit.
299
299
  """
300
- client = getattr(judge_llm, "client", None)
301
- if client is None:
302
- # Older Strands or unusual init — try to construct one from
303
- # client_args, mirroring what Strands' GeminiModel does internally.
304
- client_args = getattr(judge_llm, "client_args", None) or {}
305
- try:
306
- from google import genai # type: ignore
307
- except ImportError as e:
308
- raise RuntimeError(
309
- "GeminiModel passed as judge_llm but `google-genai` is not "
310
- "installed. `pip install google-genai`."
311
- ) from e
312
- client = genai.Client(**client_args)
300
+ client = _resolve_gemini_client(judge_llm)
313
301
 
314
302
  model_id = _resolve_gemini_model_id(judge_llm)
315
303
 
@@ -363,6 +351,44 @@ def _invoke_gemini_judge(judge_llm: Any, prompt: str, *, attempt: int) -> JudgeR
363
351
  )
364
352
 
365
353
 
354
+ def _resolve_gemini_client(judge_llm: Any) -> Any:
355
+ """Return a cached `google.genai.Client` for this judge_llm, building it
356
+ once and stashing it on the judge_llm instance.
357
+
358
+ Strands' `GeminiModel` does NOT expose a `.client` attribute — it stores
359
+ `_custom_client` + `client_args` and builds a fresh `genai.Client` on
360
+ every request via `_get_client()`. Before this cache, every guardrail
361
+ evaluation was constructing a brand new `genai.Client` (with its own
362
+ httpx pool and credential setup), which under sustained load against
363
+ the Gemini preview models has been observed to stall judge calls and
364
+ starve subsequent agent invocations of FDs. One client per judge_llm
365
+ is enough — `genai.Client` is documented as not safe to share across
366
+ asyncio event loops, but we only call it from the synchronous path on
367
+ a dedicated thread, so a single instance is correct here.
368
+ """
369
+ cached = getattr(judge_llm, "_tracectrl_genai_client", None)
370
+ if cached is not None:
371
+ return cached
372
+ # If the GeminiModel was constructed with an injected client, honour it.
373
+ injected = getattr(judge_llm, "_custom_client", None)
374
+ if injected is not None:
375
+ return injected
376
+ client_args = getattr(judge_llm, "client_args", None) or {}
377
+ try:
378
+ from google import genai # type: ignore
379
+ except ImportError as e:
380
+ raise RuntimeError(
381
+ "GeminiModel passed as judge_llm but `google-genai` is not "
382
+ "installed. `pip install google-genai`."
383
+ ) from e
384
+ client = genai.Client(**client_args)
385
+ try:
386
+ judge_llm._tracectrl_genai_client = client
387
+ except Exception: # noqa: BLE001 — frozen dataclasses etc.
388
+ pass
389
+ return client
390
+
391
+
366
392
  def _resolve_gemini_model_id(judge_llm: Any) -> str:
367
393
  """Extract model_id from a Strands GeminiModel. Mirrors the
368
394
  Bedrock-side `_resolve_bedrock_model` shape but returns just the id —
@@ -6,14 +6,35 @@ callbacks. So we wrap the agent's `__call__` method directly: run the agent,
6
6
  capture its response, then evaluate each guardrail in order. This keeps the
7
7
  core `Guardrail` class framework-agnostic and isolates the Strands knowledge
8
8
  to this file.
9
+
10
+ Two correctness details that bit us before:
11
+
12
+ - **Post-output evals run on a background thread.** Strands' `__call__`
13
+ is sync-on-the-surface but internally uses `run_async` (a fresh
14
+ ThreadPoolExecutor + asyncio.run per call). If we evaluate the judge
15
+ synchronously after `super().__call__()` returns, the agent caller
16
+ blocks on the judge round-trip (2–8s for Gemini preview models with
17
+ `response_schema`). To the user it looks like the agent "stops" after
18
+ producing output. We fire-and-forget the eval onto a bounded executor,
19
+ re-attaching the captured OTel context in the worker so the span lands
20
+ under the same agent invocation. Pre-input stays sync — semantically
21
+ must run before the agent fires.
22
+
23
+ - **Snapshot the eval text BEFORE submitting.** The eval text builder
24
+ reads `agent.messages`, which Strands mutates on subsequent calls.
25
+ Without a snapshot, a fast follow-up prompt would race the bg thread
26
+ and the judge would see a half-mutated history.
9
27
  """
10
28
 
11
29
  from __future__ import annotations
12
30
 
31
+ import atexit
13
32
  import logging
33
+ from concurrent.futures import ThreadPoolExecutor
14
34
  from datetime import datetime, timezone
15
35
  from typing import Any, Iterable, List
16
36
 
37
+ from opentelemetry import context as otel_context
17
38
  from opentelemetry import trace
18
39
 
19
40
  from tracectrl.guardrails.guardrail import Guardrail, _model_identifier
@@ -22,6 +43,36 @@ logger = logging.getLogger(__name__)
22
43
 
23
44
 
24
45
  _REGISTRATION_SPAN_NAME = "tracectrl.guardrail.registered"
46
+ _INVOCATION_SPAN_NAME = "tracectrl.agent.invocation"
47
+
48
+
49
+ # Bounded executor for post-output evals. max_workers=2 keeps memory + FD
50
+ # usage tight; the queue is unbounded but in practice a single agent caller
51
+ # can't outpace 2 workers by much (judge calls are 1–8s each). Daemon
52
+ # threads so a hung judge doesn't block process exit. atexit shuts it down
53
+ # with a short grace period so short scripts still flush their spans.
54
+ _eval_executor: ThreadPoolExecutor | None = None
55
+
56
+
57
+ def _get_eval_executor() -> ThreadPoolExecutor:
58
+ global _eval_executor
59
+ if _eval_executor is None:
60
+ _eval_executor = ThreadPoolExecutor(
61
+ max_workers=2,
62
+ thread_name_prefix="tracectrl-guardrail-eval",
63
+ )
64
+ atexit.register(_shutdown_eval_executor)
65
+ return _eval_executor
66
+
67
+
68
+ def _shutdown_eval_executor() -> None:
69
+ global _eval_executor
70
+ if _eval_executor is not None:
71
+ # wait=True so a script that runs `agent(...)` then exits still
72
+ # flushes the eval span. Workers are bounded, so worst case we
73
+ # wait one judge round-trip per pending eval.
74
+ _eval_executor.shutdown(wait=True)
75
+ _eval_executor = None
25
76
 
26
77
 
27
78
  def _emit_registration_span(agent_id: str, agent_name: str, guardrail: Guardrail) -> None:
@@ -132,32 +183,54 @@ def wrap_agent_with_guardrails(agent: Any, guardrails: Iterable[Guardrail]) -> A
132
183
  a_id = getattr(self, "_tracectrl_agent_id", None)
133
184
  a_name = getattr(self, "_tracectrl_agent_name", None)
134
185
 
135
- if pre:
136
- user_input = _extract_input(args, kwargs)
137
- if user_input is not None:
138
- for g in pre:
139
- try:
140
- g.evaluate(user_input, agent_id=a_id, agent_name=a_name)
141
- except Exception: # noqa: BLE001
142
- logger.exception("guardrail %s raised during pre_input eval", g.name)
143
-
144
- response = super(GuardedAgent, self).__call__(*args, **kwargs)
145
-
146
- if post:
147
- # The agent's final response is often a terse status summary
148
- # ("Payment workflow complete.") that hides the actual content
149
- # we need to screen — tool inputs/outputs, OCR'd text from
150
- # session context, etc. Pull the full message history off the
151
- # Strands agent so the judge sees the COMPLETE picture, not just
152
- # the synthesized summary.
153
- output_text = _build_eval_text(self, response)
154
- for g in post:
155
- try:
156
- g.evaluate(output_text, agent_id=a_id, agent_name=a_name)
157
- except Exception: # noqa: BLE001 — never break the agent
158
- logger.exception("guardrail %s raised during post_output eval", g.name)
186
+ tracer = trace.get_tracer("tracectrl.guardrails")
159
187
 
160
- return response
188
+ # Outer span wraps the entire invocation. Strands' run_async copies
189
+ # the OTel context into its worker thread, so the invoke_agent /
190
+ # chat / tool spans Strands creates become children of this span.
191
+ # The bg-thread post-eval re-attaches this same context, so its
192
+ # eval span also lands here. Net result: one tidy tree per call.
193
+ with tracer.start_as_current_span(_INVOCATION_SPAN_NAME) as invocation_span:
194
+ if a_id:
195
+ invocation_span.set_attribute("tracectrl.agent.id", a_id)
196
+ if a_name:
197
+ invocation_span.set_attribute("tracectrl.agent.name", a_name)
198
+
199
+ if pre:
200
+ user_input = _extract_input(args, kwargs)
201
+ if user_input is not None:
202
+ for g in pre:
203
+ try:
204
+ g.evaluate(user_input, agent_id=a_id, agent_name=a_name)
205
+ except Exception: # noqa: BLE001
206
+ logger.exception("guardrail %s raised during pre_input eval", g.name)
207
+
208
+ response = super(GuardedAgent, self).__call__(*args, **kwargs)
209
+
210
+ if post:
211
+ # Snapshot the eval text NOW, while we still hold the lock
212
+ # of the current invocation — a follow-up agent call would
213
+ # mutate `agent.messages` and racing the bg worker against
214
+ # that mutation is what produces the "memory leak between
215
+ # agents" symptom users have reported.
216
+ output_text = _build_eval_text(self, response)
217
+ captured_ctx = otel_context.get_current()
218
+ for g in post:
219
+ try:
220
+ _get_eval_executor().submit(
221
+ _run_post_eval_bg,
222
+ g,
223
+ output_text,
224
+ a_id,
225
+ a_name,
226
+ captured_ctx,
227
+ )
228
+ except Exception: # noqa: BLE001 — never break the agent
229
+ logger.exception(
230
+ "guardrail %s failed to submit post_output eval", g.name
231
+ )
232
+
233
+ return response
161
234
 
162
235
  GuardedAgent = type(
163
236
  f"_TraceCtrlGuarded_{cls.__name__}",
@@ -172,6 +245,29 @@ def wrap_agent_with_guardrails(agent: Any, guardrails: Iterable[Guardrail]) -> A
172
245
  return agent
173
246
 
174
247
 
248
+ def _run_post_eval_bg(
249
+ guardrail: Guardrail,
250
+ output_text: str,
251
+ agent_id: str | None,
252
+ agent_name: str | None,
253
+ captured_ctx: otel_context.Context,
254
+ ) -> None:
255
+ """Run a single post-output guardrail evaluation on a background thread.
256
+
257
+ Re-attaches the OTel context captured at submit time so the eval span
258
+ parents under the same agent invocation, not under whatever happened to
259
+ be active in this worker. Errors are logged, never raised — this thread
260
+ has no caller to surface them to.
261
+ """
262
+ token = otel_context.attach(captured_ctx)
263
+ try:
264
+ guardrail.evaluate(output_text, agent_id=agent_id, agent_name=agent_name)
265
+ except Exception: # noqa: BLE001
266
+ logger.exception("guardrail %s raised during post_output eval", guardrail.name)
267
+ finally:
268
+ otel_context.detach(token)
269
+
270
+
175
271
  def register_guardrails(agent: Any, guardrails: Iterable[Guardrail]) -> None:
176
272
  """Emit registration spans without wrapping the agent.
177
273
 
File without changes
File without changes
File without changes