tracectrl 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tracectrl-0.2.0 → tracectrl-0.3.0}/PKG-INFO +2 -2
- {tracectrl-0.2.0 → tracectrl-0.3.0}/README.md +1 -1
- {tracectrl-0.2.0 → tracectrl-0.3.0}/pyproject.toml +1 -1
- {tracectrl-0.2.0 → tracectrl-0.3.0}/src/tracectrl/__init__.py +1 -1
- tracectrl-0.3.0/src/tracectrl/guardrails/judge.py +391 -0
- tracectrl-0.2.0/src/tracectrl/guardrails/judge.py +0 -205
- {tracectrl-0.2.0 → tracectrl-0.3.0}/.gitignore +0 -0
- {tracectrl-0.2.0 → tracectrl-0.3.0}/LICENSE +0 -0
- {tracectrl-0.2.0 → tracectrl-0.3.0}/src/tracectrl/_tui.py +0 -0
- {tracectrl-0.2.0 → tracectrl-0.3.0}/src/tracectrl/agent_tagging.py +0 -0
- {tracectrl-0.2.0 → tracectrl-0.3.0}/src/tracectrl/cli.py +0 -0
- {tracectrl-0.2.0 → tracectrl-0.3.0}/src/tracectrl/config.py +0 -0
- {tracectrl-0.2.0 → tracectrl-0.3.0}/src/tracectrl/context.py +0 -0
- {tracectrl-0.2.0 → tracectrl-0.3.0}/src/tracectrl/exporter.py +0 -0
- {tracectrl-0.2.0 → tracectrl-0.3.0}/src/tracectrl/guardrails/__init__.py +0 -0
- {tracectrl-0.2.0 → tracectrl-0.3.0}/src/tracectrl/guardrails/guardrail.py +0 -0
- {tracectrl-0.2.0 → tracectrl-0.3.0}/src/tracectrl/guardrails/strands_hook.py +0 -0
- {tracectrl-0.2.0 → tracectrl-0.3.0}/src/tracectrl/inference.py +0 -0
- {tracectrl-0.2.0 → tracectrl-0.3.0}/src/tracectrl/processor.py +0 -0
- {tracectrl-0.2.0 → tracectrl-0.3.0}/src/tracectrl/protector.py +0 -0
- {tracectrl-0.2.0 → tracectrl-0.3.0}/src/tracectrl/schema.py +0 -0
- {tracectrl-0.2.0 → tracectrl-0.3.0}/src/tracectrl/session.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tracectrl
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: TraceCtrl SDK — agentic AI security observability
|
|
5
5
|
Author: CloudsineAI
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -52,7 +52,7 @@ StrandsInstrumentor().instrument()
|
|
|
52
52
|
|
|
53
53
|
Two guardrail providers, designed to coexist on the same agent:
|
|
54
54
|
|
|
55
|
-
**1. Built-in LLM judge** — declarative guardrails evaluated by a Bedrock model:
|
|
55
|
+
**1. Built-in LLM judge** — declarative guardrails evaluated by a Bedrock OR Gemini model (auto-detected from the `judge_llm` you pass in):
|
|
56
56
|
|
|
57
57
|
```python
|
|
58
58
|
from tracectrl.guardrails import Guardrail, wrap_agent_with_guardrails
|
|
@@ -37,7 +37,7 @@ StrandsInstrumentor().instrument()
|
|
|
37
37
|
|
|
38
38
|
Two guardrail providers, designed to coexist on the same agent:
|
|
39
39
|
|
|
40
|
-
**1. Built-in LLM judge** — declarative guardrails evaluated by a Bedrock model:
|
|
40
|
+
**1. Built-in LLM judge** — declarative guardrails evaluated by a Bedrock OR Gemini model (auto-detected from the `judge_llm` you pass in):
|
|
41
41
|
|
|
42
42
|
```python
|
|
43
43
|
from tracectrl.guardrails import Guardrail, wrap_agent_with_guardrails
|
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
"""Judge LLM invocation with structured output parsing.
|
|
2
|
+
|
|
3
|
+
Supports two backends, picked by inspecting the `judge_llm` argument:
|
|
4
|
+
|
|
5
|
+
- **Strands BedrockModel** (default for everything we don't recognise) —
|
|
6
|
+
Calls boto3's `bedrock-runtime.converse` directly with a single-tool
|
|
7
|
+
schema. We bind to boto3 instead of going through Strands'
|
|
8
|
+
`BedrockModel.structured_output` because that public surface is async
|
|
9
|
+
and its method names have shifted across versions.
|
|
10
|
+
|
|
11
|
+
- **Strands GeminiModel** — Calls Google's genai SDK via the client
|
|
12
|
+
object embedded in the GeminiModel. We force structured output by
|
|
13
|
+
setting `response_mime_type="application/json"` plus an OpenAPI-style
|
|
14
|
+
`response_schema`. No AWS credentials required — the same
|
|
15
|
+
`GOOGLE_API_KEY` the workshop's agents are using is enough.
|
|
16
|
+
|
|
17
|
+
Both backends produce a `JudgeResult` directly so the retry loop in
|
|
18
|
+
`invoke_judge` is provider-agnostic.
|
|
19
|
+
|
|
20
|
+
On invocation/parse failure we re-prompt once; a second failure defaults to
|
|
21
|
+
`pass=true` (a broken judge must not spam violation alerts).
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import json
|
|
27
|
+
import logging
|
|
28
|
+
from dataclasses import dataclass
|
|
29
|
+
from typing import Any, Optional
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
# Single tool the judge is forced to call. Schema matches the PRD exactly.
|
|
34
|
+
_JUDGE_TOOL_NAME = "record_decision"
|
|
35
|
+
_JUDGE_TOOL_SCHEMA = {
|
|
36
|
+
"type": "object",
|
|
37
|
+
"properties": {
|
|
38
|
+
"pass": {
|
|
39
|
+
"type": "boolean",
|
|
40
|
+
"description": "true if the output satisfies the guardrail; false if it violates.",
|
|
41
|
+
},
|
|
42
|
+
"reason": {
|
|
43
|
+
"type": "string",
|
|
44
|
+
"description": "One-sentence explanation of the decision.",
|
|
45
|
+
},
|
|
46
|
+
"evidence": {
|
|
47
|
+
"type": ["string", "null"],
|
|
48
|
+
"description": "Verbatim snippet that triggered a fail; null if pass.",
|
|
49
|
+
},
|
|
50
|
+
},
|
|
51
|
+
"required": ["pass", "reason"],
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# Gemini's response_schema lives under the Vertex / OpenAPI dialect — it
|
|
56
|
+
# doesn't accept union types like ["string", "null"]. We drop `evidence` from
|
|
57
|
+
# `required` so the model can omit it on a pass; if it sets it to an empty
|
|
58
|
+
# string, the parser below normalises to None for symmetry with the Bedrock
|
|
59
|
+
# JudgeResult shape.
|
|
60
|
+
_GEMINI_JUDGE_SCHEMA = {
|
|
61
|
+
"type": "object",
|
|
62
|
+
"properties": {
|
|
63
|
+
"pass": {
|
|
64
|
+
"type": "boolean",
|
|
65
|
+
"description": "true if the output satisfies the guardrail; false if it violates.",
|
|
66
|
+
},
|
|
67
|
+
"reason": {
|
|
68
|
+
"type": "string",
|
|
69
|
+
"description": "One-sentence explanation of the decision.",
|
|
70
|
+
},
|
|
71
|
+
"evidence": {
|
|
72
|
+
"type": "string",
|
|
73
|
+
"description": "Verbatim snippet that triggered a fail; empty string if pass.",
|
|
74
|
+
},
|
|
75
|
+
},
|
|
76
|
+
"required": ["pass", "reason"],
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclass
|
|
81
|
+
class JudgeResult:
|
|
82
|
+
passed: bool
|
|
83
|
+
reason: str
|
|
84
|
+
evidence: Optional[str]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def invoke_judge(judge_llm: Any, prompt: str) -> JudgeResult:
|
|
88
|
+
"""Dispatch to the right backend (Bedrock or Gemini), retry once on failure,
|
|
89
|
+
default-pass on a second failure.
|
|
90
|
+
|
|
91
|
+
Picking the backend by type means existing BedrockModel callers see ZERO
|
|
92
|
+
behavioural change — the dispatch falls through to the Bedrock path
|
|
93
|
+
untouched. GeminiModel callers go to a parallel Gemini path that doesn't
|
|
94
|
+
need AWS credentials.
|
|
95
|
+
"""
|
|
96
|
+
invoker = _resolve_invoker(judge_llm)
|
|
97
|
+
last_err: Optional[Exception] = None
|
|
98
|
+
for attempt in (1, 2):
|
|
99
|
+
try:
|
|
100
|
+
return invoker(judge_llm, prompt, attempt=attempt)
|
|
101
|
+
except Exception as exc: # noqa: BLE001 — broad on purpose; retry once
|
|
102
|
+
last_err = exc
|
|
103
|
+
logger.warning(
|
|
104
|
+
"judge attempt %d failed via %s: %s",
|
|
105
|
+
attempt,
|
|
106
|
+
getattr(invoker, "__name__", "unknown"),
|
|
107
|
+
exc,
|
|
108
|
+
)
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
logger.warning(
|
|
112
|
+
"guardrail judge failed to produce valid JSON twice; defaulting to pass (last error: %s)",
|
|
113
|
+
last_err,
|
|
114
|
+
)
|
|
115
|
+
return JudgeResult(passed=True, reason="judge parse failed; defaulted to pass", evidence=None)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _resolve_invoker(judge_llm: Any):
|
|
119
|
+
"""Pick the backend by type. Defaults to Bedrock for backward compat —
|
|
120
|
+
anything not specifically recognised falls through to the original path."""
|
|
121
|
+
if _is_gemini_model(judge_llm):
|
|
122
|
+
return _invoke_gemini_judge
|
|
123
|
+
return _invoke_bedrock_judge
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _is_gemini_model(judge_llm: Any) -> bool:
|
|
127
|
+
"""True only for a real Strands GeminiModel instance. Lazy-imports so
|
|
128
|
+
SDK callers without `strands.models.gemini` (older Strands, custom
|
|
129
|
+
builds) keep working — they just always go to the Bedrock path."""
|
|
130
|
+
try:
|
|
131
|
+
from strands.models.gemini import GeminiModel # type: ignore
|
|
132
|
+
except ImportError:
|
|
133
|
+
return False
|
|
134
|
+
return isinstance(judge_llm, GeminiModel)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _invoke_bedrock_judge(judge_llm: Any, prompt: str, *, attempt: int) -> JudgeResult:
|
|
138
|
+
"""Bedrock path — uses boto3.bedrock-runtime.converse with tool-forcing.
|
|
139
|
+
|
|
140
|
+
This is the original implementation, refactored only to return a
|
|
141
|
+
JudgeResult directly so it shares the dispatcher's retry shape with the
|
|
142
|
+
Gemini path. The underlying `_call_model` + `_parse_judge_response` are
|
|
143
|
+
unchanged.
|
|
144
|
+
"""
|
|
145
|
+
raw = _call_model(judge_llm, prompt, attempt=attempt)
|
|
146
|
+
return _parse_judge_response(raw)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _resolve_bedrock_model(judge_llm: Any) -> tuple[str, str]:
|
|
150
|
+
"""Pull (model_id, region) from a Strands BedrockModel or from explicit config."""
|
|
151
|
+
# Strands BedrockModel stores config in `_config` / `get_config()`.
|
|
152
|
+
config: dict = {}
|
|
153
|
+
if hasattr(judge_llm, "get_config"):
|
|
154
|
+
try:
|
|
155
|
+
cfg = judge_llm.get_config()
|
|
156
|
+
if isinstance(cfg, dict):
|
|
157
|
+
config = cfg
|
|
158
|
+
except Exception: # noqa: BLE001
|
|
159
|
+
pass
|
|
160
|
+
if not config and hasattr(judge_llm, "config"):
|
|
161
|
+
c = judge_llm.config
|
|
162
|
+
if isinstance(c, dict):
|
|
163
|
+
config = c
|
|
164
|
+
model_id = (
|
|
165
|
+
config.get("model_id")
|
|
166
|
+
or getattr(judge_llm, "model_id", None)
|
|
167
|
+
or getattr(judge_llm, "model", None)
|
|
168
|
+
)
|
|
169
|
+
region = (
|
|
170
|
+
config.get("region_name")
|
|
171
|
+
or getattr(judge_llm, "region_name", None)
|
|
172
|
+
or "us-east-1"
|
|
173
|
+
)
|
|
174
|
+
if not model_id:
|
|
175
|
+
raise RuntimeError(f"could not extract model_id from judge_llm: {type(judge_llm).__name__}")
|
|
176
|
+
return model_id, region
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _call_model(judge_llm: Any, prompt: str, *, attempt: int) -> Any:
|
|
180
|
+
"""Call Bedrock converse with tool-use forcing the JSON schema.
|
|
181
|
+
|
|
182
|
+
boto3 is bundled with every AWS Lambda / Strands deploy; importing it lazily
|
|
183
|
+
here keeps the SDK's import-time footprint clean.
|
|
184
|
+
"""
|
|
185
|
+
import boto3
|
|
186
|
+
|
|
187
|
+
model_id, region = _resolve_bedrock_model(judge_llm)
|
|
188
|
+
|
|
189
|
+
system = (
|
|
190
|
+
"You are an automated guardrail judge. You MUST call the "
|
|
191
|
+
f"`{_JUDGE_TOOL_NAME}` tool with your decision. Do not answer in plain text."
|
|
192
|
+
)
|
|
193
|
+
if attempt == 2:
|
|
194
|
+
system += " Your previous response was not valid JSON; respond by calling the tool exactly."
|
|
195
|
+
|
|
196
|
+
client = boto3.client("bedrock-runtime", region_name=region)
|
|
197
|
+
response = client.converse(
|
|
198
|
+
modelId=model_id,
|
|
199
|
+
messages=[{"role": "user", "content": [{"text": prompt}]}],
|
|
200
|
+
system=[{"text": system}],
|
|
201
|
+
toolConfig={
|
|
202
|
+
"tools": [{
|
|
203
|
+
"toolSpec": {
|
|
204
|
+
"name": _JUDGE_TOOL_NAME,
|
|
205
|
+
"description": "Record the guardrail pass/fail decision.",
|
|
206
|
+
"inputSchema": {"json": _JUDGE_TOOL_SCHEMA},
|
|
207
|
+
}
|
|
208
|
+
}],
|
|
209
|
+
# `any` forces the model to call SOME tool; combined with a single
|
|
210
|
+
# tool in the list this guarantees we get our schema back.
|
|
211
|
+
"toolChoice": {"any": {}},
|
|
212
|
+
},
|
|
213
|
+
)
|
|
214
|
+
return response
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _parse_judge_response(raw: Any) -> JudgeResult:
|
|
218
|
+
"""Extract the structured decision from a Bedrock converse response."""
|
|
219
|
+
payload: Optional[dict] = None
|
|
220
|
+
|
|
221
|
+
# Bedrock converse response shape: {output: {message: {content: [{toolUse: {input: {...}}}]}}}
|
|
222
|
+
if isinstance(raw, dict):
|
|
223
|
+
output = raw.get("output") or {}
|
|
224
|
+
message = output.get("message") if isinstance(output, dict) else None
|
|
225
|
+
if isinstance(message, dict):
|
|
226
|
+
for block in message.get("content", []) or []:
|
|
227
|
+
if isinstance(block, dict) and "toolUse" in block:
|
|
228
|
+
payload = block["toolUse"].get("input")
|
|
229
|
+
break
|
|
230
|
+
if payload is None:
|
|
231
|
+
# Some intermediaries flatten this — try direct keys.
|
|
232
|
+
payload = raw.get("input") or raw.get("toolUse", {}).get("input")
|
|
233
|
+
|
|
234
|
+
# Plain text fallback — try to find a JSON object in the string.
|
|
235
|
+
if payload is None:
|
|
236
|
+
text = _stringify(raw)
|
|
237
|
+
payload = _extract_json_object(text)
|
|
238
|
+
|
|
239
|
+
if not isinstance(payload, dict):
|
|
240
|
+
raise ValueError(f"could not extract JSON object from judge response: {raw!r}")
|
|
241
|
+
|
|
242
|
+
if "pass" not in payload or "reason" not in payload:
|
|
243
|
+
raise ValueError(f"judge JSON missing required keys: {payload!r}")
|
|
244
|
+
|
|
245
|
+
return JudgeResult(
|
|
246
|
+
passed=bool(payload["pass"]),
|
|
247
|
+
reason=str(payload.get("reason", "")),
|
|
248
|
+
evidence=(str(payload["evidence"]) if payload.get("evidence") else None),
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _stringify(raw: Any) -> str:
|
|
253
|
+
if isinstance(raw, str):
|
|
254
|
+
return raw
|
|
255
|
+
if isinstance(raw, dict):
|
|
256
|
+
return json.dumps(raw)
|
|
257
|
+
text = getattr(raw, "text", None)
|
|
258
|
+
if isinstance(text, str):
|
|
259
|
+
return text
|
|
260
|
+
return str(raw)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _extract_json_object(text: str) -> Optional[dict]:
|
|
264
|
+
"""Find the first balanced top-level JSON object in `text`."""
|
|
265
|
+
start = text.find("{")
|
|
266
|
+
while start != -1:
|
|
267
|
+
depth = 0
|
|
268
|
+
for i in range(start, len(text)):
|
|
269
|
+
ch = text[i]
|
|
270
|
+
if ch == "{":
|
|
271
|
+
depth += 1
|
|
272
|
+
elif ch == "}":
|
|
273
|
+
depth -= 1
|
|
274
|
+
if depth == 0:
|
|
275
|
+
candidate = text[start : i + 1]
|
|
276
|
+
try:
|
|
277
|
+
obj = json.loads(candidate)
|
|
278
|
+
if isinstance(obj, dict):
|
|
279
|
+
return obj
|
|
280
|
+
except json.JSONDecodeError:
|
|
281
|
+
break
|
|
282
|
+
start = text.find("{", start + 1)
|
|
283
|
+
return None
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
# ---------------------------------------------------------------------------
|
|
287
|
+
# Gemini backend
|
|
288
|
+
# ---------------------------------------------------------------------------
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _invoke_gemini_judge(judge_llm: Any, prompt: str, *, attempt: int) -> JudgeResult:
|
|
292
|
+
"""Gemini path — uses the `google.genai` client embedded in Strands'
|
|
293
|
+
GeminiModel. No AWS credentials required.
|
|
294
|
+
|
|
295
|
+
We force structured output via `response_mime_type='application/json'`
|
|
296
|
+
plus an OpenAPI-style schema (`_GEMINI_JUDGE_SCHEMA`). On the second
|
|
297
|
+
attempt we sharpen the system instruction so the model recovers from
|
|
298
|
+
whatever malformed-JSON cause the first attempt hit.
|
|
299
|
+
"""
|
|
300
|
+
client = getattr(judge_llm, "client", None)
|
|
301
|
+
if client is None:
|
|
302
|
+
# Older Strands or unusual init — try to construct one from
|
|
303
|
+
# client_args, mirroring what Strands' GeminiModel does internally.
|
|
304
|
+
client_args = getattr(judge_llm, "client_args", None) or {}
|
|
305
|
+
try:
|
|
306
|
+
from google import genai # type: ignore
|
|
307
|
+
except ImportError as e:
|
|
308
|
+
raise RuntimeError(
|
|
309
|
+
"GeminiModel passed as judge_llm but `google-genai` is not "
|
|
310
|
+
"installed. `pip install google-genai`."
|
|
311
|
+
) from e
|
|
312
|
+
client = genai.Client(**client_args)
|
|
313
|
+
|
|
314
|
+
model_id = _resolve_gemini_model_id(judge_llm)
|
|
315
|
+
|
|
316
|
+
system_text = (
|
|
317
|
+
"You are an automated guardrail judge. Respond with ONLY a JSON "
|
|
318
|
+
"object matching the schema {pass: bool, reason: string, evidence: "
|
|
319
|
+
"string}. On pass, evidence may be an empty string. On fail, "
|
|
320
|
+
"evidence must be the verbatim snippet that triggered the fail "
|
|
321
|
+
"(max ~200 chars). Do not include any text outside the JSON."
|
|
322
|
+
)
|
|
323
|
+
if attempt == 2:
|
|
324
|
+
system_text += (
|
|
325
|
+
" Your previous response was not parseable. Return strict JSON "
|
|
326
|
+
"with no preamble, no markdown fences, no commentary."
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
# Lazy import — keeps SDK import-time clean for callers that never use Gemini.
|
|
330
|
+
from google.genai import types as genai_types # type: ignore
|
|
331
|
+
|
|
332
|
+
response = client.models.generate_content(
|
|
333
|
+
model=model_id,
|
|
334
|
+
contents=prompt,
|
|
335
|
+
config=genai_types.GenerateContentConfig(
|
|
336
|
+
response_mime_type="application/json",
|
|
337
|
+
response_schema=_GEMINI_JUDGE_SCHEMA,
|
|
338
|
+
system_instruction=system_text,
|
|
339
|
+
# Low temperature — judges should be near-deterministic.
|
|
340
|
+
temperature=0.0,
|
|
341
|
+
),
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
text = (response.text or "").strip()
|
|
345
|
+
if not text:
|
|
346
|
+
raise ValueError("gemini judge returned empty body")
|
|
347
|
+
|
|
348
|
+
payload = json.loads(text)
|
|
349
|
+
|
|
350
|
+
if "pass" not in payload or "reason" not in payload:
|
|
351
|
+
raise ValueError(f"gemini judge JSON missing required keys: {payload!r}")
|
|
352
|
+
|
|
353
|
+
# Normalise empty-string evidence to None so downstream consumers can
|
|
354
|
+
# treat 'no evidence' uniformly regardless of backend. Bedrock's path
|
|
355
|
+
# already does this via the explicit "null" union type.
|
|
356
|
+
raw_evidence = payload.get("evidence")
|
|
357
|
+
evidence = str(raw_evidence) if raw_evidence else None
|
|
358
|
+
|
|
359
|
+
return JudgeResult(
|
|
360
|
+
passed=bool(payload["pass"]),
|
|
361
|
+
reason=str(payload.get("reason", "")),
|
|
362
|
+
evidence=evidence,
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def _resolve_gemini_model_id(judge_llm: Any) -> str:
|
|
367
|
+
"""Extract model_id from a Strands GeminiModel. Mirrors the
|
|
368
|
+
Bedrock-side `_resolve_bedrock_model` shape but returns just the id —
|
|
369
|
+
Gemini doesn't need a region."""
|
|
370
|
+
config: dict = {}
|
|
371
|
+
if hasattr(judge_llm, "get_config"):
|
|
372
|
+
try:
|
|
373
|
+
cfg = judge_llm.get_config()
|
|
374
|
+
if isinstance(cfg, dict):
|
|
375
|
+
config = cfg
|
|
376
|
+
except Exception: # noqa: BLE001
|
|
377
|
+
pass
|
|
378
|
+
if not config and hasattr(judge_llm, "config"):
|
|
379
|
+
c = judge_llm.config
|
|
380
|
+
if isinstance(c, dict):
|
|
381
|
+
config = c
|
|
382
|
+
model_id = (
|
|
383
|
+
config.get("model_id")
|
|
384
|
+
or getattr(judge_llm, "model_id", None)
|
|
385
|
+
or getattr(judge_llm, "model", None)
|
|
386
|
+
)
|
|
387
|
+
if not model_id:
|
|
388
|
+
raise RuntimeError(
|
|
389
|
+
f"could not extract model_id from GeminiModel: {type(judge_llm).__name__}"
|
|
390
|
+
)
|
|
391
|
+
return model_id
|
|
@@ -1,205 +0,0 @@
|
|
|
1
|
-
"""Judge LLM invocation with structured output parsing.
|
|
2
|
-
|
|
3
|
-
Uses Bedrock's `converse` API directly via boto3. Strands' BedrockModel
|
|
4
|
-
wraps the same API, but its public surface is async (`structured_output`)
|
|
5
|
-
and the public method names have shifted between versions, so binding to
|
|
6
|
-
boto3 directly is far more stable. We extract `model_id` + `region` from
|
|
7
|
-
the BedrockModel object and call `bedrock-runtime.converse` ourselves.
|
|
8
|
-
|
|
9
|
-
On parse failure we re-prompt once; a second failure is treated as
|
|
10
|
-
`pass=true` (a broken judge must not spam violation alerts).
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
from __future__ import annotations
|
|
14
|
-
|
|
15
|
-
import json
|
|
16
|
-
import logging
|
|
17
|
-
from dataclasses import dataclass
|
|
18
|
-
from typing import Any, Optional
|
|
19
|
-
|
|
20
|
-
logger = logging.getLogger(__name__)
|
|
21
|
-
|
|
22
|
-
# Single tool the judge is forced to call. Schema matches the PRD exactly.
|
|
23
|
-
_JUDGE_TOOL_NAME = "record_decision"
|
|
24
|
-
_JUDGE_TOOL_SCHEMA = {
|
|
25
|
-
"type": "object",
|
|
26
|
-
"properties": {
|
|
27
|
-
"pass": {
|
|
28
|
-
"type": "boolean",
|
|
29
|
-
"description": "true if the output satisfies the guardrail; false if it violates.",
|
|
30
|
-
},
|
|
31
|
-
"reason": {
|
|
32
|
-
"type": "string",
|
|
33
|
-
"description": "One-sentence explanation of the decision.",
|
|
34
|
-
},
|
|
35
|
-
"evidence": {
|
|
36
|
-
"type": ["string", "null"],
|
|
37
|
-
"description": "Verbatim snippet that triggered a fail; null if pass.",
|
|
38
|
-
},
|
|
39
|
-
},
|
|
40
|
-
"required": ["pass", "reason"],
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
@dataclass
|
|
45
|
-
class JudgeResult:
|
|
46
|
-
passed: bool
|
|
47
|
-
reason: str
|
|
48
|
-
evidence: Optional[str]
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def invoke_judge(judge_llm: Any, prompt: str) -> JudgeResult:
|
|
52
|
-
"""Invoke the judge twice at most; second parse failure → conservative pass."""
|
|
53
|
-
last_err: Optional[Exception] = None
|
|
54
|
-
for attempt in (1, 2):
|
|
55
|
-
try:
|
|
56
|
-
raw = _call_model(judge_llm, prompt, attempt=attempt)
|
|
57
|
-
parsed = _parse_judge_response(raw)
|
|
58
|
-
return parsed
|
|
59
|
-
except Exception as exc: # noqa: BLE001 — broad on purpose; retry once
|
|
60
|
-
last_err = exc
|
|
61
|
-
logger.warning("judge attempt %d failed: %s", attempt, exc)
|
|
62
|
-
continue
|
|
63
|
-
|
|
64
|
-
logger.warning(
|
|
65
|
-
"guardrail judge failed to produce valid JSON twice; defaulting to pass (last error: %s)",
|
|
66
|
-
last_err,
|
|
67
|
-
)
|
|
68
|
-
return JudgeResult(passed=True, reason="judge parse failed; defaulted to pass", evidence=None)
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
def _resolve_bedrock_model(judge_llm: Any) -> tuple[str, str]:
|
|
72
|
-
"""Pull (model_id, region) from a Strands BedrockModel or from explicit config."""
|
|
73
|
-
# Strands BedrockModel stores config in `_config` / `get_config()`.
|
|
74
|
-
config: dict = {}
|
|
75
|
-
if hasattr(judge_llm, "get_config"):
|
|
76
|
-
try:
|
|
77
|
-
cfg = judge_llm.get_config()
|
|
78
|
-
if isinstance(cfg, dict):
|
|
79
|
-
config = cfg
|
|
80
|
-
except Exception: # noqa: BLE001
|
|
81
|
-
pass
|
|
82
|
-
if not config and hasattr(judge_llm, "config"):
|
|
83
|
-
c = judge_llm.config
|
|
84
|
-
if isinstance(c, dict):
|
|
85
|
-
config = c
|
|
86
|
-
model_id = (
|
|
87
|
-
config.get("model_id")
|
|
88
|
-
or getattr(judge_llm, "model_id", None)
|
|
89
|
-
or getattr(judge_llm, "model", None)
|
|
90
|
-
)
|
|
91
|
-
region = (
|
|
92
|
-
config.get("region_name")
|
|
93
|
-
or getattr(judge_llm, "region_name", None)
|
|
94
|
-
or "us-east-1"
|
|
95
|
-
)
|
|
96
|
-
if not model_id:
|
|
97
|
-
raise RuntimeError(f"could not extract model_id from judge_llm: {type(judge_llm).__name__}")
|
|
98
|
-
return model_id, region
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
def _call_model(judge_llm: Any, prompt: str, *, attempt: int) -> Any:
|
|
102
|
-
"""Call Bedrock converse with tool-use forcing the JSON schema.
|
|
103
|
-
|
|
104
|
-
boto3 is bundled with every AWS Lambda / Strands deploy; importing it lazily
|
|
105
|
-
here keeps the SDK's import-time footprint clean.
|
|
106
|
-
"""
|
|
107
|
-
import boto3
|
|
108
|
-
|
|
109
|
-
model_id, region = _resolve_bedrock_model(judge_llm)
|
|
110
|
-
|
|
111
|
-
system = (
|
|
112
|
-
"You are an automated guardrail judge. You MUST call the "
|
|
113
|
-
f"`{_JUDGE_TOOL_NAME}` tool with your decision. Do not answer in plain text."
|
|
114
|
-
)
|
|
115
|
-
if attempt == 2:
|
|
116
|
-
system += " Your previous response was not valid JSON; respond by calling the tool exactly."
|
|
117
|
-
|
|
118
|
-
client = boto3.client("bedrock-runtime", region_name=region)
|
|
119
|
-
response = client.converse(
|
|
120
|
-
modelId=model_id,
|
|
121
|
-
messages=[{"role": "user", "content": [{"text": prompt}]}],
|
|
122
|
-
system=[{"text": system}],
|
|
123
|
-
toolConfig={
|
|
124
|
-
"tools": [{
|
|
125
|
-
"toolSpec": {
|
|
126
|
-
"name": _JUDGE_TOOL_NAME,
|
|
127
|
-
"description": "Record the guardrail pass/fail decision.",
|
|
128
|
-
"inputSchema": {"json": _JUDGE_TOOL_SCHEMA},
|
|
129
|
-
}
|
|
130
|
-
}],
|
|
131
|
-
# `any` forces the model to call SOME tool; combined with a single
|
|
132
|
-
# tool in the list this guarantees we get our schema back.
|
|
133
|
-
"toolChoice": {"any": {}},
|
|
134
|
-
},
|
|
135
|
-
)
|
|
136
|
-
return response
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
def _parse_judge_response(raw: Any) -> JudgeResult:
|
|
140
|
-
"""Extract the structured decision from a Bedrock converse response."""
|
|
141
|
-
payload: Optional[dict] = None
|
|
142
|
-
|
|
143
|
-
# Bedrock converse response shape: {output: {message: {content: [{toolUse: {input: {...}}}]}}}
|
|
144
|
-
if isinstance(raw, dict):
|
|
145
|
-
output = raw.get("output") or {}
|
|
146
|
-
message = output.get("message") if isinstance(output, dict) else None
|
|
147
|
-
if isinstance(message, dict):
|
|
148
|
-
for block in message.get("content", []) or []:
|
|
149
|
-
if isinstance(block, dict) and "toolUse" in block:
|
|
150
|
-
payload = block["toolUse"].get("input")
|
|
151
|
-
break
|
|
152
|
-
if payload is None:
|
|
153
|
-
# Some intermediaries flatten this — try direct keys.
|
|
154
|
-
payload = raw.get("input") or raw.get("toolUse", {}).get("input")
|
|
155
|
-
|
|
156
|
-
# Plain text fallback — try to find a JSON object in the string.
|
|
157
|
-
if payload is None:
|
|
158
|
-
text = _stringify(raw)
|
|
159
|
-
payload = _extract_json_object(text)
|
|
160
|
-
|
|
161
|
-
if not isinstance(payload, dict):
|
|
162
|
-
raise ValueError(f"could not extract JSON object from judge response: {raw!r}")
|
|
163
|
-
|
|
164
|
-
if "pass" not in payload or "reason" not in payload:
|
|
165
|
-
raise ValueError(f"judge JSON missing required keys: {payload!r}")
|
|
166
|
-
|
|
167
|
-
return JudgeResult(
|
|
168
|
-
passed=bool(payload["pass"]),
|
|
169
|
-
reason=str(payload.get("reason", "")),
|
|
170
|
-
evidence=(str(payload["evidence"]) if payload.get("evidence") else None),
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
def _stringify(raw: Any) -> str:
|
|
175
|
-
if isinstance(raw, str):
|
|
176
|
-
return raw
|
|
177
|
-
if isinstance(raw, dict):
|
|
178
|
-
return json.dumps(raw)
|
|
179
|
-
text = getattr(raw, "text", None)
|
|
180
|
-
if isinstance(text, str):
|
|
181
|
-
return text
|
|
182
|
-
return str(raw)
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
def _extract_json_object(text: str) -> Optional[dict]:
|
|
186
|
-
"""Find the first balanced top-level JSON object in `text`."""
|
|
187
|
-
start = text.find("{")
|
|
188
|
-
while start != -1:
|
|
189
|
-
depth = 0
|
|
190
|
-
for i in range(start, len(text)):
|
|
191
|
-
ch = text[i]
|
|
192
|
-
if ch == "{":
|
|
193
|
-
depth += 1
|
|
194
|
-
elif ch == "}":
|
|
195
|
-
depth -= 1
|
|
196
|
-
if depth == 0:
|
|
197
|
-
candidate = text[start : i + 1]
|
|
198
|
-
try:
|
|
199
|
-
obj = json.loads(candidate)
|
|
200
|
-
if isinstance(obj, dict):
|
|
201
|
-
return obj
|
|
202
|
-
except json.JSONDecodeError:
|
|
203
|
-
break
|
|
204
|
-
start = text.find("{", start + 1)
|
|
205
|
-
return None
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|