zu-checks 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
zu_checks/__init__.py ADDED
@@ -0,0 +1,13 @@
1
+ """Zu built-in checks — the two stdlib plugin kinds that ship with the base.
2
+
3
+ * ``zu_checks.detectors`` — observation-time detectors whose Verdict severities
4
+ drive the loop (ESCALATE climbs the tier ladder; TERMINAL ends the run).
5
+ * ``zu_checks.validators`` — on-final result checks (schema shape + grounding,
6
+ the anti-hallucination provenance check).
7
+
8
+ They live in one package because both are pure-stdlib (the schema validator adds
9
+ only ``jsonschema``) and always present in the base runtime — unlike the adapter
10
+ packages (providers/tools/backends) whose separation carries distinct heavy
11
+ optional dependencies. They register through the same ``zu.detectors`` /
12
+ ``zu.validators`` entry-point groups any third-party check would.
13
+ """
@@ -0,0 +1,37 @@
1
+ """Zu built-in detectors.
2
+
3
+ A detector inspects an observation and may return a Verdict. Verdict
4
+ severities (WARN, RETRY, ESCALATE, TERMINAL) map onto the loop's control flow:
5
+ ESCALATE is the deterministic signal that climbs the tier ladder. Detectors
6
+ are where escalation is decided — never improvised by the model.
7
+ """
8
+
9
+
10
+ # What counts as page content in an observation, in preference order. The loop
11
+ # stores a fetched/rendered page under one of these keys (mirrors zu_core.loop's
12
+ # own ``_CONTENT_KEYS``); a detector must consult all of them or it goes blind to
13
+ # a tool that returns ``{"text": ...}`` / ``{"content": ...}`` instead of html.
14
+ # One source of truth, reused by ``empty`` too.
15
+ _CONTENT_KEYS = ("html", "text", "content")
16
+
17
+
18
+ def _html_of(ctx) -> str:
19
+ """Best-effort extraction of the page content from a RunContext observation.
20
+
21
+ Concatenates *every* present content key (html, text, content) rather than
22
+ returning only the first, so a marker detector is never blind to a tool that
23
+ splits content across keys — the same all-keys view the ``empty`` detector
24
+ uses, so the detectors agree on what "the content" is."""
25
+ obs = getattr(ctx, "observation", None)
26
+ if isinstance(obs, dict):
27
+ parts = [v for k in _CONTENT_KEYS if isinstance(v := obs.get(k), str) and v]
28
+ if parts:
29
+ return "\n".join(parts)
30
+ return ""
31
+
32
+
33
+ def _contains_any(html: str, markers) -> bool:
34
+ """True if any marker (case-insensitive) appears in ``html`` — the shared
35
+ substring scan behind the marker-list detectors (bot-wall, js-shell)."""
36
+ lowered = html.lower()
37
+ return any(marker in lowered for marker in markers)
@@ -0,0 +1,54 @@
1
+ """bot-wall — fires on an anti-bot interstitial (Cloudflare, captcha, etc.)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from zu_core.ports import RunContext, Scope, Severity, Verdict
6
+
7
+ from . import _contains_any, _html_of
8
+
9
+ # Strong markers: phrasing characteristic of an anti-bot interstitial, specific
10
+ # enough that their presence is treated as the signal on its own. This is a
11
+ # deterministic heuristic, not a proof: a page that *discusses* CAPTCHAs (a news
12
+ # story, this very comment) can contain "captcha" and would escalate — the cost
13
+ # is a wasted tier-2 render, not a wrong answer, and escalating a borderline page
14
+ # is the safer failure. ``cf-browser-verification`` is unambiguous; the natural-
15
+ # language phrases are the ones with residual false-positive surface.
16
+ _STRONG_MARKERS = (
17
+ "captcha",
18
+ "are you a robot",
19
+ "verify you are human",
20
+ "cf-browser-verification",
21
+ )
22
+
23
+ # Weak markers: real Cloudflare wall phrasing, but common-enough English that a
24
+ # substring match alone false-positives (an article titled "Just a Moment in
25
+ # History", a banner reading "Attention required"). They fire ONLY when a
26
+ # Cloudflare fingerprint is also present, so a normal page is never escalated.
27
+ _WEAK_MARKERS = (
28
+ "attention required",
29
+ "just a moment",
30
+ )
31
+ _CLOUDFLARE_FINGERPRINTS = (
32
+ "cloudflare",
33
+ "cf-ray",
34
+ "cf-browser-verification",
35
+ "__cf",
36
+ "/cdn-cgi/",
37
+ )
38
+
39
+
40
+ class BotWallDetector:
41
+ name = "bot-wall"
42
+ scope = Scope.PER_OBSERVATION
43
+
44
+ def inspect(self, ctx: RunContext) -> Verdict | None:
45
+ html = _html_of(ctx)
46
+ strong = _contains_any(html, _STRONG_MARKERS)
47
+ weak = _contains_any(html, _WEAK_MARKERS) and _contains_any(html, _CLOUDFLARE_FINGERPRINTS)
48
+ if strong or weak:
49
+ return Verdict(
50
+ severity=Severity.ESCALATE,
51
+ detector=self.name,
52
+ detail="anti-bot wall detected",
53
+ )
54
+ return None
@@ -0,0 +1,97 @@
1
+ """embedded-widget — fires when the page's real content is inside a JS widget.
2
+
3
+ The complement to ``js-shell``. ``js-shell`` catches an *empty* SPA shell (a
4
+ ``<div id="root">`` with no visible text). But a page can be full of human-visible
5
+ chrome — nav, footer, copy — while the data the task actually needs (appointment
6
+ slots, a price table, a seat map) is rendered by an **embedded third-party widget
7
+ or iframe** that loads via JavaScript. A tier-1 ``http_fetch`` sees the chrome and
8
+ the empty mount point, never the data, so it would loop forever or give up. This
9
+ detector is the deterministic signal to *offer* the browser (tier 2) in that case.
10
+
11
+ It is conservative about what counts as a content widget, to avoid escalating on
12
+ ubiquitous analytics/ad scripts:
13
+
14
+ * an ``<iframe>`` with an external ``http(s)`` ``src`` — an embedded application
15
+ whose content is not in this DOM; or
16
+ * a **widget mount point** — an element whose *attributes* (id/class/data-*/domain)
17
+ name a content widget (``widget``, ``embed``, ``scheduler``, or a known booking
18
+ vendor) — together with an external ``<script>`` that fills it.
19
+
20
+ ESCALATE only *unlocks* the browser; the model renders only if it still lacks the
21
+ data, so being a touch generous here is cheap and fail-safe.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import re
27
+
28
+ from zu_core.ports import RunContext, Scope, Severity, Verdict
29
+
30
+ from . import _html_of
31
+
32
+ # Tokens that, when they appear in an element's ATTRIBUTES (not visible text),
33
+ # mark a JS content-widget mount. Generic structural words plus a few common
34
+ # booking/scheduling vendors — kept to attribute context so a nav link like
35
+ # href="/book-an-appointment" or body copy never trips it.
36
+ _WIDGET_TOKENS = (
37
+ "widget", "embed", "scheduler", "data-widget",
38
+ "vetstoria", "oabp", "calendly", "acuityscheduling", "simplybook", "petsapp",
39
+ )
40
+
41
+ # An <iframe ...> carrying an external http(s) src — an embedded app.
42
+ _IFRAME_SRC = re.compile(r"<iframe\b[^>]*\bsrc\s*=\s*[\"']https?://", re.IGNORECASE)
43
+ # Any element's attribute span, to scan for a widget token in attribute context.
44
+ _TAG_ATTRS = re.compile(r"<[a-zA-Z][a-zA-Z0-9]*\b([^>]*)>")
45
+ # An external <script src="http(s)://..."> — the loader that fills a mount point.
46
+ _EXTERNAL_SCRIPT = re.compile(r"<script\b[^>]*\bsrc\s*=\s*[\"']https?://", re.IGNORECASE)
47
+
48
+
49
+ def _has_widget_mount(html: str) -> bool:
50
+ """True if some element's attributes name a content widget."""
51
+ for m in _TAG_ATTRS.finditer(html):
52
+ attrs = m.group(1).lower()
53
+ if any(tok in attrs for tok in _WIDGET_TOKENS):
54
+ return True
55
+ return False
56
+
57
+
58
+ def _already_escalated(ctx: RunContext) -> bool:
59
+ """True if the run has already escalated (or browser-rendered) this run.
60
+
61
+ This detector is an escalation *trigger*: its job is to unlock the browser
62
+ tier once. After that it must go quiet — every later widget page (another
63
+ http_fetch, or the rendered DOM, which still carries the markers) would
64
+ otherwise re-fire, and at the top tier a re-escalation is 'exhausted' and ENDS
65
+ the run before the model can use the browser it just unlocked. So: fire once,
66
+ then defer to the model working at the higher tier."""
67
+ for ev in getattr(ctx, "events", []) or []:
68
+ et = getattr(ev, "type", "")
69
+ if et == "harness.task.escalated":
70
+ return True
71
+ if et == "data.source.fetched" and getattr(ev, "source", "") == "render_dom":
72
+ return True
73
+ return False
74
+
75
+
76
+ class EmbeddedWidgetDetector:
77
+ name = "embedded-widget"
78
+ scope = Scope.PER_OBSERVATION
79
+
80
+ def inspect(self, ctx: RunContext) -> Verdict | None:
81
+ html = _html_of(ctx)
82
+ if not html:
83
+ return None
84
+ if _already_escalated(ctx):
85
+ return None # already unlocked the browser; fire once, then stay quiet
86
+ embedded_app = bool(_IFRAME_SRC.search(html))
87
+ # A named mount point only counts when an external script is present to
88
+ # fill it — a bare class="...widget..." on a static page isn't deferred.
89
+ widget_loaded = _has_widget_mount(html) and bool(_EXTERNAL_SCRIPT.search(html))
90
+ if embedded_app or widget_loaded:
91
+ return Verdict(
92
+ severity=Severity.ESCALATE,
93
+ detector=self.name,
94
+ detail="page defers content to an embedded widget/iframe; "
95
+ "escalate to a browser to render it",
96
+ )
97
+ return None
@@ -0,0 +1,32 @@
1
+ """empty — fires when a *fetched page* carried no usable content.
2
+
3
+ Scoped to page-content observations on purpose: it judges a fetch (a tool that
4
+ returned ``html``/``text``/``content``) and escalates when that content is empty
5
+ — the signal to climb to a browser. It must NOT fire on observations that are not
6
+ page fetches — e.g. ``html_parse`` returning ``{"matches": [...]}`` (a successful
7
+ extraction) or an error observation — or it would spuriously escalate after real
8
+ work. So: a content key present but blank -> escalate; no content key -> not our
9
+ concern (return None).
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from zu_core.ports import RunContext, Scope, Severity, Verdict
15
+
16
+ from . import _CONTENT_KEYS # one source of truth for "what counts as page content"
17
+
18
+
19
+ class EmptyDetector:
20
+ name = "empty"
21
+ scope = Scope.PER_OBSERVATION
22
+
23
+ def inspect(self, ctx: RunContext) -> Verdict | None:
24
+ obs = getattr(ctx, "observation", None)
25
+ if not isinstance(obs, dict):
26
+ return None
27
+ present = [k for k in _CONTENT_KEYS if k in obs]
28
+ if not present:
29
+ return None # not a page-content observation — "empty" doesn't apply
30
+ if all(not str(obs.get(k) or "").strip() for k in present):
31
+ return Verdict(severity=Severity.ESCALATE, detector=self.name, detail="empty observation")
32
+ return None
@@ -0,0 +1,25 @@
1
+ """error — fires on an HTTP error status in the observation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from zu_core.ports import RunContext, Scope, Severity, Verdict
6
+
7
+
8
+ class ErrorDetector:
9
+ name = "error"
10
+ scope = Scope.PER_OBSERVATION
11
+
12
+ def inspect(self, ctx: RunContext) -> Verdict | None:
13
+ # An HTTP error on a FETCHED page is RECOVERABLE, not fatal. A single bad
14
+ # url (a 403 WAF wall, a 404, a 5xx) says nothing about whether the RUN can
15
+ # succeed — an agent that searches and tries several candidates must be
16
+ # free to fetch the next one. Ending the whole run on one bad fetch (the
17
+ # old TERMINAL behaviour) broke exactly that. So this is RETRY: it is
18
+ # recorded and fed back, the model sees the error and chooses another
19
+ # action, and a run that genuinely cannot proceed still ends via the
20
+ # step/token budget — not by assuming the first url was the only one.
21
+ obs = getattr(ctx, "observation", None)
22
+ status = obs.get("status") if isinstance(obs, dict) else None
23
+ if isinstance(status, int) and status >= 400:
24
+ return Verdict(severity=Severity.RETRY, detector=self.name, detail=f"http {status}")
25
+ return None
@@ -0,0 +1,75 @@
1
+ """js-shell — fires when a page is an empty JavaScript shell.
2
+
3
+ The canonical escalation trigger: tier-1 http_fetch returns HTML that is
4
+ essentially a <div id="root"></div> plus scripts, with no real text content.
5
+ That is the signal to give up on the cheap tier and climb to a browser.
6
+
7
+ The test is structural, not size-based: a page is a shell when it has a known
8
+ SPA mount point *and* almost no human-visible text once scripts and styles are
9
+ removed. Measuring visible text (rather than raw HTML length) is what step 5
10
+ finalizes — a shell padded with a large inline bundle is still a shell, and a
11
+ small page that happens to be real content is not escalated.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import re
17
+
18
+ from zu_core.ports import RunContext, Scope, Severity, Verdict
19
+
20
+ from . import _contains_any, _html_of
21
+
22
+ # Common SPA mount points / framework markers.
23
+ _SHELL_MARKERS = ('id="root"', "id='root'", 'id="app"', "id='app'", "__NEXT_DATA__")
24
+
25
+ # Strip the elements whose contents are never visible text before measuring.
26
+ # ``\s*`` in the close tag tolerates ``</script >``; the second pattern handles
27
+ # an *unterminated* script/style — a browser treats everything after an unclosed
28
+ # <script> as script text, so the heuristic does too (consume to end of input).
29
+ # HTML comments are removed FIRST so a commented-out ``<!-- <script> -->`` (or
30
+ # any literal ``<script`` inside a comment) can't trip the greedy _UNCLOSED rule
31
+ # and erase the real article body after it — a deterministic false-positive the
32
+ # unbalanced-tag heuristic would otherwise produce.
33
+ _COMMENT = re.compile(r"<!--.*?-->", re.DOTALL)
34
+ _NONVISIBLE = re.compile(r"<(script|style|template|noscript)\b.*?</\1\s*>", re.IGNORECASE | re.DOTALL)
35
+ _UNCLOSED = re.compile(r"<(script|style|template|noscript)\b.*\Z", re.IGNORECASE | re.DOTALL)
36
+ _TAGS = re.compile(r"<[^>]+>")
37
+ _WS = re.compile(r"\s+")
38
+
39
+ # Below this many characters of visible text, a page with a mount point is
40
+ # treated as an unrendered shell. Tuned against the graded fixture set.
41
+ _MIN_VISIBLE_TEXT = 64
42
+
43
+
44
+ def _visible_text(html: str) -> str:
45
+ """Human-visible text: drop script/style/template/noscript bodies, strip
46
+ the remaining tags, and collapse whitespace."""
47
+ without_code = _COMMENT.sub(" ", html)
48
+ without_code = _NONVISIBLE.sub(" ", without_code)
49
+ without_code = _UNCLOSED.sub(" ", without_code)
50
+ text = _TAGS.sub(" ", without_code)
51
+ return _WS.sub(" ", text).strip()
52
+
53
+
54
+ class JsShellDetector:
55
+ name = "js-shell"
56
+ scope = Scope.PER_OBSERVATION
57
+
58
+ def inspect(self, ctx: RunContext) -> Verdict | None:
59
+ html = _html_of(ctx)
60
+ if not html:
61
+ return None
62
+ lowered = html.lower()
63
+ looks_like_shell = _contains_any(html, _SHELL_MARKERS)
64
+ # The page defers its content to JS: a literal <script>, OR a module
65
+ # graph pulled in via <link rel="modulepreload"> with no inline script
66
+ # (a modern bundler shape the bare "<script" check would miss).
67
+ script_heavy = "<script" in lowered or "modulepreload" in lowered
68
+ thin = len(_visible_text(html)) < _MIN_VISIBLE_TEXT
69
+ if looks_like_shell and script_heavy and thin:
70
+ return Verdict(
71
+ severity=Severity.ESCALATE,
72
+ detector=self.name,
73
+ detail="page appears to be a JS shell; escalate to a browser",
74
+ )
75
+ return None
@@ -0,0 +1,7 @@
1
+ """Zu built-in validators — the on-final checks of the result.
2
+
3
+ The two cheapest rungs of the validation ladder: schema (does the result fit
4
+ the requested shape?) and grounding (does every extracted value actually
5
+ appear in retrieved content?). Grounding is the anti-hallucination check — the
6
+ core of the "agents that actually work" claim.
7
+ """
@@ -0,0 +1,162 @@
1
+ """grounding — every extracted value must appear in retrieved content.
2
+
3
+ The anti-making-things-up check: a value the agent reports that is nowhere in
4
+ the content the run actually fetched fails grounding. It reads the run's
5
+ content from the event log via RunContext, so it proves provenance, not just
6
+ plausibility.
7
+
8
+ Matching is token-boundary-aware (build step 6): a value must appear in the
9
+ retrieved content as a standalone token, not merely as a substring, so a short
10
+ value such as ``"5"`` is not spuriously grounded by ``"1985"``.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from collections.abc import Iterator
16
+
17
+ from zu_core.contracts import Result
18
+ from zu_core.ports import RunContext, Severity, Verdict
19
+
20
+
21
+ def _normalize(s: str) -> str:
22
+ """Collapse whitespace and lowercase so trivial formatting differences
23
+ between an extracted value and the page text don't cause false failures."""
24
+ return " ".join(s.split()).lower()
25
+
26
+
27
+ def _grounded(leaf_norm: str, corpus: str) -> bool:
28
+ """Is the normalized value present in the corpus on token boundaries?
29
+
30
+ Plain substring containment is too lenient: a short value like ``"5"`` would
31
+ match incidentally inside ``"1985"`` and let a fabricated number pass. We
32
+ require the value to appear as a standalone token, not a fragment of a longer
33
+ one, on two axes:
34
+
35
+ - **Alphanumeric flanks** (Unicode-aware via ``str.isalnum``): ``"5"`` inside
36
+ ``"1985"`` or ``"caf"`` inside ``"café"`` does not ground, while ``"$9.00"``
37
+ between ``>`` and ``<`` still does — punctuation is a boundary.
38
+ - **Number fragments across a decimal/thousands separator**: a ``.`` or ``,``
39
+ flanked by a digit on the *outer* side means the value is part of a larger
40
+ number, so ``"14"`` is not grounded by ``"3.14"`` nor ``"3"`` by ``"3.14"``
41
+ — but ``"5"`` in ``"Qty: 5."`` (the dot ends a sentence) still grounds.
42
+ """
43
+ if not leaf_norm:
44
+ # An empty normalized value has no provenance to prove, so it is NOT
45
+ # grounded — fail safe rather than free-pass. ``_leaf_strings`` already
46
+ # drops empty/whitespace leaves upstream, so this is reached only if a
47
+ # non-empty value normalizes to nothing; treating that as ungrounded
48
+ # keeps "I said nothing" from passing the anti-fabrication gate.
49
+ return False
50
+ n = len(leaf_norm)
51
+ start = 0
52
+ while True:
53
+ i = corpus.find(leaf_norm, start)
54
+ if i == -1:
55
+ return False
56
+ if _standalone(corpus, i, i + n):
57
+ return True
58
+ start = i + 1
59
+
60
+
61
+ # Separators that join a number to more digits to form a single larger value or
62
+ # a compound numeric token: decimal/thousands (``.`` ``,``) AND the connectors in
63
+ # dates, versions, times, ranges, SKUs and phone numbers (``-`` ``/`` ``:``). A
64
+ # match flanked by one of these with a digit on its *outer* side is a fragment of
65
+ # a longer token, not a standalone value — so "12" is not grounded by "12-2024",
66
+ # nor "30" by "12:30", just as "14" is not grounded by "3.14".
67
+ _NUM_SEPARATORS = frozenset(".,-/:")
68
+
69
+
70
+ def _standalone(corpus: str, lo: int, hi: int) -> bool:
71
+ """Are the chars flanking ``corpus[lo:hi]`` token boundaries, not part of a
72
+ longer alphanumeric token or a larger/compound number?"""
73
+ before = corpus[lo - 1] if lo > 0 else ""
74
+ after = corpus[hi] if hi < len(corpus) else ""
75
+ if before.isalnum() or after.isalnum():
76
+ return False
77
+ # A numeric separator adjacent to a digit on its outer side means this match
78
+ # is a slice of a larger number or compound token (e.g. "14" inside "3.14",
79
+ # "12" inside "12-2024", "30" inside "12:30").
80
+ if before in _NUM_SEPARATORS and corpus[lo - 2 : lo - 1].isdigit():
81
+ return False
82
+ if after in _NUM_SEPARATORS and corpus[hi + 1 : hi + 2].isdigit():
83
+ return False
84
+ return True
85
+
86
+
87
+ def _leaf_strings(value: object) -> Iterator[str]:
88
+ """Yield every scalar leaf of a result value as a string to ground.
89
+
90
+ Numbers and booleans are real extracted values too — skipping non-strings
91
+ (the previous behaviour) let a fabricated price or count pass ungrounded.
92
+ bool is checked before int because ``isinstance(True, int)`` is True, and a
93
+ boolean is not groundable page text.
94
+ """
95
+ if isinstance(value, bool):
96
+ return
97
+ if isinstance(value, (str, int, float)):
98
+ text = str(value).strip()
99
+ if text:
100
+ yield text
101
+ elif isinstance(value, dict):
102
+ for v in value.values():
103
+ yield from _leaf_strings(v)
104
+ elif isinstance(value, (list, tuple)):
105
+ for v in value:
106
+ yield from _leaf_strings(v)
107
+
108
+
109
+ def _retrieved_corpus(ctx: RunContext) -> str:
110
+ """Concatenate everything the run fetched, from data.source.fetched events.
111
+
112
+ Falls back to the current observation when the event log isn't populated
113
+ yet (the loop wires the full log in build step 4).
114
+ """
115
+ chunks: list[str] = []
116
+ for ev in getattr(ctx, "events", []) or []:
117
+ # Only *retrieved* content grounds a value — i.e. data.source.fetched
118
+ # events. Reading text-like keys from any event would let the model
119
+ # ground its own fabrications: harness.turn.completed carries the model's
120
+ # output text, which must never count as evidence about the page.
121
+ if getattr(ev, "type", "") != "data.source.fetched":
122
+ continue
123
+ payload = getattr(ev, "payload", {}) or {}
124
+ for key in ("html", "text", "content"):
125
+ if isinstance(payload.get(key), str):
126
+ chunks.append(payload[key])
127
+ # Fall back to the current observation ONLY when the event log has no fetched
128
+ # content yet (the loop wires the full log in build step 4). If fetched events
129
+ # exist, we must not also fold in the raw observation: an observation that is
130
+ # not itself retrieved page content (e.g. a model-produced turn that happens
131
+ # to carry a ``text`` key) would reopen the self-grounding hole the event-type
132
+ # filter above exists to close.
133
+ if not chunks:
134
+ obs = getattr(ctx, "observation", None)
135
+ if isinstance(obs, dict):
136
+ for key in ("html", "text", "content"):
137
+ if isinstance(obs.get(key), str):
138
+ chunks.append(obs[key])
139
+ return "\n".join(chunks)
140
+
141
+
142
+ class GroundingValidator:
143
+ name = "grounding"
144
+
145
+ def check(self, result: Result, ctx: RunContext) -> Verdict | None:
146
+ if not result.value:
147
+ return None
148
+ corpus = _normalize(_retrieved_corpus(ctx))
149
+ # The result value is usually a JSON object, but the schema may permit a
150
+ # non-object root (a list or scalar). Don't assume ``.items()`` — that
151
+ # would raise AttributeError and silently break the validator ladder.
152
+ value = result.value
153
+ fields = value.items() if isinstance(value, dict) else [("value", value)]
154
+ for field, field_value in fields:
155
+ for leaf in _leaf_strings(field_value):
156
+ if not _grounded(_normalize(leaf), corpus):
157
+ return Verdict(
158
+ severity=Severity.RETRY,
159
+ detector=self.name,
160
+ detail=f"value for {field!r} not found in retrieved content",
161
+ )
162
+ return None
@@ -0,0 +1,41 @@
1
+ """schema — the result must satisfy the task's output JSON schema."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import jsonschema
6
+
7
+ from zu_core.contracts import Result
8
+ from zu_core.ports import RunContext, Severity, Verdict
9
+
10
+
11
+ class SchemaValidator:
12
+ name = "schema"
13
+
14
+ def check(self, result: Result, ctx: RunContext) -> Verdict | None:
15
+ schema = getattr(ctx.spec, "output_schema", None) or {}
16
+ if not schema:
17
+ return None # nothing to check against
18
+ # jsonschema's richer errors carry a ``.message``; plain exceptions don't.
19
+ # One extraction, used by both the data-mismatch and bad-schema branches.
20
+ def message_of(e: Exception) -> str:
21
+ return getattr(e, "message", str(e))
22
+
23
+ try:
24
+ jsonschema.validate(instance=result.value, schema=schema)
25
+ except jsonschema.ValidationError as e:
26
+ # The data didn't match a valid schema — a retry might fix it.
27
+ return Verdict(severity=Severity.RETRY, detector=self.name, detail=message_of(e))
28
+ except Exception as e: # noqa: BLE001 - a broken schema is terminal; see below
29
+ # The output_schema itself is unusable (comes from the TaskSpec,
30
+ # unvalidated): malformed (jsonschema.SchemaError), or an
31
+ # unresolvable ``$ref`` — which jsonschema raises as a *referencing*
32
+ # error that is NOT a subclass of SchemaError and would otherwise
33
+ # escape and crash the validation ladder. Retrying can't fix a broken
34
+ # schema, so any such error is terminal, caught here unconditionally
35
+ # so the ladder never sees an unhandled exception from a bad schema.
36
+ return Verdict(
37
+ severity=Severity.TERMINAL,
38
+ detector=self.name,
39
+ detail=f"invalid output_schema: {message_of(e)}",
40
+ )
41
+ return None
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.4
2
+ Name: zu-checks
3
+ Version: 0.1.0
4
+ Summary: Zu built-in checks: detectors (empty, error, js-shell, embedded-widget, bot-wall) + validators (schema, grounding)
5
+ Project-URL: Homepage, https://github.com/k3-mt/zu
6
+ Project-URL: Repository, https://github.com/k3-mt/zu
7
+ License-Expression: Apache-2.0
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
15
+ Classifier: Typing :: Typed
16
+ Requires-Python: >=3.11
17
+ Requires-Dist: jsonschema>=4
18
+ Requires-Dist: zu-core==0.1.0
@@ -0,0 +1,14 @@
1
+ zu_checks/__init__.py,sha256=DFWxEdZBy4vqncg4IJUg9GxA3Q9tewAF8EyHsDXxGls,746
2
+ zu_checks/detectors/__init__.py,sha256=yBPvvUvk5yoHWHCsTu6NWrYFExO3PcMmkW61bhT7IVI,1667
3
+ zu_checks/detectors/bot_wall.py,sha256=KYH5WrNp4B-Auwn06PK7nG4SYXND6vbGJBc1Kx1KS20,1935
4
+ zu_checks/detectors/embedded_widget.py,sha256=NIMEQEX11gXxHGA0crMUBibCLIzaEQAYKcD6s3xTP4I,4458
5
+ zu_checks/detectors/empty.py,sha256=0eNF2ZZrX4UCVQ_Mp-bNd--q_ejhRFlZn9E5-avWM2s,1375
6
+ zu_checks/detectors/error.py,sha256=63ds67Vz84WLgl3ouqCpmATGPIYHkFnoOSMPuGpGT_E,1232
7
+ zu_checks/detectors/js_shell.py,sha256=N16KhXBB8Pl6QMLqiXhYUDn9pP0w_RowJuphBXbKmW8,3378
8
+ zu_checks/validators/__init__.py,sha256=UIB0gBGtPzR4Zc8hGRQ45R_RU7YrHqItE1b6nR7qg4g,348
9
+ zu_checks/validators/grounding.py,sha256=TaEE8oiE9f5mqEYt_qadxQUudg8v52IxZioODGXmuUs,7281
10
+ zu_checks/validators/schema.py,sha256=VVxM28vzLoA-Vd6Ytf4dT7xVN1aiQdILF-n7qcBm5PQ,1888
11
+ zu_checks-0.1.0.dist-info/METADATA,sha256=MS3oPArfyzBmma7Esb2DvXEJVrvI7AlsSxCs6VLsHQU,804
12
+ zu_checks-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
13
+ zu_checks-0.1.0.dist-info/entry_points.txt,sha256=nzsKOh8HkgUgURbDEmXcYnkVDOjCyWOEkxSnUTv0Ebc,432
14
+ zu_checks-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,10 @@
1
+ [zu.detectors]
2
+ bot-wall = zu_checks.detectors.bot_wall:BotWallDetector
3
+ embedded-widget = zu_checks.detectors.embedded_widget:EmbeddedWidgetDetector
4
+ empty = zu_checks.detectors.empty:EmptyDetector
5
+ error = zu_checks.detectors.error:ErrorDetector
6
+ js-shell = zu_checks.detectors.js_shell:JsShellDetector
7
+
8
+ [zu.validators]
9
+ grounding = zu_checks.validators.grounding:GroundingValidator
10
+ schema = zu_checks.validators.schema:SchemaValidator