zu-huggingface 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,74 @@
1
+ """zu-huggingface — HuggingFace models behind Zu's typed ports (§8.3–8.5).
2
+
3
+ HuggingFace is not a model — it is the largest hub of open models across every
4
+ modality. This package reaches it three ways, all behind configuration:
5
+
6
+ * **Chat / vision-language models as the policy** need *no code here* — they
7
+ speak the OpenAI chat API on all three serving surfaces (the router's ``/v1``,
8
+ an Endpoint's ``/v1``, or a local vLLM), so a HuggingFace model as the brain
9
+ is the existing ``openai-compatible`` provider pointed at a HuggingFace base
10
+ URL (see this package's README). It is the OpenRouter story exactly.
11
+
12
+ * **Task models** (ASR, OCR, detection, embeddings, classification,
13
+ summarisation, translation) are *not* chat models — each has its own typed
14
+ I/O — so they enter through the non-policy ports by their role: as **Tools**
15
+ (``tools.py``) and as **detectors / validators** (``roles.py``), over the one
16
+ :class:`HfClient` seam (``client.py``) that works hosted or local.
17
+
18
+ * **The supply chain** (``supply_chain.py``) makes pulling any of them safe by
19
+ default: pin + hash, safetensors not pickle, never trust remote code.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ from .client import HF_ROUTER, HfClient, InferenceClientBackend, PipelineBackend
25
+ from .roles import HfClassifierDetector, HfClassifierValidator
26
+ from .supply_chain import (
27
+ ModelPin,
28
+ SupplyChainError,
29
+ SupplyChainPolicy,
30
+ assert_no_remote_code,
31
+ file_sha256,
32
+ safe_pipeline_kwargs,
33
+ verify_file_hash,
34
+ verify_model_source,
35
+ )
36
+ from .tools import (
37
+ Classify,
38
+ DetectObjects,
39
+ Embed,
40
+ ImageToText,
41
+ Summarize,
42
+ Transcribe,
43
+ Translate,
44
+ ZeroShotClassify,
45
+ )
46
+
47
+ __all__ = [
48
+ # client seam
49
+ "HfClient",
50
+ "HF_ROUTER",
51
+ "InferenceClientBackend",
52
+ "PipelineBackend",
53
+ # tools
54
+ "Transcribe",
55
+ "ImageToText",
56
+ "DetectObjects",
57
+ "Embed",
58
+ "Classify",
59
+ "ZeroShotClassify",
60
+ "Summarize",
61
+ "Translate",
62
+ # role wrappers
63
+ "HfClassifierDetector",
64
+ "HfClassifierValidator",
65
+ # supply chain
66
+ "ModelPin",
67
+ "SupplyChainPolicy",
68
+ "SupplyChainError",
69
+ "verify_model_source",
70
+ "assert_no_remote_code",
71
+ "safe_pipeline_kwargs",
72
+ "file_sha256",
73
+ "verify_file_hash",
74
+ ]
@@ -0,0 +1,187 @@
1
+ """The HuggingFace client seam — one task-method interface, three serving surfaces.
2
+
3
+ Most HuggingFace models are *not* chat models: OCR, speech recognition, object
4
+ detection, and embedding models each have their own typed input/output, so they
5
+ enter Zu through the non-policy ports by their role (§8.5). This module is the
6
+ thin seam the HuggingFace *tools* call, so the same tool works whether the model
7
+ is served hosted (the Inference Providers router) or local (a transformers
8
+ pipeline) — the integration is done once, here.
9
+
10
+ ``HfClient`` is the protocol the tools depend on. Two adapters implement it:
11
+
12
+ * :class:`InferenceClientBackend` — wraps ``huggingface_hub.InferenceClient``
13
+ (hosted; the router or a dedicated Endpoint), egressing to the HF router.
14
+ * :class:`PipelineBackend` — wraps ``transformers.pipeline`` (local; the
15
+ air-gapped / on-prem case), constructed only through the supply-chain guards
16
+ (§8.3): a pinned revision and ``trust_remote_code=False``.
17
+
18
+ Both heavy SDKs are imported lazily, so installing ``zu-huggingface`` without
19
+ the extras costs nothing and the tools are testable offline against a fake
20
+ client. Credentials (``HF_TOKEN``) are resolved from the environment *inside*
21
+ the backend, never placed in the model's context.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import os
27
+ from typing import Any, Protocol, runtime_checkable
28
+
29
+ from .supply_chain import ModelPin, SupplyChainPolicy, safe_pipeline_kwargs
30
+
31
+ # The Inference Providers router — the hosted default, OpenAI-compatible for
32
+ # chat at /v1 but task-native through the InferenceClient methods.
33
+ HF_ROUTER = "router.huggingface.co"
34
+
35
+
36
+ @runtime_checkable
37
+ class HfClient(Protocol):
38
+ """The task methods the HuggingFace tools call. Inputs/outputs are plain
39
+ Python (bytes for media, str for text, list[dict] for structured) so the
40
+ tools own the translation to/from typed :class:`zu_core.content` Content."""
41
+
42
+ def transcribe(self, audio: bytes, model: str) -> str: ...
43
+ def image_to_text(self, image: bytes, model: str) -> str: ...
44
+ def object_detection(self, image: bytes, model: str) -> list[dict]: ...
45
+ def text_classification(self, text: str, model: str) -> list[dict]: ...
46
+ def zero_shot(self, text: str, labels: list[str], model: str) -> list[dict]: ...
47
+ def embed(self, text: str, model: str) -> list[float]: ...
48
+ def summarize(self, text: str, model: str) -> str: ...
49
+ def translate(self, text: str, model: str) -> str: ...
50
+
51
+
52
+ def _scores(raw: Any) -> list[dict]:
53
+ """Normalise a classifier response to ``[{"label","score"}, …]`` sorted by
54
+ score desc — the shape every classification tool/detector reads."""
55
+ out: list[dict] = []
56
+ if isinstance(raw, dict) and "labels" in raw and "scores" in raw: # zero-shot shape
57
+ out = [{"label": str(lbl), "score": float(sc)}
58
+ for lbl, sc in zip(raw["labels"], raw["scores"], strict=False)]
59
+ elif isinstance(raw, list):
60
+ for item in raw:
61
+ if isinstance(item, dict) and "label" in item:
62
+ out.append({"label": str(item["label"]), "score": float(item.get("score", 0.0))})
63
+ return sorted(out, key=lambda d: d["score"], reverse=True)
64
+
65
+
66
+ class InferenceClientBackend:
67
+ """Hosted HuggingFace via ``huggingface_hub.InferenceClient`` (lazy import).
68
+
69
+ The same model id works through the serverless router or a dedicated
70
+ Endpoint; ``HF_TOKEN`` is read from the environment here.
71
+ """
72
+
73
+ egress_host = HF_ROUTER
74
+
75
+ def __init__(
76
+ self,
77
+ *,
78
+ provider: str = "hf-inference",
79
+ token_env: str = "HF_TOKEN",
80
+ client: Any = None,
81
+ ) -> None:
82
+ self._provider = provider
83
+ self._token_env = token_env
84
+ self._client = client # injectable for tests
85
+
86
+ def _c(self) -> Any:
87
+ if self._client is None:
88
+ try:
89
+ from huggingface_hub import InferenceClient
90
+ except ImportError as e: # pragma: no cover - exercised only without the extra
91
+ raise RuntimeError(
92
+ "the hosted HuggingFace backend needs `huggingface_hub` "
93
+ "(install zu-huggingface[hosted])"
94
+ ) from e
95
+ self._client = InferenceClient(provider=self._provider, api_key=os.environ.get(self._token_env))
96
+ return self._client
97
+
98
+ def transcribe(self, audio: bytes, model: str) -> str:
99
+ r = self._c().automatic_speech_recognition(audio, model=model)
100
+ return r if isinstance(r, str) else str(getattr(r, "text", r))
101
+
102
+ def image_to_text(self, image: bytes, model: str) -> str:
103
+ r = self._c().image_to_text(image, model=model)
104
+ return r if isinstance(r, str) else str(getattr(r, "generated_text", r))
105
+
106
+ def object_detection(self, image: bytes, model: str) -> list[dict]:
107
+ r = self._c().object_detection(image, model=model)
108
+ return [dict(item) for item in r]
109
+
110
+ def text_classification(self, text: str, model: str) -> list[dict]:
111
+ return _scores(self._c().text_classification(text, model=model))
112
+
113
+ def zero_shot(self, text: str, labels: list[str], model: str) -> list[dict]:
114
+ return _scores(self._c().zero_shot_classification(text, candidate_labels=labels, model=model))
115
+
116
+ def embed(self, text: str, model: str) -> list[float]:
117
+ r = self._c().feature_extraction(text, model=model)
118
+ return [float(x) for x in (r.tolist() if hasattr(r, "tolist") else r)]
119
+
120
+ def summarize(self, text: str, model: str) -> str:
121
+ r = self._c().summarization(text, model=model)
122
+ return r if isinstance(r, str) else str(getattr(r, "summary_text", r))
123
+
124
+ def translate(self, text: str, model: str) -> str:
125
+ r = self._c().translation(text, model=model)
126
+ return r if isinstance(r, str) else str(getattr(r, "translation_text", r))
127
+
128
+
129
+ class PipelineBackend:
130
+ """Local HuggingFace via ``transformers.pipeline`` (lazy import).
131
+
132
+ The only option for air-gapped / on-prem. Every pipeline is built through
133
+ :func:`safe_pipeline_kwargs` — a pinned revision and ``trust_remote_code``
134
+ forced off — so the §8.3 supply-chain rules hold by construction. Pipelines
135
+ are cached per (task, model).
136
+ """
137
+
138
+ egress_host = "" # local — no egress
139
+
140
+ def __init__(self, policy: SupplyChainPolicy | None = None) -> None:
141
+ self._policy = policy or SupplyChainPolicy()
142
+ self._cache: dict[tuple[str, str], Any] = {}
143
+
144
+ def _pipe(self, task: str, model: str) -> Any:
145
+ key = (task, model)
146
+ if key not in self._cache:
147
+ try:
148
+ from transformers import pipeline
149
+ except ImportError as e: # pragma: no cover - exercised only without the extra
150
+ raise RuntimeError(
151
+ "the local HuggingFace backend needs `transformers` "
152
+ "(install zu-huggingface[local])"
153
+ ) from e
154
+ kwargs = safe_pipeline_kwargs(ModelPin(repo_id=model), self._policy)
155
+ self._cache[key] = pipeline(task, **kwargs)
156
+ return self._cache[key]
157
+
158
+ def transcribe(self, audio: bytes, model: str) -> str:
159
+ return str(self._pipe("automatic-speech-recognition", model)(audio)["text"])
160
+
161
+ def image_to_text(self, image: bytes, model: str) -> str:
162
+ r = self._pipe("image-to-text", model)(image)
163
+ return str(r[0]["generated_text"] if isinstance(r, list) else r["generated_text"])
164
+
165
+ def object_detection(self, image: bytes, model: str) -> list[dict]:
166
+ return [dict(item) for item in self._pipe("object-detection", model)(image)]
167
+
168
+ def text_classification(self, text: str, model: str) -> list[dict]:
169
+ return _scores(self._pipe("text-classification", model)(text))
170
+
171
+ def zero_shot(self, text: str, labels: list[str], model: str) -> list[dict]:
172
+ return _scores(self._pipe("zero-shot-classification", model)(text, candidate_labels=labels))
173
+
174
+ def embed(self, text: str, model: str) -> list[float]:
175
+ r = self._pipe("feature-extraction", model)(text)
176
+ # pipelines return [[token-vectors]]; mean-pool to one vector
177
+ vecs = r[0] if isinstance(r, list) else r
178
+ if vecs and isinstance(vecs[0], list):
179
+ cols = list(zip(*vecs, strict=False))
180
+ return [sum(c) / len(c) for c in cols]
181
+ return [float(x) for x in vecs]
182
+
183
+ def summarize(self, text: str, model: str) -> str:
184
+ return str(self._pipe("summarization", model)(text)[0]["summary_text"])
185
+
186
+ def translate(self, text: str, model: str) -> str:
187
+ return str(self._pipe("translation", model)(text)[0]["translation_text"])
@@ -0,0 +1,139 @@
1
+ """HuggingFace models in the detector and validator roles (§8.5, §9.1).
2
+
3
+ The port is the role, assigned per agent. A zero-shot or text-classification
4
+ model that *gates control flow* is a **detector**; one that *checks the final
5
+ result* is a **validator**. A trained classifier as a detector is cheaper,
6
+ faster, and more reliable than asking an LLM the same yes/no question — the
7
+ right-sized-model discipline the economics rest on (§9.1).
8
+
9
+ These are configured per agent (a model + the labels that matter + a threshold),
10
+ so they enter the registry *by reference in config* rather than as a zero-config
11
+ entry point. Both reuse the same :class:`HfClient` seam as the tools.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from zu_core.contracts import Result
17
+ from zu_core.ports import RunContext, Scope, Severity, Verdict
18
+
19
+ from .client import HfClient
20
+
21
+ _CONTENT_KEYS = ("html", "text", "content")
22
+
23
+
24
+ def _text_of(obs: object) -> str:
25
+ """The text of an observation, concatenating the content keys (mirrors the
26
+ built-in detectors so they agree on "the content")."""
27
+ if isinstance(obs, dict):
28
+ parts = [v for k in _CONTENT_KEYS if isinstance(v := obs.get(k), str) and v]
29
+ return "\n".join(parts)
30
+ return ""
31
+
32
+
33
+ class HfClassifierDetector:
34
+ """Escalate (or stop) when a HuggingFace classifier flags the observation.
35
+
36
+ Configure with a model and the labels that should trip control flow. With
37
+ ``candidate_labels`` set it runs zero-shot; without, it runs the model's own
38
+ text-classification head. The verdict severity is configurable (default
39
+ ESCALATE) — the deterministic gate, decided by the classifier, never the
40
+ policy.
41
+ """
42
+
43
+ scope = Scope.PER_OBSERVATION
44
+
45
+ def __init__(
46
+ self,
47
+ client: HfClient,
48
+ model: str,
49
+ *,
50
+ escalate_on: list[str],
51
+ candidate_labels: list[str] | None = None,
52
+ threshold: float = 0.5,
53
+ severity: Severity = Severity.ESCALATE,
54
+ name: str = "hf-classifier",
55
+ ) -> None:
56
+ self._client = client
57
+ self._model = model
58
+ self._escalate_on = {lbl.lower() for lbl in escalate_on}
59
+ self._candidate_labels = candidate_labels
60
+ self._threshold = threshold
61
+ self._severity = severity
62
+ self.name = name
63
+
64
+ def inspect(self, ctx: RunContext) -> Verdict | None:
65
+ text = _text_of(getattr(ctx, "observation", None))
66
+ if not text.strip():
67
+ return None
68
+ if self._candidate_labels is not None:
69
+ scored = self._client.zero_shot(text, self._candidate_labels, self._model)
70
+ else:
71
+ scored = self._client.text_classification(text, self._model)
72
+ if not scored:
73
+ return None
74
+ top = scored[0]
75
+ if top["label"].lower() in self._escalate_on and top["score"] >= self._threshold:
76
+ return Verdict(
77
+ severity=self._severity,
78
+ detector=self.name,
79
+ detail=f"{top['label']} ({top['score']:.2f})",
80
+ )
81
+ return None
82
+
83
+
84
+ class HfClassifierValidator:
85
+ """Fail a result on finalise when a HuggingFace classifier flags its value.
86
+
87
+ The result's text is classified; if the top label is one of ``fail_on`` over
88
+ threshold, the validator returns a (default RETRY) verdict — e.g. a toxicity
89
+ or refusal classifier checking the answer before it ships.
90
+ """
91
+
92
+ def __init__(
93
+ self,
94
+ client: HfClient,
95
+ model: str,
96
+ *,
97
+ fail_on: list[str],
98
+ candidate_labels: list[str] | None = None,
99
+ threshold: float = 0.5,
100
+ severity: Severity = Severity.RETRY,
101
+ value_key: str | None = None,
102
+ name: str = "hf-classifier-check",
103
+ ) -> None:
104
+ self._client = client
105
+ self._model = model
106
+ self._fail_on = {lbl.lower() for lbl in fail_on}
107
+ self._candidate_labels = candidate_labels
108
+ self._threshold = threshold
109
+ self._severity = severity
110
+ self._value_key = value_key
111
+ self.name = name
112
+
113
+ def _result_text(self, result: Result) -> str:
114
+ if not isinstance(result.value, dict):
115
+ return ""
116
+ if self._value_key is not None:
117
+ v = result.value.get(self._value_key)
118
+ return v if isinstance(v, str) else ""
119
+ # join the string leaves of the value
120
+ return "\n".join(str(v) for v in result.value.values() if isinstance(v, str))
121
+
122
+ def check(self, result: Result, ctx: RunContext) -> Verdict | None:
123
+ text = self._result_text(result)
124
+ if not text.strip():
125
+ return None
126
+ if self._candidate_labels is not None:
127
+ scored = self._client.zero_shot(text, self._candidate_labels, self._model)
128
+ else:
129
+ scored = self._client.text_classification(text, self._model)
130
+ if not scored:
131
+ return None
132
+ top = scored[0]
133
+ if top["label"].lower() in self._fail_on and top["score"] >= self._threshold:
134
+ return Verdict(
135
+ severity=self._severity,
136
+ detector=self.name,
137
+ detail=f"{top['label']} ({top['score']:.2f})",
138
+ )
139
+ return None
@@ -0,0 +1,135 @@
1
+ """Model supply-chain guards (Engineering Design §8.3).
2
+
3
+ Pulling a model from the Hub is a supply-chain surface under the same rules as
4
+ any downloaded artifact. Two hazards matter:
5
+
6
+ * **model code that runs on load** — the transformers "trust remote code" path
7
+ executes arbitrary code from the repo; and
8
+ * **pickle-based checkpoints** — which execute on deserialisation.
9
+
10
+ Both are the fetch-then-execute anti-pattern the project bans. So, by default:
11
+ pin and hash-verify weights and configs; prefer safetensors and disallow pickle;
12
+ never enable remote model code. (Serving inside the capability envelope is the
13
+ SandboxBackend's job; this module is the *declaration and verification* half.)
14
+
15
+ Everything here is pure and deterministic — it makes a decision about a model
16
+ reference and a file list, with no network — so it is fully testable at $0 and
17
+ is the gate the HuggingFace tools call before a local pipeline is constructed.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import hashlib
23
+ import re
24
+ from pathlib import Path
25
+
26
+ from pydantic import BaseModel, Field
27
+
28
+ # A pinned revision is a full 40-hex git commit sha — a moving ref (a branch
29
+ # name, or "main") is exactly what pinning forbids.
30
+ _COMMIT_RE = re.compile(r"^[0-9a-f]{40}$")
31
+
32
+ # Checkpoint extensions that deserialise via pickle (arbitrary code on load).
33
+ _PICKLE_SUFFIXES = (".bin", ".pt", ".pth", ".ckpt", ".pkl", ".pickle")
34
+ # The safe weights format — no code path on load.
35
+ _SAFE_SUFFIXES = (".safetensors", ".json", ".txt", ".model", ".onnx")
36
+
37
+
38
+ class SupplyChainError(ValueError):
39
+ """A model reference or file set violates the supply-chain policy."""
40
+
41
+
42
+ class ModelPin(BaseModel):
43
+ """A pinned reference to a model on the Hub.
44
+
45
+ ``revision`` should be a full commit sha so the artifact can never change
46
+ under a fixed reference; ``expected_hashes`` maps a filename to its expected
47
+ sha256 for hash-verification of the downloaded files.
48
+ """
49
+
50
+ repo_id: str
51
+ revision: str | None = None
52
+ expected_hashes: dict[str, str] = Field(default_factory=dict)
53
+
54
+
55
+ class SupplyChainPolicy(BaseModel):
56
+ """The default-deny policy. The safe configuration is the default — there is
57
+ nothing to turn *on* to be safe, only flags to relax for a reviewed case."""
58
+
59
+ allow_pickle: bool = False
60
+ allow_remote_code: bool = False
61
+ require_pinned_revision: bool = True
62
+
63
+
64
+ def verify_model_source(
65
+ pin: ModelPin,
66
+ policy: SupplyChainPolicy | None = None,
67
+ *,
68
+ files: list[str] | None = None,
69
+ ) -> None:
70
+ """Raise :class:`SupplyChainError` if ``pin`` (and an optional ``files`` list)
71
+ violates ``policy``. A no-op (returns ``None``) when everything is allowed.
72
+
73
+ Checks, cheapest first: the revision is a pinned commit sha (unless relaxed);
74
+ no pickle-format weights appear in the file list (unless relaxed).
75
+ """
76
+ policy = policy or SupplyChainPolicy()
77
+
78
+ if policy.require_pinned_revision:
79
+ if not pin.revision or not _COMMIT_RE.match(pin.revision):
80
+ raise SupplyChainError(
81
+ f"{pin.repo_id}: revision must be a pinned 40-hex commit sha "
82
+ f"(got {pin.revision!r}); a moving ref like 'main' is forbidden"
83
+ )
84
+
85
+ if files and not policy.allow_pickle:
86
+ offending = sorted(f for f in files if f.lower().endswith(_PICKLE_SUFFIXES))
87
+ if offending:
88
+ raise SupplyChainError(
89
+ f"{pin.repo_id}: pickle-format checkpoints are disallowed "
90
+ f"(prefer safetensors): {offending}"
91
+ )
92
+
93
+
94
+ def assert_no_remote_code(policy: SupplyChainPolicy | None = None) -> None:
95
+ """Guard the transformers ``trust_remote_code`` path: raise unless explicitly
96
+ relaxed. The HuggingFace tools call this before building a local pipeline."""
97
+ policy = policy or SupplyChainPolicy()
98
+ if policy.allow_remote_code:
99
+ raise SupplyChainError(
100
+ "remote model code is enabled (allow_remote_code=True) — this executes "
101
+ "arbitrary code from the model repo on load; it must be reviewed, not default"
102
+ )
103
+
104
+
105
+ def safe_pipeline_kwargs(pin: ModelPin, policy: SupplyChainPolicy | None = None) -> dict:
106
+ """Keyword arguments for ``transformers.pipeline`` that enforce the policy:
107
+ a pinned revision and ``trust_remote_code=False`` (always — there is no safe
108
+ default for executing repo code)."""
109
+ policy = policy or SupplyChainPolicy()
110
+ assert_no_remote_code(policy)
111
+ verify_model_source(pin, policy)
112
+ kwargs: dict = {"model": pin.repo_id, "trust_remote_code": False}
113
+ if pin.revision:
114
+ kwargs["revision"] = pin.revision
115
+ return kwargs
116
+
117
+
118
+ def file_sha256(path: str | Path) -> str:
119
+ """The sha256 of a file, streamed (so a multi-GB checkpoint never loads into
120
+ memory to be hashed)."""
121
+ h = hashlib.sha256()
122
+ with open(path, "rb") as fh:
123
+ for chunk in iter(lambda: fh.read(1 << 20), b""):
124
+ h.update(chunk)
125
+ return h.hexdigest()
126
+
127
+
128
+ def verify_file_hash(path: str | Path, expected_sha256: str) -> None:
129
+ """Raise :class:`SupplyChainError` if the file's sha256 differs from
130
+ ``expected_sha256`` — hash-verification of a downloaded artifact (§8.3)."""
131
+ actual = file_sha256(path)
132
+ if actual != expected_sha256:
133
+ raise SupplyChainError(
134
+ f"{path}: sha256 mismatch — expected {expected_sha256}, got {actual}"
135
+ )
@@ -0,0 +1,231 @@
1
+ """HuggingFace task models as typed Zu Tools (Engineering Design §8.5).
2
+
3
+ Each tool wraps one HuggingFace task behind the standard Tool contract, with the
4
+ typed multimodal :class:`~zu_core.content` Content (Text/Image/Audio) as the
5
+ currency in and out — which is what lets a non-chat model slot into the loop as
6
+ cleanly as a chat one. The same tool works hosted or local because it depends
7
+ only on the :class:`HfClient` seam (``client.py``).
8
+
9
+ The port is the *role*, assigned per agent (§4.5): these are Tools — verbs the
10
+ policy performs (transcribe, read an image, detect, embed, summarise,
11
+ translate). A classifier wanting to *gate control flow* or *check a result*
12
+ becomes a detector/validator instead — see ``roles.py``.
13
+
14
+ The envelope is derived from the backend: a hosted client egresses to the HF
15
+ router (CAP_NET + that host); a local pipeline reaches nothing. Media is passed
16
+ as base64 (``data_b64``) or a local ``path`` — the realistic shape when the
17
+ policy carries bytes from a prior observation.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import base64
23
+ from pathlib import Path
24
+ from typing import Any
25
+
26
+ from zu_core.content import Audio, Image, Text
27
+ from zu_core.ports import CAP_NET
28
+
29
+ from .client import HfClient, InferenceClientBackend
30
+
31
+
32
+ def _decode_media(data_b64: str | None, path: str | None) -> bytes:
33
+ if data_b64:
34
+ return base64.b64decode(data_b64)
35
+ if path:
36
+ return Path(path).read_bytes()
37
+ raise ValueError("provide media as 'data_b64' (base64) or a local 'path'")
38
+
39
+
40
+ class _HfTool:
41
+ """Shared base: hold a model id + client, and derive the capability envelope
42
+ from the backend (hosted ⇒ net+router; local ⇒ nothing)."""
43
+
44
+ tier = 1 # a specialised model the policy calls — cheap, not an escalation
45
+
46
+ def __init__(self, model: str, client: HfClient | None = None) -> None:
47
+ self.model = model
48
+ self._client = client
49
+ backend = client if client is not None else InferenceClientBackend()
50
+ host = getattr(backend, "egress_host", "")
51
+ self.capabilities = frozenset({CAP_NET}) if host else frozenset()
52
+ self.egress = frozenset({host}) if host else frozenset()
53
+ self._backend = backend
54
+
55
+ def _c(self) -> HfClient:
56
+ return self._client if self._client is not None else self._backend
57
+
58
+
59
+ class Transcribe(_HfTool):
60
+ """ASR — audio → text (a sense). Role: Tool (§8.5 Audio)."""
61
+
62
+ name = "hf_transcribe"
63
+ prompt_fragment = "hf_transcribe(data_b64|path): transcribe speech audio to text."
64
+ schema = {
65
+ "name": "hf_transcribe",
66
+ "description": "Transcribe speech audio to text via a HuggingFace ASR model.",
67
+ "parameters": {
68
+ "type": "object",
69
+ "properties": {
70
+ "data_b64": {"type": "string", "description": "base64-encoded audio"},
71
+ "path": {"type": "string", "description": "local audio file path"},
72
+ },
73
+ },
74
+ }
75
+
76
+ async def __call__(self, ctx: Any, data_b64: str | None = None, path: str | None = None) -> dict:
77
+ audio = _decode_media(data_b64, path)
78
+ _ = Audio(data=audio) # typed currency in (recorded shape)
79
+ text = self._c().transcribe(audio, self.model)
80
+ return {"text": text, "model": self.model}
81
+
82
+
83
+ class ImageToText(_HfTool):
84
+ """Image-to-text / OCR — image → text (a sense). Role: Tool (§8.5 CV/Multimodal)."""
85
+
86
+ name = "hf_image_to_text"
87
+ prompt_fragment = "hf_image_to_text(data_b64|path): read/describe an image as text (OCR or caption)."
88
+ schema = {
89
+ "name": "hf_image_to_text",
90
+ "description": "Extract or describe the text/content of an image via a HuggingFace model.",
91
+ "parameters": {
92
+ "type": "object",
93
+ "properties": {
94
+ "data_b64": {"type": "string", "description": "base64-encoded image"},
95
+ "path": {"type": "string", "description": "local image file path"},
96
+ },
97
+ },
98
+ }
99
+
100
+ async def __call__(self, ctx: Any, data_b64: str | None = None, path: str | None = None) -> dict:
101
+ image = _decode_media(data_b64, path)
102
+ _ = Image(data=image)
103
+ text = self._c().image_to_text(image, self.model)
104
+ return {"text": text, "model": self.model}
105
+
106
+
107
+ class DetectObjects(_HfTool):
108
+ """Object detection — image → boxes. Role: Tool (or detector). (§8.5 CV)."""
109
+
110
+ name = "hf_detect"
111
+ prompt_fragment = "hf_detect(data_b64|path): find objects in an image (labelled boxes)."
112
+ schema = {
113
+ "name": "hf_detect",
114
+ "description": "Detect objects in an image via a HuggingFace model; returns labelled boxes.",
115
+ "parameters": {
116
+ "type": "object",
117
+ "properties": {
118
+ "data_b64": {"type": "string", "description": "base64-encoded image"},
119
+ "path": {"type": "string", "description": "local image file path"},
120
+ },
121
+ },
122
+ }
123
+
124
+ async def __call__(self, ctx: Any, data_b64: str | None = None, path: str | None = None) -> dict:
125
+ image = _decode_media(data_b64, path)
126
+ objects = self._c().object_detection(image, self.model)
127
+ return {"objects": objects, "count": len(objects), "model": self.model}
128
+
129
+
130
+ class Embed(_HfTool):
131
+ """Feature extraction — text → vector. Role: retrieval Tool / grounding (§8.5 NLP)."""
132
+
133
+ name = "hf_embed"
134
+ prompt_fragment = "hf_embed(text): embed text into a vector for search/similarity."
135
+ schema = {
136
+ "name": "hf_embed",
137
+ "description": "Embed text into a dense vector via a HuggingFace embedding model.",
138
+ "parameters": {
139
+ "type": "object",
140
+ "properties": {"text": {"type": "string"}},
141
+ "required": ["text"],
142
+ },
143
+ }
144
+
145
+ async def __call__(self, ctx: Any, text: str) -> dict:
146
+ vec = self._c().embed(text, self.model)
147
+ return {"embedding": vec, "dim": len(vec), "model": self.model}
148
+
149
+
150
+ class Classify(_HfTool):
151
+ """Text classification — text → labels. Role: Tool (or detector/router) (§8.5 NLP)."""
152
+
153
+ name = "hf_classify"
154
+ prompt_fragment = "hf_classify(text): classify/score text into the model's labels."
155
+ schema = {
156
+ "name": "hf_classify",
157
+ "description": "Classify text via a HuggingFace text-classification model.",
158
+ "parameters": {
159
+ "type": "object",
160
+ "properties": {"text": {"type": "string"}},
161
+ "required": ["text"],
162
+ },
163
+ }
164
+
165
+ async def __call__(self, ctx: Any, text: str) -> dict:
166
+ labels = self._c().text_classification(text, self.model)
167
+ return {"labels": labels, "top": labels[0]["label"] if labels else None, "model": self.model}
168
+
169
+
170
+ class ZeroShotClassify(_HfTool):
171
+ """Zero-shot classification — text + candidate labels → scores (§8.5 NLP)."""
172
+
173
+ name = "hf_zero_shot"
174
+ prompt_fragment = "hf_zero_shot(text, labels): score text against candidate labels you supply."
175
+ schema = {
176
+ "name": "hf_zero_shot",
177
+ "description": "Zero-shot classify text against candidate labels via a HuggingFace model.",
178
+ "parameters": {
179
+ "type": "object",
180
+ "properties": {
181
+ "text": {"type": "string"},
182
+ "labels": {"type": "array", "items": {"type": "string"}},
183
+ },
184
+ "required": ["text", "labels"],
185
+ },
186
+ }
187
+
188
+ async def __call__(self, ctx: Any, text: str, labels: list[str]) -> dict:
189
+ scored = self._c().zero_shot(text, labels, self.model)
190
+ return {"labels": scored, "top": scored[0]["label"] if scored else None, "model": self.model}
191
+
192
+
193
+ class Summarize(_HfTool):
194
+ """Summarization — text → text (§8.5 NLP)."""
195
+
196
+ name = "hf_summarize"
197
+ prompt_fragment = "hf_summarize(text): summarise a long text."
198
+ schema = {
199
+ "name": "hf_summarize",
200
+ "description": "Summarise text via a HuggingFace summarization model.",
201
+ "parameters": {
202
+ "type": "object",
203
+ "properties": {"text": {"type": "string"}},
204
+ "required": ["text"],
205
+ },
206
+ }
207
+
208
+ async def __call__(self, ctx: Any, text: str) -> dict:
209
+ out = self._c().summarize(text, self.model)
210
+ _ = Text(text=out)
211
+ return {"text": out, "model": self.model}
212
+
213
+
214
+ class Translate(_HfTool):
215
+ """Translation — text → text (§8.5 NLP)."""
216
+
217
+ name = "hf_translate"
218
+ prompt_fragment = "hf_translate(text): translate text (model is pinned to a language pair)."
219
+ schema = {
220
+ "name": "hf_translate",
221
+ "description": "Translate text via a HuggingFace translation model.",
222
+ "parameters": {
223
+ "type": "object",
224
+ "properties": {"text": {"type": "string"}},
225
+ "required": ["text"],
226
+ },
227
+ }
228
+
229
+ async def __call__(self, ctx: Any, text: str) -> dict:
230
+ out = self._c().translate(text, self.model)
231
+ return {"text": out, "model": self.model}
@@ -0,0 +1,119 @@
1
+ Metadata-Version: 2.4
2
+ Name: zu-huggingface
3
+ Version: 0.2.2
4
+ Summary: Zu HuggingFace adapter: task models as typed tools/detectors/validators, behind the supply-chain guards
5
+ Project-URL: Homepage, https://github.com/k3-mt/zu
6
+ Project-URL: Repository, https://github.com/k3-mt/zu
7
+ License-Expression: Apache-2.0
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
15
+ Classifier: Typing :: Typed
16
+ Requires-Python: >=3.11
17
+ Requires-Dist: zu-core==0.2.1
18
+ Provides-Extra: hosted
19
+ Requires-Dist: huggingface-hub>=0.24; extra == 'hosted'
20
+ Provides-Extra: local
21
+ Requires-Dist: pillow; extra == 'local'
22
+ Requires-Dist: transformers>=4.40; extra == 'local'
23
+ Description-Content-Type: text/markdown
24
+
25
+ # zu-huggingface
26
+
27
+ HuggingFace models behind Zu's typed ports. HuggingFace is not a model — it is
28
+ the largest hub of open models across every modality — so "supporting it" means
29
+ three different things, and this package draws the line cleanly
30
+ (Engineering Design §8.3–8.5).
31
+
32
+ ## Chat / vision-language models as the policy — *no code here*
33
+
34
+ A chat or vision-language model that is the **brain** speaks the OpenAI chat API
35
+ on all three HuggingFace serving surfaces (the router's `/v1`, a dedicated
36
+ Endpoint's `/v1`, or a local vLLM server). So a HuggingFace model as the policy
37
+ is the existing `openai-compatible` provider pointed at a HuggingFace base URL —
38
+ the OpenRouter story exactly, no new adapter:
39
+
40
+ ```yaml
41
+ # agent.yaml — a HuggingFace multimodal model as the policy
42
+ model: meta-llama/Llama-Vision-... # any chat / VLM id on the Hub
43
+ provider: openai-compatible
44
+ options:
45
+ base_url: https://router.huggingface.co/v1 # or an Endpoint, or local vLLM
46
+ api_key_env: HF_TOKEN
47
+ ```
48
+
49
+ ## Task models as tools, detectors, validators — this package
50
+
51
+ Most HuggingFace models are **not** chat models (OCR, ASR, detection,
52
+ embeddings, classification, …), so they enter through the non-policy ports by
53
+ their **role** (the port is the role, assigned per agent — §4.5):
54
+
55
+ | Role | Class | Task |
56
+ |------|-------|------|
57
+ | Tool | `Transcribe` (`hf_transcribe`) | speech → text (ASR) |
58
+ | Tool | `ImageToText` (`hf_image_to_text`) | image → text (OCR / caption) |
59
+ | Tool | `DetectObjects` (`hf_detect`) | image → labelled boxes |
60
+ | Tool | `Embed` (`hf_embed`) | text → vector (retrieval / grounding) |
61
+ | Tool | `Classify` (`hf_classify`) | text → labels |
62
+ | Tool | `ZeroShotClassify` (`hf_zero_shot`) | text + labels → scores |
63
+ | Tool | `Summarize` (`hf_summarize`) | text → text |
64
+ | Tool | `Translate` (`hf_translate`) | text → text |
65
+ | Detector | `HfClassifierDetector` | classify an observation → ESCALATE/stop |
66
+ | Validator | `HfClassifierValidator` | classify the result → fail/RETRY |
67
+
68
+ Each is **parameterised by a model id** (and the role wrappers by the labels
69
+ that matter), so they are wired *by reference in config* per agent rather than
70
+ as zero-config entry points:
71
+
72
+ ```yaml
73
+ tools:
74
+ - ref: zu_huggingface.tools:Transcribe
75
+ args: { model: openai/whisper-large-v3 }
76
+ - ref: zu_huggingface.tools:Embed
77
+ args: { model: BAAI/bge-large-en-v1.5 }
78
+ detectors:
79
+ - ref: zu_huggingface.roles:HfClassifierDetector
80
+ args: { model: facebook/bart-large-mnli, candidate_labels: ["safe","unsafe"], escalate_on: ["unsafe"] }
81
+ ```
82
+
83
+ The typed multimodal `Content` (`Text`/`Image`/`Audio`) from `zu_core.content`
84
+ is the currency in and out — which is what lets a non-chat model slot into the
85
+ loop as cleanly as a chat one.
86
+
87
+ ### Hosted vs local — one seam
88
+
89
+ Every tool depends only on the `HfClient` seam, so the same tool works:
90
+
91
+ - **Hosted** — `InferenceClientBackend` wraps `huggingface_hub.InferenceClient`
92
+ (the serverless router or a dedicated Endpoint). Egresses to
93
+ `router.huggingface.co`; `HF_TOKEN` is read from the environment inside the
94
+ backend. `pip install 'zu-huggingface[hosted]'`.
95
+ - **Local** — `PipelineBackend` wraps `transformers.pipeline` for the
96
+ air-gapped / on-prem case. Reaches no network. Every pipeline is built through
97
+ the supply-chain guards. `pip install 'zu-huggingface[local]'` (plus a
98
+ backend such as `torch`).
99
+
100
+ ## The supply chain — safe by default (§8.3)
101
+
102
+ Pulling a model from the Hub is a supply-chain surface. `supply_chain.py`
103
+ enforces, by default:
104
+
105
+ - **Pin + hash.** A `ModelPin` should carry a full commit-sha `revision`;
106
+ `verify_file_hash` checks a downloaded file's sha256.
107
+ - **safetensors, not pickle.** `verify_model_source` rejects `.bin`/`.pt`/`.ckpt`
108
+ checkpoints (which execute on deserialisation) unless explicitly allowed.
109
+ - **No remote code.** `safe_pipeline_kwargs` forces `trust_remote_code=False`;
110
+ `assert_no_remote_code` raises if it is relaxed.
111
+
112
+ The safe configuration is the default — there is nothing to turn *on* to be
113
+ safe, only flags a reviewed case may relax.
114
+
115
+ ## Tests
116
+
117
+ Offline, no network, no model download: the tools and role wrappers are
118
+ exercised against a fake `HfClient`, and the supply-chain guards are pure.
119
+ `uv run pytest packages/zu-huggingface`.
@@ -0,0 +1,8 @@
1
+ zu_huggingface/__init__.py,sha256=YJDo9GnPPnyZIirzuuP3fBKUppBTJMXfp27um7IQCak,2261
2
+ zu_huggingface/client.py,sha256=JakMapgYgzKntkQUjb1dEhQIMD3j21bIlK2VvcNOZ_A,8573
3
+ zu_huggingface/roles.py,sha256=yAqM7ItZibCLka9ojLbmdGbnHn9wVGbY7AwZd_hCIgM,5160
4
+ zu_huggingface/supply_chain.py,sha256=yTRxBDaCLlWYfUpz5JKcqUSuUUGyNq6sriv3SoLiMRI,5358
5
+ zu_huggingface/tools.py,sha256=vN1LWcsj8Atmfic-SKh84DwE0KURjK7jVJVMJ2-RSIM,8889
6
+ zu_huggingface-0.2.2.dist-info/METADATA,sha256=jIUj7y_udLid09vW5Gmq7Q5ao154tWy2iqzZL1G26P0,5284
7
+ zu_huggingface-0.2.2.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
8
+ zu_huggingface-0.2.2.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any