zu-huggingface 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zu_huggingface/__init__.py +74 -0
- zu_huggingface/client.py +187 -0
- zu_huggingface/roles.py +139 -0
- zu_huggingface/supply_chain.py +135 -0
- zu_huggingface/tools.py +231 -0
- zu_huggingface-0.2.2.dist-info/METADATA +119 -0
- zu_huggingface-0.2.2.dist-info/RECORD +8 -0
- zu_huggingface-0.2.2.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""zu-huggingface — HuggingFace models behind Zu's typed ports (§8.3–8.5).
|
|
2
|
+
|
|
3
|
+
HuggingFace is not a model — it is the largest hub of open models across every
|
|
4
|
+
modality. This package reaches it three ways, all behind configuration:
|
|
5
|
+
|
|
6
|
+
* **Chat / vision-language models as the policy** need *no code here* — they
|
|
7
|
+
speak the OpenAI chat API on all three serving surfaces (the router's ``/v1``,
|
|
8
|
+
an Endpoint's ``/v1``, or a local vLLM), so a HuggingFace model as the brain
|
|
9
|
+
is the existing ``openai-compatible`` provider pointed at a HuggingFace base
|
|
10
|
+
URL (see this package's README). It is the OpenRouter story exactly.
|
|
11
|
+
|
|
12
|
+
* **Task models** (ASR, OCR, detection, embeddings, classification,
|
|
13
|
+
summarisation, translation) are *not* chat models — each has its own typed
|
|
14
|
+
I/O — so they enter through the non-policy ports by their role: as **Tools**
|
|
15
|
+
(``tools.py``) and as **detectors / validators** (``roles.py``), over the one
|
|
16
|
+
:class:`HfClient` seam (``client.py``) that works hosted or local.
|
|
17
|
+
|
|
18
|
+
* **The supply chain** (``supply_chain.py``) makes pulling any of them safe by
|
|
19
|
+
default: pin + hash, safetensors not pickle, never trust remote code.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
from .client import HF_ROUTER, HfClient, InferenceClientBackend, PipelineBackend
|
|
25
|
+
from .roles import HfClassifierDetector, HfClassifierValidator
|
|
26
|
+
from .supply_chain import (
|
|
27
|
+
ModelPin,
|
|
28
|
+
SupplyChainError,
|
|
29
|
+
SupplyChainPolicy,
|
|
30
|
+
assert_no_remote_code,
|
|
31
|
+
file_sha256,
|
|
32
|
+
safe_pipeline_kwargs,
|
|
33
|
+
verify_file_hash,
|
|
34
|
+
verify_model_source,
|
|
35
|
+
)
|
|
36
|
+
from .tools import (
|
|
37
|
+
Classify,
|
|
38
|
+
DetectObjects,
|
|
39
|
+
Embed,
|
|
40
|
+
ImageToText,
|
|
41
|
+
Summarize,
|
|
42
|
+
Transcribe,
|
|
43
|
+
Translate,
|
|
44
|
+
ZeroShotClassify,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
__all__ = [
|
|
48
|
+
# client seam
|
|
49
|
+
"HfClient",
|
|
50
|
+
"HF_ROUTER",
|
|
51
|
+
"InferenceClientBackend",
|
|
52
|
+
"PipelineBackend",
|
|
53
|
+
# tools
|
|
54
|
+
"Transcribe",
|
|
55
|
+
"ImageToText",
|
|
56
|
+
"DetectObjects",
|
|
57
|
+
"Embed",
|
|
58
|
+
"Classify",
|
|
59
|
+
"ZeroShotClassify",
|
|
60
|
+
"Summarize",
|
|
61
|
+
"Translate",
|
|
62
|
+
# role wrappers
|
|
63
|
+
"HfClassifierDetector",
|
|
64
|
+
"HfClassifierValidator",
|
|
65
|
+
# supply chain
|
|
66
|
+
"ModelPin",
|
|
67
|
+
"SupplyChainPolicy",
|
|
68
|
+
"SupplyChainError",
|
|
69
|
+
"verify_model_source",
|
|
70
|
+
"assert_no_remote_code",
|
|
71
|
+
"safe_pipeline_kwargs",
|
|
72
|
+
"file_sha256",
|
|
73
|
+
"verify_file_hash",
|
|
74
|
+
]
|
zu_huggingface/client.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""The HuggingFace client seam — one task-method interface, three serving surfaces.
|
|
2
|
+
|
|
3
|
+
Most HuggingFace models are *not* chat models: OCR, speech recognition, object
|
|
4
|
+
detection, and embedding models each have their own typed input/output, so they
|
|
5
|
+
enter Zu through the non-policy ports by their role (§8.5). This module is the
|
|
6
|
+
thin seam the HuggingFace *tools* call, so the same tool works whether the model
|
|
7
|
+
is served hosted (the Inference Providers router) or local (a transformers
|
|
8
|
+
pipeline) — the integration is done once, here.
|
|
9
|
+
|
|
10
|
+
``HfClient`` is the protocol the tools depend on. Two adapters implement it:
|
|
11
|
+
|
|
12
|
+
* :class:`InferenceClientBackend` — wraps ``huggingface_hub.InferenceClient``
|
|
13
|
+
(hosted; the router or a dedicated Endpoint), egressing to the HF router.
|
|
14
|
+
* :class:`PipelineBackend` — wraps ``transformers.pipeline`` (local; the
|
|
15
|
+
air-gapped / on-prem case), constructed only through the supply-chain guards
|
|
16
|
+
(§8.3): a pinned revision and ``trust_remote_code=False``.
|
|
17
|
+
|
|
18
|
+
Both heavy SDKs are imported lazily, so installing ``zu-huggingface`` without
|
|
19
|
+
the extras costs nothing and the tools are testable offline against a fake
|
|
20
|
+
client. Credentials (``HF_TOKEN``) are resolved from the environment *inside*
|
|
21
|
+
the backend, never placed in the model's context.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import os
|
|
27
|
+
from typing import Any, Protocol, runtime_checkable
|
|
28
|
+
|
|
29
|
+
from .supply_chain import ModelPin, SupplyChainPolicy, safe_pipeline_kwargs
|
|
30
|
+
|
|
31
|
+
# The Inference Providers router — the hosted default, OpenAI-compatible for
|
|
32
|
+
# chat at /v1 but task-native through the InferenceClient methods.
|
|
33
|
+
HF_ROUTER = "router.huggingface.co"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@runtime_checkable
|
|
37
|
+
class HfClient(Protocol):
|
|
38
|
+
"""The task methods the HuggingFace tools call. Inputs/outputs are plain
|
|
39
|
+
Python (bytes for media, str for text, list[dict] for structured) so the
|
|
40
|
+
tools own the translation to/from typed :class:`zu_core.content` Content."""
|
|
41
|
+
|
|
42
|
+
def transcribe(self, audio: bytes, model: str) -> str: ...
|
|
43
|
+
def image_to_text(self, image: bytes, model: str) -> str: ...
|
|
44
|
+
def object_detection(self, image: bytes, model: str) -> list[dict]: ...
|
|
45
|
+
def text_classification(self, text: str, model: str) -> list[dict]: ...
|
|
46
|
+
def zero_shot(self, text: str, labels: list[str], model: str) -> list[dict]: ...
|
|
47
|
+
def embed(self, text: str, model: str) -> list[float]: ...
|
|
48
|
+
def summarize(self, text: str, model: str) -> str: ...
|
|
49
|
+
def translate(self, text: str, model: str) -> str: ...
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _scores(raw: Any) -> list[dict]:
|
|
53
|
+
"""Normalise a classifier response to ``[{"label","score"}, …]`` sorted by
|
|
54
|
+
score desc — the shape every classification tool/detector reads."""
|
|
55
|
+
out: list[dict] = []
|
|
56
|
+
if isinstance(raw, dict) and "labels" in raw and "scores" in raw: # zero-shot shape
|
|
57
|
+
out = [{"label": str(lbl), "score": float(sc)}
|
|
58
|
+
for lbl, sc in zip(raw["labels"], raw["scores"], strict=False)]
|
|
59
|
+
elif isinstance(raw, list):
|
|
60
|
+
for item in raw:
|
|
61
|
+
if isinstance(item, dict) and "label" in item:
|
|
62
|
+
out.append({"label": str(item["label"]), "score": float(item.get("score", 0.0))})
|
|
63
|
+
return sorted(out, key=lambda d: d["score"], reverse=True)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class InferenceClientBackend:
|
|
67
|
+
"""Hosted HuggingFace via ``huggingface_hub.InferenceClient`` (lazy import).
|
|
68
|
+
|
|
69
|
+
The same model id works through the serverless router or a dedicated
|
|
70
|
+
Endpoint; ``HF_TOKEN`` is read from the environment here.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
egress_host = HF_ROUTER
|
|
74
|
+
|
|
75
|
+
def __init__(
|
|
76
|
+
self,
|
|
77
|
+
*,
|
|
78
|
+
provider: str = "hf-inference",
|
|
79
|
+
token_env: str = "HF_TOKEN",
|
|
80
|
+
client: Any = None,
|
|
81
|
+
) -> None:
|
|
82
|
+
self._provider = provider
|
|
83
|
+
self._token_env = token_env
|
|
84
|
+
self._client = client # injectable for tests
|
|
85
|
+
|
|
86
|
+
def _c(self) -> Any:
|
|
87
|
+
if self._client is None:
|
|
88
|
+
try:
|
|
89
|
+
from huggingface_hub import InferenceClient
|
|
90
|
+
except ImportError as e: # pragma: no cover - exercised only without the extra
|
|
91
|
+
raise RuntimeError(
|
|
92
|
+
"the hosted HuggingFace backend needs `huggingface_hub` "
|
|
93
|
+
"(install zu-huggingface[hosted])"
|
|
94
|
+
) from e
|
|
95
|
+
self._client = InferenceClient(provider=self._provider, api_key=os.environ.get(self._token_env))
|
|
96
|
+
return self._client
|
|
97
|
+
|
|
98
|
+
def transcribe(self, audio: bytes, model: str) -> str:
|
|
99
|
+
r = self._c().automatic_speech_recognition(audio, model=model)
|
|
100
|
+
return r if isinstance(r, str) else str(getattr(r, "text", r))
|
|
101
|
+
|
|
102
|
+
def image_to_text(self, image: bytes, model: str) -> str:
|
|
103
|
+
r = self._c().image_to_text(image, model=model)
|
|
104
|
+
return r if isinstance(r, str) else str(getattr(r, "generated_text", r))
|
|
105
|
+
|
|
106
|
+
def object_detection(self, image: bytes, model: str) -> list[dict]:
|
|
107
|
+
r = self._c().object_detection(image, model=model)
|
|
108
|
+
return [dict(item) for item in r]
|
|
109
|
+
|
|
110
|
+
def text_classification(self, text: str, model: str) -> list[dict]:
|
|
111
|
+
return _scores(self._c().text_classification(text, model=model))
|
|
112
|
+
|
|
113
|
+
def zero_shot(self, text: str, labels: list[str], model: str) -> list[dict]:
|
|
114
|
+
return _scores(self._c().zero_shot_classification(text, candidate_labels=labels, model=model))
|
|
115
|
+
|
|
116
|
+
def embed(self, text: str, model: str) -> list[float]:
|
|
117
|
+
r = self._c().feature_extraction(text, model=model)
|
|
118
|
+
return [float(x) for x in (r.tolist() if hasattr(r, "tolist") else r)]
|
|
119
|
+
|
|
120
|
+
def summarize(self, text: str, model: str) -> str:
|
|
121
|
+
r = self._c().summarization(text, model=model)
|
|
122
|
+
return r if isinstance(r, str) else str(getattr(r, "summary_text", r))
|
|
123
|
+
|
|
124
|
+
def translate(self, text: str, model: str) -> str:
|
|
125
|
+
r = self._c().translation(text, model=model)
|
|
126
|
+
return r if isinstance(r, str) else str(getattr(r, "translation_text", r))
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class PipelineBackend:
|
|
130
|
+
"""Local HuggingFace via ``transformers.pipeline`` (lazy import).
|
|
131
|
+
|
|
132
|
+
The only option for air-gapped / on-prem. Every pipeline is built through
|
|
133
|
+
:func:`safe_pipeline_kwargs` — a pinned revision and ``trust_remote_code``
|
|
134
|
+
forced off — so the §8.3 supply-chain rules hold by construction. Pipelines
|
|
135
|
+
are cached per (task, model).
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
egress_host = "" # local — no egress
|
|
139
|
+
|
|
140
|
+
def __init__(self, policy: SupplyChainPolicy | None = None) -> None:
|
|
141
|
+
self._policy = policy or SupplyChainPolicy()
|
|
142
|
+
self._cache: dict[tuple[str, str], Any] = {}
|
|
143
|
+
|
|
144
|
+
def _pipe(self, task: str, model: str) -> Any:
|
|
145
|
+
key = (task, model)
|
|
146
|
+
if key not in self._cache:
|
|
147
|
+
try:
|
|
148
|
+
from transformers import pipeline
|
|
149
|
+
except ImportError as e: # pragma: no cover - exercised only without the extra
|
|
150
|
+
raise RuntimeError(
|
|
151
|
+
"the local HuggingFace backend needs `transformers` "
|
|
152
|
+
"(install zu-huggingface[local])"
|
|
153
|
+
) from e
|
|
154
|
+
kwargs = safe_pipeline_kwargs(ModelPin(repo_id=model), self._policy)
|
|
155
|
+
self._cache[key] = pipeline(task, **kwargs)
|
|
156
|
+
return self._cache[key]
|
|
157
|
+
|
|
158
|
+
def transcribe(self, audio: bytes, model: str) -> str:
|
|
159
|
+
return str(self._pipe("automatic-speech-recognition", model)(audio)["text"])
|
|
160
|
+
|
|
161
|
+
def image_to_text(self, image: bytes, model: str) -> str:
|
|
162
|
+
r = self._pipe("image-to-text", model)(image)
|
|
163
|
+
return str(r[0]["generated_text"] if isinstance(r, list) else r["generated_text"])
|
|
164
|
+
|
|
165
|
+
def object_detection(self, image: bytes, model: str) -> list[dict]:
|
|
166
|
+
return [dict(item) for item in self._pipe("object-detection", model)(image)]
|
|
167
|
+
|
|
168
|
+
def text_classification(self, text: str, model: str) -> list[dict]:
|
|
169
|
+
return _scores(self._pipe("text-classification", model)(text))
|
|
170
|
+
|
|
171
|
+
def zero_shot(self, text: str, labels: list[str], model: str) -> list[dict]:
|
|
172
|
+
return _scores(self._pipe("zero-shot-classification", model)(text, candidate_labels=labels))
|
|
173
|
+
|
|
174
|
+
def embed(self, text: str, model: str) -> list[float]:
|
|
175
|
+
r = self._pipe("feature-extraction", model)(text)
|
|
176
|
+
# pipelines return [[token-vectors]]; mean-pool to one vector
|
|
177
|
+
vecs = r[0] if isinstance(r, list) else r
|
|
178
|
+
if vecs and isinstance(vecs[0], list):
|
|
179
|
+
cols = list(zip(*vecs, strict=False))
|
|
180
|
+
return [sum(c) / len(c) for c in cols]
|
|
181
|
+
return [float(x) for x in vecs]
|
|
182
|
+
|
|
183
|
+
def summarize(self, text: str, model: str) -> str:
|
|
184
|
+
return str(self._pipe("summarization", model)(text)[0]["summary_text"])
|
|
185
|
+
|
|
186
|
+
def translate(self, text: str, model: str) -> str:
|
|
187
|
+
return str(self._pipe("translation", model)(text)[0]["translation_text"])
|
zu_huggingface/roles.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""HuggingFace models in the detector and validator roles (§8.5, §9.1).
|
|
2
|
+
|
|
3
|
+
The port is the role, assigned per agent. A zero-shot or text-classification
|
|
4
|
+
model that *gates control flow* is a **detector**; one that *checks the final
|
|
5
|
+
result* is a **validator**. A trained classifier as a detector is cheaper,
|
|
6
|
+
faster, and more reliable than asking an LLM the same yes/no question — the
|
|
7
|
+
right-sized-model discipline the economics rest on (§9.1).
|
|
8
|
+
|
|
9
|
+
These are configured per agent (a model + the labels that matter + a threshold),
|
|
10
|
+
so they enter the registry *by reference in config* rather than as a zero-config
|
|
11
|
+
entry point. Both reuse the same :class:`HfClient` seam as the tools.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from zu_core.contracts import Result
|
|
17
|
+
from zu_core.ports import RunContext, Scope, Severity, Verdict
|
|
18
|
+
|
|
19
|
+
from .client import HfClient
|
|
20
|
+
|
|
21
|
+
_CONTENT_KEYS = ("html", "text", "content")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _text_of(obs: object) -> str:
|
|
25
|
+
"""The text of an observation, concatenating the content keys (mirrors the
|
|
26
|
+
built-in detectors so they agree on "the content")."""
|
|
27
|
+
if isinstance(obs, dict):
|
|
28
|
+
parts = [v for k in _CONTENT_KEYS if isinstance(v := obs.get(k), str) and v]
|
|
29
|
+
return "\n".join(parts)
|
|
30
|
+
return ""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class HfClassifierDetector:
|
|
34
|
+
"""Escalate (or stop) when a HuggingFace classifier flags the observation.
|
|
35
|
+
|
|
36
|
+
Configure with a model and the labels that should trip control flow. With
|
|
37
|
+
``candidate_labels`` set it runs zero-shot; without, it runs the model's own
|
|
38
|
+
text-classification head. The verdict severity is configurable (default
|
|
39
|
+
ESCALATE) — the deterministic gate, decided by the classifier, never the
|
|
40
|
+
policy.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
scope = Scope.PER_OBSERVATION
|
|
44
|
+
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
client: HfClient,
|
|
48
|
+
model: str,
|
|
49
|
+
*,
|
|
50
|
+
escalate_on: list[str],
|
|
51
|
+
candidate_labels: list[str] | None = None,
|
|
52
|
+
threshold: float = 0.5,
|
|
53
|
+
severity: Severity = Severity.ESCALATE,
|
|
54
|
+
name: str = "hf-classifier",
|
|
55
|
+
) -> None:
|
|
56
|
+
self._client = client
|
|
57
|
+
self._model = model
|
|
58
|
+
self._escalate_on = {lbl.lower() for lbl in escalate_on}
|
|
59
|
+
self._candidate_labels = candidate_labels
|
|
60
|
+
self._threshold = threshold
|
|
61
|
+
self._severity = severity
|
|
62
|
+
self.name = name
|
|
63
|
+
|
|
64
|
+
def inspect(self, ctx: RunContext) -> Verdict | None:
|
|
65
|
+
text = _text_of(getattr(ctx, "observation", None))
|
|
66
|
+
if not text.strip():
|
|
67
|
+
return None
|
|
68
|
+
if self._candidate_labels is not None:
|
|
69
|
+
scored = self._client.zero_shot(text, self._candidate_labels, self._model)
|
|
70
|
+
else:
|
|
71
|
+
scored = self._client.text_classification(text, self._model)
|
|
72
|
+
if not scored:
|
|
73
|
+
return None
|
|
74
|
+
top = scored[0]
|
|
75
|
+
if top["label"].lower() in self._escalate_on and top["score"] >= self._threshold:
|
|
76
|
+
return Verdict(
|
|
77
|
+
severity=self._severity,
|
|
78
|
+
detector=self.name,
|
|
79
|
+
detail=f"{top['label']} ({top['score']:.2f})",
|
|
80
|
+
)
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class HfClassifierValidator:
|
|
85
|
+
"""Fail a result on finalise when a HuggingFace classifier flags its value.
|
|
86
|
+
|
|
87
|
+
The result's text is classified; if the top label is one of ``fail_on`` over
|
|
88
|
+
threshold, the validator returns a (default RETRY) verdict — e.g. a toxicity
|
|
89
|
+
or refusal classifier checking the answer before it ships.
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
def __init__(
|
|
93
|
+
self,
|
|
94
|
+
client: HfClient,
|
|
95
|
+
model: str,
|
|
96
|
+
*,
|
|
97
|
+
fail_on: list[str],
|
|
98
|
+
candidate_labels: list[str] | None = None,
|
|
99
|
+
threshold: float = 0.5,
|
|
100
|
+
severity: Severity = Severity.RETRY,
|
|
101
|
+
value_key: str | None = None,
|
|
102
|
+
name: str = "hf-classifier-check",
|
|
103
|
+
) -> None:
|
|
104
|
+
self._client = client
|
|
105
|
+
self._model = model
|
|
106
|
+
self._fail_on = {lbl.lower() for lbl in fail_on}
|
|
107
|
+
self._candidate_labels = candidate_labels
|
|
108
|
+
self._threshold = threshold
|
|
109
|
+
self._severity = severity
|
|
110
|
+
self._value_key = value_key
|
|
111
|
+
self.name = name
|
|
112
|
+
|
|
113
|
+
def _result_text(self, result: Result) -> str:
|
|
114
|
+
if not isinstance(result.value, dict):
|
|
115
|
+
return ""
|
|
116
|
+
if self._value_key is not None:
|
|
117
|
+
v = result.value.get(self._value_key)
|
|
118
|
+
return v if isinstance(v, str) else ""
|
|
119
|
+
# join the string leaves of the value
|
|
120
|
+
return "\n".join(str(v) for v in result.value.values() if isinstance(v, str))
|
|
121
|
+
|
|
122
|
+
def check(self, result: Result, ctx: RunContext) -> Verdict | None:
|
|
123
|
+
text = self._result_text(result)
|
|
124
|
+
if not text.strip():
|
|
125
|
+
return None
|
|
126
|
+
if self._candidate_labels is not None:
|
|
127
|
+
scored = self._client.zero_shot(text, self._candidate_labels, self._model)
|
|
128
|
+
else:
|
|
129
|
+
scored = self._client.text_classification(text, self._model)
|
|
130
|
+
if not scored:
|
|
131
|
+
return None
|
|
132
|
+
top = scored[0]
|
|
133
|
+
if top["label"].lower() in self._fail_on and top["score"] >= self._threshold:
|
|
134
|
+
return Verdict(
|
|
135
|
+
severity=self._severity,
|
|
136
|
+
detector=self.name,
|
|
137
|
+
detail=f"{top['label']} ({top['score']:.2f})",
|
|
138
|
+
)
|
|
139
|
+
return None
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Model supply-chain guards (Engineering Design §8.3).
|
|
2
|
+
|
|
3
|
+
Pulling a model from the Hub is a supply-chain surface under the same rules as
|
|
4
|
+
any downloaded artifact. Two hazards matter:
|
|
5
|
+
|
|
6
|
+
* **model code that runs on load** — the transformers "trust remote code" path
|
|
7
|
+
executes arbitrary code from the repo; and
|
|
8
|
+
* **pickle-based checkpoints** — which execute on deserialisation.
|
|
9
|
+
|
|
10
|
+
Both are the fetch-then-execute anti-pattern the project bans. So, by default:
|
|
11
|
+
pin and hash-verify weights and configs; prefer safetensors and disallow pickle;
|
|
12
|
+
never enable remote model code. (Serving inside the capability envelope is the
|
|
13
|
+
SandboxBackend's job; this module is the *declaration and verification* half.)
|
|
14
|
+
|
|
15
|
+
Everything here is pure and deterministic — it makes a decision about a model
|
|
16
|
+
reference and a file list, with no network — so it is fully testable at $0 and
|
|
17
|
+
is the gate the HuggingFace tools call before a local pipeline is constructed.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import hashlib
|
|
23
|
+
import re
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
from pydantic import BaseModel, Field
|
|
27
|
+
|
|
28
|
+
# A pinned revision is a full 40-hex git commit sha — a moving ref (a branch
|
|
29
|
+
# name, or "main") is exactly what pinning forbids.
|
|
30
|
+
_COMMIT_RE = re.compile(r"^[0-9a-f]{40}$")
|
|
31
|
+
|
|
32
|
+
# Checkpoint extensions that deserialise via pickle (arbitrary code on load).
|
|
33
|
+
_PICKLE_SUFFIXES = (".bin", ".pt", ".pth", ".ckpt", ".pkl", ".pickle")
|
|
34
|
+
# The safe weights format — no code path on load.
|
|
35
|
+
_SAFE_SUFFIXES = (".safetensors", ".json", ".txt", ".model", ".onnx")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class SupplyChainError(ValueError):
|
|
39
|
+
"""A model reference or file set violates the supply-chain policy."""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ModelPin(BaseModel):
|
|
43
|
+
"""A pinned reference to a model on the Hub.
|
|
44
|
+
|
|
45
|
+
``revision`` should be a full commit sha so the artifact can never change
|
|
46
|
+
under a fixed reference; ``expected_hashes`` maps a filename to its expected
|
|
47
|
+
sha256 for hash-verification of the downloaded files.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
repo_id: str
|
|
51
|
+
revision: str | None = None
|
|
52
|
+
expected_hashes: dict[str, str] = Field(default_factory=dict)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class SupplyChainPolicy(BaseModel):
|
|
56
|
+
"""The default-deny policy. The safe configuration is the default — there is
|
|
57
|
+
nothing to turn *on* to be safe, only flags to relax for a reviewed case."""
|
|
58
|
+
|
|
59
|
+
allow_pickle: bool = False
|
|
60
|
+
allow_remote_code: bool = False
|
|
61
|
+
require_pinned_revision: bool = True
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def verify_model_source(
|
|
65
|
+
pin: ModelPin,
|
|
66
|
+
policy: SupplyChainPolicy | None = None,
|
|
67
|
+
*,
|
|
68
|
+
files: list[str] | None = None,
|
|
69
|
+
) -> None:
|
|
70
|
+
"""Raise :class:`SupplyChainError` if ``pin`` (and an optional ``files`` list)
|
|
71
|
+
violates ``policy``. A no-op (returns ``None``) when everything is allowed.
|
|
72
|
+
|
|
73
|
+
Checks, cheapest first: the revision is a pinned commit sha (unless relaxed);
|
|
74
|
+
no pickle-format weights appear in the file list (unless relaxed).
|
|
75
|
+
"""
|
|
76
|
+
policy = policy or SupplyChainPolicy()
|
|
77
|
+
|
|
78
|
+
if policy.require_pinned_revision:
|
|
79
|
+
if not pin.revision or not _COMMIT_RE.match(pin.revision):
|
|
80
|
+
raise SupplyChainError(
|
|
81
|
+
f"{pin.repo_id}: revision must be a pinned 40-hex commit sha "
|
|
82
|
+
f"(got {pin.revision!r}); a moving ref like 'main' is forbidden"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
if files and not policy.allow_pickle:
|
|
86
|
+
offending = sorted(f for f in files if f.lower().endswith(_PICKLE_SUFFIXES))
|
|
87
|
+
if offending:
|
|
88
|
+
raise SupplyChainError(
|
|
89
|
+
f"{pin.repo_id}: pickle-format checkpoints are disallowed "
|
|
90
|
+
f"(prefer safetensors): {offending}"
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def assert_no_remote_code(policy: SupplyChainPolicy | None = None) -> None:
|
|
95
|
+
"""Guard the transformers ``trust_remote_code`` path: raise unless explicitly
|
|
96
|
+
relaxed. The HuggingFace tools call this before building a local pipeline."""
|
|
97
|
+
policy = policy or SupplyChainPolicy()
|
|
98
|
+
if policy.allow_remote_code:
|
|
99
|
+
raise SupplyChainError(
|
|
100
|
+
"remote model code is enabled (allow_remote_code=True) — this executes "
|
|
101
|
+
"arbitrary code from the model repo on load; it must be reviewed, not default"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def safe_pipeline_kwargs(pin: ModelPin, policy: SupplyChainPolicy | None = None) -> dict:
|
|
106
|
+
"""Keyword arguments for ``transformers.pipeline`` that enforce the policy:
|
|
107
|
+
a pinned revision and ``trust_remote_code=False`` (always — there is no safe
|
|
108
|
+
default for executing repo code)."""
|
|
109
|
+
policy = policy or SupplyChainPolicy()
|
|
110
|
+
assert_no_remote_code(policy)
|
|
111
|
+
verify_model_source(pin, policy)
|
|
112
|
+
kwargs: dict = {"model": pin.repo_id, "trust_remote_code": False}
|
|
113
|
+
if pin.revision:
|
|
114
|
+
kwargs["revision"] = pin.revision
|
|
115
|
+
return kwargs
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def file_sha256(path: str | Path) -> str:
|
|
119
|
+
"""The sha256 of a file, streamed (so a multi-GB checkpoint never loads into
|
|
120
|
+
memory to be hashed)."""
|
|
121
|
+
h = hashlib.sha256()
|
|
122
|
+
with open(path, "rb") as fh:
|
|
123
|
+
for chunk in iter(lambda: fh.read(1 << 20), b""):
|
|
124
|
+
h.update(chunk)
|
|
125
|
+
return h.hexdigest()
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def verify_file_hash(path: str | Path, expected_sha256: str) -> None:
|
|
129
|
+
"""Raise :class:`SupplyChainError` if the file's sha256 differs from
|
|
130
|
+
``expected_sha256`` — hash-verification of a downloaded artifact (§8.3)."""
|
|
131
|
+
actual = file_sha256(path)
|
|
132
|
+
if actual != expected_sha256:
|
|
133
|
+
raise SupplyChainError(
|
|
134
|
+
f"{path}: sha256 mismatch — expected {expected_sha256}, got {actual}"
|
|
135
|
+
)
|
zu_huggingface/tools.py
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"""HuggingFace task models as typed Zu Tools (Engineering Design §8.5).
|
|
2
|
+
|
|
3
|
+
Each tool wraps one HuggingFace task behind the standard Tool contract, with the
|
|
4
|
+
typed multimodal :class:`~zu_core.content` Content (Text/Image/Audio) as the
|
|
5
|
+
currency in and out — which is what lets a non-chat model slot into the loop as
|
|
6
|
+
cleanly as a chat one. The same tool works hosted or local because it depends
|
|
7
|
+
only on the :class:`HfClient` seam (``client.py``).
|
|
8
|
+
|
|
9
|
+
The port is the *role*, assigned per agent (§4.5): these are Tools — verbs the
|
|
10
|
+
policy performs (transcribe, read an image, detect, embed, summarise,
|
|
11
|
+
translate). A classifier wanting to *gate control flow* or *check a result*
|
|
12
|
+
becomes a detector/validator instead — see ``roles.py``.
|
|
13
|
+
|
|
14
|
+
The envelope is derived from the backend: a hosted client egresses to the HF
|
|
15
|
+
router (CAP_NET + that host); a local pipeline reaches nothing. Media is passed
|
|
16
|
+
as base64 (``data_b64``) or a local ``path`` — the realistic shape when the
|
|
17
|
+
policy carries bytes from a prior observation.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import base64
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from typing import Any
|
|
25
|
+
|
|
26
|
+
from zu_core.content import Audio, Image, Text
|
|
27
|
+
from zu_core.ports import CAP_NET
|
|
28
|
+
|
|
29
|
+
from .client import HfClient, InferenceClientBackend
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _decode_media(data_b64: str | None, path: str | None) -> bytes:
|
|
33
|
+
if data_b64:
|
|
34
|
+
return base64.b64decode(data_b64)
|
|
35
|
+
if path:
|
|
36
|
+
return Path(path).read_bytes()
|
|
37
|
+
raise ValueError("provide media as 'data_b64' (base64) or a local 'path'")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class _HfTool:
|
|
41
|
+
"""Shared base: hold a model id + client, and derive the capability envelope
|
|
42
|
+
from the backend (hosted ⇒ net+router; local ⇒ nothing)."""
|
|
43
|
+
|
|
44
|
+
tier = 1 # a specialised model the policy calls — cheap, not an escalation
|
|
45
|
+
|
|
46
|
+
def __init__(self, model: str, client: HfClient | None = None) -> None:
|
|
47
|
+
self.model = model
|
|
48
|
+
self._client = client
|
|
49
|
+
backend = client if client is not None else InferenceClientBackend()
|
|
50
|
+
host = getattr(backend, "egress_host", "")
|
|
51
|
+
self.capabilities = frozenset({CAP_NET}) if host else frozenset()
|
|
52
|
+
self.egress = frozenset({host}) if host else frozenset()
|
|
53
|
+
self._backend = backend
|
|
54
|
+
|
|
55
|
+
def _c(self) -> HfClient:
|
|
56
|
+
return self._client if self._client is not None else self._backend
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class Transcribe(_HfTool):
|
|
60
|
+
"""ASR — audio → text (a sense). Role: Tool (§8.5 Audio)."""
|
|
61
|
+
|
|
62
|
+
name = "hf_transcribe"
|
|
63
|
+
prompt_fragment = "hf_transcribe(data_b64|path): transcribe speech audio to text."
|
|
64
|
+
schema = {
|
|
65
|
+
"name": "hf_transcribe",
|
|
66
|
+
"description": "Transcribe speech audio to text via a HuggingFace ASR model.",
|
|
67
|
+
"parameters": {
|
|
68
|
+
"type": "object",
|
|
69
|
+
"properties": {
|
|
70
|
+
"data_b64": {"type": "string", "description": "base64-encoded audio"},
|
|
71
|
+
"path": {"type": "string", "description": "local audio file path"},
|
|
72
|
+
},
|
|
73
|
+
},
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
async def __call__(self, ctx: Any, data_b64: str | None = None, path: str | None = None) -> dict:
|
|
77
|
+
audio = _decode_media(data_b64, path)
|
|
78
|
+
_ = Audio(data=audio) # typed currency in (recorded shape)
|
|
79
|
+
text = self._c().transcribe(audio, self.model)
|
|
80
|
+
return {"text": text, "model": self.model}
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class ImageToText(_HfTool):
|
|
84
|
+
"""Image-to-text / OCR — image → text (a sense). Role: Tool (§8.5 CV/Multimodal)."""
|
|
85
|
+
|
|
86
|
+
name = "hf_image_to_text"
|
|
87
|
+
prompt_fragment = "hf_image_to_text(data_b64|path): read/describe an image as text (OCR or caption)."
|
|
88
|
+
schema = {
|
|
89
|
+
"name": "hf_image_to_text",
|
|
90
|
+
"description": "Extract or describe the text/content of an image via a HuggingFace model.",
|
|
91
|
+
"parameters": {
|
|
92
|
+
"type": "object",
|
|
93
|
+
"properties": {
|
|
94
|
+
"data_b64": {"type": "string", "description": "base64-encoded image"},
|
|
95
|
+
"path": {"type": "string", "description": "local image file path"},
|
|
96
|
+
},
|
|
97
|
+
},
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
async def __call__(self, ctx: Any, data_b64: str | None = None, path: str | None = None) -> dict:
|
|
101
|
+
image = _decode_media(data_b64, path)
|
|
102
|
+
_ = Image(data=image)
|
|
103
|
+
text = self._c().image_to_text(image, self.model)
|
|
104
|
+
return {"text": text, "model": self.model}
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class DetectObjects(_HfTool):
|
|
108
|
+
"""Object detection — image → boxes. Role: Tool (or detector). (§8.5 CV)."""
|
|
109
|
+
|
|
110
|
+
name = "hf_detect"
|
|
111
|
+
prompt_fragment = "hf_detect(data_b64|path): find objects in an image (labelled boxes)."
|
|
112
|
+
schema = {
|
|
113
|
+
"name": "hf_detect",
|
|
114
|
+
"description": "Detect objects in an image via a HuggingFace model; returns labelled boxes.",
|
|
115
|
+
"parameters": {
|
|
116
|
+
"type": "object",
|
|
117
|
+
"properties": {
|
|
118
|
+
"data_b64": {"type": "string", "description": "base64-encoded image"},
|
|
119
|
+
"path": {"type": "string", "description": "local image file path"},
|
|
120
|
+
},
|
|
121
|
+
},
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
async def __call__(self, ctx: Any, data_b64: str | None = None, path: str | None = None) -> dict:
|
|
125
|
+
image = _decode_media(data_b64, path)
|
|
126
|
+
objects = self._c().object_detection(image, self.model)
|
|
127
|
+
return {"objects": objects, "count": len(objects), "model": self.model}
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class Embed(_HfTool):
|
|
131
|
+
"""Feature extraction — text → vector. Role: retrieval Tool / grounding (§8.5 NLP)."""
|
|
132
|
+
|
|
133
|
+
name = "hf_embed"
|
|
134
|
+
prompt_fragment = "hf_embed(text): embed text into a vector for search/similarity."
|
|
135
|
+
schema = {
|
|
136
|
+
"name": "hf_embed",
|
|
137
|
+
"description": "Embed text into a dense vector via a HuggingFace embedding model.",
|
|
138
|
+
"parameters": {
|
|
139
|
+
"type": "object",
|
|
140
|
+
"properties": {"text": {"type": "string"}},
|
|
141
|
+
"required": ["text"],
|
|
142
|
+
},
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
async def __call__(self, ctx: Any, text: str) -> dict:
|
|
146
|
+
vec = self._c().embed(text, self.model)
|
|
147
|
+
return {"embedding": vec, "dim": len(vec), "model": self.model}
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class Classify(_HfTool):
|
|
151
|
+
"""Text classification — text → labels. Role: Tool (or detector/router) (§8.5 NLP)."""
|
|
152
|
+
|
|
153
|
+
name = "hf_classify"
|
|
154
|
+
prompt_fragment = "hf_classify(text): classify/score text into the model's labels."
|
|
155
|
+
schema = {
|
|
156
|
+
"name": "hf_classify",
|
|
157
|
+
"description": "Classify text via a HuggingFace text-classification model.",
|
|
158
|
+
"parameters": {
|
|
159
|
+
"type": "object",
|
|
160
|
+
"properties": {"text": {"type": "string"}},
|
|
161
|
+
"required": ["text"],
|
|
162
|
+
},
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
async def __call__(self, ctx: Any, text: str) -> dict:
|
|
166
|
+
labels = self._c().text_classification(text, self.model)
|
|
167
|
+
return {"labels": labels, "top": labels[0]["label"] if labels else None, "model": self.model}
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class ZeroShotClassify(_HfTool):
|
|
171
|
+
"""Zero-shot classification — text + candidate labels → scores (§8.5 NLP)."""
|
|
172
|
+
|
|
173
|
+
name = "hf_zero_shot"
|
|
174
|
+
prompt_fragment = "hf_zero_shot(text, labels): score text against candidate labels you supply."
|
|
175
|
+
schema = {
|
|
176
|
+
"name": "hf_zero_shot",
|
|
177
|
+
"description": "Zero-shot classify text against candidate labels via a HuggingFace model.",
|
|
178
|
+
"parameters": {
|
|
179
|
+
"type": "object",
|
|
180
|
+
"properties": {
|
|
181
|
+
"text": {"type": "string"},
|
|
182
|
+
"labels": {"type": "array", "items": {"type": "string"}},
|
|
183
|
+
},
|
|
184
|
+
"required": ["text", "labels"],
|
|
185
|
+
},
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
async def __call__(self, ctx: Any, text: str, labels: list[str]) -> dict:
|
|
189
|
+
scored = self._c().zero_shot(text, labels, self.model)
|
|
190
|
+
return {"labels": scored, "top": scored[0]["label"] if scored else None, "model": self.model}
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
class Summarize(_HfTool):
|
|
194
|
+
"""Summarization — text → text (§8.5 NLP)."""
|
|
195
|
+
|
|
196
|
+
name = "hf_summarize"
|
|
197
|
+
prompt_fragment = "hf_summarize(text): summarise a long text."
|
|
198
|
+
schema = {
|
|
199
|
+
"name": "hf_summarize",
|
|
200
|
+
"description": "Summarise text via a HuggingFace summarization model.",
|
|
201
|
+
"parameters": {
|
|
202
|
+
"type": "object",
|
|
203
|
+
"properties": {"text": {"type": "string"}},
|
|
204
|
+
"required": ["text"],
|
|
205
|
+
},
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
async def __call__(self, ctx: Any, text: str) -> dict:
|
|
209
|
+
out = self._c().summarize(text, self.model)
|
|
210
|
+
_ = Text(text=out)
|
|
211
|
+
return {"text": out, "model": self.model}
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
class Translate(_HfTool):
|
|
215
|
+
"""Translation — text → text (§8.5 NLP)."""
|
|
216
|
+
|
|
217
|
+
name = "hf_translate"
|
|
218
|
+
prompt_fragment = "hf_translate(text): translate text (model is pinned to a language pair)."
|
|
219
|
+
schema = {
|
|
220
|
+
"name": "hf_translate",
|
|
221
|
+
"description": "Translate text via a HuggingFace translation model.",
|
|
222
|
+
"parameters": {
|
|
223
|
+
"type": "object",
|
|
224
|
+
"properties": {"text": {"type": "string"}},
|
|
225
|
+
"required": ["text"],
|
|
226
|
+
},
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
async def __call__(self, ctx: Any, text: str) -> dict:
|
|
230
|
+
out = self._c().translate(text, self.model)
|
|
231
|
+
return {"text": out, "model": self.model}
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: zu-huggingface
|
|
3
|
+
Version: 0.2.2
|
|
4
|
+
Summary: Zu HuggingFace adapter: task models as typed tools/detectors/validators, behind the supply-chain guards
|
|
5
|
+
Project-URL: Homepage, https://github.com/k3-mt/zu
|
|
6
|
+
Project-URL: Repository, https://github.com/k3-mt/zu
|
|
7
|
+
License-Expression: Apache-2.0
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
|
15
|
+
Classifier: Typing :: Typed
|
|
16
|
+
Requires-Python: >=3.11
|
|
17
|
+
Requires-Dist: zu-core==0.2.1
|
|
18
|
+
Provides-Extra: hosted
|
|
19
|
+
Requires-Dist: huggingface-hub>=0.24; extra == 'hosted'
|
|
20
|
+
Provides-Extra: local
|
|
21
|
+
Requires-Dist: pillow; extra == 'local'
|
|
22
|
+
Requires-Dist: transformers>=4.40; extra == 'local'
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# zu-huggingface
|
|
26
|
+
|
|
27
|
+
HuggingFace models behind Zu's typed ports. HuggingFace is not a model — it is
|
|
28
|
+
the largest hub of open models across every modality — so "supporting it" means
|
|
29
|
+
three different things, and this package draws the line cleanly
|
|
30
|
+
(Engineering Design §8.3–8.5).
|
|
31
|
+
|
|
32
|
+
## Chat / vision-language models as the policy — *no code here*
|
|
33
|
+
|
|
34
|
+
A chat or vision-language model that is the **brain** speaks the OpenAI chat API
|
|
35
|
+
on all three HuggingFace serving surfaces (the router's `/v1`, a dedicated
|
|
36
|
+
Endpoint's `/v1`, or a local vLLM server). So a HuggingFace model as the policy
|
|
37
|
+
is the existing `openai-compatible` provider pointed at a HuggingFace base URL —
|
|
38
|
+
the OpenRouter story exactly, no new adapter:
|
|
39
|
+
|
|
40
|
+
```yaml
|
|
41
|
+
# agent.yaml — a HuggingFace multimodal model as the policy
|
|
42
|
+
model: meta-llama/Llama-Vision-... # any chat / VLM id on the Hub
|
|
43
|
+
provider: openai-compatible
|
|
44
|
+
options:
|
|
45
|
+
base_url: https://router.huggingface.co/v1 # or an Endpoint, or local vLLM
|
|
46
|
+
api_key_env: HF_TOKEN
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Task models as tools, detectors, validators — this package
|
|
50
|
+
|
|
51
|
+
Most HuggingFace models are **not** chat models (OCR, ASR, detection,
|
|
52
|
+
embeddings, classification, …), so they enter through the non-policy ports by
|
|
53
|
+
their **role** (the port is the role, assigned per agent — §4.5):
|
|
54
|
+
|
|
55
|
+
| Role | Class | Task |
|
|
56
|
+
|------|-------|------|
|
|
57
|
+
| Tool | `Transcribe` (`hf_transcribe`) | speech → text (ASR) |
|
|
58
|
+
| Tool | `ImageToText` (`hf_image_to_text`) | image → text (OCR / caption) |
|
|
59
|
+
| Tool | `DetectObjects` (`hf_detect`) | image → labelled boxes |
|
|
60
|
+
| Tool | `Embed` (`hf_embed`) | text → vector (retrieval / grounding) |
|
|
61
|
+
| Tool | `Classify` (`hf_classify`) | text → labels |
|
|
62
|
+
| Tool | `ZeroShotClassify` (`hf_zero_shot`) | text + labels → scores |
|
|
63
|
+
| Tool | `Summarize` (`hf_summarize`) | text → text |
|
|
64
|
+
| Tool | `Translate` (`hf_translate`) | text → text |
|
|
65
|
+
| Detector | `HfClassifierDetector` | classify an observation → ESCALATE/stop |
|
|
66
|
+
| Validator | `HfClassifierValidator` | classify the result → fail/RETRY |
|
|
67
|
+
|
|
68
|
+
Each is **parameterised by a model id** (and the role wrappers by the labels
|
|
69
|
+
that matter), so they are wired *by reference in config* per agent rather than
|
|
70
|
+
as zero-config entry points:
|
|
71
|
+
|
|
72
|
+
```yaml
|
|
73
|
+
tools:
|
|
74
|
+
- ref: zu_huggingface.tools:Transcribe
|
|
75
|
+
args: { model: openai/whisper-large-v3 }
|
|
76
|
+
- ref: zu_huggingface.tools:Embed
|
|
77
|
+
args: { model: BAAI/bge-large-en-v1.5 }
|
|
78
|
+
detectors:
|
|
79
|
+
- ref: zu_huggingface.roles:HfClassifierDetector
|
|
80
|
+
args: { model: facebook/bart-large-mnli, candidate_labels: ["safe","unsafe"], escalate_on: ["unsafe"] }
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
The typed multimodal `Content` (`Text`/`Image`/`Audio`) from `zu_core.content`
|
|
84
|
+
is the currency in and out — which is what lets a non-chat model slot into the
|
|
85
|
+
loop as cleanly as a chat one.
|
|
86
|
+
|
|
87
|
+
### Hosted vs local — one seam
|
|
88
|
+
|
|
89
|
+
Every tool depends only on the `HfClient` seam, so the same tool works:
|
|
90
|
+
|
|
91
|
+
- **Hosted** — `InferenceClientBackend` wraps `huggingface_hub.InferenceClient`
|
|
92
|
+
(the serverless router or a dedicated Endpoint). Egresses to
|
|
93
|
+
`router.huggingface.co`; `HF_TOKEN` is read from the environment inside the
|
|
94
|
+
backend. `pip install 'zu-huggingface[hosted]'`.
|
|
95
|
+
- **Local** — `PipelineBackend` wraps `transformers.pipeline` for the
|
|
96
|
+
air-gapped / on-prem case. Reaches no network. Every pipeline is built through
|
|
97
|
+
the supply-chain guards. `pip install 'zu-huggingface[local]'` (plus a
|
|
98
|
+
backend such as `torch`).
|
|
99
|
+
|
|
100
|
+
## The supply chain — safe by default (§8.3)
|
|
101
|
+
|
|
102
|
+
Pulling a model from the Hub is a supply-chain surface. `supply_chain.py`
|
|
103
|
+
enforces, by default:
|
|
104
|
+
|
|
105
|
+
- **Pin + hash.** A `ModelPin` should carry a full commit-sha `revision`;
|
|
106
|
+
`verify_file_hash` checks a downloaded file's sha256.
|
|
107
|
+
- **safetensors, not pickle.** `verify_model_source` rejects `.bin`/`.pt`/`.ckpt`
|
|
108
|
+
checkpoints (which execute on deserialisation) unless explicitly allowed.
|
|
109
|
+
- **No remote code.** `safe_pipeline_kwargs` forces `trust_remote_code=False`;
|
|
110
|
+
`assert_no_remote_code` raises if it is relaxed.
|
|
111
|
+
|
|
112
|
+
The safe configuration is the default — there is nothing to turn *on* to be
|
|
113
|
+
safe, only flags a reviewed case may relax.
|
|
114
|
+
|
|
115
|
+
## Tests
|
|
116
|
+
|
|
117
|
+
Offline, no network, no model download: the tools and role wrappers are
|
|
118
|
+
exercised against a fake `HfClient`, and the supply-chain guards are pure.
|
|
119
|
+
`uv run pytest packages/zu-huggingface`.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
zu_huggingface/__init__.py,sha256=YJDo9GnPPnyZIirzuuP3fBKUppBTJMXfp27um7IQCak,2261
|
|
2
|
+
zu_huggingface/client.py,sha256=JakMapgYgzKntkQUjb1dEhQIMD3j21bIlK2VvcNOZ_A,8573
|
|
3
|
+
zu_huggingface/roles.py,sha256=yAqM7ItZibCLka9ojLbmdGbnHn9wVGbY7AwZd_hCIgM,5160
|
|
4
|
+
zu_huggingface/supply_chain.py,sha256=yTRxBDaCLlWYfUpz5JKcqUSuUUGyNq6sriv3SoLiMRI,5358
|
|
5
|
+
zu_huggingface/tools.py,sha256=vN1LWcsj8Atmfic-SKh84DwE0KURjK7jVJVMJ2-RSIM,8889
|
|
6
|
+
zu_huggingface-0.2.2.dist-info/METADATA,sha256=jIUj7y_udLid09vW5Gmq7Q5ao154tWy2iqzZL1G26P0,5284
|
|
7
|
+
zu_huggingface-0.2.2.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
8
|
+
zu_huggingface-0.2.2.dist-info/RECORD,,
|