zu-huggingface 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,66 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ .eggs/
6
+ build/
7
+ dist/
8
+
9
+ # uv / venv
10
+ .venv/
11
+ uv.lock.bak
12
+
13
+ # Test / type caches
14
+ .pytest_cache/
15
+ .mypy_cache/
16
+ .ruff_cache/
17
+ .coverage
18
+ htmlcov/
19
+
20
+ # Zu runtime artifacts
21
+ *.db
22
+ zu.db
23
+ zu.yaml.local
24
+ zu_review.jsonl
25
+ *.review.jsonl
26
+ # Per-agent cost telemetry ledger — machine-local run history, not source.
27
+ cost.jsonl
28
+ # A recorded replay path is learned per-run and machine-local — regenerated on
29
+ # every successful run, not source. The agent ships; its track does not.
30
+ track.json
31
+ # …except the flagship example ships its track on purpose, as a demo of the
32
+ # record/replay convergence (committed; re-runs show as ordinary modifications).
33
+ !examples/agents/vet-appointment/track.json
34
+
35
+ # Editor / OS
36
+ .idea/
37
+ .vscode/
38
+ .DS_Store
39
+
40
+ # Claude Code local session state
41
+ .claude/
42
+
43
+ # Secrets
44
+ .env
45
+ .env.*
46
+ !.env.example
47
+
48
+ # Microsoft Office temp/lock files
49
+ ~$*
50
+
51
+ # Internal design / strategy docs — kept local, never in the public repo
52
+ *.docx
53
+ *.pdf
54
+ # BUILD.md is the internal build-sequence / deferred-gaps ledger — kept local.
55
+ # (ARCHITECTURE.md is public: an onboarding agent needs the structural map.)
56
+ docs/BUILD.md
57
+
58
+ # Local secret — API key for live validation, never commit
59
+ zu_demo_key.md
60
+ *_key.md
61
+
62
+ # Local PyPI publish token — never commit
63
+ /pypi
64
+
65
+ # Local Discord credentials (bot token / app secrets) — never commit
66
+ /discord
@@ -0,0 +1,119 @@
1
+ Metadata-Version: 2.4
2
+ Name: zu-huggingface
3
+ Version: 0.2.2
4
+ Summary: Zu HuggingFace adapter: task models as typed tools/detectors/validators, behind the supply-chain guards
5
+ Project-URL: Homepage, https://github.com/k3-mt/zu
6
+ Project-URL: Repository, https://github.com/k3-mt/zu
7
+ License-Expression: Apache-2.0
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
15
+ Classifier: Typing :: Typed
16
+ Requires-Python: >=3.11
17
+ Requires-Dist: zu-core==0.2.1
18
+ Provides-Extra: hosted
19
+ Requires-Dist: huggingface-hub>=0.24; extra == 'hosted'
20
+ Provides-Extra: local
21
+ Requires-Dist: pillow; extra == 'local'
22
+ Requires-Dist: transformers>=4.40; extra == 'local'
23
+ Description-Content-Type: text/markdown
24
+
25
+ # zu-huggingface
26
+
27
+ HuggingFace models behind Zu's typed ports. HuggingFace is not a model — it is
28
+ the largest hub of open models across every modality — so "supporting it" means
29
+ three different things, and this package draws the line cleanly
30
+ (Engineering Design §8.3–8.5).
31
+
32
+ ## Chat / vision-language models as the policy — *no code here*
33
+
34
+ A chat or vision-language model that is the **brain** speaks the OpenAI chat API
35
+ on all three HuggingFace serving surfaces (the router's `/v1`, a dedicated
36
+ Endpoint's `/v1`, or a local vLLM server). So a HuggingFace model as the policy
37
+ is the existing `openai-compatible` provider pointed at a HuggingFace base URL —
38
+ the OpenRouter story exactly, no new adapter:
39
+
40
+ ```yaml
41
+ # agent.yaml — a HuggingFace multimodal model as the policy
42
+ model: meta-llama/Llama-Vision-... # any chat / VLM id on the Hub
43
+ provider: openai-compatible
44
+ options:
45
+ base_url: https://router.huggingface.co/v1 # or an Endpoint, or local vLLM
46
+ api_key_env: HF_TOKEN
47
+ ```
48
+
49
+ ## Task models as tools, detectors, validators — this package
50
+
51
+ Most HuggingFace models are **not** chat models (OCR, ASR, detection,
52
+ embeddings, classification, …), so they enter through the non-policy ports by
53
+ their **role** (the port is the role, assigned per agent — §4.5):
54
+
55
+ | Role | Class | Task |
56
+ |------|-------|------|
57
+ | Tool | `Transcribe` (`hf_transcribe`) | speech → text (ASR) |
58
+ | Tool | `ImageToText` (`hf_image_to_text`) | image → text (OCR / caption) |
59
+ | Tool | `DetectObjects` (`hf_detect`) | image → labelled boxes |
60
+ | Tool | `Embed` (`hf_embed`) | text → vector (retrieval / grounding) |
61
+ | Tool | `Classify` (`hf_classify`) | text → labels |
62
+ | Tool | `ZeroShotClassify` (`hf_zero_shot`) | text + labels → scores |
63
+ | Tool | `Summarize` (`hf_summarize`) | text → text |
64
+ | Tool | `Translate` (`hf_translate`) | text → text |
65
+ | Detector | `HfClassifierDetector` | classify an observation → ESCALATE/stop |
66
+ | Validator | `HfClassifierValidator` | classify the result → fail/RETRY |
67
+
68
+ Each is **parameterised by a model id** (and the role wrappers by the labels
69
+ that matter), so they are wired *by reference in config* per agent rather than
70
+ as zero-config entry points:
71
+
72
+ ```yaml
73
+ tools:
74
+ - ref: zu_huggingface.tools:Transcribe
75
+ args: { model: openai/whisper-large-v3 }
76
+ - ref: zu_huggingface.tools:Embed
77
+ args: { model: BAAI/bge-large-en-v1.5 }
78
+ detectors:
79
+ - ref: zu_huggingface.roles:HfClassifierDetector
80
+ args: { model: facebook/bart-large-mnli, candidate_labels: ["safe","unsafe"], escalate_on: ["unsafe"] }
81
+ ```
82
+
83
+ The typed multimodal `Content` (`Text`/`Image`/`Audio`) from `zu_core.content`
84
+ is the currency in and out — which is what lets a non-chat model slot into the
85
+ loop as cleanly as a chat one.
86
+
87
+ ### Hosted vs local — one seam
88
+
89
+ Every tool depends only on the `HfClient` seam, so the same tool works:
90
+
91
+ - **Hosted** — `InferenceClientBackend` wraps `huggingface_hub.InferenceClient`
92
+ (the serverless router or a dedicated Endpoint). Egresses to
93
+ `router.huggingface.co`; `HF_TOKEN` is read from the environment inside the
94
+ backend. `pip install 'zu-huggingface[hosted]'`.
95
+ - **Local** — `PipelineBackend` wraps `transformers.pipeline` for the
96
+ air-gapped / on-prem case. Reaches no network. Every pipeline is built through
97
+ the supply-chain guards. `pip install 'zu-huggingface[local]'` (plus a
98
+ backend such as `torch`).
99
+
100
+ ## The supply chain — safe by default (§8.3)
101
+
102
+ Pulling a model from the Hub is a supply-chain surface. `supply_chain.py`
103
+ enforces, by default:
104
+
105
+ - **Pin + hash.** A `ModelPin` should carry a full commit-sha `revision`;
106
+ `verify_file_hash` checks a downloaded file's sha256.
107
+ - **safetensors, not pickle.** `verify_model_source` rejects `.bin`/`.pt`/`.ckpt`
108
+ checkpoints (which execute on deserialisation) unless explicitly allowed.
109
+ - **No remote code.** `safe_pipeline_kwargs` forces `trust_remote_code=False`;
110
+ `assert_no_remote_code` raises if it is relaxed.
111
+
112
+ The safe configuration is the default — there is nothing to turn *on* to be
113
+ safe, only flags a reviewed case may relax.
114
+
115
+ ## Tests
116
+
117
+ Offline, no network, no model download: the tools and role wrappers are
118
+ exercised against a fake `HfClient`, and the supply-chain guards are pure.
119
+ `uv run pytest packages/zu-huggingface`.
@@ -0,0 +1,95 @@
1
+ # zu-huggingface
2
+
3
+ HuggingFace models behind Zu's typed ports. HuggingFace is not a model — it is
4
+ the largest hub of open models across every modality — so "supporting it" means
5
+ three different things, and this package draws the line cleanly
6
+ (Engineering Design §8.3–8.5).
7
+
8
+ ## Chat / vision-language models as the policy — *no code here*
9
+
10
+ A chat or vision-language model that is the **brain** speaks the OpenAI chat API
11
+ on all three HuggingFace serving surfaces (the router's `/v1`, a dedicated
12
+ Endpoint's `/v1`, or a local vLLM server). So a HuggingFace model as the policy
13
+ is the existing `openai-compatible` provider pointed at a HuggingFace base URL —
14
+ the OpenRouter story exactly, no new adapter:
15
+
16
+ ```yaml
17
+ # agent.yaml — a HuggingFace multimodal model as the policy
18
+ model: meta-llama/Llama-Vision-... # any chat / VLM id on the Hub
19
+ provider: openai-compatible
20
+ options:
21
+ base_url: https://router.huggingface.co/v1 # or an Endpoint, or local vLLM
22
+ api_key_env: HF_TOKEN
23
+ ```
24
+
25
+ ## Task models as tools, detectors, validators — this package
26
+
27
+ Most HuggingFace models are **not** chat models (OCR, ASR, detection,
28
+ embeddings, classification, …), so they enter through the non-policy ports by
29
+ their **role** (the port is the role, assigned per agent — §4.5):
30
+
31
+ | Role | Class | Task |
32
+ |------|-------|------|
33
+ | Tool | `Transcribe` (`hf_transcribe`) | speech → text (ASR) |
34
+ | Tool | `ImageToText` (`hf_image_to_text`) | image → text (OCR / caption) |
35
+ | Tool | `DetectObjects` (`hf_detect`) | image → labelled boxes |
36
+ | Tool | `Embed` (`hf_embed`) | text → vector (retrieval / grounding) |
37
+ | Tool | `Classify` (`hf_classify`) | text → labels |
38
+ | Tool | `ZeroShotClassify` (`hf_zero_shot`) | text + labels → scores |
39
+ | Tool | `Summarize` (`hf_summarize`) | text → text |
40
+ | Tool | `Translate` (`hf_translate`) | text → text |
41
+ | Detector | `HfClassifierDetector` | classify an observation → ESCALATE/stop |
42
+ | Validator | `HfClassifierValidator` | classify the result → fail/RETRY |
43
+
44
+ Each is **parameterised by a model id** (and the role wrappers by the labels
45
+ that matter), so they are wired *by reference in config* per agent rather than
46
+ as zero-config entry points:
47
+
48
+ ```yaml
49
+ tools:
50
+ - ref: zu_huggingface.tools:Transcribe
51
+ args: { model: openai/whisper-large-v3 }
52
+ - ref: zu_huggingface.tools:Embed
53
+ args: { model: BAAI/bge-large-en-v1.5 }
54
+ detectors:
55
+ - ref: zu_huggingface.roles:HfClassifierDetector
56
+ args: { model: facebook/bart-large-mnli, candidate_labels: ["safe","unsafe"], escalate_on: ["unsafe"] }
57
+ ```
58
+
59
+ The typed multimodal `Content` (`Text`/`Image`/`Audio`) from `zu_core.content`
60
+ is the currency in and out — which is what lets a non-chat model slot into the
61
+ loop as cleanly as a chat one.
62
+
63
+ ### Hosted vs local — one seam
64
+
65
+ Every tool depends only on the `HfClient` seam, so the same tool works:
66
+
67
+ - **Hosted** — `InferenceClientBackend` wraps `huggingface_hub.InferenceClient`
68
+ (the serverless router or a dedicated Endpoint). Egresses to
69
+ `router.huggingface.co`; `HF_TOKEN` is read from the environment inside the
70
+ backend. `pip install 'zu-huggingface[hosted]'`.
71
+ - **Local** — `PipelineBackend` wraps `transformers.pipeline` for the
72
+ air-gapped / on-prem case. Reaches no network. Every pipeline is built through
73
+ the supply-chain guards. `pip install 'zu-huggingface[local]'` (plus a
74
+ backend such as `torch`).
75
+
76
+ ## The supply chain — safe by default (§8.3)
77
+
78
+ Pulling a model from the Hub is a supply-chain surface. `supply_chain.py`
79
+ enforces, by default:
80
+
81
+ - **Pin + hash.** A `ModelPin` should carry a full commit-sha `revision`;
82
+ `verify_file_hash` checks a downloaded file's sha256.
83
+ - **safetensors, not pickle.** `verify_model_source` rejects `.bin`/`.pt`/`.ckpt`
84
+ checkpoints (which execute on deserialisation) unless explicitly allowed.
85
+ - **No remote code.** `safe_pipeline_kwargs` forces `trust_remote_code=False`;
86
+ `assert_no_remote_code` raises if it is relaxed.
87
+
88
+ The safe configuration is the default — there is nothing to turn *on* to be
89
+ safe, only flags a reviewed case may relax.
90
+
91
+ ## Tests
92
+
93
+ Offline, no network, no model download: the tools and role wrappers are
94
+ exercised against a fake `HfClient`, and the supply-chain guards are pure.
95
+ `uv run pytest packages/zu-huggingface`.
@@ -0,0 +1,42 @@
1
+ [project]
2
+ name = "zu-huggingface"
3
+ version = "0.2.2"
4
+ description = "Zu HuggingFace adapter: task models as typed tools/detectors/validators, behind the supply-chain guards"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ license = "Apache-2.0"
8
+ classifiers = [
9
+ "Development Status :: 4 - Beta",
10
+ "Intended Audience :: Developers",
11
+ "License :: OSI Approved :: Apache Software License",
12
+ "Programming Language :: Python :: 3",
13
+ "Programming Language :: Python :: 3.11",
14
+ "Programming Language :: Python :: 3.12",
15
+ "Topic :: Software Development :: Libraries :: Application Frameworks",
16
+ "Typing :: Typed",
17
+ ]
18
+ # The core + the multimodal Content currency are all that is always needed; the
19
+ # heavy SDKs are optional extras, imported lazily, so the tools are testable
20
+ # offline against a fake client and a base install costs nothing.
21
+ dependencies = ["zu-core==0.2.1"]
22
+
23
+ [project.optional-dependencies]
24
+ hosted = ["huggingface_hub>=0.24"] # the Inference Providers router / Endpoints
25
+ local = ["transformers>=4.40", "pillow"] # local pipelines (also needs a backend, e.g. torch)
26
+
27
+ # The task-model tools and the classifier detector/validator are PARAMETERISED
28
+ # (a model id, and for the role wrappers the labels that matter), so they are
29
+ # wired by reference in config per agent rather than as zero-config entry points
30
+ # — see the README. The package's contract is its importable classes + the
31
+ # HfClient seam + the supply-chain guards.
32
+
33
+ [project.urls]
34
+ Homepage = "https://github.com/k3-mt/zu"
35
+ Repository = "https://github.com/k3-mt/zu"
36
+
37
+ [build-system]
38
+ requires = ["hatchling"]
39
+ build-backend = "hatchling.build"
40
+
41
+ [tool.hatch.build.targets.wheel]
42
+ packages = ["src/zu_huggingface"]
@@ -0,0 +1,74 @@
1
+ """zu-huggingface — HuggingFace models behind Zu's typed ports (§8.3–8.5).
2
+
3
+ HuggingFace is not a model — it is the largest hub of open models across every
4
+ modality. This package reaches it three ways, all behind configuration:
5
+
6
+ * **Chat / vision-language models as the policy** need *no code here* — they
7
+ speak the OpenAI chat API on all three serving surfaces (the router's ``/v1``,
8
+ an Endpoint's ``/v1``, or a local vLLM), so a HuggingFace model as the brain
9
+ is the existing ``openai-compatible`` provider pointed at a HuggingFace base
10
+ URL (see this package's README). It is the OpenRouter story exactly.
11
+
12
+ * **Task models** (ASR, OCR, detection, embeddings, classification,
13
+ summarisation, translation) are *not* chat models — each has its own typed
14
+ I/O — so they enter through the non-policy ports by their role: as **Tools**
15
+ (``tools.py``) and as **detectors / validators** (``roles.py``), over the one
16
+ :class:`HfClient` seam (``client.py``) that works hosted or local.
17
+
18
+ * **The supply chain** (``supply_chain.py``) makes pulling any of them safe by
19
+ default: pin + hash, safetensors not pickle, never trust remote code.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ from .client import HF_ROUTER, HfClient, InferenceClientBackend, PipelineBackend
25
+ from .roles import HfClassifierDetector, HfClassifierValidator
26
+ from .supply_chain import (
27
+ ModelPin,
28
+ SupplyChainError,
29
+ SupplyChainPolicy,
30
+ assert_no_remote_code,
31
+ file_sha256,
32
+ safe_pipeline_kwargs,
33
+ verify_file_hash,
34
+ verify_model_source,
35
+ )
36
+ from .tools import (
37
+ Classify,
38
+ DetectObjects,
39
+ Embed,
40
+ ImageToText,
41
+ Summarize,
42
+ Transcribe,
43
+ Translate,
44
+ ZeroShotClassify,
45
+ )
46
+
47
+ __all__ = [
48
+ # client seam
49
+ "HfClient",
50
+ "HF_ROUTER",
51
+ "InferenceClientBackend",
52
+ "PipelineBackend",
53
+ # tools
54
+ "Transcribe",
55
+ "ImageToText",
56
+ "DetectObjects",
57
+ "Embed",
58
+ "Classify",
59
+ "ZeroShotClassify",
60
+ "Summarize",
61
+ "Translate",
62
+ # role wrappers
63
+ "HfClassifierDetector",
64
+ "HfClassifierValidator",
65
+ # supply chain
66
+ "ModelPin",
67
+ "SupplyChainPolicy",
68
+ "SupplyChainError",
69
+ "verify_model_source",
70
+ "assert_no_remote_code",
71
+ "safe_pipeline_kwargs",
72
+ "file_sha256",
73
+ "verify_file_hash",
74
+ ]
@@ -0,0 +1,187 @@
1
+ """The HuggingFace client seam — one task-method interface, three serving surfaces.
2
+
3
+ Most HuggingFace models are *not* chat models: OCR, speech recognition, object
4
+ detection, and embedding models each have their own typed input/output, so they
5
+ enter Zu through the non-policy ports by their role (§8.5). This module is the
6
+ thin seam the HuggingFace *tools* call, so the same tool works whether the model
7
+ is served hosted (the Inference Providers router) or local (a transformers
8
+ pipeline) — the integration is done once, here.
9
+
10
+ ``HfClient`` is the protocol the tools depend on. Two adapters implement it:
11
+
12
+ * :class:`InferenceClientBackend` — wraps ``huggingface_hub.InferenceClient``
13
+ (hosted; the router or a dedicated Endpoint), egressing to the HF router.
14
+ * :class:`PipelineBackend` — wraps ``transformers.pipeline`` (local; the
15
+ air-gapped / on-prem case), constructed only through the supply-chain guards
16
+ (§8.3): a pinned revision and ``trust_remote_code=False``.
17
+
18
+ Both heavy SDKs are imported lazily, so installing ``zu-huggingface`` without
19
+ the extras costs nothing and the tools are testable offline against a fake
20
+ client. Credentials (``HF_TOKEN``) are resolved from the environment *inside*
21
+ the backend, never placed in the model's context.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import os
27
+ from typing import Any, Protocol, runtime_checkable
28
+
29
+ from .supply_chain import ModelPin, SupplyChainPolicy, safe_pipeline_kwargs
30
+
31
+ # The Inference Providers router — the hosted default, OpenAI-compatible for
32
+ # chat at /v1 but task-native through the InferenceClient methods.
33
+ HF_ROUTER = "router.huggingface.co"
34
+
35
+
36
+ @runtime_checkable
37
+ class HfClient(Protocol):
38
+ """The task methods the HuggingFace tools call. Inputs/outputs are plain
39
+ Python (bytes for media, str for text, list[dict] for structured) so the
40
+ tools own the translation to/from typed :class:`zu_core.content` Content."""
41
+
42
+ def transcribe(self, audio: bytes, model: str) -> str: ...
43
+ def image_to_text(self, image: bytes, model: str) -> str: ...
44
+ def object_detection(self, image: bytes, model: str) -> list[dict]: ...
45
+ def text_classification(self, text: str, model: str) -> list[dict]: ...
46
+ def zero_shot(self, text: str, labels: list[str], model: str) -> list[dict]: ...
47
+ def embed(self, text: str, model: str) -> list[float]: ...
48
+ def summarize(self, text: str, model: str) -> str: ...
49
+ def translate(self, text: str, model: str) -> str: ...
50
+
51
+
52
+ def _scores(raw: Any) -> list[dict]:
53
+ """Normalise a classifier response to ``[{"label","score"}, …]`` sorted by
54
+ score desc — the shape every classification tool/detector reads."""
55
+ out: list[dict] = []
56
+ if isinstance(raw, dict) and "labels" in raw and "scores" in raw: # zero-shot shape
57
+ out = [{"label": str(lbl), "score": float(sc)}
58
+ for lbl, sc in zip(raw["labels"], raw["scores"], strict=False)]
59
+ elif isinstance(raw, list):
60
+ for item in raw:
61
+ if isinstance(item, dict) and "label" in item:
62
+ out.append({"label": str(item["label"]), "score": float(item.get("score", 0.0))})
63
+ return sorted(out, key=lambda d: d["score"], reverse=True)
64
+
65
+
66
+ class InferenceClientBackend:
67
+ """Hosted HuggingFace via ``huggingface_hub.InferenceClient`` (lazy import).
68
+
69
+ The same model id works through the serverless router or a dedicated
70
+ Endpoint; ``HF_TOKEN`` is read from the environment here.
71
+ """
72
+
73
+ egress_host = HF_ROUTER
74
+
75
+ def __init__(
76
+ self,
77
+ *,
78
+ provider: str = "hf-inference",
79
+ token_env: str = "HF_TOKEN",
80
+ client: Any = None,
81
+ ) -> None:
82
+ self._provider = provider
83
+ self._token_env = token_env
84
+ self._client = client # injectable for tests
85
+
86
+ def _c(self) -> Any:
87
+ if self._client is None:
88
+ try:
89
+ from huggingface_hub import InferenceClient
90
+ except ImportError as e: # pragma: no cover - exercised only without the extra
91
+ raise RuntimeError(
92
+ "the hosted HuggingFace backend needs `huggingface_hub` "
93
+ "(install zu-huggingface[hosted])"
94
+ ) from e
95
+ self._client = InferenceClient(provider=self._provider, api_key=os.environ.get(self._token_env))
96
+ return self._client
97
+
98
+ def transcribe(self, audio: bytes, model: str) -> str:
99
+ r = self._c().automatic_speech_recognition(audio, model=model)
100
+ return r if isinstance(r, str) else str(getattr(r, "text", r))
101
+
102
+ def image_to_text(self, image: bytes, model: str) -> str:
103
+ r = self._c().image_to_text(image, model=model)
104
+ return r if isinstance(r, str) else str(getattr(r, "generated_text", r))
105
+
106
+ def object_detection(self, image: bytes, model: str) -> list[dict]:
107
+ r = self._c().object_detection(image, model=model)
108
+ return [dict(item) for item in r]
109
+
110
+ def text_classification(self, text: str, model: str) -> list[dict]:
111
+ return _scores(self._c().text_classification(text, model=model))
112
+
113
+ def zero_shot(self, text: str, labels: list[str], model: str) -> list[dict]:
114
+ return _scores(self._c().zero_shot_classification(text, candidate_labels=labels, model=model))
115
+
116
+ def embed(self, text: str, model: str) -> list[float]:
117
+ r = self._c().feature_extraction(text, model=model)
118
+ return [float(x) for x in (r.tolist() if hasattr(r, "tolist") else r)]
119
+
120
+ def summarize(self, text: str, model: str) -> str:
121
+ r = self._c().summarization(text, model=model)
122
+ return r if isinstance(r, str) else str(getattr(r, "summary_text", r))
123
+
124
+ def translate(self, text: str, model: str) -> str:
125
+ r = self._c().translation(text, model=model)
126
+ return r if isinstance(r, str) else str(getattr(r, "translation_text", r))
127
+
128
+
129
+ class PipelineBackend:
130
+ """Local HuggingFace via ``transformers.pipeline`` (lazy import).
131
+
132
+ The only option for air-gapped / on-prem. Every pipeline is built through
133
+ :func:`safe_pipeline_kwargs` — a pinned revision and ``trust_remote_code``
134
+ forced off — so the §8.3 supply-chain rules hold by construction. Pipelines
135
+ are cached per (task, model).
136
+ """
137
+
138
+ egress_host = "" # local — no egress
139
+
140
+ def __init__(self, policy: SupplyChainPolicy | None = None) -> None:
141
+ self._policy = policy or SupplyChainPolicy()
142
+ self._cache: dict[tuple[str, str], Any] = {}
143
+
144
+ def _pipe(self, task: str, model: str) -> Any:
145
+ key = (task, model)
146
+ if key not in self._cache:
147
+ try:
148
+ from transformers import pipeline
149
+ except ImportError as e: # pragma: no cover - exercised only without the extra
150
+ raise RuntimeError(
151
+ "the local HuggingFace backend needs `transformers` "
152
+ "(install zu-huggingface[local])"
153
+ ) from e
154
+ kwargs = safe_pipeline_kwargs(ModelPin(repo_id=model), self._policy)
155
+ self._cache[key] = pipeline(task, **kwargs)
156
+ return self._cache[key]
157
+
158
+ def transcribe(self, audio: bytes, model: str) -> str:
159
+ return str(self._pipe("automatic-speech-recognition", model)(audio)["text"])
160
+
161
+ def image_to_text(self, image: bytes, model: str) -> str:
162
+ r = self._pipe("image-to-text", model)(image)
163
+ return str(r[0]["generated_text"] if isinstance(r, list) else r["generated_text"])
164
+
165
+ def object_detection(self, image: bytes, model: str) -> list[dict]:
166
+ return [dict(item) for item in self._pipe("object-detection", model)(image)]
167
+
168
+ def text_classification(self, text: str, model: str) -> list[dict]:
169
+ return _scores(self._pipe("text-classification", model)(text))
170
+
171
+ def zero_shot(self, text: str, labels: list[str], model: str) -> list[dict]:
172
+ return _scores(self._pipe("zero-shot-classification", model)(text, candidate_labels=labels))
173
+
174
+ def embed(self, text: str, model: str) -> list[float]:
175
+ r = self._pipe("feature-extraction", model)(text)
176
+ # pipelines return [[token-vectors]]; mean-pool to one vector
177
+ vecs = r[0] if isinstance(r, list) else r
178
+ if vecs and isinstance(vecs[0], list):
179
+ cols = list(zip(*vecs, strict=False))
180
+ return [sum(c) / len(c) for c in cols]
181
+ return [float(x) for x in vecs]
182
+
183
+ def summarize(self, text: str, model: str) -> str:
184
+ return str(self._pipe("summarization", model)(text)[0]["summary_text"])
185
+
186
+ def translate(self, text: str, model: str) -> str:
187
+ return str(self._pipe("translation", model)(text)[0]["translation_text"])