zu-huggingface 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zu_huggingface-0.2.2/.gitignore +66 -0
- zu_huggingface-0.2.2/PKG-INFO +119 -0
- zu_huggingface-0.2.2/README.md +95 -0
- zu_huggingface-0.2.2/pyproject.toml +42 -0
- zu_huggingface-0.2.2/src/zu_huggingface/__init__.py +74 -0
- zu_huggingface-0.2.2/src/zu_huggingface/client.py +187 -0
- zu_huggingface-0.2.2/src/zu_huggingface/roles.py +139 -0
- zu_huggingface-0.2.2/src/zu_huggingface/supply_chain.py +135 -0
- zu_huggingface-0.2.2/src/zu_huggingface/tools.py +231 -0
- zu_huggingface-0.2.2/tests/conftest.py +55 -0
- zu_huggingface-0.2.2/tests/test_roles.py +73 -0
- zu_huggingface-0.2.2/tests/test_supply_chain.py +75 -0
- zu_huggingface-0.2.2/tests/test_tools.py +80 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.eggs/
|
|
6
|
+
build/
|
|
7
|
+
dist/
|
|
8
|
+
|
|
9
|
+
# uv / venv
|
|
10
|
+
.venv/
|
|
11
|
+
uv.lock.bak
|
|
12
|
+
|
|
13
|
+
# Test / type caches
|
|
14
|
+
.pytest_cache/
|
|
15
|
+
.mypy_cache/
|
|
16
|
+
.ruff_cache/
|
|
17
|
+
.coverage
|
|
18
|
+
htmlcov/
|
|
19
|
+
|
|
20
|
+
# Zu runtime artifacts
|
|
21
|
+
*.db
|
|
22
|
+
zu.db
|
|
23
|
+
zu.yaml.local
|
|
24
|
+
zu_review.jsonl
|
|
25
|
+
*.review.jsonl
|
|
26
|
+
# Per-agent cost telemetry ledger — machine-local run history, not source.
|
|
27
|
+
cost.jsonl
|
|
28
|
+
# A recorded replay path is learned per-run and machine-local — regenerated on
|
|
29
|
+
# every successful run, not source. The agent ships; its track does not.
|
|
30
|
+
track.json
|
|
31
|
+
# …except the flagship example ships its track on purpose, as a demo of the
|
|
32
|
+
# record/replay convergence (committed; re-runs show as ordinary modifications).
|
|
33
|
+
!examples/agents/vet-appointment/track.json
|
|
34
|
+
|
|
35
|
+
# Editor / OS
|
|
36
|
+
.idea/
|
|
37
|
+
.vscode/
|
|
38
|
+
.DS_Store
|
|
39
|
+
|
|
40
|
+
# Claude Code local session state
|
|
41
|
+
.claude/
|
|
42
|
+
|
|
43
|
+
# Secrets
|
|
44
|
+
.env
|
|
45
|
+
.env.*
|
|
46
|
+
!.env.example
|
|
47
|
+
|
|
48
|
+
# Microsoft Office temp/lock files
|
|
49
|
+
~$*
|
|
50
|
+
|
|
51
|
+
# Internal design / strategy docs — kept local, never in the public repo
|
|
52
|
+
*.docx
|
|
53
|
+
*.pdf
|
|
54
|
+
# BUILD.md is the internal build-sequence / deferred-gaps ledger — kept local.
|
|
55
|
+
# (ARCHITECTURE.md is public: an onboarding agent needs the structural map.)
|
|
56
|
+
docs/BUILD.md
|
|
57
|
+
|
|
58
|
+
# Local secret — API key for live validation, never commit
|
|
59
|
+
zu_demo_key.md
|
|
60
|
+
*_key.md
|
|
61
|
+
|
|
62
|
+
# Local PyPI publish token — never commit
|
|
63
|
+
/pypi
|
|
64
|
+
|
|
65
|
+
# Local Discord credentials (bot token / app secrets) — never commit
|
|
66
|
+
/discord
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: zu-huggingface
|
|
3
|
+
Version: 0.2.2
|
|
4
|
+
Summary: Zu HuggingFace adapter: task models as typed tools/detectors/validators, behind the supply-chain guards
|
|
5
|
+
Project-URL: Homepage, https://github.com/k3-mt/zu
|
|
6
|
+
Project-URL: Repository, https://github.com/k3-mt/zu
|
|
7
|
+
License-Expression: Apache-2.0
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
|
15
|
+
Classifier: Typing :: Typed
|
|
16
|
+
Requires-Python: >=3.11
|
|
17
|
+
Requires-Dist: zu-core==0.2.1
|
|
18
|
+
Provides-Extra: hosted
|
|
19
|
+
Requires-Dist: huggingface-hub>=0.24; extra == 'hosted'
|
|
20
|
+
Provides-Extra: local
|
|
21
|
+
Requires-Dist: pillow; extra == 'local'
|
|
22
|
+
Requires-Dist: transformers>=4.40; extra == 'local'
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# zu-huggingface
|
|
26
|
+
|
|
27
|
+
HuggingFace models behind Zu's typed ports. HuggingFace is not a model — it is
|
|
28
|
+
the largest hub of open models across every modality — so "supporting it" means
|
|
29
|
+
three different things, and this package draws the line cleanly
|
|
30
|
+
(Engineering Design §8.3–8.5).
|
|
31
|
+
|
|
32
|
+
## Chat / vision-language models as the policy — *no code here*
|
|
33
|
+
|
|
34
|
+
A chat or vision-language model that is the **brain** speaks the OpenAI chat API
|
|
35
|
+
on all three HuggingFace serving surfaces (the router's `/v1`, a dedicated
|
|
36
|
+
Endpoint's `/v1`, or a local vLLM server). So a HuggingFace model as the policy
|
|
37
|
+
is the existing `openai-compatible` provider pointed at a HuggingFace base URL —
|
|
38
|
+
the OpenRouter story exactly, no new adapter:
|
|
39
|
+
|
|
40
|
+
```yaml
|
|
41
|
+
# agent.yaml — a HuggingFace multimodal model as the policy
|
|
42
|
+
model: meta-llama/Llama-Vision-... # any chat / VLM id on the Hub
|
|
43
|
+
provider: openai-compatible
|
|
44
|
+
options:
|
|
45
|
+
base_url: https://router.huggingface.co/v1 # or an Endpoint, or local vLLM
|
|
46
|
+
api_key_env: HF_TOKEN
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Task models as tools, detectors, validators — this package
|
|
50
|
+
|
|
51
|
+
Most HuggingFace models are **not** chat models (OCR, ASR, detection,
|
|
52
|
+
embeddings, classification, …), so they enter through the non-policy ports by
|
|
53
|
+
their **role** (the port is the role, assigned per agent — §4.5):
|
|
54
|
+
|
|
55
|
+
| Role | Class | Task |
|
|
56
|
+
|------|-------|------|
|
|
57
|
+
| Tool | `Transcribe` (`hf_transcribe`) | speech → text (ASR) |
|
|
58
|
+
| Tool | `ImageToText` (`hf_image_to_text`) | image → text (OCR / caption) |
|
|
59
|
+
| Tool | `DetectObjects` (`hf_detect`) | image → labelled boxes |
|
|
60
|
+
| Tool | `Embed` (`hf_embed`) | text → vector (retrieval / grounding) |
|
|
61
|
+
| Tool | `Classify` (`hf_classify`) | text → labels |
|
|
62
|
+
| Tool | `ZeroShotClassify` (`hf_zero_shot`) | text + labels → scores |
|
|
63
|
+
| Tool | `Summarize` (`hf_summarize`) | text → text |
|
|
64
|
+
| Tool | `Translate` (`hf_translate`) | text → text |
|
|
65
|
+
| Detector | `HfClassifierDetector` | classify an observation → ESCALATE/stop |
|
|
66
|
+
| Validator | `HfClassifierValidator` | classify the result → fail/RETRY |
|
|
67
|
+
|
|
68
|
+
Each is **parameterised by a model id** (and the role wrappers by the labels
|
|
69
|
+
that matter), so they are wired *by reference in config* per agent rather than
|
|
70
|
+
as zero-config entry points:
|
|
71
|
+
|
|
72
|
+
```yaml
|
|
73
|
+
tools:
|
|
74
|
+
- ref: zu_huggingface.tools:Transcribe
|
|
75
|
+
args: { model: openai/whisper-large-v3 }
|
|
76
|
+
- ref: zu_huggingface.tools:Embed
|
|
77
|
+
args: { model: BAAI/bge-large-en-v1.5 }
|
|
78
|
+
detectors:
|
|
79
|
+
- ref: zu_huggingface.roles:HfClassifierDetector
|
|
80
|
+
args: { model: facebook/bart-large-mnli, candidate_labels: ["safe","unsafe"], escalate_on: ["unsafe"] }
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
The typed multimodal `Content` (`Text`/`Image`/`Audio`) from `zu_core.content`
|
|
84
|
+
is the currency in and out — which is what lets a non-chat model slot into the
|
|
85
|
+
loop as cleanly as a chat one.
|
|
86
|
+
|
|
87
|
+
### Hosted vs local — one seam
|
|
88
|
+
|
|
89
|
+
Every tool depends only on the `HfClient` seam, so the same tool works:
|
|
90
|
+
|
|
91
|
+
- **Hosted** — `InferenceClientBackend` wraps `huggingface_hub.InferenceClient`
|
|
92
|
+
(the serverless router or a dedicated Endpoint). Egresses to
|
|
93
|
+
`router.huggingface.co`; `HF_TOKEN` is read from the environment inside the
|
|
94
|
+
backend. `pip install 'zu-huggingface[hosted]'`.
|
|
95
|
+
- **Local** — `PipelineBackend` wraps `transformers.pipeline` for the
|
|
96
|
+
air-gapped / on-prem case. Reaches no network. Every pipeline is built through
|
|
97
|
+
the supply-chain guards. `pip install 'zu-huggingface[local]'` (plus a
|
|
98
|
+
backend such as `torch`).
|
|
99
|
+
|
|
100
|
+
## The supply chain — safe by default (§8.3)
|
|
101
|
+
|
|
102
|
+
Pulling a model from the Hub is a supply-chain surface. `supply_chain.py`
|
|
103
|
+
enforces, by default:
|
|
104
|
+
|
|
105
|
+
- **Pin + hash.** A `ModelPin` should carry a full commit-sha `revision`;
|
|
106
|
+
`verify_file_hash` checks a downloaded file's sha256.
|
|
107
|
+
- **safetensors, not pickle.** `verify_model_source` rejects `.bin`/`.pt`/`.ckpt`
|
|
108
|
+
checkpoints (which execute on deserialisation) unless explicitly allowed.
|
|
109
|
+
- **No remote code.** `safe_pipeline_kwargs` forces `trust_remote_code=False`;
|
|
110
|
+
`assert_no_remote_code` raises if it is relaxed.
|
|
111
|
+
|
|
112
|
+
The safe configuration is the default — there is nothing to turn *on* to be
|
|
113
|
+
safe, only flags a reviewed case may relax.
|
|
114
|
+
|
|
115
|
+
## Tests
|
|
116
|
+
|
|
117
|
+
Offline, no network, no model download: the tools and role wrappers are
|
|
118
|
+
exercised against a fake `HfClient`, and the supply-chain guards are pure.
|
|
119
|
+
`uv run pytest packages/zu-huggingface`.
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# zu-huggingface
|
|
2
|
+
|
|
3
|
+
HuggingFace models behind Zu's typed ports. HuggingFace is not a model — it is
|
|
4
|
+
the largest hub of open models across every modality — so "supporting it" means
|
|
5
|
+
three different things, and this package draws the line cleanly
|
|
6
|
+
(Engineering Design §8.3–8.5).
|
|
7
|
+
|
|
8
|
+
## Chat / vision-language models as the policy — *no code here*
|
|
9
|
+
|
|
10
|
+
A chat or vision-language model that is the **brain** speaks the OpenAI chat API
|
|
11
|
+
on all three HuggingFace serving surfaces (the router's `/v1`, a dedicated
|
|
12
|
+
Endpoint's `/v1`, or a local vLLM server). So a HuggingFace model as the policy
|
|
13
|
+
is the existing `openai-compatible` provider pointed at a HuggingFace base URL —
|
|
14
|
+
the OpenRouter story exactly, no new adapter:
|
|
15
|
+
|
|
16
|
+
```yaml
|
|
17
|
+
# agent.yaml — a HuggingFace multimodal model as the policy
|
|
18
|
+
model: meta-llama/Llama-Vision-... # any chat / VLM id on the Hub
|
|
19
|
+
provider: openai-compatible
|
|
20
|
+
options:
|
|
21
|
+
base_url: https://router.huggingface.co/v1 # or an Endpoint, or local vLLM
|
|
22
|
+
api_key_env: HF_TOKEN
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Task models as tools, detectors, validators — this package
|
|
26
|
+
|
|
27
|
+
Most HuggingFace models are **not** chat models (OCR, ASR, detection,
|
|
28
|
+
embeddings, classification, …), so they enter through the non-policy ports by
|
|
29
|
+
their **role** (the port is the role, assigned per agent — §4.5):
|
|
30
|
+
|
|
31
|
+
| Role | Class | Task |
|
|
32
|
+
|------|-------|------|
|
|
33
|
+
| Tool | `Transcribe` (`hf_transcribe`) | speech → text (ASR) |
|
|
34
|
+
| Tool | `ImageToText` (`hf_image_to_text`) | image → text (OCR / caption) |
|
|
35
|
+
| Tool | `DetectObjects` (`hf_detect`) | image → labelled boxes |
|
|
36
|
+
| Tool | `Embed` (`hf_embed`) | text → vector (retrieval / grounding) |
|
|
37
|
+
| Tool | `Classify` (`hf_classify`) | text → labels |
|
|
38
|
+
| Tool | `ZeroShotClassify` (`hf_zero_shot`) | text + labels → scores |
|
|
39
|
+
| Tool | `Summarize` (`hf_summarize`) | text → text |
|
|
40
|
+
| Tool | `Translate` (`hf_translate`) | text → text |
|
|
41
|
+
| Detector | `HfClassifierDetector` | classify an observation → ESCALATE/stop |
|
|
42
|
+
| Validator | `HfClassifierValidator` | classify the result → fail/RETRY |
|
|
43
|
+
|
|
44
|
+
Each is **parameterised by a model id** (and the role wrappers by the labels
|
|
45
|
+
that matter), so they are wired *by reference in config* per agent rather than
|
|
46
|
+
as zero-config entry points:
|
|
47
|
+
|
|
48
|
+
```yaml
|
|
49
|
+
tools:
|
|
50
|
+
- ref: zu_huggingface.tools:Transcribe
|
|
51
|
+
args: { model: openai/whisper-large-v3 }
|
|
52
|
+
- ref: zu_huggingface.tools:Embed
|
|
53
|
+
args: { model: BAAI/bge-large-en-v1.5 }
|
|
54
|
+
detectors:
|
|
55
|
+
- ref: zu_huggingface.roles:HfClassifierDetector
|
|
56
|
+
args: { model: facebook/bart-large-mnli, candidate_labels: ["safe","unsafe"], escalate_on: ["unsafe"] }
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
The typed multimodal `Content` (`Text`/`Image`/`Audio`) from `zu_core.content`
|
|
60
|
+
is the currency in and out — which is what lets a non-chat model slot into the
|
|
61
|
+
loop as cleanly as a chat one.
|
|
62
|
+
|
|
63
|
+
### Hosted vs local — one seam
|
|
64
|
+
|
|
65
|
+
Every tool depends only on the `HfClient` seam, so the same tool works:
|
|
66
|
+
|
|
67
|
+
- **Hosted** — `InferenceClientBackend` wraps `huggingface_hub.InferenceClient`
|
|
68
|
+
(the serverless router or a dedicated Endpoint). Egresses to
|
|
69
|
+
`router.huggingface.co`; `HF_TOKEN` is read from the environment inside the
|
|
70
|
+
backend. `pip install 'zu-huggingface[hosted]'`.
|
|
71
|
+
- **Local** — `PipelineBackend` wraps `transformers.pipeline` for the
|
|
72
|
+
air-gapped / on-prem case. Reaches no network. Every pipeline is built through
|
|
73
|
+
the supply-chain guards. `pip install 'zu-huggingface[local]'` (plus a
|
|
74
|
+
backend such as `torch`).
|
|
75
|
+
|
|
76
|
+
## The supply chain — safe by default (§8.3)
|
|
77
|
+
|
|
78
|
+
Pulling a model from the Hub is a supply-chain surface. `supply_chain.py`
|
|
79
|
+
enforces, by default:
|
|
80
|
+
|
|
81
|
+
- **Pin + hash.** A `ModelPin` should carry a full commit-sha `revision`;
|
|
82
|
+
`verify_file_hash` checks a downloaded file's sha256.
|
|
83
|
+
- **safetensors, not pickle.** `verify_model_source` rejects `.bin`/`.pt`/`.ckpt`
|
|
84
|
+
checkpoints (which execute on deserialisation) unless explicitly allowed.
|
|
85
|
+
- **No remote code.** `safe_pipeline_kwargs` forces `trust_remote_code=False`;
|
|
86
|
+
`assert_no_remote_code` raises if it is relaxed.
|
|
87
|
+
|
|
88
|
+
The safe configuration is the default — there is nothing to turn *on* to be
|
|
89
|
+
safe, only flags a reviewed case may relax.
|
|
90
|
+
|
|
91
|
+
## Tests
|
|
92
|
+
|
|
93
|
+
Offline, no network, no model download: the tools and role wrappers are
|
|
94
|
+
exercised against a fake `HfClient`, and the supply-chain guards are pure.
|
|
95
|
+
`uv run pytest packages/zu-huggingface`.
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "zu-huggingface"
|
|
3
|
+
version = "0.2.2"
|
|
4
|
+
description = "Zu HuggingFace adapter: task models as typed tools/detectors/validators, behind the supply-chain guards"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.11"
|
|
7
|
+
license = "Apache-2.0"
|
|
8
|
+
classifiers = [
|
|
9
|
+
"Development Status :: 4 - Beta",
|
|
10
|
+
"Intended Audience :: Developers",
|
|
11
|
+
"License :: OSI Approved :: Apache Software License",
|
|
12
|
+
"Programming Language :: Python :: 3",
|
|
13
|
+
"Programming Language :: Python :: 3.11",
|
|
14
|
+
"Programming Language :: Python :: 3.12",
|
|
15
|
+
"Topic :: Software Development :: Libraries :: Application Frameworks",
|
|
16
|
+
"Typing :: Typed",
|
|
17
|
+
]
|
|
18
|
+
# The core + the multimodal Content currency are all that is always needed; the
|
|
19
|
+
# heavy SDKs are optional extras, imported lazily, so the tools are testable
|
|
20
|
+
# offline against a fake client and a base install costs nothing.
|
|
21
|
+
dependencies = ["zu-core==0.2.1"]
|
|
22
|
+
|
|
23
|
+
[project.optional-dependencies]
|
|
24
|
+
hosted = ["huggingface_hub>=0.24"] # the Inference Providers router / Endpoints
|
|
25
|
+
local = ["transformers>=4.40", "pillow"] # local pipelines (also needs a backend, e.g. torch)
|
|
26
|
+
|
|
27
|
+
# The task-model tools and the classifier detector/validator are PARAMETERISED
|
|
28
|
+
# (a model id, and for the role wrappers the labels that matter), so they are
|
|
29
|
+
# wired by reference in config per agent rather than as zero-config entry points
|
|
30
|
+
# — see the README. The package's contract is its importable classes + the
|
|
31
|
+
# HfClient seam + the supply-chain guards.
|
|
32
|
+
|
|
33
|
+
[project.urls]
|
|
34
|
+
Homepage = "https://github.com/k3-mt/zu"
|
|
35
|
+
Repository = "https://github.com/k3-mt/zu"
|
|
36
|
+
|
|
37
|
+
[build-system]
|
|
38
|
+
requires = ["hatchling"]
|
|
39
|
+
build-backend = "hatchling.build"
|
|
40
|
+
|
|
41
|
+
[tool.hatch.build.targets.wheel]
|
|
42
|
+
packages = ["src/zu_huggingface"]
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""zu-huggingface — HuggingFace models behind Zu's typed ports (§8.3–8.5).
|
|
2
|
+
|
|
3
|
+
HuggingFace is not a model — it is the largest hub of open models across every
|
|
4
|
+
modality. This package reaches it three ways, all behind configuration:
|
|
5
|
+
|
|
6
|
+
* **Chat / vision-language models as the policy** need *no code here* — they
|
|
7
|
+
speak the OpenAI chat API on all three serving surfaces (the router's ``/v1``,
|
|
8
|
+
an Endpoint's ``/v1``, or a local vLLM), so a HuggingFace model as the brain
|
|
9
|
+
is the existing ``openai-compatible`` provider pointed at a HuggingFace base
|
|
10
|
+
URL (see this package's README). It is the OpenRouter story exactly.
|
|
11
|
+
|
|
12
|
+
* **Task models** (ASR, OCR, detection, embeddings, classification,
|
|
13
|
+
summarisation, translation) are *not* chat models — each has its own typed
|
|
14
|
+
I/O — so they enter through the non-policy ports by their role: as **Tools**
|
|
15
|
+
(``tools.py``) and as **detectors / validators** (``roles.py``), over the one
|
|
16
|
+
:class:`HfClient` seam (``client.py``) that works hosted or local.
|
|
17
|
+
|
|
18
|
+
* **The supply chain** (``supply_chain.py``) makes pulling any of them safe by
|
|
19
|
+
default: pin + hash, safetensors not pickle, never trust remote code.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
from .client import HF_ROUTER, HfClient, InferenceClientBackend, PipelineBackend
|
|
25
|
+
from .roles import HfClassifierDetector, HfClassifierValidator
|
|
26
|
+
from .supply_chain import (
|
|
27
|
+
ModelPin,
|
|
28
|
+
SupplyChainError,
|
|
29
|
+
SupplyChainPolicy,
|
|
30
|
+
assert_no_remote_code,
|
|
31
|
+
file_sha256,
|
|
32
|
+
safe_pipeline_kwargs,
|
|
33
|
+
verify_file_hash,
|
|
34
|
+
verify_model_source,
|
|
35
|
+
)
|
|
36
|
+
from .tools import (
|
|
37
|
+
Classify,
|
|
38
|
+
DetectObjects,
|
|
39
|
+
Embed,
|
|
40
|
+
ImageToText,
|
|
41
|
+
Summarize,
|
|
42
|
+
Transcribe,
|
|
43
|
+
Translate,
|
|
44
|
+
ZeroShotClassify,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
__all__ = [
|
|
48
|
+
# client seam
|
|
49
|
+
"HfClient",
|
|
50
|
+
"HF_ROUTER",
|
|
51
|
+
"InferenceClientBackend",
|
|
52
|
+
"PipelineBackend",
|
|
53
|
+
# tools
|
|
54
|
+
"Transcribe",
|
|
55
|
+
"ImageToText",
|
|
56
|
+
"DetectObjects",
|
|
57
|
+
"Embed",
|
|
58
|
+
"Classify",
|
|
59
|
+
"ZeroShotClassify",
|
|
60
|
+
"Summarize",
|
|
61
|
+
"Translate",
|
|
62
|
+
# role wrappers
|
|
63
|
+
"HfClassifierDetector",
|
|
64
|
+
"HfClassifierValidator",
|
|
65
|
+
# supply chain
|
|
66
|
+
"ModelPin",
|
|
67
|
+
"SupplyChainPolicy",
|
|
68
|
+
"SupplyChainError",
|
|
69
|
+
"verify_model_source",
|
|
70
|
+
"assert_no_remote_code",
|
|
71
|
+
"safe_pipeline_kwargs",
|
|
72
|
+
"file_sha256",
|
|
73
|
+
"verify_file_hash",
|
|
74
|
+
]
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""The HuggingFace client seam — one task-method interface, three serving surfaces.
|
|
2
|
+
|
|
3
|
+
Most HuggingFace models are *not* chat models: OCR, speech recognition, object
|
|
4
|
+
detection, and embedding models each have their own typed input/output, so they
|
|
5
|
+
enter Zu through the non-policy ports by their role (§8.5). This module is the
|
|
6
|
+
thin seam the HuggingFace *tools* call, so the same tool works whether the model
|
|
7
|
+
is served hosted (the Inference Providers router) or local (a transformers
|
|
8
|
+
pipeline) — the integration is done once, here.
|
|
9
|
+
|
|
10
|
+
``HfClient`` is the protocol the tools depend on. Two adapters implement it:
|
|
11
|
+
|
|
12
|
+
* :class:`InferenceClientBackend` — wraps ``huggingface_hub.InferenceClient``
|
|
13
|
+
(hosted; the router or a dedicated Endpoint), egressing to the HF router.
|
|
14
|
+
* :class:`PipelineBackend` — wraps ``transformers.pipeline`` (local; the
|
|
15
|
+
air-gapped / on-prem case), constructed only through the supply-chain guards
|
|
16
|
+
(§8.3): a pinned revision and ``trust_remote_code=False``.
|
|
17
|
+
|
|
18
|
+
Both heavy SDKs are imported lazily, so installing ``zu-huggingface`` without
|
|
19
|
+
the extras costs nothing and the tools are testable offline against a fake
|
|
20
|
+
client. Credentials (``HF_TOKEN``) are resolved from the environment *inside*
|
|
21
|
+
the backend, never placed in the model's context.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import os
|
|
27
|
+
from typing import Any, Protocol, runtime_checkable
|
|
28
|
+
|
|
29
|
+
from .supply_chain import ModelPin, SupplyChainPolicy, safe_pipeline_kwargs
|
|
30
|
+
|
|
31
|
+
# The Inference Providers router — the hosted default, OpenAI-compatible for
|
|
32
|
+
# chat at /v1 but task-native through the InferenceClient methods.
|
|
33
|
+
HF_ROUTER = "router.huggingface.co"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@runtime_checkable
|
|
37
|
+
class HfClient(Protocol):
|
|
38
|
+
"""The task methods the HuggingFace tools call. Inputs/outputs are plain
|
|
39
|
+
Python (bytes for media, str for text, list[dict] for structured) so the
|
|
40
|
+
tools own the translation to/from typed :class:`zu_core.content` Content."""
|
|
41
|
+
|
|
42
|
+
def transcribe(self, audio: bytes, model: str) -> str: ...
|
|
43
|
+
def image_to_text(self, image: bytes, model: str) -> str: ...
|
|
44
|
+
def object_detection(self, image: bytes, model: str) -> list[dict]: ...
|
|
45
|
+
def text_classification(self, text: str, model: str) -> list[dict]: ...
|
|
46
|
+
def zero_shot(self, text: str, labels: list[str], model: str) -> list[dict]: ...
|
|
47
|
+
def embed(self, text: str, model: str) -> list[float]: ...
|
|
48
|
+
def summarize(self, text: str, model: str) -> str: ...
|
|
49
|
+
def translate(self, text: str, model: str) -> str: ...
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _scores(raw: Any) -> list[dict]:
|
|
53
|
+
"""Normalise a classifier response to ``[{"label","score"}, …]`` sorted by
|
|
54
|
+
score desc — the shape every classification tool/detector reads."""
|
|
55
|
+
out: list[dict] = []
|
|
56
|
+
if isinstance(raw, dict) and "labels" in raw and "scores" in raw: # zero-shot shape
|
|
57
|
+
out = [{"label": str(lbl), "score": float(sc)}
|
|
58
|
+
for lbl, sc in zip(raw["labels"], raw["scores"], strict=False)]
|
|
59
|
+
elif isinstance(raw, list):
|
|
60
|
+
for item in raw:
|
|
61
|
+
if isinstance(item, dict) and "label" in item:
|
|
62
|
+
out.append({"label": str(item["label"]), "score": float(item.get("score", 0.0))})
|
|
63
|
+
return sorted(out, key=lambda d: d["score"], reverse=True)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class InferenceClientBackend:
|
|
67
|
+
"""Hosted HuggingFace via ``huggingface_hub.InferenceClient`` (lazy import).
|
|
68
|
+
|
|
69
|
+
The same model id works through the serverless router or a dedicated
|
|
70
|
+
Endpoint; ``HF_TOKEN`` is read from the environment here.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
egress_host = HF_ROUTER
|
|
74
|
+
|
|
75
|
+
def __init__(
|
|
76
|
+
self,
|
|
77
|
+
*,
|
|
78
|
+
provider: str = "hf-inference",
|
|
79
|
+
token_env: str = "HF_TOKEN",
|
|
80
|
+
client: Any = None,
|
|
81
|
+
) -> None:
|
|
82
|
+
self._provider = provider
|
|
83
|
+
self._token_env = token_env
|
|
84
|
+
self._client = client # injectable for tests
|
|
85
|
+
|
|
86
|
+
def _c(self) -> Any:
|
|
87
|
+
if self._client is None:
|
|
88
|
+
try:
|
|
89
|
+
from huggingface_hub import InferenceClient
|
|
90
|
+
except ImportError as e: # pragma: no cover - exercised only without the extra
|
|
91
|
+
raise RuntimeError(
|
|
92
|
+
"the hosted HuggingFace backend needs `huggingface_hub` "
|
|
93
|
+
"(install zu-huggingface[hosted])"
|
|
94
|
+
) from e
|
|
95
|
+
self._client = InferenceClient(provider=self._provider, api_key=os.environ.get(self._token_env))
|
|
96
|
+
return self._client
|
|
97
|
+
|
|
98
|
+
def transcribe(self, audio: bytes, model: str) -> str:
|
|
99
|
+
r = self._c().automatic_speech_recognition(audio, model=model)
|
|
100
|
+
return r if isinstance(r, str) else str(getattr(r, "text", r))
|
|
101
|
+
|
|
102
|
+
def image_to_text(self, image: bytes, model: str) -> str:
|
|
103
|
+
r = self._c().image_to_text(image, model=model)
|
|
104
|
+
return r if isinstance(r, str) else str(getattr(r, "generated_text", r))
|
|
105
|
+
|
|
106
|
+
def object_detection(self, image: bytes, model: str) -> list[dict]:
|
|
107
|
+
r = self._c().object_detection(image, model=model)
|
|
108
|
+
return [dict(item) for item in r]
|
|
109
|
+
|
|
110
|
+
def text_classification(self, text: str, model: str) -> list[dict]:
|
|
111
|
+
return _scores(self._c().text_classification(text, model=model))
|
|
112
|
+
|
|
113
|
+
def zero_shot(self, text: str, labels: list[str], model: str) -> list[dict]:
|
|
114
|
+
return _scores(self._c().zero_shot_classification(text, candidate_labels=labels, model=model))
|
|
115
|
+
|
|
116
|
+
def embed(self, text: str, model: str) -> list[float]:
|
|
117
|
+
r = self._c().feature_extraction(text, model=model)
|
|
118
|
+
return [float(x) for x in (r.tolist() if hasattr(r, "tolist") else r)]
|
|
119
|
+
|
|
120
|
+
def summarize(self, text: str, model: str) -> str:
|
|
121
|
+
r = self._c().summarization(text, model=model)
|
|
122
|
+
return r if isinstance(r, str) else str(getattr(r, "summary_text", r))
|
|
123
|
+
|
|
124
|
+
def translate(self, text: str, model: str) -> str:
|
|
125
|
+
r = self._c().translation(text, model=model)
|
|
126
|
+
return r if isinstance(r, str) else str(getattr(r, "translation_text", r))
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class PipelineBackend:
|
|
130
|
+
"""Local HuggingFace via ``transformers.pipeline`` (lazy import).
|
|
131
|
+
|
|
132
|
+
The only option for air-gapped / on-prem. Every pipeline is built through
|
|
133
|
+
:func:`safe_pipeline_kwargs` — a pinned revision and ``trust_remote_code``
|
|
134
|
+
forced off — so the §8.3 supply-chain rules hold by construction. Pipelines
|
|
135
|
+
are cached per (task, model).
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
egress_host = "" # local — no egress
|
|
139
|
+
|
|
140
|
+
def __init__(self, policy: SupplyChainPolicy | None = None) -> None:
|
|
141
|
+
self._policy = policy or SupplyChainPolicy()
|
|
142
|
+
self._cache: dict[tuple[str, str], Any] = {}
|
|
143
|
+
|
|
144
|
+
def _pipe(self, task: str, model: str) -> Any:
|
|
145
|
+
key = (task, model)
|
|
146
|
+
if key not in self._cache:
|
|
147
|
+
try:
|
|
148
|
+
from transformers import pipeline
|
|
149
|
+
except ImportError as e: # pragma: no cover - exercised only without the extra
|
|
150
|
+
raise RuntimeError(
|
|
151
|
+
"the local HuggingFace backend needs `transformers` "
|
|
152
|
+
"(install zu-huggingface[local])"
|
|
153
|
+
) from e
|
|
154
|
+
kwargs = safe_pipeline_kwargs(ModelPin(repo_id=model), self._policy)
|
|
155
|
+
self._cache[key] = pipeline(task, **kwargs)
|
|
156
|
+
return self._cache[key]
|
|
157
|
+
|
|
158
|
+
def transcribe(self, audio: bytes, model: str) -> str:
|
|
159
|
+
return str(self._pipe("automatic-speech-recognition", model)(audio)["text"])
|
|
160
|
+
|
|
161
|
+
def image_to_text(self, image: bytes, model: str) -> str:
|
|
162
|
+
r = self._pipe("image-to-text", model)(image)
|
|
163
|
+
return str(r[0]["generated_text"] if isinstance(r, list) else r["generated_text"])
|
|
164
|
+
|
|
165
|
+
def object_detection(self, image: bytes, model: str) -> list[dict]:
|
|
166
|
+
return [dict(item) for item in self._pipe("object-detection", model)(image)]
|
|
167
|
+
|
|
168
|
+
def text_classification(self, text: str, model: str) -> list[dict]:
|
|
169
|
+
return _scores(self._pipe("text-classification", model)(text))
|
|
170
|
+
|
|
171
|
+
def zero_shot(self, text: str, labels: list[str], model: str) -> list[dict]:
|
|
172
|
+
return _scores(self._pipe("zero-shot-classification", model)(text, candidate_labels=labels))
|
|
173
|
+
|
|
174
|
+
def embed(self, text: str, model: str) -> list[float]:
|
|
175
|
+
r = self._pipe("feature-extraction", model)(text)
|
|
176
|
+
# pipelines return [[token-vectors]]; mean-pool to one vector
|
|
177
|
+
vecs = r[0] if isinstance(r, list) else r
|
|
178
|
+
if vecs and isinstance(vecs[0], list):
|
|
179
|
+
cols = list(zip(*vecs, strict=False))
|
|
180
|
+
return [sum(c) / len(c) for c in cols]
|
|
181
|
+
return [float(x) for x in vecs]
|
|
182
|
+
|
|
183
|
+
def summarize(self, text: str, model: str) -> str:
|
|
184
|
+
return str(self._pipe("summarization", model)(text)[0]["summary_text"])
|
|
185
|
+
|
|
186
|
+
def translate(self, text: str, model: str) -> str:
|
|
187
|
+
return str(self._pipe("translation", model)(text)[0]["translation_text"])
|