zing-audit 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zing/__init__.py +12 -0
- zing/__main__.py +6 -0
- zing/cli.py +476 -0
- zing/clients/__init__.py +47 -0
- zing/clients/anthropic.py +320 -0
- zing/clients/base.py +132 -0
- zing/clients/openai_compatible.py +218 -0
- zing/config.py +215 -0
- zing/context.py +57 -0
- zing/detectors/__init__.py +32 -0
- zing/detectors/base.py +104 -0
- zing/detectors/billing.py +247 -0
- zing/detectors/capability.py +494 -0
- zing/detectors/connectivity.py +103 -0
- zing/detectors/context_window.py +424 -0
- zing/detectors/determinism.py +202 -0
- zing/detectors/helpers.py +159 -0
- zing/detectors/injected_prompt.py +186 -0
- zing/detectors/integrity.py +167 -0
- zing/detectors/model_identity.py +444 -0
- zing/detectors/prompt_cache.py +120 -0
- zing/detectors/protocol.py +256 -0
- zing/detectors/quality_judge.py +280 -0
- zing/detectors/reliability.py +210 -0
- zing/detectors/security.py +202 -0
- zing/detectors/streaming.py +198 -0
- zing/judge/__init__.py +11 -0
- zing/judge/judge.py +70 -0
- zing/knowledge/__init__.py +24 -0
- zing/knowledge/data/__init__.py +7 -0
- zing/knowledge/data/anthropic.yaml +609 -0
- zing/knowledge/data/deepseek.yaml +261 -0
- zing/knowledge/data/gemini.yaml +508 -0
- zing/knowledge/data/glm.yaml +730 -0
- zing/knowledge/data/moonshot.yaml +450 -0
- zing/knowledge/data/openai.yaml +702 -0
- zing/knowledge/data/qwen.yaml +712 -0
- zing/knowledge/loader.py +69 -0
- zing/knowledge/schema.py +158 -0
- zing/models.py +263 -0
- zing/report/__init__.py +14 -0
- zing/report/render.py +537 -0
- zing/report/writer.py +68 -0
- zing/runner.py +145 -0
- zing/scoring.py +283 -0
- zing/utils/__init__.py +1 -0
- zing/utils/redact.py +118 -0
- zing/utils/sse.py +68 -0
- zing/utils/stats.py +66 -0
- zing/utils/tokenize.py +92 -0
- zing_audit-0.2.0.dist-info/METADATA +230 -0
- zing_audit-0.2.0.dist-info/RECORD +56 -0
- zing_audit-0.2.0.dist-info/WHEEL +4 -0
- zing_audit-0.2.0.dist-info/entry_points.txt +2 -0
- zing_audit-0.2.0.dist-info/licenses/LICENSE +201 -0
- zing_audit-0.2.0.dist-info/licenses/NOTICE +10 -0
zing/__init__.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""zing — LLM relay reality check.
|
|
2
|
+
|
|
3
|
+
A local-first CLI that audits whether an OpenAI-compatible API relay actually
|
|
4
|
+
serves the model it claims to (货不对板检测): real context window, model identity
|
|
5
|
+
and downgrade fingerprinting, capability claims, token/billing sanity, streaming
|
|
6
|
+
authenticity, reliability and static security signals.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
__version__ = "0.1.0"
|
|
12
|
+
__all__ = ["__version__"]
|
zing/__main__.py
ADDED
zing/cli.py
ADDED
|
@@ -0,0 +1,476 @@
|
|
|
1
|
+
"""zing command-line interface.
|
|
2
|
+
|
|
3
|
+
Commands:
|
|
4
|
+
zing init write a starter config file
|
|
5
|
+
zing check audit one relay endpoint
|
|
6
|
+
zing compare audit a relay against a trusted baseline of the same model
|
|
7
|
+
zing models quickly probe an endpoint's /models list
|
|
8
|
+
zing kb inspect the bundled knowledge base
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import asyncio
|
|
14
|
+
import sys
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Annotated
|
|
17
|
+
|
|
18
|
+
import typer
|
|
19
|
+
from rich.console import Console
|
|
20
|
+
from rich.panel import Panel
|
|
21
|
+
from rich.table import Table
|
|
22
|
+
|
|
23
|
+
from zing import __version__
|
|
24
|
+
from zing.clients import make_client
|
|
25
|
+
from zing.config import (
|
|
26
|
+
TEMPLATE,
|
|
27
|
+
AuditOptions,
|
|
28
|
+
ConfigError,
|
|
29
|
+
build_target,
|
|
30
|
+
load_config_file,
|
|
31
|
+
merge_headers,
|
|
32
|
+
section,
|
|
33
|
+
validate_format,
|
|
34
|
+
validate_risk,
|
|
35
|
+
validate_suite,
|
|
36
|
+
)
|
|
37
|
+
from zing.knowledge import load_knowledge_base
|
|
38
|
+
from zing.models import AuditReport, RiskLevel, Status, TargetConfig
|
|
39
|
+
|
|
40
|
+
app = typer.Typer(
|
|
41
|
+
name="zing",
|
|
42
|
+
help="LLM relay reality check — audit whether a relay serves the model it claims (货不对板检测).",
|
|
43
|
+
no_args_is_help=True,
|
|
44
|
+
add_completion=False,
|
|
45
|
+
)
|
|
46
|
+
console = Console()
|
|
47
|
+
err_console = Console(stderr=True)
|
|
48
|
+
|
|
49
|
+
_RISK_STYLE = {
|
|
50
|
+
RiskLevel.CLEAN: ("green", "✓ CLEAN"),
|
|
51
|
+
RiskLevel.LOW: ("cyan", "• LOW RISK"),
|
|
52
|
+
RiskLevel.MEDIUM: ("yellow", "▲ MEDIUM RISK"),
|
|
53
|
+
RiskLevel.HIGH: ("bold red", "✗ HIGH RISK"),
|
|
54
|
+
RiskLevel.INCONCLUSIVE: ("dim", "? INCONCLUSIVE"),
|
|
55
|
+
}
|
|
56
|
+
_STATUS_STYLE = {
|
|
57
|
+
Status.PASS: "green",
|
|
58
|
+
Status.WARN: "yellow",
|
|
59
|
+
Status.FAIL: "red",
|
|
60
|
+
Status.INCONCLUSIVE: "dim",
|
|
61
|
+
Status.NOT_RUN: "dim",
|
|
62
|
+
Status.INFO: "blue",
|
|
63
|
+
Status.ERROR: "red",
|
|
64
|
+
}
|
|
65
|
+
_RISK_ORDER = [
|
|
66
|
+
RiskLevel.CLEAN,
|
|
67
|
+
RiskLevel.LOW,
|
|
68
|
+
RiskLevel.MEDIUM,
|
|
69
|
+
RiskLevel.HIGH,
|
|
70
|
+
]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# --------------------------------------------------------------------------- #
|
|
74
|
+
# Rendering
|
|
75
|
+
# --------------------------------------------------------------------------- #
|
|
76
|
+
def _print_summary(report: AuditReport, written: list[Path]) -> None:
|
|
77
|
+
v = report.verdict
|
|
78
|
+
style, label = _RISK_STYLE.get(v.risk_level, ("white", v.risk_level.value))
|
|
79
|
+
score = "n/a" if v.overall_score is None else f"{v.overall_score}/100 (rating {v.rating})"
|
|
80
|
+
head = f"[{style}]{label}[/] — {v.headline}"
|
|
81
|
+
body = (
|
|
82
|
+
f"Target : {report.target.name} · model [bold]{report.target.model}[/]"
|
|
83
|
+
f"{' · provider ' + report.target.declared_provider if report.target.declared_provider else ''}\n"
|
|
84
|
+
f"Mode : {report.mode} · suite {report.suite}"
|
|
85
|
+
f"{' · baseline ' + report.baseline.model if report.baseline else ''}"
|
|
86
|
+
f"{' · judge ' + (report.judge_model or 'on') if report.judge_used else ''}\n"
|
|
87
|
+
f"Score : {score} · confidence {v.confidence}\n\n"
|
|
88
|
+
f"{v.summary}"
|
|
89
|
+
)
|
|
90
|
+
console.print(Panel(body, title=head, border_style=style.split()[-1]))
|
|
91
|
+
|
|
92
|
+
table = Table(title="Dimensions", show_lines=False, expand=False)
|
|
93
|
+
table.add_column("Dimension")
|
|
94
|
+
table.add_column("Score", justify="right")
|
|
95
|
+
table.add_column("Weight", justify="right")
|
|
96
|
+
table.add_column("Status")
|
|
97
|
+
for d in report.dimensions:
|
|
98
|
+
sc = "—" if d.score is None else f"{d.score}"
|
|
99
|
+
st_style = _STATUS_STYLE.get(d.status, "white")
|
|
100
|
+
table.add_row(
|
|
101
|
+
d.dimension.value,
|
|
102
|
+
sc,
|
|
103
|
+
f"{d.weight:.0f}",
|
|
104
|
+
f"[{st_style}]{d.status.value}[/]",
|
|
105
|
+
)
|
|
106
|
+
console.print(table)
|
|
107
|
+
|
|
108
|
+
if v.key_findings:
|
|
109
|
+
console.print("\n[bold]Key findings[/bold]")
|
|
110
|
+
for kf in v.key_findings:
|
|
111
|
+
console.print(f" • {kf}")
|
|
112
|
+
|
|
113
|
+
# Detector findings worth surfacing (warn/fail/error or high severity).
|
|
114
|
+
notable = [
|
|
115
|
+
(det, f)
|
|
116
|
+
for det in report.detectors
|
|
117
|
+
for f in det.findings
|
|
118
|
+
if f.status in (Status.WARN, Status.FAIL, Status.ERROR)
|
|
119
|
+
]
|
|
120
|
+
if notable:
|
|
121
|
+
console.print("\n[bold]Detector findings[/bold]")
|
|
122
|
+
for det, f in notable[:14]:
|
|
123
|
+
fstyle = _STATUS_STYLE.get(f.status, "white")
|
|
124
|
+
console.print(
|
|
125
|
+
f" [{fstyle}]{f.status.value:5s}[/] [{f.severity.value}] "
|
|
126
|
+
f"{det.dimension.value}: {f.title}"
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
if report.reliability:
|
|
130
|
+
r = report.reliability
|
|
131
|
+
p95 = r.latency_ms.get("p95")
|
|
132
|
+
console.print(
|
|
133
|
+
f"\n[bold]Reliability[/bold]: {r.successes}/{r.requests} ok "
|
|
134
|
+
f"({r.success_rate * 100:.0f}%)"
|
|
135
|
+
+ (f", p95 {p95:.0f} ms" if p95 else "")
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
if report.warnings:
|
|
139
|
+
console.print("\n[yellow]Warnings[/yellow]")
|
|
140
|
+
for w in report.warnings:
|
|
141
|
+
console.print(f" ! {w}")
|
|
142
|
+
|
|
143
|
+
if written:
|
|
144
|
+
console.print("\n[bold]Reports[/bold]")
|
|
145
|
+
for p in written:
|
|
146
|
+
console.print(f" → {p}")
|
|
147
|
+
|
|
148
|
+
console.print(
|
|
149
|
+
"\n[dim]zing reports black-box evidence of divergence and risk, "
|
|
150
|
+
"not proof of fraud. Use `zing compare` for the strongest verdict.[/dim]"
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _exit_code(report: AuditReport, fail_under: float | None, fail_on_risk: str | None) -> int:
|
|
155
|
+
v = report.verdict
|
|
156
|
+
if fail_under is not None and v.overall_score is not None and v.overall_score < fail_under:
|
|
157
|
+
return 1
|
|
158
|
+
if fail_on_risk:
|
|
159
|
+
try:
|
|
160
|
+
threshold = RiskLevel(fail_on_risk)
|
|
161
|
+
except ValueError:
|
|
162
|
+
return 0
|
|
163
|
+
if (
|
|
164
|
+
threshold in _RISK_ORDER
|
|
165
|
+
and v.risk_level in _RISK_ORDER
|
|
166
|
+
and _RISK_ORDER.index(v.risk_level) >= _RISK_ORDER.index(threshold)
|
|
167
|
+
):
|
|
168
|
+
return 1
|
|
169
|
+
return 0
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
# --------------------------------------------------------------------------- #
|
|
173
|
+
# Shared option resolution
|
|
174
|
+
# --------------------------------------------------------------------------- #
|
|
175
|
+
def _target_from(
|
|
176
|
+
cfg: dict,
|
|
177
|
+
sect: str,
|
|
178
|
+
*,
|
|
179
|
+
kind: str,
|
|
180
|
+
name: str | None,
|
|
181
|
+
base_url: str | None,
|
|
182
|
+
api_key: str | None,
|
|
183
|
+
model: str | None,
|
|
184
|
+
declared_provider: str | None,
|
|
185
|
+
timeout: float | None,
|
|
186
|
+
headers: list[str] | None,
|
|
187
|
+
api: str | None = None,
|
|
188
|
+
) -> TargetConfig:
|
|
189
|
+
fs = section(cfg, sect)
|
|
190
|
+
merged_headers = merge_headers(fs.get("headers"), headers)
|
|
191
|
+
return build_target(
|
|
192
|
+
kind=kind,
|
|
193
|
+
name=name or fs.get("name"),
|
|
194
|
+
base_url=base_url or fs.get("base_url"),
|
|
195
|
+
api_key=api_key or fs.get("api_key"),
|
|
196
|
+
model=model or fs.get("model"),
|
|
197
|
+
declared_provider=declared_provider or fs.get("declared_provider"),
|
|
198
|
+
timeout_sec=timeout if timeout is not None else fs.get("timeout_sec"),
|
|
199
|
+
headers=merged_headers,
|
|
200
|
+
api=api or fs.get("api"),
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _build_options(cfg: dict, **overrides) -> AuditOptions:
|
|
205
|
+
run_cfg = section(cfg, "run")
|
|
206
|
+
|
|
207
|
+
def pick(key: str, cfg_key: str, default):
|
|
208
|
+
"""CLI override (when not None) wins over the config-file value."""
|
|
209
|
+
val = overrides.get(key)
|
|
210
|
+
return val if val is not None else run_cfg.get(cfg_key, default)
|
|
211
|
+
|
|
212
|
+
opts = AuditOptions(
|
|
213
|
+
suite=validate_suite(overrides.get("suite") or run_cfg.get("suite") or "standard"),
|
|
214
|
+
judge=bool(pick("judge", "judge", False)),
|
|
215
|
+
only=overrides.get("only") or [],
|
|
216
|
+
skip=overrides.get("skip") or [],
|
|
217
|
+
reliability_requests=int(pick("reliability_requests", "reliability_requests", 8)),
|
|
218
|
+
reliability_concurrency=int(pick("concurrency", "concurrency", 3)),
|
|
219
|
+
max_context_probe_tokens=int(pick("max_context_tokens", "max_context_probe_tokens", 200_000)),
|
|
220
|
+
)
|
|
221
|
+
return opts
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _judge_target(cfg: dict, base_url, api_key, model, baseline: TargetConfig | None):
|
|
225
|
+
"""Resolve a judge endpoint from flags/config, falling back to the baseline."""
|
|
226
|
+
js = section(cfg, "judge")
|
|
227
|
+
b = base_url or js.get("base_url")
|
|
228
|
+
m = model or js.get("model")
|
|
229
|
+
k = api_key or js.get("api_key")
|
|
230
|
+
if b and m:
|
|
231
|
+
return build_target(
|
|
232
|
+
kind="judge", name="judge", base_url=b, api_key=k, model=m, timeout_sec=None, headers={}
|
|
233
|
+
)
|
|
234
|
+
return baseline
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def _run_and_report(
|
|
238
|
+
target,
|
|
239
|
+
options,
|
|
240
|
+
*,
|
|
241
|
+
baseline=None,
|
|
242
|
+
judge_target=None,
|
|
243
|
+
mode,
|
|
244
|
+
out_dir,
|
|
245
|
+
fmt,
|
|
246
|
+
fail_under,
|
|
247
|
+
fail_on_risk,
|
|
248
|
+
as_json,
|
|
249
|
+
kb_dirs,
|
|
250
|
+
) -> None:
|
|
251
|
+
# Imported here so a partially-built report module never breaks `zing kb` etc.
|
|
252
|
+
from zing.report import write_reports
|
|
253
|
+
from zing.runner import run_audit
|
|
254
|
+
|
|
255
|
+
command = "zing " + " ".join(sys.argv[1:])
|
|
256
|
+
report = asyncio.run(
|
|
257
|
+
run_audit(
|
|
258
|
+
target,
|
|
259
|
+
options,
|
|
260
|
+
baseline=baseline,
|
|
261
|
+
judge_target=judge_target,
|
|
262
|
+
mode=mode,
|
|
263
|
+
command=command,
|
|
264
|
+
kb_dirs=kb_dirs,
|
|
265
|
+
)
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
if as_json:
|
|
269
|
+
# Machine-facing output (for piping into another tool or an LLM).
|
|
270
|
+
print(report.model_dump_json(indent=2))
|
|
271
|
+
else:
|
|
272
|
+
written = write_reports(report, out_dir=out_dir, fmt=validate_format(fmt))
|
|
273
|
+
_print_summary(report, written)
|
|
274
|
+
|
|
275
|
+
raise typer.Exit(code=_exit_code(report, fail_under, fail_on_risk))
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
# --------------------------------------------------------------------------- #
|
|
279
|
+
# Commands
|
|
280
|
+
# --------------------------------------------------------------------------- #
|
|
281
|
+
@app.command("init")
|
|
282
|
+
def init_config(
|
|
283
|
+
path: Annotated[Path, typer.Option("--path", "-p", help="Where to write the config.")] = Path("zing.yaml"),
|
|
284
|
+
force: Annotated[bool, typer.Option("--force", help="Overwrite if it exists.")] = False,
|
|
285
|
+
) -> None:
|
|
286
|
+
"""Write a starter zing.yaml config."""
|
|
287
|
+
if path.exists() and not force:
|
|
288
|
+
err_console.print(f"[red]{path} already exists. Use --force to overwrite.[/red]")
|
|
289
|
+
raise typer.Exit(code=2)
|
|
290
|
+
path.write_text(TEMPLATE, encoding="utf-8")
|
|
291
|
+
console.print(f"Wrote {path}")
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
@app.command("check")
|
|
295
|
+
def check_command(
|
|
296
|
+
config: Annotated[Path | None, typer.Option("--config", "-c", help="YAML config path.")] = None,
|
|
297
|
+
base_url: Annotated[str | None, typer.Option("--base-url", help="Relay base URL, e.g. https://relay.example.com/v1.")] = None,
|
|
298
|
+
api_key: Annotated[str | None, typer.Option("--api-key", help="API key, or env:VAR / file:/path reference.")] = None,
|
|
299
|
+
model: Annotated[str | None, typer.Option("--model", help="Model id the relay claims to serve.")] = None,
|
|
300
|
+
name: Annotated[str | None, typer.Option("--name", help="Display name for the target.")] = None,
|
|
301
|
+
api: Annotated[str | None, typer.Option("--api", help="Wire protocol: auto | openai | anthropic.")] = None,
|
|
302
|
+
declared_provider: Annotated[str | None, typer.Option("--declared-provider", help="Provider hint for KB lookup (openai, anthropic, deepseek, ...).")] = None,
|
|
303
|
+
header: Annotated[list[str] | None, typer.Option("--header", "-H", help="Extra header 'Name: value' (repeatable).")] = None,
|
|
304
|
+
suite: Annotated[str | None, typer.Option("--suite", help="smoke | standard | deep | full.")] = None,
|
|
305
|
+
judge: Annotated[bool | None, typer.Option("--judge/--no-judge", help="Enable code+LLM hybrid judging.")] = None,
|
|
306
|
+
judge_base_url: Annotated[str | None, typer.Option("--judge-base-url", help="Trusted judge endpoint base URL.")] = None,
|
|
307
|
+
judge_api_key: Annotated[str | None, typer.Option("--judge-api-key", help="Judge API key (env:VAR ok).")] = None,
|
|
308
|
+
judge_model: Annotated[str | None, typer.Option("--judge-model", help="Judge model id.")] = None,
|
|
309
|
+
only: Annotated[list[str] | None, typer.Option("--only", help="Run only these detector ids (repeatable).")] = None,
|
|
310
|
+
skip: Annotated[list[str] | None, typer.Option("--skip", help="Skip these detector ids (repeatable).")] = None,
|
|
311
|
+
out_dir: Annotated[Path | None, typer.Option("--out-dir", help="Report output directory.")] = None,
|
|
312
|
+
fmt: Annotated[str | None, typer.Option("--format", help="json | md | html | all.")] = None,
|
|
313
|
+
timeout: Annotated[float | None, typer.Option("--timeout", help="HTTP timeout (seconds).")] = None,
|
|
314
|
+
reliability_requests: Annotated[int | None, typer.Option("--reliability-requests", help="Reliability probe request count (0 disables).")] = None,
|
|
315
|
+
concurrency: Annotated[int | None, typer.Option("--concurrency", help="Reliability probe concurrency.")] = None,
|
|
316
|
+
max_context_tokens: Annotated[int | None, typer.Option("--max-context-tokens", help="Cap for the real-context-window probe.")] = None,
|
|
317
|
+
kb_dir: Annotated[list[Path] | None, typer.Option("--kb-dir", help="Extra knowledge-base directory (repeatable).")] = None,
|
|
318
|
+
fail_under: Annotated[float | None, typer.Option("--fail-under", help="Exit 1 if overall score < this.")] = None,
|
|
319
|
+
fail_on_risk: Annotated[str | None, typer.Option("--fail-on-risk", help="Exit 1 if risk >= this (low|medium|high).")] = None,
|
|
320
|
+
as_json: Annotated[bool, typer.Option("--json", help="Print the JSON report to stdout instead of writing files.")] = False,
|
|
321
|
+
) -> None:
|
|
322
|
+
"""Audit one relay endpoint and write a report."""
|
|
323
|
+
try:
|
|
324
|
+
cfg = load_config_file(config)
|
|
325
|
+
target = _target_from(
|
|
326
|
+
cfg, "target", kind="target", name=name, base_url=base_url, api_key=api_key,
|
|
327
|
+
model=model, declared_provider=declared_provider, timeout=timeout, headers=header,
|
|
328
|
+
api=api,
|
|
329
|
+
)
|
|
330
|
+
options = _build_options(
|
|
331
|
+
cfg, suite=suite, judge=judge, only=only, skip=skip,
|
|
332
|
+
reliability_requests=reliability_requests, concurrency=concurrency,
|
|
333
|
+
max_context_tokens=max_context_tokens,
|
|
334
|
+
)
|
|
335
|
+
fail_on_risk = validate_risk(fail_on_risk)
|
|
336
|
+
baseline = None
|
|
337
|
+
judge_t = _judge_target(cfg, judge_base_url, judge_api_key, judge_model, baseline) if options.judge else None
|
|
338
|
+
_run_and_report(
|
|
339
|
+
target, options, baseline=baseline, judge_target=judge_t, mode="check",
|
|
340
|
+
out_dir=out_dir or Path(section(cfg, "run").get("output_dir") or "reports"),
|
|
341
|
+
fmt=fmt or section(cfg, "run").get("format") or "all",
|
|
342
|
+
fail_under=fail_under, fail_on_risk=fail_on_risk, as_json=as_json,
|
|
343
|
+
kb_dirs=list(kb_dir) if kb_dir else None,
|
|
344
|
+
)
|
|
345
|
+
except ConfigError as exc:
|
|
346
|
+
err_console.print(f"[red]Config error:[/red] {exc}")
|
|
347
|
+
raise typer.Exit(code=2) from exc
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
@app.command("compare")
|
|
351
|
+
def compare_command(
|
|
352
|
+
config: Annotated[Path | None, typer.Option("--config", "-c", help="YAML config path.")] = None,
|
|
353
|
+
target_base_url: Annotated[str | None, typer.Option("--target-base-url", help="Target relay base URL.")] = None,
|
|
354
|
+
target_api_key: Annotated[str | None, typer.Option("--target-api-key", help="Target API key (env:VAR ok).")] = None,
|
|
355
|
+
target_model: Annotated[str | None, typer.Option("--target-model", help="Target model id.")] = None,
|
|
356
|
+
target_name: Annotated[str | None, typer.Option("--target-name", help="Target display name.")] = None,
|
|
357
|
+
target_api: Annotated[str | None, typer.Option("--target-api", help="Target wire protocol: auto | openai | anthropic.")] = None,
|
|
358
|
+
declared_provider: Annotated[str | None, typer.Option("--declared-provider", help="Provider hint for KB lookup.")] = None,
|
|
359
|
+
baseline_base_url: Annotated[str | None, typer.Option("--baseline-base-url", help="Trusted baseline base URL.")] = None,
|
|
360
|
+
baseline_api_key: Annotated[str | None, typer.Option("--baseline-api-key", help="Baseline API key (env:VAR ok).")] = None,
|
|
361
|
+
baseline_model: Annotated[str | None, typer.Option("--baseline-model", help="Baseline model id.")] = None,
|
|
362
|
+
baseline_name: Annotated[str | None, typer.Option("--baseline-name", help="Baseline display name.")] = None,
|
|
363
|
+
baseline_api: Annotated[str | None, typer.Option("--baseline-api", help="Baseline wire protocol: auto | openai | anthropic.")] = None,
|
|
364
|
+
suite: Annotated[str | None, typer.Option("--suite", help="smoke | standard | deep | full.")] = None,
|
|
365
|
+
judge: Annotated[bool | None, typer.Option("--judge/--no-judge", help="Enable code+LLM hybrid judging.")] = None,
|
|
366
|
+
judge_model: Annotated[str | None, typer.Option("--judge-model", help="Judge model id (defaults to baseline).")] = None,
|
|
367
|
+
out_dir: Annotated[Path | None, typer.Option("--out-dir", help="Report output directory.")] = None,
|
|
368
|
+
fmt: Annotated[str | None, typer.Option("--format", help="json | md | html | all.")] = None,
|
|
369
|
+
timeout: Annotated[float | None, typer.Option("--timeout", help="HTTP timeout (seconds).")] = None,
|
|
370
|
+
max_context_tokens: Annotated[int | None, typer.Option("--max-context-tokens", help="Cap for the context-window probe.")] = None,
|
|
371
|
+
kb_dir: Annotated[list[Path] | None, typer.Option("--kb-dir", help="Extra knowledge-base directory (repeatable).")] = None,
|
|
372
|
+
fail_under: Annotated[float | None, typer.Option("--fail-under", help="Exit 1 if overall score < this.")] = None,
|
|
373
|
+
fail_on_risk: Annotated[str | None, typer.Option("--fail-on-risk", help="Exit 1 if risk >= this.")] = None,
|
|
374
|
+
as_json: Annotated[bool, typer.Option("--json", help="Print JSON report to stdout.")] = False,
|
|
375
|
+
) -> None:
|
|
376
|
+
"""Audit a relay against a trusted baseline of the same declared model."""
|
|
377
|
+
try:
|
|
378
|
+
cfg = load_config_file(config)
|
|
379
|
+
target = _target_from(
|
|
380
|
+
cfg, "target", kind="target", name=target_name, base_url=target_base_url,
|
|
381
|
+
api_key=target_api_key, model=target_model, declared_provider=declared_provider,
|
|
382
|
+
timeout=timeout, headers=None, api=target_api,
|
|
383
|
+
)
|
|
384
|
+
baseline = _target_from(
|
|
385
|
+
cfg, "baseline", kind="baseline", name=baseline_name, base_url=baseline_base_url,
|
|
386
|
+
api_key=baseline_api_key, model=baseline_model, declared_provider=None,
|
|
387
|
+
timeout=timeout, headers=None, api=baseline_api,
|
|
388
|
+
)
|
|
389
|
+
options = _build_options(cfg, suite=suite or "deep", judge=judge, max_context_tokens=max_context_tokens)
|
|
390
|
+
fail_on_risk = validate_risk(fail_on_risk)
|
|
391
|
+
judge_t = _judge_target(cfg, None, None, judge_model, baseline) if options.judge else None
|
|
392
|
+
_run_and_report(
|
|
393
|
+
target, options, baseline=baseline, judge_target=judge_t, mode="compare",
|
|
394
|
+
out_dir=out_dir or Path(section(cfg, "run").get("output_dir") or "reports"),
|
|
395
|
+
fmt=fmt or section(cfg, "run").get("format") or "all",
|
|
396
|
+
fail_under=fail_under, fail_on_risk=fail_on_risk, as_json=as_json,
|
|
397
|
+
kb_dirs=list(kb_dir) if kb_dir else None,
|
|
398
|
+
)
|
|
399
|
+
except ConfigError as exc:
|
|
400
|
+
err_console.print(f"[red]Config error:[/red] {exc}")
|
|
401
|
+
raise typer.Exit(code=2) from exc
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
@app.command("models")
|
|
405
|
+
def models_command(
|
|
406
|
+
base_url: Annotated[str, typer.Option("--base-url", help="Endpoint base URL.")],
|
|
407
|
+
api_key: Annotated[str | None, typer.Option("--api-key", help="API key (env:VAR ok).")] = None,
|
|
408
|
+
model: Annotated[str, typer.Option("--model", help="A model id (for the client; not required to list).")] = "x",
|
|
409
|
+
api: Annotated[str | None, typer.Option("--api", help="Wire protocol: auto | openai | anthropic.")] = None,
|
|
410
|
+
) -> None:
|
|
411
|
+
"""List the models an endpoint advertises via GET /v1/models."""
|
|
412
|
+
try:
|
|
413
|
+
target = build_target(
|
|
414
|
+
kind="endpoint", name=None, base_url=base_url, api_key=api_key, model=model, api=api
|
|
415
|
+
)
|
|
416
|
+
except ConfigError as exc:
|
|
417
|
+
err_console.print(f"[red]Config error:[/red] {exc}")
|
|
418
|
+
raise typer.Exit(code=2) from exc
|
|
419
|
+
|
|
420
|
+
async def _go() -> None:
|
|
421
|
+
async with make_client(target) as client:
|
|
422
|
+
outcome, ids = await client.list_models()
|
|
423
|
+
if not outcome.ok:
|
|
424
|
+
err_console.print(f"[red]Failed:[/red] {outcome.error_message or outcome.status_code}")
|
|
425
|
+
raise typer.Exit(code=1)
|
|
426
|
+
console.print(f"[green]{len(ids)} models[/green] at {base_url}")
|
|
427
|
+
for mid in ids:
|
|
428
|
+
console.print(f" • {mid}")
|
|
429
|
+
|
|
430
|
+
asyncio.run(_go())
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
@app.command("kb")
|
|
434
|
+
def kb_command(
|
|
435
|
+
provider: Annotated[str | None, typer.Argument(help="Filter by provider key (openai, deepseek, ...).")] = None,
|
|
436
|
+
kb_dir: Annotated[list[Path] | None, typer.Option("--kb-dir", help="Extra knowledge-base directory.")] = None,
|
|
437
|
+
) -> None:
|
|
438
|
+
"""Inspect the bundled knowledge base."""
|
|
439
|
+
kb = load_knowledge_base(list(kb_dir) if kb_dir else None)
|
|
440
|
+
table = Table(title="zing knowledge base")
|
|
441
|
+
table.add_column("Provider")
|
|
442
|
+
table.add_column("Model")
|
|
443
|
+
table.add_column("Context", justify="right")
|
|
444
|
+
table.add_column("Max out", justify="right")
|
|
445
|
+
table.add_column("Reasoning")
|
|
446
|
+
for prov in sorted(kb.providers.values(), key=lambda p: p.provider):
|
|
447
|
+
if provider and prov.provider != provider:
|
|
448
|
+
continue
|
|
449
|
+
for m in prov.models:
|
|
450
|
+
table.add_row(
|
|
451
|
+
prov.provider,
|
|
452
|
+
m.id,
|
|
453
|
+
f"{m.context_window_tokens:,}" if m.context_window_tokens > 0 else "—",
|
|
454
|
+
f"{m.max_output_tokens:,}" if m.max_output_tokens > 0 else "—",
|
|
455
|
+
"yes" if m.reasoning else "",
|
|
456
|
+
)
|
|
457
|
+
console.print(table)
|
|
458
|
+
total = sum(len(p.models) for p in kb.providers.values())
|
|
459
|
+
console.print(f"{total} models across {len(kb.providers)} providers.")
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
@app.callback(invoke_without_command=True)
|
|
463
|
+
def main(
|
|
464
|
+
ctx: typer.Context,
|
|
465
|
+
version: Annotated[bool, typer.Option("--version", help="Show version and exit.")] = False,
|
|
466
|
+
) -> None:
|
|
467
|
+
if version:
|
|
468
|
+
console.print(f"zing {__version__}")
|
|
469
|
+
raise typer.Exit()
|
|
470
|
+
if ctx.invoked_subcommand is None:
|
|
471
|
+
console.print(ctx.get_help())
|
|
472
|
+
raise typer.Exit()
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
if __name__ == "__main__":
|
|
476
|
+
app()
|
zing/clients/__init__.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""HTTP clients for talking to relay endpoints.
|
|
2
|
+
|
|
3
|
+
Two wire protocols are supported behind one :class:`~zing.models.CompletionOutcome`
|
|
4
|
+
interface: OpenAI Chat Completions and the Anthropic Messages API. ``make_client``
|
|
5
|
+
picks the right one from the target's ``api`` field (``auto`` infers it).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
|
|
12
|
+
from zing.clients.anthropic import AnthropicClient
|
|
13
|
+
from zing.clients.base import BaseHTTPClient
|
|
14
|
+
from zing.clients.openai_compatible import OpenAICompatibleClient
|
|
15
|
+
from zing.models import TargetConfig
|
|
16
|
+
|
|
17
|
+
Client = BaseHTTPClient
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"AnthropicClient",
|
|
21
|
+
"BaseHTTPClient",
|
|
22
|
+
"Client",
|
|
23
|
+
"OpenAICompatibleClient",
|
|
24
|
+
"detect_api",
|
|
25
|
+
"make_client",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def detect_api(config: TargetConfig) -> str:
|
|
30
|
+
"""Resolve an ``api`` of 'auto' to a concrete 'openai' | 'anthropic'."""
|
|
31
|
+
flavor = (config.api or "auto").lower()
|
|
32
|
+
if flavor in ("openai", "anthropic"):
|
|
33
|
+
return flavor
|
|
34
|
+
url = (config.base_url or "").lower()
|
|
35
|
+
model = (config.model or "").lower()
|
|
36
|
+
if "anthropic" in url or url.rstrip("/").endswith("/messages") or model.startswith("claude"):
|
|
37
|
+
return "anthropic"
|
|
38
|
+
return "openai"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def make_client(
|
|
42
|
+
config: TargetConfig, *, transport: httpx.AsyncBaseTransport | None = None
|
|
43
|
+
) -> Client:
|
|
44
|
+
"""Construct the client matching the target's (possibly auto-detected) protocol."""
|
|
45
|
+
if detect_api(config) == "anthropic":
|
|
46
|
+
return AnthropicClient(config, transport=transport)
|
|
47
|
+
return OpenAICompatibleClient(config, transport=transport)
|