spanforge 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spanforge/__init__.py +695 -0
- spanforge/_batch_exporter.py +322 -0
- spanforge/_cli.py +3081 -0
- spanforge/_hooks.py +340 -0
- spanforge/_server.py +953 -0
- spanforge/_span.py +1015 -0
- spanforge/_store.py +287 -0
- spanforge/_stream.py +654 -0
- spanforge/_trace.py +334 -0
- spanforge/_tracer.py +253 -0
- spanforge/actor.py +141 -0
- spanforge/alerts.py +464 -0
- spanforge/auto.py +181 -0
- spanforge/baseline.py +336 -0
- spanforge/config.py +460 -0
- spanforge/consent.py +227 -0
- spanforge/consumer.py +379 -0
- spanforge/core/__init__.py +5 -0
- spanforge/core/compliance_mapping.py +1060 -0
- spanforge/cost.py +597 -0
- spanforge/debug.py +514 -0
- spanforge/drift.py +488 -0
- spanforge/egress.py +63 -0
- spanforge/eval.py +575 -0
- spanforge/event.py +1052 -0
- spanforge/exceptions.py +246 -0
- spanforge/explain.py +181 -0
- spanforge/export/__init__.py +50 -0
- spanforge/export/append_only.py +342 -0
- spanforge/export/cloud.py +349 -0
- spanforge/export/datadog.py +495 -0
- spanforge/export/grafana.py +331 -0
- spanforge/export/jsonl.py +198 -0
- spanforge/export/otel_bridge.py +291 -0
- spanforge/export/otlp.py +817 -0
- spanforge/export/otlp_bridge.py +231 -0
- spanforge/export/redis_backend.py +282 -0
- spanforge/export/webhook.py +302 -0
- spanforge/exporters/__init__.py +29 -0
- spanforge/exporters/console.py +271 -0
- spanforge/exporters/jsonl.py +144 -0
- spanforge/hitl.py +297 -0
- spanforge/inspect.py +429 -0
- spanforge/integrations/__init__.py +39 -0
- spanforge/integrations/_pricing.py +277 -0
- spanforge/integrations/anthropic.py +388 -0
- spanforge/integrations/bedrock.py +306 -0
- spanforge/integrations/crewai.py +251 -0
- spanforge/integrations/gemini.py +349 -0
- spanforge/integrations/groq.py +444 -0
- spanforge/integrations/langchain.py +349 -0
- spanforge/integrations/llamaindex.py +370 -0
- spanforge/integrations/ollama.py +286 -0
- spanforge/integrations/openai.py +370 -0
- spanforge/integrations/together.py +485 -0
- spanforge/metrics.py +393 -0
- spanforge/metrics_export.py +342 -0
- spanforge/migrate.py +278 -0
- spanforge/model_registry.py +282 -0
- spanforge/models.py +407 -0
- spanforge/namespaces/__init__.py +215 -0
- spanforge/namespaces/audit.py +253 -0
- spanforge/namespaces/cache.py +209 -0
- spanforge/namespaces/chain.py +74 -0
- spanforge/namespaces/confidence.py +69 -0
- spanforge/namespaces/consent.py +85 -0
- spanforge/namespaces/cost.py +175 -0
- spanforge/namespaces/decision.py +135 -0
- spanforge/namespaces/diff.py +146 -0
- spanforge/namespaces/drift.py +79 -0
- spanforge/namespaces/eval_.py +232 -0
- spanforge/namespaces/fence.py +180 -0
- spanforge/namespaces/guard.py +104 -0
- spanforge/namespaces/hitl.py +92 -0
- spanforge/namespaces/latency.py +69 -0
- spanforge/namespaces/prompt.py +185 -0
- spanforge/namespaces/redact.py +172 -0
- spanforge/namespaces/template.py +197 -0
- spanforge/namespaces/tool_call.py +76 -0
- spanforge/namespaces/trace.py +1006 -0
- spanforge/normalizer.py +183 -0
- spanforge/presidio_backend.py +149 -0
- spanforge/processor.py +258 -0
- spanforge/prompt_registry.py +415 -0
- spanforge/py.typed +0 -0
- spanforge/redact.py +780 -0
- spanforge/sampling.py +500 -0
- spanforge/schemas/v1.0/schema.json +170 -0
- spanforge/schemas/v2.0/schema.json +536 -0
- spanforge/signing.py +1152 -0
- spanforge/stream.py +559 -0
- spanforge/testing.py +376 -0
- spanforge/trace.py +199 -0
- spanforge/types.py +696 -0
- spanforge/ulid.py +304 -0
- spanforge/validate.py +383 -0
- spanforge-2.0.0.dist-info/METADATA +1777 -0
- spanforge-2.0.0.dist-info/RECORD +101 -0
- spanforge-2.0.0.dist-info/WHEEL +4 -0
- spanforge-2.0.0.dist-info/entry_points.txt +5 -0
- spanforge-2.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""spanforge.namespaces.drift \u2014 Drift namespace payload types (RFC-0001 SPANFORGE).
|
|
2
|
+
|
|
3
|
+
Classes
|
|
4
|
+
-------
|
|
5
|
+
DriftPayload drift.detected / drift.threshold_breach / drift.resolved
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Any, Literal
|
|
11
|
+
|
|
12
|
+
__all__ = ["DriftPayload"]
|
|
13
|
+
|
|
14
|
+
_VALID_STATUSES = frozenset({"detected", "threshold_breach", "resolved"})
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class DriftPayload:
|
|
19
|
+
"""RFC-0001 SPANFORGE \u2014 payload for drift.* events.
|
|
20
|
+
|
|
21
|
+
Captures Z-score and KL-divergence statistical drift signals against the
|
|
22
|
+
deployment baseline (T \u2014 Traceability).
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
metric_name: str
|
|
26
|
+
agent_id: str
|
|
27
|
+
current_value: float
|
|
28
|
+
baseline_mean: float
|
|
29
|
+
baseline_stddev: float
|
|
30
|
+
z_score: float
|
|
31
|
+
threshold: float
|
|
32
|
+
window_seconds: int
|
|
33
|
+
status: Literal["detected", "threshold_breach", "resolved"]
|
|
34
|
+
kl_divergence: float | None = None
|
|
35
|
+
|
|
36
|
+
def __post_init__(self) -> None:
|
|
37
|
+
if not self.metric_name:
|
|
38
|
+
raise ValueError("DriftPayload.metric_name must be non-empty")
|
|
39
|
+
if not self.agent_id:
|
|
40
|
+
raise ValueError("DriftPayload.agent_id must be non-empty")
|
|
41
|
+
if self.status not in _VALID_STATUSES:
|
|
42
|
+
raise ValueError(
|
|
43
|
+
f"DriftPayload.status must be one of {sorted(_VALID_STATUSES)}"
|
|
44
|
+
)
|
|
45
|
+
if self.window_seconds <= 0:
|
|
46
|
+
raise ValueError("DriftPayload.window_seconds must be > 0")
|
|
47
|
+
if self.baseline_stddev < 0:
|
|
48
|
+
raise ValueError("DriftPayload.baseline_stddev must be >= 0")
|
|
49
|
+
|
|
50
|
+
def to_dict(self) -> dict[str, Any]:
|
|
51
|
+
d: dict[str, Any] = {
|
|
52
|
+
"metric_name": self.metric_name,
|
|
53
|
+
"agent_id": self.agent_id,
|
|
54
|
+
"current_value": self.current_value,
|
|
55
|
+
"baseline_mean": self.baseline_mean,
|
|
56
|
+
"baseline_stddev": self.baseline_stddev,
|
|
57
|
+
"z_score": self.z_score,
|
|
58
|
+
"threshold": self.threshold,
|
|
59
|
+
"window_seconds": self.window_seconds,
|
|
60
|
+
"status": self.status,
|
|
61
|
+
}
|
|
62
|
+
if self.kl_divergence is not None:
|
|
63
|
+
d["kl_divergence"] = self.kl_divergence
|
|
64
|
+
return d
|
|
65
|
+
|
|
66
|
+
@classmethod
|
|
67
|
+
def from_dict(cls, data: dict[str, Any]) -> DriftPayload:
|
|
68
|
+
return cls(
|
|
69
|
+
metric_name=data["metric_name"],
|
|
70
|
+
agent_id=data["agent_id"],
|
|
71
|
+
current_value=float(data["current_value"]),
|
|
72
|
+
baseline_mean=float(data["baseline_mean"]),
|
|
73
|
+
baseline_stddev=float(data["baseline_stddev"]),
|
|
74
|
+
z_score=float(data["z_score"]),
|
|
75
|
+
threshold=float(data["threshold"]),
|
|
76
|
+
window_seconds=int(data["window_seconds"]),
|
|
77
|
+
status=data["status"],
|
|
78
|
+
kl_divergence=data.get("kl_divergence"),
|
|
79
|
+
)
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"""spanforge.namespaces.eval_ — Evaluation payload types (RFC-0001).
|
|
2
|
+
|
|
3
|
+
Classes
|
|
4
|
+
-------
|
|
5
|
+
EvalScoreRecordedPayload llm.eval.score.recorded
|
|
6
|
+
EvalRegressionDetectedPayload llm.eval.regression.detected
|
|
7
|
+
EvalScenarioStartedPayload llm.eval.scenario.started
|
|
8
|
+
EvalScenarioCompletedPayload llm.eval.scenario.completed
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from spanforge.namespaces.trace import ModelInfo
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"EvalRegressionDetectedPayload",
|
|
19
|
+
"EvalScenarioCompletedPayload",
|
|
20
|
+
"EvalScenarioStartedPayload",
|
|
21
|
+
"EvalScoreRecordedPayload",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
_VALID_SEVERITIES = frozenset({"low", "medium", "high", "critical"})
|
|
25
|
+
_VALID_STATUSES = frozenset({"passed", "failed", "error", "cancelled"})
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class EvalScoreRecordedPayload:
|
|
30
|
+
"""RFC-0001 — A single evaluation score recorded for a subject event."""
|
|
31
|
+
|
|
32
|
+
evaluator: str
|
|
33
|
+
metric_name: str
|
|
34
|
+
score: float
|
|
35
|
+
score_min: float | None = None
|
|
36
|
+
score_max: float | None = None
|
|
37
|
+
threshold: float | None = None
|
|
38
|
+
passed: bool | None = None
|
|
39
|
+
subject_event_id: str | None = None
|
|
40
|
+
subject_type: str | None = None
|
|
41
|
+
eval_run_id: str | None = None
|
|
42
|
+
rationale: str | None = None
|
|
43
|
+
model: ModelInfo | None = None # judge model
|
|
44
|
+
|
|
45
|
+
def __post_init__(self) -> None:
|
|
46
|
+
if not isinstance(self.evaluator, str) or not self.evaluator:
|
|
47
|
+
raise ValueError("EvalScoreRecordedPayload.evaluator must be non-empty")
|
|
48
|
+
if not isinstance(self.metric_name, str) or not self.metric_name:
|
|
49
|
+
raise ValueError("EvalScoreRecordedPayload.metric_name must be non-empty")
|
|
50
|
+
|
|
51
|
+
def to_dict(self) -> dict[str, Any]:
|
|
52
|
+
"""Serialise the payload to a plain ``dict``."""
|
|
53
|
+
d: dict[str, Any] = {
|
|
54
|
+
"evaluator": self.evaluator,
|
|
55
|
+
"metric_name": self.metric_name,
|
|
56
|
+
"score": self.score,
|
|
57
|
+
}
|
|
58
|
+
for f in ("score_min", "score_max", "threshold", "passed",
|
|
59
|
+
"subject_event_id", "subject_type", "eval_run_id", "rationale"):
|
|
60
|
+
v = getattr(self, f)
|
|
61
|
+
if v is not None:
|
|
62
|
+
d[f] = v
|
|
63
|
+
if self.model is not None:
|
|
64
|
+
d["model"] = self.model.to_dict()
|
|
65
|
+
return d
|
|
66
|
+
|
|
67
|
+
@classmethod
|
|
68
|
+
def from_dict(cls, data: dict[str, Any]) -> EvalScoreRecordedPayload:
|
|
69
|
+
"""Deserialise from a plain ``dict``."""
|
|
70
|
+
return cls(
|
|
71
|
+
evaluator=data["evaluator"],
|
|
72
|
+
metric_name=data["metric_name"],
|
|
73
|
+
score=float(data["score"]),
|
|
74
|
+
score_min=float(data["score_min"]) if "score_min" in data else None,
|
|
75
|
+
score_max=float(data["score_max"]) if "score_max" in data else None,
|
|
76
|
+
threshold=float(data["threshold"]) if "threshold" in data else None,
|
|
77
|
+
passed=bool(data["passed"]) if "passed" in data else None,
|
|
78
|
+
subject_event_id=data.get("subject_event_id"),
|
|
79
|
+
subject_type=data.get("subject_type"),
|
|
80
|
+
eval_run_id=data.get("eval_run_id"),
|
|
81
|
+
rationale=data.get("rationale"),
|
|
82
|
+
model=ModelInfo.from_dict(data["model"]) if "model" in data else None,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclass
|
|
87
|
+
class EvalRegressionDetectedPayload:
|
|
88
|
+
"""RFC-0001 — A metric regression detected between baseline and current."""
|
|
89
|
+
|
|
90
|
+
metric_name: str
|
|
91
|
+
baseline_score: float
|
|
92
|
+
current_score: float
|
|
93
|
+
delta: float
|
|
94
|
+
regression_pct: float
|
|
95
|
+
severity: str | None = None # "low"|"medium"|"high"|"critical"
|
|
96
|
+
affected_model: ModelInfo | None = None
|
|
97
|
+
eval_run_id: str | None = None
|
|
98
|
+
sample_count: int | None = None
|
|
99
|
+
|
|
100
|
+
def __post_init__(self) -> None:
|
|
101
|
+
if not isinstance(self.metric_name, str) or not self.metric_name:
|
|
102
|
+
raise ValueError("EvalRegressionDetectedPayload.metric_name must be non-empty")
|
|
103
|
+
if self.severity is not None and self.severity not in _VALID_SEVERITIES:
|
|
104
|
+
raise ValueError(f"EvalRegressionDetectedPayload.severity must be one of {sorted(_VALID_SEVERITIES)}") # noqa: E501
|
|
105
|
+
|
|
106
|
+
def to_dict(self) -> dict[str, Any]:
|
|
107
|
+
"""Serialise the payload to a plain ``dict``."""
|
|
108
|
+
d: dict[str, Any] = {
|
|
109
|
+
"metric_name": self.metric_name,
|
|
110
|
+
"baseline_score": self.baseline_score,
|
|
111
|
+
"current_score": self.current_score,
|
|
112
|
+
"delta": self.delta,
|
|
113
|
+
"regression_pct": self.regression_pct,
|
|
114
|
+
}
|
|
115
|
+
if self.severity is not None:
|
|
116
|
+
d["severity"] = self.severity
|
|
117
|
+
if self.affected_model is not None:
|
|
118
|
+
d["affected_model"] = self.affected_model.to_dict()
|
|
119
|
+
if self.eval_run_id is not None:
|
|
120
|
+
d["eval_run_id"] = self.eval_run_id
|
|
121
|
+
if self.sample_count is not None:
|
|
122
|
+
d["sample_count"] = self.sample_count
|
|
123
|
+
return d
|
|
124
|
+
|
|
125
|
+
@classmethod
|
|
126
|
+
def from_dict(cls, data: dict[str, Any]) -> EvalRegressionDetectedPayload:
|
|
127
|
+
"""Deserialise from a plain ``dict``."""
|
|
128
|
+
return cls(
|
|
129
|
+
metric_name=data["metric_name"],
|
|
130
|
+
baseline_score=float(data["baseline_score"]),
|
|
131
|
+
current_score=float(data["current_score"]),
|
|
132
|
+
delta=float(data["delta"]),
|
|
133
|
+
regression_pct=float(data["regression_pct"]),
|
|
134
|
+
severity=data.get("severity"),
|
|
135
|
+
affected_model=ModelInfo.from_dict(data["affected_model"]) if "affected_model" in data else None, # noqa: E501
|
|
136
|
+
eval_run_id=data.get("eval_run_id"),
|
|
137
|
+
sample_count=int(data["sample_count"]) if "sample_count" in data else None,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@dataclass
|
|
142
|
+
class EvalScenarioStartedPayload:
|
|
143
|
+
"""RFC-0001 — An evaluation scenario has started."""
|
|
144
|
+
|
|
145
|
+
scenario_id: str
|
|
146
|
+
scenario_name: str
|
|
147
|
+
evaluator: str
|
|
148
|
+
dataset_id: str | None = None
|
|
149
|
+
expected_sample_count: int | None = None
|
|
150
|
+
metrics: list[str] = field(default_factory=list)
|
|
151
|
+
|
|
152
|
+
def __post_init__(self) -> None:
|
|
153
|
+
if not self.scenario_id:
|
|
154
|
+
raise ValueError("EvalScenarioStartedPayload.scenario_id must be non-empty")
|
|
155
|
+
if not self.scenario_name:
|
|
156
|
+
raise ValueError("EvalScenarioStartedPayload.scenario_name must be non-empty")
|
|
157
|
+
if not self.evaluator:
|
|
158
|
+
raise ValueError("EvalScenarioStartedPayload.evaluator must be non-empty")
|
|
159
|
+
|
|
160
|
+
def to_dict(self) -> dict[str, Any]:
|
|
161
|
+
"""Serialise the payload to a plain ``dict``."""
|
|
162
|
+
d: dict[str, Any] = {
|
|
163
|
+
"scenario_id": self.scenario_id,
|
|
164
|
+
"scenario_name": self.scenario_name,
|
|
165
|
+
"evaluator": self.evaluator,
|
|
166
|
+
}
|
|
167
|
+
if self.dataset_id is not None:
|
|
168
|
+
d["dataset_id"] = self.dataset_id
|
|
169
|
+
if self.expected_sample_count is not None:
|
|
170
|
+
d["expected_sample_count"] = self.expected_sample_count
|
|
171
|
+
if self.metrics:
|
|
172
|
+
d["metrics"] = list(self.metrics)
|
|
173
|
+
return d
|
|
174
|
+
|
|
175
|
+
@classmethod
|
|
176
|
+
def from_dict(cls, data: dict[str, Any]) -> EvalScenarioStartedPayload:
|
|
177
|
+
"""Deserialise from a plain ``dict``."""
|
|
178
|
+
return cls(
|
|
179
|
+
scenario_id=data["scenario_id"],
|
|
180
|
+
scenario_name=data["scenario_name"],
|
|
181
|
+
evaluator=data["evaluator"],
|
|
182
|
+
dataset_id=data.get("dataset_id"),
|
|
183
|
+
expected_sample_count=int(data["expected_sample_count"]) if "expected_sample_count" in data else None, # noqa: E501
|
|
184
|
+
metrics=list(data.get("metrics", [])),
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
@dataclass
|
|
189
|
+
class EvalScenarioCompletedPayload:
|
|
190
|
+
"""RFC-0001 — An evaluation scenario has completed."""
|
|
191
|
+
|
|
192
|
+
scenario_id: str
|
|
193
|
+
status: str # "passed"|"failed"|"error"|"cancelled"
|
|
194
|
+
duration_ms: float
|
|
195
|
+
completed_sample_count: int | None = None
|
|
196
|
+
scores_summary: dict[str, float] | None = None
|
|
197
|
+
errors: list[str] | None = None
|
|
198
|
+
|
|
199
|
+
def __post_init__(self) -> None:
|
|
200
|
+
if not self.scenario_id:
|
|
201
|
+
raise ValueError("EvalScenarioCompletedPayload.scenario_id must be non-empty")
|
|
202
|
+
if self.status not in _VALID_STATUSES:
|
|
203
|
+
raise ValueError(f"EvalScenarioCompletedPayload.status must be one of {sorted(_VALID_STATUSES)}") # noqa: E501
|
|
204
|
+
if self.duration_ms < 0:
|
|
205
|
+
raise ValueError("EvalScenarioCompletedPayload.duration_ms must be non-negative")
|
|
206
|
+
|
|
207
|
+
def to_dict(self) -> dict[str, Any]:
|
|
208
|
+
"""Serialise the payload to a plain ``dict``."""
|
|
209
|
+
d: dict[str, Any] = {
|
|
210
|
+
"scenario_id": self.scenario_id,
|
|
211
|
+
"status": self.status,
|
|
212
|
+
"duration_ms": self.duration_ms,
|
|
213
|
+
}
|
|
214
|
+
if self.completed_sample_count is not None:
|
|
215
|
+
d["completed_sample_count"] = self.completed_sample_count
|
|
216
|
+
if self.scores_summary is not None:
|
|
217
|
+
d["scores_summary"] = dict(self.scores_summary)
|
|
218
|
+
if self.errors is not None:
|
|
219
|
+
d["errors"] = list(self.errors)
|
|
220
|
+
return d
|
|
221
|
+
|
|
222
|
+
@classmethod
|
|
223
|
+
def from_dict(cls, data: dict[str, Any]) -> EvalScenarioCompletedPayload:
|
|
224
|
+
"""Deserialise from a plain ``dict``."""
|
|
225
|
+
return cls(
|
|
226
|
+
scenario_id=data["scenario_id"],
|
|
227
|
+
status=data["status"],
|
|
228
|
+
duration_ms=float(data["duration_ms"]),
|
|
229
|
+
completed_sample_count=int(data["completed_sample_count"]) if "completed_sample_count" in data else None, # noqa: E501
|
|
230
|
+
scores_summary=dict(data["scores_summary"]) if "scores_summary" in data else None,
|
|
231
|
+
errors=list(data["errors"]) if "errors" in data else None,
|
|
232
|
+
)
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""spanforge.namespaces.fence — Fence payload types (RFC-0001).
|
|
2
|
+
|
|
3
|
+
Classes
|
|
4
|
+
-------
|
|
5
|
+
FenceValidatedPayload llm.fence.validated
|
|
6
|
+
FenceRetryTriggeredPayload llm.fence.retry.triggered
|
|
7
|
+
FenceMaxRetriesExceededPayload llm.fence.max_retries.exceeded
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from spanforge.namespaces.trace import CostBreakdown
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"FenceMaxRetriesExceededPayload",
|
|
18
|
+
"FenceRetryTriggeredPayload",
|
|
19
|
+
"FenceValidatedPayload",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
_VALID_OUTPUT_TYPES = frozenset({"json_schema", "pydantic", "regex", "xml", "custom"})
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class FenceValidatedPayload:
|
|
27
|
+
"""RFC-0001 — Structured output passed validation on a given attempt."""
|
|
28
|
+
|
|
29
|
+
fence_id: str
|
|
30
|
+
schema_name: str
|
|
31
|
+
attempt: int
|
|
32
|
+
output_type: str | None = None # "json_schema"|"pydantic"|"regex"|"xml"|"custom"
|
|
33
|
+
span_id: str | None = None
|
|
34
|
+
validation_duration_ms: float | None = None
|
|
35
|
+
|
|
36
|
+
def __post_init__(self) -> None:
|
|
37
|
+
if not self.fence_id:
|
|
38
|
+
raise ValueError("FenceValidatedPayload.fence_id must be non-empty")
|
|
39
|
+
if not self.schema_name:
|
|
40
|
+
raise ValueError("FenceValidatedPayload.schema_name must be non-empty")
|
|
41
|
+
if not isinstance(self.attempt, int) or self.attempt < 1:
|
|
42
|
+
raise ValueError("FenceValidatedPayload.attempt must be a positive int")
|
|
43
|
+
if self.output_type is not None and self.output_type not in _VALID_OUTPUT_TYPES:
|
|
44
|
+
raise ValueError(f"FenceValidatedPayload.output_type must be one of {sorted(_VALID_OUTPUT_TYPES)}") # noqa: E501
|
|
45
|
+
|
|
46
|
+
def to_dict(self) -> dict[str, Any]:
|
|
47
|
+
"""Serialise the payload to a plain ``dict``."""
|
|
48
|
+
d: dict[str, Any] = {
|
|
49
|
+
"fence_id": self.fence_id,
|
|
50
|
+
"schema_name": self.schema_name,
|
|
51
|
+
"attempt": self.attempt,
|
|
52
|
+
}
|
|
53
|
+
if self.output_type is not None:
|
|
54
|
+
d["output_type"] = self.output_type
|
|
55
|
+
if self.span_id is not None:
|
|
56
|
+
d["span_id"] = self.span_id
|
|
57
|
+
if self.validation_duration_ms is not None:
|
|
58
|
+
d["validation_duration_ms"] = self.validation_duration_ms
|
|
59
|
+
return d
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def from_dict(cls, data: dict[str, Any]) -> FenceValidatedPayload:
|
|
63
|
+
"""Deserialise from a plain ``dict``."""
|
|
64
|
+
return cls(
|
|
65
|
+
fence_id=data["fence_id"],
|
|
66
|
+
schema_name=data["schema_name"],
|
|
67
|
+
attempt=int(data["attempt"]),
|
|
68
|
+
output_type=data.get("output_type"),
|
|
69
|
+
span_id=data.get("span_id"),
|
|
70
|
+
validation_duration_ms=float(data["validation_duration_ms"]) if "validation_duration_ms" in data else None, # noqa: E501
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class FenceRetryTriggeredPayload:
|
|
76
|
+
"""RFC-0001 — A validation failure triggered a retry."""
|
|
77
|
+
|
|
78
|
+
fence_id: str
|
|
79
|
+
schema_name: str
|
|
80
|
+
attempt: int
|
|
81
|
+
max_attempts: int
|
|
82
|
+
violation_summary: str
|
|
83
|
+
output_type: str | None = None
|
|
84
|
+
span_id: str | None = None
|
|
85
|
+
|
|
86
|
+
def __post_init__(self) -> None:
|
|
87
|
+
if not self.fence_id:
|
|
88
|
+
raise ValueError("FenceRetryTriggeredPayload.fence_id must be non-empty")
|
|
89
|
+
if not self.schema_name:
|
|
90
|
+
raise ValueError("FenceRetryTriggeredPayload.schema_name must be non-empty")
|
|
91
|
+
if not isinstance(self.attempt, int) or self.attempt < 1:
|
|
92
|
+
raise ValueError("FenceRetryTriggeredPayload.attempt must be a positive int")
|
|
93
|
+
if not isinstance(self.max_attempts, int) or self.max_attempts < 1:
|
|
94
|
+
raise ValueError("FenceRetryTriggeredPayload.max_attempts must be a positive int")
|
|
95
|
+
if not self.violation_summary:
|
|
96
|
+
raise ValueError("FenceRetryTriggeredPayload.violation_summary must be non-empty")
|
|
97
|
+
if self.output_type is not None and self.output_type not in _VALID_OUTPUT_TYPES:
|
|
98
|
+
raise ValueError(f"FenceRetryTriggeredPayload.output_type must be one of {sorted(_VALID_OUTPUT_TYPES)}") # noqa: E501
|
|
99
|
+
|
|
100
|
+
def to_dict(self) -> dict[str, Any]:
|
|
101
|
+
"""Serialise the payload to a plain ``dict``."""
|
|
102
|
+
d: dict[str, Any] = {
|
|
103
|
+
"fence_id": self.fence_id,
|
|
104
|
+
"schema_name": self.schema_name,
|
|
105
|
+
"attempt": self.attempt,
|
|
106
|
+
"max_attempts": self.max_attempts,
|
|
107
|
+
"violation_summary": self.violation_summary,
|
|
108
|
+
}
|
|
109
|
+
if self.output_type is not None:
|
|
110
|
+
d["output_type"] = self.output_type
|
|
111
|
+
if self.span_id is not None:
|
|
112
|
+
d["span_id"] = self.span_id
|
|
113
|
+
return d
|
|
114
|
+
|
|
115
|
+
@classmethod
|
|
116
|
+
def from_dict(cls, data: dict[str, Any]) -> FenceRetryTriggeredPayload:
|
|
117
|
+
"""Deserialise from a plain ``dict``."""
|
|
118
|
+
return cls(
|
|
119
|
+
fence_id=data["fence_id"],
|
|
120
|
+
schema_name=data["schema_name"],
|
|
121
|
+
attempt=int(data["attempt"]),
|
|
122
|
+
max_attempts=int(data["max_attempts"]),
|
|
123
|
+
violation_summary=data["violation_summary"],
|
|
124
|
+
output_type=data.get("output_type"),
|
|
125
|
+
span_id=data.get("span_id"),
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@dataclass
|
|
130
|
+
class FenceMaxRetriesExceededPayload:
|
|
131
|
+
"""RFC-0001 — All retry attempts exhausted; output remains invalid."""
|
|
132
|
+
|
|
133
|
+
fence_id: str
|
|
134
|
+
schema_name: str
|
|
135
|
+
attempts_made: int
|
|
136
|
+
final_violation_summary: str
|
|
137
|
+
output_type: str | None = None
|
|
138
|
+
span_id: str | None = None
|
|
139
|
+
total_extra_cost: CostBreakdown | None = None
|
|
140
|
+
|
|
141
|
+
def __post_init__(self) -> None:
|
|
142
|
+
if not self.fence_id:
|
|
143
|
+
raise ValueError("FenceMaxRetriesExceededPayload.fence_id must be non-empty")
|
|
144
|
+
if not self.schema_name:
|
|
145
|
+
raise ValueError("FenceMaxRetriesExceededPayload.schema_name must be non-empty")
|
|
146
|
+
if not isinstance(self.attempts_made, int) or self.attempts_made < 1:
|
|
147
|
+
raise ValueError("FenceMaxRetriesExceededPayload.attempts_made must be a positive int")
|
|
148
|
+
if not self.final_violation_summary:
|
|
149
|
+
raise ValueError("FenceMaxRetriesExceededPayload.final_violation_summary must be non-empty") # noqa: E501
|
|
150
|
+
if self.output_type is not None and self.output_type not in _VALID_OUTPUT_TYPES:
|
|
151
|
+
raise ValueError(f"FenceMaxRetriesExceededPayload.output_type must be one of {sorted(_VALID_OUTPUT_TYPES)}") # noqa: E501
|
|
152
|
+
|
|
153
|
+
def to_dict(self) -> dict[str, Any]:
|
|
154
|
+
"""Serialise the payload to a plain ``dict``."""
|
|
155
|
+
d: dict[str, Any] = {
|
|
156
|
+
"fence_id": self.fence_id,
|
|
157
|
+
"schema_name": self.schema_name,
|
|
158
|
+
"attempts_made": self.attempts_made,
|
|
159
|
+
"final_violation_summary": self.final_violation_summary,
|
|
160
|
+
}
|
|
161
|
+
if self.output_type is not None:
|
|
162
|
+
d["output_type"] = self.output_type
|
|
163
|
+
if self.span_id is not None:
|
|
164
|
+
d["span_id"] = self.span_id
|
|
165
|
+
if self.total_extra_cost is not None:
|
|
166
|
+
d["total_extra_cost"] = self.total_extra_cost.to_dict()
|
|
167
|
+
return d
|
|
168
|
+
|
|
169
|
+
@classmethod
|
|
170
|
+
def from_dict(cls, data: dict[str, Any]) -> FenceMaxRetriesExceededPayload:
|
|
171
|
+
"""Deserialise from a plain ``dict``."""
|
|
172
|
+
return cls(
|
|
173
|
+
fence_id=data["fence_id"],
|
|
174
|
+
schema_name=data["schema_name"],
|
|
175
|
+
attempts_made=int(data["attempts_made"]),
|
|
176
|
+
final_violation_summary=data["final_violation_summary"],
|
|
177
|
+
output_type=data.get("output_type"),
|
|
178
|
+
span_id=data.get("span_id"),
|
|
179
|
+
total_extra_cost=CostBreakdown.from_dict(data["total_extra_cost"]) if "total_extra_cost" in data else None, # noqa: E501
|
|
180
|
+
)
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""spanforge.namespaces.guard — Guard payload types (RFC-0001).
|
|
2
|
+
|
|
3
|
+
A single ``GuardPayload`` class is used for all four guard event types.
|
|
4
|
+
|
|
5
|
+
Classes
|
|
6
|
+
-------
|
|
7
|
+
GuardPayload llm.guard.input.blocked, llm.guard.input.passed,
|
|
8
|
+
llm.guard.output.blocked, llm.guard.output.passed
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
__all__ = ["GuardPayload"]
|
|
16
|
+
|
|
17
|
+
_VALID_DIRECTIONS = frozenset({"input", "output"})
|
|
18
|
+
_VALID_ACTIONS = frozenset({"blocked", "passed", "flagged", "modified", "escalated"})
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class GuardPayload:
|
|
23
|
+
"""RFC-0001 — Result of a guard classifier applied to LLM input or output.
|
|
24
|
+
|
|
25
|
+
Used with all four guard event types:
|
|
26
|
+
``llm.guard.input.blocked``, ``llm.guard.input.passed``,
|
|
27
|
+
``llm.guard.output.blocked``, ``llm.guard.output.passed``.
|
|
28
|
+
|
|
29
|
+
``content_hash`` stores a SHA-256 hash of the content that was classified.
|
|
30
|
+
Raw content MUST NOT be stored.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
classifier: str
|
|
34
|
+
direction: str # "input" | "output"
|
|
35
|
+
action: str # "blocked"|"passed"|"flagged"|"modified"|"escalated"
|
|
36
|
+
score: float
|
|
37
|
+
score_min: float | None = None
|
|
38
|
+
score_max: float | None = None
|
|
39
|
+
threshold: float | None = None
|
|
40
|
+
categories: list[str] = field(default_factory=list)
|
|
41
|
+
triggered_categories: list[str] = field(default_factory=list)
|
|
42
|
+
span_id: str | None = None
|
|
43
|
+
latency_ms: float | None = None
|
|
44
|
+
policy_id: str | None = None
|
|
45
|
+
content_hash: str | None = None # 64 lowercase hex chars, SHA-256
|
|
46
|
+
|
|
47
|
+
def __post_init__(self) -> None:
|
|
48
|
+
if not isinstance(self.classifier, str) or not self.classifier:
|
|
49
|
+
raise ValueError("GuardPayload.classifier must be non-empty")
|
|
50
|
+
if self.direction not in _VALID_DIRECTIONS:
|
|
51
|
+
raise ValueError(f"GuardPayload.direction must be one of {sorted(_VALID_DIRECTIONS)}")
|
|
52
|
+
if self.action not in _VALID_ACTIONS:
|
|
53
|
+
raise ValueError(f"GuardPayload.action must be one of {sorted(_VALID_ACTIONS)}")
|
|
54
|
+
if not isinstance(self.score, (int, float)):
|
|
55
|
+
raise ValueError("GuardPayload.score must be a number") # noqa: TRY004
|
|
56
|
+
if self.latency_ms is not None and self.latency_ms < 0:
|
|
57
|
+
raise ValueError("GuardPayload.latency_ms must be non-negative")
|
|
58
|
+
|
|
59
|
+
def to_dict(self) -> dict[str, Any]:
|
|
60
|
+
"""Serialise the payload to a plain ``dict``."""
|
|
61
|
+
d: dict[str, Any] = {
|
|
62
|
+
"classifier": self.classifier,
|
|
63
|
+
"direction": self.direction,
|
|
64
|
+
"action": self.action,
|
|
65
|
+
"score": self.score,
|
|
66
|
+
}
|
|
67
|
+
if self.score_min is not None:
|
|
68
|
+
d["score_min"] = self.score_min
|
|
69
|
+
if self.score_max is not None:
|
|
70
|
+
d["score_max"] = self.score_max
|
|
71
|
+
if self.threshold is not None:
|
|
72
|
+
d["threshold"] = self.threshold
|
|
73
|
+
if self.categories:
|
|
74
|
+
d["categories"] = list(self.categories)
|
|
75
|
+
if self.triggered_categories:
|
|
76
|
+
d["triggered_categories"] = list(self.triggered_categories)
|
|
77
|
+
if self.span_id is not None:
|
|
78
|
+
d["span_id"] = self.span_id
|
|
79
|
+
if self.latency_ms is not None:
|
|
80
|
+
d["latency_ms"] = self.latency_ms
|
|
81
|
+
if self.policy_id is not None:
|
|
82
|
+
d["policy_id"] = self.policy_id
|
|
83
|
+
if self.content_hash is not None:
|
|
84
|
+
d["content_hash"] = self.content_hash
|
|
85
|
+
return d
|
|
86
|
+
|
|
87
|
+
@classmethod
|
|
88
|
+
def from_dict(cls, data: dict[str, Any]) -> GuardPayload:
|
|
89
|
+
"""Deserialise from a plain ``dict``."""
|
|
90
|
+
return cls(
|
|
91
|
+
classifier=data["classifier"],
|
|
92
|
+
direction=data["direction"],
|
|
93
|
+
action=data["action"],
|
|
94
|
+
score=float(data["score"]),
|
|
95
|
+
score_min=float(data["score_min"]) if "score_min" in data else None,
|
|
96
|
+
score_max=float(data["score_max"]) if "score_max" in data else None,
|
|
97
|
+
threshold=float(data["threshold"]) if "threshold" in data else None,
|
|
98
|
+
categories=list(data.get("categories", [])),
|
|
99
|
+
triggered_categories=list(data.get("triggered_categories", [])),
|
|
100
|
+
span_id=data.get("span_id"),
|
|
101
|
+
latency_ms=float(data["latency_ms"]) if "latency_ms" in data else None,
|
|
102
|
+
policy_id=data.get("policy_id"),
|
|
103
|
+
content_hash=data.get("content_hash"),
|
|
104
|
+
)
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""spanforge.namespaces.hitl — Human-in-the-Loop namespace payload types (RFC-0001 SPANFORGE).
|
|
2
|
+
|
|
3
|
+
Classes
|
|
4
|
+
-------
|
|
5
|
+
HITLPayload hitl.queued / hitl.reviewed / hitl.escalated / hitl.timeout
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Any, Literal
|
|
11
|
+
|
|
12
|
+
__all__ = ["HITLPayload"]
|
|
13
|
+
|
|
14
|
+
_VALID_STATUSES = frozenset({"queued", "approved", "rejected", "escalated", "timeout"})
|
|
15
|
+
_VALID_RISK_TIERS = frozenset({"low", "medium", "high", "critical"})
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class HITLPayload:
|
|
20
|
+
"""RFC-0001 SPANFORGE — payload for hitl.* events.
|
|
21
|
+
|
|
22
|
+
Captures human-in-the-loop review decisions for EU AI Act mandatory
|
|
23
|
+
human oversight on high-risk AI systems (R — Responsibility).
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
decision_id: str
|
|
27
|
+
agent_id: str
|
|
28
|
+
risk_tier: Literal["low", "medium", "high", "critical"]
|
|
29
|
+
status: Literal["queued", "approved", "rejected", "escalated", "timeout"]
|
|
30
|
+
reason: str
|
|
31
|
+
reviewer: str | None = None
|
|
32
|
+
sla_seconds: int = 3600
|
|
33
|
+
queued_at: str | None = None # ISO 8601
|
|
34
|
+
resolved_at: str | None = None # ISO 8601
|
|
35
|
+
escalation_tier: int = 0
|
|
36
|
+
confidence: float | None = None
|
|
37
|
+
|
|
38
|
+
def __post_init__(self) -> None:
|
|
39
|
+
if not self.decision_id:
|
|
40
|
+
raise ValueError("HITLPayload.decision_id must be non-empty")
|
|
41
|
+
if not self.agent_id:
|
|
42
|
+
raise ValueError("HITLPayload.agent_id must be non-empty")
|
|
43
|
+
if self.risk_tier not in _VALID_RISK_TIERS:
|
|
44
|
+
raise ValueError(
|
|
45
|
+
f"HITLPayload.risk_tier must be one of {sorted(_VALID_RISK_TIERS)}"
|
|
46
|
+
)
|
|
47
|
+
if self.status not in _VALID_STATUSES:
|
|
48
|
+
raise ValueError(
|
|
49
|
+
f"HITLPayload.status must be one of {sorted(_VALID_STATUSES)}"
|
|
50
|
+
)
|
|
51
|
+
if not self.reason:
|
|
52
|
+
raise ValueError("HITLPayload.reason must be non-empty")
|
|
53
|
+
if self.sla_seconds <= 0:
|
|
54
|
+
raise ValueError("HITLPayload.sla_seconds must be > 0")
|
|
55
|
+
if self.confidence is not None and not (0.0 <= self.confidence <= 1.0):
|
|
56
|
+
raise ValueError("HITLPayload.confidence must be in [0.0, 1.0]")
|
|
57
|
+
|
|
58
|
+
def to_dict(self) -> dict[str, Any]:
|
|
59
|
+
d: dict[str, Any] = {
|
|
60
|
+
"decision_id": self.decision_id,
|
|
61
|
+
"agent_id": self.agent_id,
|
|
62
|
+
"risk_tier": self.risk_tier,
|
|
63
|
+
"status": self.status,
|
|
64
|
+
"reason": self.reason,
|
|
65
|
+
"sla_seconds": self.sla_seconds,
|
|
66
|
+
"escalation_tier": self.escalation_tier,
|
|
67
|
+
}
|
|
68
|
+
if self.reviewer is not None:
|
|
69
|
+
d["reviewer"] = self.reviewer
|
|
70
|
+
if self.queued_at is not None:
|
|
71
|
+
d["queued_at"] = self.queued_at
|
|
72
|
+
if self.resolved_at is not None:
|
|
73
|
+
d["resolved_at"] = self.resolved_at
|
|
74
|
+
if self.confidence is not None:
|
|
75
|
+
d["confidence"] = self.confidence
|
|
76
|
+
return d
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def from_dict(cls, data: dict[str, Any]) -> HITLPayload:
|
|
80
|
+
return cls(
|
|
81
|
+
decision_id=data["decision_id"],
|
|
82
|
+
agent_id=data["agent_id"],
|
|
83
|
+
risk_tier=data["risk_tier"],
|
|
84
|
+
status=data["status"],
|
|
85
|
+
reason=data["reason"],
|
|
86
|
+
reviewer=data.get("reviewer"),
|
|
87
|
+
sla_seconds=int(data.get("sla_seconds", 3600)),
|
|
88
|
+
queued_at=data.get("queued_at"),
|
|
89
|
+
resolved_at=data.get("resolved_at"),
|
|
90
|
+
escalation_tier=int(data.get("escalation_tier", 0)),
|
|
91
|
+
confidence=data.get("confidence"),
|
|
92
|
+
)
|