spanforge 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. spanforge/__init__.py +695 -0
  2. spanforge/_batch_exporter.py +322 -0
  3. spanforge/_cli.py +3081 -0
  4. spanforge/_hooks.py +340 -0
  5. spanforge/_server.py +953 -0
  6. spanforge/_span.py +1015 -0
  7. spanforge/_store.py +287 -0
  8. spanforge/_stream.py +654 -0
  9. spanforge/_trace.py +334 -0
  10. spanforge/_tracer.py +253 -0
  11. spanforge/actor.py +141 -0
  12. spanforge/alerts.py +464 -0
  13. spanforge/auto.py +181 -0
  14. spanforge/baseline.py +336 -0
  15. spanforge/config.py +460 -0
  16. spanforge/consent.py +227 -0
  17. spanforge/consumer.py +379 -0
  18. spanforge/core/__init__.py +5 -0
  19. spanforge/core/compliance_mapping.py +1060 -0
  20. spanforge/cost.py +597 -0
  21. spanforge/debug.py +514 -0
  22. spanforge/drift.py +488 -0
  23. spanforge/egress.py +63 -0
  24. spanforge/eval.py +575 -0
  25. spanforge/event.py +1052 -0
  26. spanforge/exceptions.py +246 -0
  27. spanforge/explain.py +181 -0
  28. spanforge/export/__init__.py +50 -0
  29. spanforge/export/append_only.py +342 -0
  30. spanforge/export/cloud.py +349 -0
  31. spanforge/export/datadog.py +495 -0
  32. spanforge/export/grafana.py +331 -0
  33. spanforge/export/jsonl.py +198 -0
  34. spanforge/export/otel_bridge.py +291 -0
  35. spanforge/export/otlp.py +817 -0
  36. spanforge/export/otlp_bridge.py +231 -0
  37. spanforge/export/redis_backend.py +282 -0
  38. spanforge/export/webhook.py +302 -0
  39. spanforge/exporters/__init__.py +29 -0
  40. spanforge/exporters/console.py +271 -0
  41. spanforge/exporters/jsonl.py +144 -0
  42. spanforge/hitl.py +297 -0
  43. spanforge/inspect.py +429 -0
  44. spanforge/integrations/__init__.py +39 -0
  45. spanforge/integrations/_pricing.py +277 -0
  46. spanforge/integrations/anthropic.py +388 -0
  47. spanforge/integrations/bedrock.py +306 -0
  48. spanforge/integrations/crewai.py +251 -0
  49. spanforge/integrations/gemini.py +349 -0
  50. spanforge/integrations/groq.py +444 -0
  51. spanforge/integrations/langchain.py +349 -0
  52. spanforge/integrations/llamaindex.py +370 -0
  53. spanforge/integrations/ollama.py +286 -0
  54. spanforge/integrations/openai.py +370 -0
  55. spanforge/integrations/together.py +485 -0
  56. spanforge/metrics.py +393 -0
  57. spanforge/metrics_export.py +342 -0
  58. spanforge/migrate.py +278 -0
  59. spanforge/model_registry.py +282 -0
  60. spanforge/models.py +407 -0
  61. spanforge/namespaces/__init__.py +215 -0
  62. spanforge/namespaces/audit.py +253 -0
  63. spanforge/namespaces/cache.py +209 -0
  64. spanforge/namespaces/chain.py +74 -0
  65. spanforge/namespaces/confidence.py +69 -0
  66. spanforge/namespaces/consent.py +85 -0
  67. spanforge/namespaces/cost.py +175 -0
  68. spanforge/namespaces/decision.py +135 -0
  69. spanforge/namespaces/diff.py +146 -0
  70. spanforge/namespaces/drift.py +79 -0
  71. spanforge/namespaces/eval_.py +232 -0
  72. spanforge/namespaces/fence.py +180 -0
  73. spanforge/namespaces/guard.py +104 -0
  74. spanforge/namespaces/hitl.py +92 -0
  75. spanforge/namespaces/latency.py +69 -0
  76. spanforge/namespaces/prompt.py +185 -0
  77. spanforge/namespaces/redact.py +172 -0
  78. spanforge/namespaces/template.py +197 -0
  79. spanforge/namespaces/tool_call.py +76 -0
  80. spanforge/namespaces/trace.py +1006 -0
  81. spanforge/normalizer.py +183 -0
  82. spanforge/presidio_backend.py +149 -0
  83. spanforge/processor.py +258 -0
  84. spanforge/prompt_registry.py +415 -0
  85. spanforge/py.typed +0 -0
  86. spanforge/redact.py +780 -0
  87. spanforge/sampling.py +500 -0
  88. spanforge/schemas/v1.0/schema.json +170 -0
  89. spanforge/schemas/v2.0/schema.json +536 -0
  90. spanforge/signing.py +1152 -0
  91. spanforge/stream.py +559 -0
  92. spanforge/testing.py +376 -0
  93. spanforge/trace.py +199 -0
  94. spanforge/types.py +696 -0
  95. spanforge/ulid.py +304 -0
  96. spanforge/validate.py +383 -0
  97. spanforge-2.0.0.dist-info/METADATA +1777 -0
  98. spanforge-2.0.0.dist-info/RECORD +101 -0
  99. spanforge-2.0.0.dist-info/WHEEL +4 -0
  100. spanforge-2.0.0.dist-info/entry_points.txt +5 -0
  101. spanforge-2.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,79 @@
1
+ """spanforge.namespaces.drift \u2014 Drift namespace payload types (RFC-0001 SPANFORGE).
2
+
3
+ Classes
4
+ -------
5
+ DriftPayload drift.detected / drift.threshold_breach / drift.resolved
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass
10
+ from typing import Any, Literal
11
+
12
+ __all__ = ["DriftPayload"]
13
+
14
+ _VALID_STATUSES = frozenset({"detected", "threshold_breach", "resolved"})
15
+
16
+
17
+ @dataclass
18
+ class DriftPayload:
19
+ """RFC-0001 SPANFORGE \u2014 payload for drift.* events.
20
+
21
+ Captures Z-score and KL-divergence statistical drift signals against the
22
+ deployment baseline (T \u2014 Traceability).
23
+ """
24
+
25
+ metric_name: str
26
+ agent_id: str
27
+ current_value: float
28
+ baseline_mean: float
29
+ baseline_stddev: float
30
+ z_score: float
31
+ threshold: float
32
+ window_seconds: int
33
+ status: Literal["detected", "threshold_breach", "resolved"]
34
+ kl_divergence: float | None = None
35
+
36
+ def __post_init__(self) -> None:
37
+ if not self.metric_name:
38
+ raise ValueError("DriftPayload.metric_name must be non-empty")
39
+ if not self.agent_id:
40
+ raise ValueError("DriftPayload.agent_id must be non-empty")
41
+ if self.status not in _VALID_STATUSES:
42
+ raise ValueError(
43
+ f"DriftPayload.status must be one of {sorted(_VALID_STATUSES)}"
44
+ )
45
+ if self.window_seconds <= 0:
46
+ raise ValueError("DriftPayload.window_seconds must be > 0")
47
+ if self.baseline_stddev < 0:
48
+ raise ValueError("DriftPayload.baseline_stddev must be >= 0")
49
+
50
+ def to_dict(self) -> dict[str, Any]:
51
+ d: dict[str, Any] = {
52
+ "metric_name": self.metric_name,
53
+ "agent_id": self.agent_id,
54
+ "current_value": self.current_value,
55
+ "baseline_mean": self.baseline_mean,
56
+ "baseline_stddev": self.baseline_stddev,
57
+ "z_score": self.z_score,
58
+ "threshold": self.threshold,
59
+ "window_seconds": self.window_seconds,
60
+ "status": self.status,
61
+ }
62
+ if self.kl_divergence is not None:
63
+ d["kl_divergence"] = self.kl_divergence
64
+ return d
65
+
66
+ @classmethod
67
+ def from_dict(cls, data: dict[str, Any]) -> DriftPayload:
68
+ return cls(
69
+ metric_name=data["metric_name"],
70
+ agent_id=data["agent_id"],
71
+ current_value=float(data["current_value"]),
72
+ baseline_mean=float(data["baseline_mean"]),
73
+ baseline_stddev=float(data["baseline_stddev"]),
74
+ z_score=float(data["z_score"]),
75
+ threshold=float(data["threshold"]),
76
+ window_seconds=int(data["window_seconds"]),
77
+ status=data["status"],
78
+ kl_divergence=data.get("kl_divergence"),
79
+ )
@@ -0,0 +1,232 @@
1
+ """spanforge.namespaces.eval_ — Evaluation payload types (RFC-0001).
2
+
3
+ Classes
4
+ -------
5
+ EvalScoreRecordedPayload llm.eval.score.recorded
6
+ EvalRegressionDetectedPayload llm.eval.regression.detected
7
+ EvalScenarioStartedPayload llm.eval.scenario.started
8
+ EvalScenarioCompletedPayload llm.eval.scenario.completed
9
+ """
10
+ from __future__ import annotations
11
+
12
+ from dataclasses import dataclass, field
13
+ from typing import Any
14
+
15
+ from spanforge.namespaces.trace import ModelInfo
16
+
17
+ __all__ = [
18
+ "EvalRegressionDetectedPayload",
19
+ "EvalScenarioCompletedPayload",
20
+ "EvalScenarioStartedPayload",
21
+ "EvalScoreRecordedPayload",
22
+ ]
23
+
24
+ _VALID_SEVERITIES = frozenset({"low", "medium", "high", "critical"})
25
+ _VALID_STATUSES = frozenset({"passed", "failed", "error", "cancelled"})
26
+
27
+
28
+ @dataclass
29
+ class EvalScoreRecordedPayload:
30
+ """RFC-0001 — A single evaluation score recorded for a subject event."""
31
+
32
+ evaluator: str
33
+ metric_name: str
34
+ score: float
35
+ score_min: float | None = None
36
+ score_max: float | None = None
37
+ threshold: float | None = None
38
+ passed: bool | None = None
39
+ subject_event_id: str | None = None
40
+ subject_type: str | None = None
41
+ eval_run_id: str | None = None
42
+ rationale: str | None = None
43
+ model: ModelInfo | None = None # judge model
44
+
45
+ def __post_init__(self) -> None:
46
+ if not isinstance(self.evaluator, str) or not self.evaluator:
47
+ raise ValueError("EvalScoreRecordedPayload.evaluator must be non-empty")
48
+ if not isinstance(self.metric_name, str) or not self.metric_name:
49
+ raise ValueError("EvalScoreRecordedPayload.metric_name must be non-empty")
50
+
51
+ def to_dict(self) -> dict[str, Any]:
52
+ """Serialise the payload to a plain ``dict``."""
53
+ d: dict[str, Any] = {
54
+ "evaluator": self.evaluator,
55
+ "metric_name": self.metric_name,
56
+ "score": self.score,
57
+ }
58
+ for f in ("score_min", "score_max", "threshold", "passed",
59
+ "subject_event_id", "subject_type", "eval_run_id", "rationale"):
60
+ v = getattr(self, f)
61
+ if v is not None:
62
+ d[f] = v
63
+ if self.model is not None:
64
+ d["model"] = self.model.to_dict()
65
+ return d
66
+
67
+ @classmethod
68
+ def from_dict(cls, data: dict[str, Any]) -> EvalScoreRecordedPayload:
69
+ """Deserialise from a plain ``dict``."""
70
+ return cls(
71
+ evaluator=data["evaluator"],
72
+ metric_name=data["metric_name"],
73
+ score=float(data["score"]),
74
+ score_min=float(data["score_min"]) if "score_min" in data else None,
75
+ score_max=float(data["score_max"]) if "score_max" in data else None,
76
+ threshold=float(data["threshold"]) if "threshold" in data else None,
77
+ passed=bool(data["passed"]) if "passed" in data else None,
78
+ subject_event_id=data.get("subject_event_id"),
79
+ subject_type=data.get("subject_type"),
80
+ eval_run_id=data.get("eval_run_id"),
81
+ rationale=data.get("rationale"),
82
+ model=ModelInfo.from_dict(data["model"]) if "model" in data else None,
83
+ )
84
+
85
+
86
+ @dataclass
87
+ class EvalRegressionDetectedPayload:
88
+ """RFC-0001 — A metric regression detected between baseline and current."""
89
+
90
+ metric_name: str
91
+ baseline_score: float
92
+ current_score: float
93
+ delta: float
94
+ regression_pct: float
95
+ severity: str | None = None # "low"|"medium"|"high"|"critical"
96
+ affected_model: ModelInfo | None = None
97
+ eval_run_id: str | None = None
98
+ sample_count: int | None = None
99
+
100
+ def __post_init__(self) -> None:
101
+ if not isinstance(self.metric_name, str) or not self.metric_name:
102
+ raise ValueError("EvalRegressionDetectedPayload.metric_name must be non-empty")
103
+ if self.severity is not None and self.severity not in _VALID_SEVERITIES:
104
+ raise ValueError(f"EvalRegressionDetectedPayload.severity must be one of {sorted(_VALID_SEVERITIES)}") # noqa: E501
105
+
106
+ def to_dict(self) -> dict[str, Any]:
107
+ """Serialise the payload to a plain ``dict``."""
108
+ d: dict[str, Any] = {
109
+ "metric_name": self.metric_name,
110
+ "baseline_score": self.baseline_score,
111
+ "current_score": self.current_score,
112
+ "delta": self.delta,
113
+ "regression_pct": self.regression_pct,
114
+ }
115
+ if self.severity is not None:
116
+ d["severity"] = self.severity
117
+ if self.affected_model is not None:
118
+ d["affected_model"] = self.affected_model.to_dict()
119
+ if self.eval_run_id is not None:
120
+ d["eval_run_id"] = self.eval_run_id
121
+ if self.sample_count is not None:
122
+ d["sample_count"] = self.sample_count
123
+ return d
124
+
125
+ @classmethod
126
+ def from_dict(cls, data: dict[str, Any]) -> EvalRegressionDetectedPayload:
127
+ """Deserialise from a plain ``dict``."""
128
+ return cls(
129
+ metric_name=data["metric_name"],
130
+ baseline_score=float(data["baseline_score"]),
131
+ current_score=float(data["current_score"]),
132
+ delta=float(data["delta"]),
133
+ regression_pct=float(data["regression_pct"]),
134
+ severity=data.get("severity"),
135
+ affected_model=ModelInfo.from_dict(data["affected_model"]) if "affected_model" in data else None, # noqa: E501
136
+ eval_run_id=data.get("eval_run_id"),
137
+ sample_count=int(data["sample_count"]) if "sample_count" in data else None,
138
+ )
139
+
140
+
141
+ @dataclass
142
+ class EvalScenarioStartedPayload:
143
+ """RFC-0001 — An evaluation scenario has started."""
144
+
145
+ scenario_id: str
146
+ scenario_name: str
147
+ evaluator: str
148
+ dataset_id: str | None = None
149
+ expected_sample_count: int | None = None
150
+ metrics: list[str] = field(default_factory=list)
151
+
152
+ def __post_init__(self) -> None:
153
+ if not self.scenario_id:
154
+ raise ValueError("EvalScenarioStartedPayload.scenario_id must be non-empty")
155
+ if not self.scenario_name:
156
+ raise ValueError("EvalScenarioStartedPayload.scenario_name must be non-empty")
157
+ if not self.evaluator:
158
+ raise ValueError("EvalScenarioStartedPayload.evaluator must be non-empty")
159
+
160
+ def to_dict(self) -> dict[str, Any]:
161
+ """Serialise the payload to a plain ``dict``."""
162
+ d: dict[str, Any] = {
163
+ "scenario_id": self.scenario_id,
164
+ "scenario_name": self.scenario_name,
165
+ "evaluator": self.evaluator,
166
+ }
167
+ if self.dataset_id is not None:
168
+ d["dataset_id"] = self.dataset_id
169
+ if self.expected_sample_count is not None:
170
+ d["expected_sample_count"] = self.expected_sample_count
171
+ if self.metrics:
172
+ d["metrics"] = list(self.metrics)
173
+ return d
174
+
175
+ @classmethod
176
+ def from_dict(cls, data: dict[str, Any]) -> EvalScenarioStartedPayload:
177
+ """Deserialise from a plain ``dict``."""
178
+ return cls(
179
+ scenario_id=data["scenario_id"],
180
+ scenario_name=data["scenario_name"],
181
+ evaluator=data["evaluator"],
182
+ dataset_id=data.get("dataset_id"),
183
+ expected_sample_count=int(data["expected_sample_count"]) if "expected_sample_count" in data else None, # noqa: E501
184
+ metrics=list(data.get("metrics", [])),
185
+ )
186
+
187
+
188
+ @dataclass
189
+ class EvalScenarioCompletedPayload:
190
+ """RFC-0001 — An evaluation scenario has completed."""
191
+
192
+ scenario_id: str
193
+ status: str # "passed"|"failed"|"error"|"cancelled"
194
+ duration_ms: float
195
+ completed_sample_count: int | None = None
196
+ scores_summary: dict[str, float] | None = None
197
+ errors: list[str] | None = None
198
+
199
+ def __post_init__(self) -> None:
200
+ if not self.scenario_id:
201
+ raise ValueError("EvalScenarioCompletedPayload.scenario_id must be non-empty")
202
+ if self.status not in _VALID_STATUSES:
203
+ raise ValueError(f"EvalScenarioCompletedPayload.status must be one of {sorted(_VALID_STATUSES)}") # noqa: E501
204
+ if self.duration_ms < 0:
205
+ raise ValueError("EvalScenarioCompletedPayload.duration_ms must be non-negative")
206
+
207
+ def to_dict(self) -> dict[str, Any]:
208
+ """Serialise the payload to a plain ``dict``."""
209
+ d: dict[str, Any] = {
210
+ "scenario_id": self.scenario_id,
211
+ "status": self.status,
212
+ "duration_ms": self.duration_ms,
213
+ }
214
+ if self.completed_sample_count is not None:
215
+ d["completed_sample_count"] = self.completed_sample_count
216
+ if self.scores_summary is not None:
217
+ d["scores_summary"] = dict(self.scores_summary)
218
+ if self.errors is not None:
219
+ d["errors"] = list(self.errors)
220
+ return d
221
+
222
+ @classmethod
223
+ def from_dict(cls, data: dict[str, Any]) -> EvalScenarioCompletedPayload:
224
+ """Deserialise from a plain ``dict``."""
225
+ return cls(
226
+ scenario_id=data["scenario_id"],
227
+ status=data["status"],
228
+ duration_ms=float(data["duration_ms"]),
229
+ completed_sample_count=int(data["completed_sample_count"]) if "completed_sample_count" in data else None, # noqa: E501
230
+ scores_summary=dict(data["scores_summary"]) if "scores_summary" in data else None,
231
+ errors=list(data["errors"]) if "errors" in data else None,
232
+ )
@@ -0,0 +1,180 @@
1
+ """spanforge.namespaces.fence — Fence payload types (RFC-0001).
2
+
3
+ Classes
4
+ -------
5
+ FenceValidatedPayload llm.fence.validated
6
+ FenceRetryTriggeredPayload llm.fence.retry.triggered
7
+ FenceMaxRetriesExceededPayload llm.fence.max_retries.exceeded
8
+ """
9
+ from __future__ import annotations
10
+
11
+ from dataclasses import dataclass
12
+ from typing import Any
13
+
14
+ from spanforge.namespaces.trace import CostBreakdown
15
+
16
+ __all__ = [
17
+ "FenceMaxRetriesExceededPayload",
18
+ "FenceRetryTriggeredPayload",
19
+ "FenceValidatedPayload",
20
+ ]
21
+
22
+ _VALID_OUTPUT_TYPES = frozenset({"json_schema", "pydantic", "regex", "xml", "custom"})
23
+
24
+
25
+ @dataclass
26
+ class FenceValidatedPayload:
27
+ """RFC-0001 — Structured output passed validation on a given attempt."""
28
+
29
+ fence_id: str
30
+ schema_name: str
31
+ attempt: int
32
+ output_type: str | None = None # "json_schema"|"pydantic"|"regex"|"xml"|"custom"
33
+ span_id: str | None = None
34
+ validation_duration_ms: float | None = None
35
+
36
+ def __post_init__(self) -> None:
37
+ if not self.fence_id:
38
+ raise ValueError("FenceValidatedPayload.fence_id must be non-empty")
39
+ if not self.schema_name:
40
+ raise ValueError("FenceValidatedPayload.schema_name must be non-empty")
41
+ if not isinstance(self.attempt, int) or self.attempt < 1:
42
+ raise ValueError("FenceValidatedPayload.attempt must be a positive int")
43
+ if self.output_type is not None and self.output_type not in _VALID_OUTPUT_TYPES:
44
+ raise ValueError(f"FenceValidatedPayload.output_type must be one of {sorted(_VALID_OUTPUT_TYPES)}") # noqa: E501
45
+
46
+ def to_dict(self) -> dict[str, Any]:
47
+ """Serialise the payload to a plain ``dict``."""
48
+ d: dict[str, Any] = {
49
+ "fence_id": self.fence_id,
50
+ "schema_name": self.schema_name,
51
+ "attempt": self.attempt,
52
+ }
53
+ if self.output_type is not None:
54
+ d["output_type"] = self.output_type
55
+ if self.span_id is not None:
56
+ d["span_id"] = self.span_id
57
+ if self.validation_duration_ms is not None:
58
+ d["validation_duration_ms"] = self.validation_duration_ms
59
+ return d
60
+
61
+ @classmethod
62
+ def from_dict(cls, data: dict[str, Any]) -> FenceValidatedPayload:
63
+ """Deserialise from a plain ``dict``."""
64
+ return cls(
65
+ fence_id=data["fence_id"],
66
+ schema_name=data["schema_name"],
67
+ attempt=int(data["attempt"]),
68
+ output_type=data.get("output_type"),
69
+ span_id=data.get("span_id"),
70
+ validation_duration_ms=float(data["validation_duration_ms"]) if "validation_duration_ms" in data else None, # noqa: E501
71
+ )
72
+
73
+
74
+ @dataclass
75
+ class FenceRetryTriggeredPayload:
76
+ """RFC-0001 — A validation failure triggered a retry."""
77
+
78
+ fence_id: str
79
+ schema_name: str
80
+ attempt: int
81
+ max_attempts: int
82
+ violation_summary: str
83
+ output_type: str | None = None
84
+ span_id: str | None = None
85
+
86
+ def __post_init__(self) -> None:
87
+ if not self.fence_id:
88
+ raise ValueError("FenceRetryTriggeredPayload.fence_id must be non-empty")
89
+ if not self.schema_name:
90
+ raise ValueError("FenceRetryTriggeredPayload.schema_name must be non-empty")
91
+ if not isinstance(self.attempt, int) or self.attempt < 1:
92
+ raise ValueError("FenceRetryTriggeredPayload.attempt must be a positive int")
93
+ if not isinstance(self.max_attempts, int) or self.max_attempts < 1:
94
+ raise ValueError("FenceRetryTriggeredPayload.max_attempts must be a positive int")
95
+ if not self.violation_summary:
96
+ raise ValueError("FenceRetryTriggeredPayload.violation_summary must be non-empty")
97
+ if self.output_type is not None and self.output_type not in _VALID_OUTPUT_TYPES:
98
+ raise ValueError(f"FenceRetryTriggeredPayload.output_type must be one of {sorted(_VALID_OUTPUT_TYPES)}") # noqa: E501
99
+
100
+ def to_dict(self) -> dict[str, Any]:
101
+ """Serialise the payload to a plain ``dict``."""
102
+ d: dict[str, Any] = {
103
+ "fence_id": self.fence_id,
104
+ "schema_name": self.schema_name,
105
+ "attempt": self.attempt,
106
+ "max_attempts": self.max_attempts,
107
+ "violation_summary": self.violation_summary,
108
+ }
109
+ if self.output_type is not None:
110
+ d["output_type"] = self.output_type
111
+ if self.span_id is not None:
112
+ d["span_id"] = self.span_id
113
+ return d
114
+
115
+ @classmethod
116
+ def from_dict(cls, data: dict[str, Any]) -> FenceRetryTriggeredPayload:
117
+ """Deserialise from a plain ``dict``."""
118
+ return cls(
119
+ fence_id=data["fence_id"],
120
+ schema_name=data["schema_name"],
121
+ attempt=int(data["attempt"]),
122
+ max_attempts=int(data["max_attempts"]),
123
+ violation_summary=data["violation_summary"],
124
+ output_type=data.get("output_type"),
125
+ span_id=data.get("span_id"),
126
+ )
127
+
128
+
129
+ @dataclass
130
+ class FenceMaxRetriesExceededPayload:
131
+ """RFC-0001 — All retry attempts exhausted; output remains invalid."""
132
+
133
+ fence_id: str
134
+ schema_name: str
135
+ attempts_made: int
136
+ final_violation_summary: str
137
+ output_type: str | None = None
138
+ span_id: str | None = None
139
+ total_extra_cost: CostBreakdown | None = None
140
+
141
+ def __post_init__(self) -> None:
142
+ if not self.fence_id:
143
+ raise ValueError("FenceMaxRetriesExceededPayload.fence_id must be non-empty")
144
+ if not self.schema_name:
145
+ raise ValueError("FenceMaxRetriesExceededPayload.schema_name must be non-empty")
146
+ if not isinstance(self.attempts_made, int) or self.attempts_made < 1:
147
+ raise ValueError("FenceMaxRetriesExceededPayload.attempts_made must be a positive int")
148
+ if not self.final_violation_summary:
149
+ raise ValueError("FenceMaxRetriesExceededPayload.final_violation_summary must be non-empty") # noqa: E501
150
+ if self.output_type is not None and self.output_type not in _VALID_OUTPUT_TYPES:
151
+ raise ValueError(f"FenceMaxRetriesExceededPayload.output_type must be one of {sorted(_VALID_OUTPUT_TYPES)}") # noqa: E501
152
+
153
+ def to_dict(self) -> dict[str, Any]:
154
+ """Serialise the payload to a plain ``dict``."""
155
+ d: dict[str, Any] = {
156
+ "fence_id": self.fence_id,
157
+ "schema_name": self.schema_name,
158
+ "attempts_made": self.attempts_made,
159
+ "final_violation_summary": self.final_violation_summary,
160
+ }
161
+ if self.output_type is not None:
162
+ d["output_type"] = self.output_type
163
+ if self.span_id is not None:
164
+ d["span_id"] = self.span_id
165
+ if self.total_extra_cost is not None:
166
+ d["total_extra_cost"] = self.total_extra_cost.to_dict()
167
+ return d
168
+
169
+ @classmethod
170
+ def from_dict(cls, data: dict[str, Any]) -> FenceMaxRetriesExceededPayload:
171
+ """Deserialise from a plain ``dict``."""
172
+ return cls(
173
+ fence_id=data["fence_id"],
174
+ schema_name=data["schema_name"],
175
+ attempts_made=int(data["attempts_made"]),
176
+ final_violation_summary=data["final_violation_summary"],
177
+ output_type=data.get("output_type"),
178
+ span_id=data.get("span_id"),
179
+ total_extra_cost=CostBreakdown.from_dict(data["total_extra_cost"]) if "total_extra_cost" in data else None, # noqa: E501
180
+ )
@@ -0,0 +1,104 @@
1
+ """spanforge.namespaces.guard — Guard payload types (RFC-0001).
2
+
3
+ A single ``GuardPayload`` class is used for all four guard event types.
4
+
5
+ Classes
6
+ -------
7
+ GuardPayload llm.guard.input.blocked, llm.guard.input.passed,
8
+ llm.guard.output.blocked, llm.guard.output.passed
9
+ """
10
+ from __future__ import annotations
11
+
12
+ from dataclasses import dataclass, field
13
+ from typing import Any
14
+
15
+ __all__ = ["GuardPayload"]
16
+
17
+ _VALID_DIRECTIONS = frozenset({"input", "output"})
18
+ _VALID_ACTIONS = frozenset({"blocked", "passed", "flagged", "modified", "escalated"})
19
+
20
+
21
+ @dataclass
22
+ class GuardPayload:
23
+ """RFC-0001 — Result of a guard classifier applied to LLM input or output.
24
+
25
+ Used with all four guard event types:
26
+ ``llm.guard.input.blocked``, ``llm.guard.input.passed``,
27
+ ``llm.guard.output.blocked``, ``llm.guard.output.passed``.
28
+
29
+ ``content_hash`` stores a SHA-256 hash of the content that was classified.
30
+ Raw content MUST NOT be stored.
31
+ """
32
+
33
+ classifier: str
34
+ direction: str # "input" | "output"
35
+ action: str # "blocked"|"passed"|"flagged"|"modified"|"escalated"
36
+ score: float
37
+ score_min: float | None = None
38
+ score_max: float | None = None
39
+ threshold: float | None = None
40
+ categories: list[str] = field(default_factory=list)
41
+ triggered_categories: list[str] = field(default_factory=list)
42
+ span_id: str | None = None
43
+ latency_ms: float | None = None
44
+ policy_id: str | None = None
45
+ content_hash: str | None = None # 64 lowercase hex chars, SHA-256
46
+
47
+ def __post_init__(self) -> None:
48
+ if not isinstance(self.classifier, str) or not self.classifier:
49
+ raise ValueError("GuardPayload.classifier must be non-empty")
50
+ if self.direction not in _VALID_DIRECTIONS:
51
+ raise ValueError(f"GuardPayload.direction must be one of {sorted(_VALID_DIRECTIONS)}")
52
+ if self.action not in _VALID_ACTIONS:
53
+ raise ValueError(f"GuardPayload.action must be one of {sorted(_VALID_ACTIONS)}")
54
+ if not isinstance(self.score, (int, float)):
55
+ raise ValueError("GuardPayload.score must be a number") # noqa: TRY004
56
+ if self.latency_ms is not None and self.latency_ms < 0:
57
+ raise ValueError("GuardPayload.latency_ms must be non-negative")
58
+
59
+ def to_dict(self) -> dict[str, Any]:
60
+ """Serialise the payload to a plain ``dict``."""
61
+ d: dict[str, Any] = {
62
+ "classifier": self.classifier,
63
+ "direction": self.direction,
64
+ "action": self.action,
65
+ "score": self.score,
66
+ }
67
+ if self.score_min is not None:
68
+ d["score_min"] = self.score_min
69
+ if self.score_max is not None:
70
+ d["score_max"] = self.score_max
71
+ if self.threshold is not None:
72
+ d["threshold"] = self.threshold
73
+ if self.categories:
74
+ d["categories"] = list(self.categories)
75
+ if self.triggered_categories:
76
+ d["triggered_categories"] = list(self.triggered_categories)
77
+ if self.span_id is not None:
78
+ d["span_id"] = self.span_id
79
+ if self.latency_ms is not None:
80
+ d["latency_ms"] = self.latency_ms
81
+ if self.policy_id is not None:
82
+ d["policy_id"] = self.policy_id
83
+ if self.content_hash is not None:
84
+ d["content_hash"] = self.content_hash
85
+ return d
86
+
87
+ @classmethod
88
+ def from_dict(cls, data: dict[str, Any]) -> GuardPayload:
89
+ """Deserialise from a plain ``dict``."""
90
+ return cls(
91
+ classifier=data["classifier"],
92
+ direction=data["direction"],
93
+ action=data["action"],
94
+ score=float(data["score"]),
95
+ score_min=float(data["score_min"]) if "score_min" in data else None,
96
+ score_max=float(data["score_max"]) if "score_max" in data else None,
97
+ threshold=float(data["threshold"]) if "threshold" in data else None,
98
+ categories=list(data.get("categories", [])),
99
+ triggered_categories=list(data.get("triggered_categories", [])),
100
+ span_id=data.get("span_id"),
101
+ latency_ms=float(data["latency_ms"]) if "latency_ms" in data else None,
102
+ policy_id=data.get("policy_id"),
103
+ content_hash=data.get("content_hash"),
104
+ )
@@ -0,0 +1,92 @@
1
+ """spanforge.namespaces.hitl — Human-in-the-Loop namespace payload types (RFC-0001 SPANFORGE).
2
+
3
+ Classes
4
+ -------
5
+ HITLPayload hitl.queued / hitl.reviewed / hitl.escalated / hitl.timeout
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass
10
+ from typing import Any, Literal
11
+
12
+ __all__ = ["HITLPayload"]
13
+
14
+ _VALID_STATUSES = frozenset({"queued", "approved", "rejected", "escalated", "timeout"})
15
+ _VALID_RISK_TIERS = frozenset({"low", "medium", "high", "critical"})
16
+
17
+
18
+ @dataclass
19
+ class HITLPayload:
20
+ """RFC-0001 SPANFORGE — payload for hitl.* events.
21
+
22
+ Captures human-in-the-loop review decisions for EU AI Act mandatory
23
+ human oversight on high-risk AI systems (R — Responsibility).
24
+ """
25
+
26
+ decision_id: str
27
+ agent_id: str
28
+ risk_tier: Literal["low", "medium", "high", "critical"]
29
+ status: Literal["queued", "approved", "rejected", "escalated", "timeout"]
30
+ reason: str
31
+ reviewer: str | None = None
32
+ sla_seconds: int = 3600
33
+ queued_at: str | None = None # ISO 8601
34
+ resolved_at: str | None = None # ISO 8601
35
+ escalation_tier: int = 0
36
+ confidence: float | None = None
37
+
38
+ def __post_init__(self) -> None:
39
+ if not self.decision_id:
40
+ raise ValueError("HITLPayload.decision_id must be non-empty")
41
+ if not self.agent_id:
42
+ raise ValueError("HITLPayload.agent_id must be non-empty")
43
+ if self.risk_tier not in _VALID_RISK_TIERS:
44
+ raise ValueError(
45
+ f"HITLPayload.risk_tier must be one of {sorted(_VALID_RISK_TIERS)}"
46
+ )
47
+ if self.status not in _VALID_STATUSES:
48
+ raise ValueError(
49
+ f"HITLPayload.status must be one of {sorted(_VALID_STATUSES)}"
50
+ )
51
+ if not self.reason:
52
+ raise ValueError("HITLPayload.reason must be non-empty")
53
+ if self.sla_seconds <= 0:
54
+ raise ValueError("HITLPayload.sla_seconds must be > 0")
55
+ if self.confidence is not None and not (0.0 <= self.confidence <= 1.0):
56
+ raise ValueError("HITLPayload.confidence must be in [0.0, 1.0]")
57
+
58
+ def to_dict(self) -> dict[str, Any]:
59
+ d: dict[str, Any] = {
60
+ "decision_id": self.decision_id,
61
+ "agent_id": self.agent_id,
62
+ "risk_tier": self.risk_tier,
63
+ "status": self.status,
64
+ "reason": self.reason,
65
+ "sla_seconds": self.sla_seconds,
66
+ "escalation_tier": self.escalation_tier,
67
+ }
68
+ if self.reviewer is not None:
69
+ d["reviewer"] = self.reviewer
70
+ if self.queued_at is not None:
71
+ d["queued_at"] = self.queued_at
72
+ if self.resolved_at is not None:
73
+ d["resolved_at"] = self.resolved_at
74
+ if self.confidence is not None:
75
+ d["confidence"] = self.confidence
76
+ return d
77
+
78
+ @classmethod
79
+ def from_dict(cls, data: dict[str, Any]) -> HITLPayload:
80
+ return cls(
81
+ decision_id=data["decision_id"],
82
+ agent_id=data["agent_id"],
83
+ risk_tier=data["risk_tier"],
84
+ status=data["status"],
85
+ reason=data["reason"],
86
+ reviewer=data.get("reviewer"),
87
+ sla_seconds=int(data.get("sla_seconds", 3600)),
88
+ queued_at=data.get("queued_at"),
89
+ resolved_at=data.get("resolved_at"),
90
+ escalation_tier=int(data.get("escalation_tier", 0)),
91
+ confidence=data.get("confidence"),
92
+ )