spanforge 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. spanforge/__init__.py +695 -0
  2. spanforge/_batch_exporter.py +322 -0
  3. spanforge/_cli.py +3081 -0
  4. spanforge/_hooks.py +340 -0
  5. spanforge/_server.py +953 -0
  6. spanforge/_span.py +1015 -0
  7. spanforge/_store.py +287 -0
  8. spanforge/_stream.py +654 -0
  9. spanforge/_trace.py +334 -0
  10. spanforge/_tracer.py +253 -0
  11. spanforge/actor.py +141 -0
  12. spanforge/alerts.py +464 -0
  13. spanforge/auto.py +181 -0
  14. spanforge/baseline.py +336 -0
  15. spanforge/config.py +460 -0
  16. spanforge/consent.py +227 -0
  17. spanforge/consumer.py +379 -0
  18. spanforge/core/__init__.py +5 -0
  19. spanforge/core/compliance_mapping.py +1060 -0
  20. spanforge/cost.py +597 -0
  21. spanforge/debug.py +514 -0
  22. spanforge/drift.py +488 -0
  23. spanforge/egress.py +63 -0
  24. spanforge/eval.py +575 -0
  25. spanforge/event.py +1052 -0
  26. spanforge/exceptions.py +246 -0
  27. spanforge/explain.py +181 -0
  28. spanforge/export/__init__.py +50 -0
  29. spanforge/export/append_only.py +342 -0
  30. spanforge/export/cloud.py +349 -0
  31. spanforge/export/datadog.py +495 -0
  32. spanforge/export/grafana.py +331 -0
  33. spanforge/export/jsonl.py +198 -0
  34. spanforge/export/otel_bridge.py +291 -0
  35. spanforge/export/otlp.py +817 -0
  36. spanforge/export/otlp_bridge.py +231 -0
  37. spanforge/export/redis_backend.py +282 -0
  38. spanforge/export/webhook.py +302 -0
  39. spanforge/exporters/__init__.py +29 -0
  40. spanforge/exporters/console.py +271 -0
  41. spanforge/exporters/jsonl.py +144 -0
  42. spanforge/hitl.py +297 -0
  43. spanforge/inspect.py +429 -0
  44. spanforge/integrations/__init__.py +39 -0
  45. spanforge/integrations/_pricing.py +277 -0
  46. spanforge/integrations/anthropic.py +388 -0
  47. spanforge/integrations/bedrock.py +306 -0
  48. spanforge/integrations/crewai.py +251 -0
  49. spanforge/integrations/gemini.py +349 -0
  50. spanforge/integrations/groq.py +444 -0
  51. spanforge/integrations/langchain.py +349 -0
  52. spanforge/integrations/llamaindex.py +370 -0
  53. spanforge/integrations/ollama.py +286 -0
  54. spanforge/integrations/openai.py +370 -0
  55. spanforge/integrations/together.py +485 -0
  56. spanforge/metrics.py +393 -0
  57. spanforge/metrics_export.py +342 -0
  58. spanforge/migrate.py +278 -0
  59. spanforge/model_registry.py +282 -0
  60. spanforge/models.py +407 -0
  61. spanforge/namespaces/__init__.py +215 -0
  62. spanforge/namespaces/audit.py +253 -0
  63. spanforge/namespaces/cache.py +209 -0
  64. spanforge/namespaces/chain.py +74 -0
  65. spanforge/namespaces/confidence.py +69 -0
  66. spanforge/namespaces/consent.py +85 -0
  67. spanforge/namespaces/cost.py +175 -0
  68. spanforge/namespaces/decision.py +135 -0
  69. spanforge/namespaces/diff.py +146 -0
  70. spanforge/namespaces/drift.py +79 -0
  71. spanforge/namespaces/eval_.py +232 -0
  72. spanforge/namespaces/fence.py +180 -0
  73. spanforge/namespaces/guard.py +104 -0
  74. spanforge/namespaces/hitl.py +92 -0
  75. spanforge/namespaces/latency.py +69 -0
  76. spanforge/namespaces/prompt.py +185 -0
  77. spanforge/namespaces/redact.py +172 -0
  78. spanforge/namespaces/template.py +197 -0
  79. spanforge/namespaces/tool_call.py +76 -0
  80. spanforge/namespaces/trace.py +1006 -0
  81. spanforge/normalizer.py +183 -0
  82. spanforge/presidio_backend.py +149 -0
  83. spanforge/processor.py +258 -0
  84. spanforge/prompt_registry.py +415 -0
  85. spanforge/py.typed +0 -0
  86. spanforge/redact.py +780 -0
  87. spanforge/sampling.py +500 -0
  88. spanforge/schemas/v1.0/schema.json +170 -0
  89. spanforge/schemas/v2.0/schema.json +536 -0
  90. spanforge/signing.py +1152 -0
  91. spanforge/stream.py +559 -0
  92. spanforge/testing.py +376 -0
  93. spanforge/trace.py +199 -0
  94. spanforge/types.py +696 -0
  95. spanforge/ulid.py +304 -0
  96. spanforge/validate.py +383 -0
  97. spanforge-2.0.0.dist-info/METADATA +1777 -0
  98. spanforge-2.0.0.dist-info/RECORD +101 -0
  99. spanforge-2.0.0.dist-info/WHEEL +4 -0
  100. spanforge-2.0.0.dist-info/entry_points.txt +5 -0
  101. spanforge-2.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,74 @@
1
+ """spanforge.namespaces.chain \u2014 Chain namespace payload types (RFC-0001 SPANFORGE).
2
+
3
+ Classes
4
+ -------
5
+ ChainPayload chain.started / chain.step_completed / chain.completed / chain.failed
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass, field
10
+ from typing import Any
11
+
12
+ __all__ = ["ChainPayload"]
13
+
14
+
15
+ @dataclass
16
+ class ChainPayload:
17
+ """RFC-0001 SPANFORGE \u2014 payload for chain.* events.
18
+
19
+ Captures multi-step prompt chain state: step sequence, inter-step data
20
+ flow references, and cumulative error/cost/latency propagation.
21
+ """
22
+
23
+ chain_id: str
24
+ step_index: int
25
+ step_name: str
26
+ cumulative_latency_ms: float
27
+ cumulative_token_cost: float
28
+ error_propagated: bool
29
+ total_steps: int | None = None
30
+ input_refs: list[str] = field(default_factory=list) # event ULIDs of inputs
31
+ output_refs: list[str] = field(default_factory=list) # event ULIDs of outputs
32
+
33
+ def __post_init__(self) -> None:
34
+ if not self.chain_id:
35
+ raise ValueError("ChainPayload.chain_id must be non-empty")
36
+ if self.step_index < 0:
37
+ raise ValueError("ChainPayload.step_index must be >= 0")
38
+ if not self.step_name:
39
+ raise ValueError("ChainPayload.step_name must be non-empty")
40
+ if self.cumulative_latency_ms < 0:
41
+ raise ValueError("ChainPayload.cumulative_latency_ms must be >= 0")
42
+ if self.cumulative_token_cost < 0:
43
+ raise ValueError("ChainPayload.cumulative_token_cost must be >= 0")
44
+ if self.total_steps is not None and self.total_steps < 1:
45
+ raise ValueError("ChainPayload.total_steps must be >= 1")
46
+
47
+ def to_dict(self) -> dict[str, Any]:
48
+ d: dict[str, Any] = {
49
+ "chain_id": self.chain_id,
50
+ "step_index": self.step_index,
51
+ "step_name": self.step_name,
52
+ "cumulative_latency_ms": self.cumulative_latency_ms,
53
+ "cumulative_token_cost": self.cumulative_token_cost,
54
+ "error_propagated": self.error_propagated,
55
+ "input_refs": list(self.input_refs),
56
+ "output_refs": list(self.output_refs),
57
+ }
58
+ if self.total_steps is not None:
59
+ d["total_steps"] = self.total_steps
60
+ return d
61
+
62
+ @classmethod
63
+ def from_dict(cls, data: dict[str, Any]) -> ChainPayload:
64
+ return cls(
65
+ chain_id=data["chain_id"],
66
+ step_index=int(data["step_index"]),
67
+ step_name=data["step_name"],
68
+ cumulative_latency_ms=float(data["cumulative_latency_ms"]),
69
+ cumulative_token_cost=float(data["cumulative_token_cost"]),
70
+ error_propagated=bool(data["error_propagated"]),
71
+ total_steps=data.get("total_steps"),
72
+ input_refs=list(data.get("input_refs", [])),
73
+ output_refs=list(data.get("output_refs", [])),
74
+ )
@@ -0,0 +1,69 @@
1
+ """spanforge.namespaces.confidence \u2014 Confidence namespace payload types (RFC-0001 SPANFORGE).
2
+
3
+ Classes
4
+ -------
5
+ ConfidencePayload confidence.sample / confidence.threshold_breach
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass
10
+ from typing import Any
11
+
12
+ __all__ = ["ConfidencePayload"]
13
+
14
+
15
+ @dataclass
16
+ class ConfidencePayload:
17
+ """RFC-0001 SPANFORGE \u2014 payload for confidence.* events.
18
+
19
+ Tracks output confidence score distributions per decision type and model,
20
+ measured against the deployment baseline (T \u2014 Traceability).
21
+ """
22
+
23
+ model_id: str
24
+ decision_type: str
25
+ score: float # 0.0\u20131.0
26
+ threshold_breached: bool
27
+ sampled_at: str # ISO 8601 timestamp
28
+ baseline_mean: float | None = None
29
+ baseline_stddev: float | None = None
30
+ z_score: float | None = None
31
+
32
+ def __post_init__(self) -> None:
33
+ if not self.model_id:
34
+ raise ValueError("ConfidencePayload.model_id must be non-empty")
35
+ if not self.decision_type:
36
+ raise ValueError("ConfidencePayload.decision_type must be non-empty")
37
+ if not (0.0 <= self.score <= 1.0):
38
+ raise ValueError("ConfidencePayload.score must be in [0.0, 1.0]")
39
+ if not self.sampled_at:
40
+ raise ValueError("ConfidencePayload.sampled_at must be non-empty")
41
+
42
+ def to_dict(self) -> dict[str, Any]:
43
+ d: dict[str, Any] = {
44
+ "model_id": self.model_id,
45
+ "decision_type": self.decision_type,
46
+ "score": self.score,
47
+ "threshold_breached": self.threshold_breached,
48
+ "sampled_at": self.sampled_at,
49
+ }
50
+ if self.baseline_mean is not None:
51
+ d["baseline_mean"] = self.baseline_mean
52
+ if self.baseline_stddev is not None:
53
+ d["baseline_stddev"] = self.baseline_stddev
54
+ if self.z_score is not None:
55
+ d["z_score"] = self.z_score
56
+ return d
57
+
58
+ @classmethod
59
+ def from_dict(cls, data: dict[str, Any]) -> ConfidencePayload:
60
+ return cls(
61
+ model_id=data["model_id"],
62
+ decision_type=data["decision_type"],
63
+ score=float(data["score"]),
64
+ threshold_breached=bool(data["threshold_breached"]),
65
+ sampled_at=data["sampled_at"],
66
+ baseline_mean=data.get("baseline_mean"),
67
+ baseline_stddev=data.get("baseline_stddev"),
68
+ z_score=data.get("z_score"),
69
+ )
@@ -0,0 +1,85 @@
1
+ """spanforge.namespaces.consent — Consent namespace payload types (RFC-0001 SPANFORGE).
2
+
3
+ Classes
4
+ -------
5
+ ConsentPayload consent.granted / consent.revoked / consent.violation
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass, field
10
+ from typing import Any, Literal
11
+
12
+ __all__ = ["ConsentPayload"]
13
+
14
+ _VALID_STATUSES = frozenset({"granted", "revoked", "violation"})
15
+ _VALID_LEGAL_BASES = frozenset({
16
+ "consent", "contract", "legal_obligation",
17
+ "vital_interest", "public_task", "legitimate_interest",
18
+ })
19
+
20
+
21
+ @dataclass
22
+ class ConsentPayload:
23
+ """RFC-0001 SPANFORGE — payload for consent.* events.
24
+
25
+ Tracks data-subject consent grants, revocations, and boundary violations
26
+ for GDPR Art. 6/7 and EU AI Act compliance (U — User Rights).
27
+ """
28
+
29
+ subject_id: str
30
+ scope: str
31
+ purpose: str
32
+ status: Literal["granted", "revoked", "violation"]
33
+ legal_basis: str = "consent"
34
+ expiry: str | None = None # ISO 8601 timestamp
35
+ agent_id: str | None = None
36
+ violation_detail: str | None = None
37
+ data_categories: list[str] = field(default_factory=list)
38
+
39
+ def __post_init__(self) -> None:
40
+ if not self.subject_id:
41
+ raise ValueError("ConsentPayload.subject_id must be non-empty")
42
+ if not self.scope:
43
+ raise ValueError("ConsentPayload.scope must be non-empty")
44
+ if not self.purpose:
45
+ raise ValueError("ConsentPayload.purpose must be non-empty")
46
+ if self.status not in _VALID_STATUSES:
47
+ raise ValueError(
48
+ f"ConsentPayload.status must be one of {sorted(_VALID_STATUSES)}"
49
+ )
50
+ if self.legal_basis not in _VALID_LEGAL_BASES:
51
+ raise ValueError(
52
+ f"ConsentPayload.legal_basis must be one of {sorted(_VALID_LEGAL_BASES)}"
53
+ )
54
+
55
+ def to_dict(self) -> dict[str, Any]:
56
+ d: dict[str, Any] = {
57
+ "subject_id": self.subject_id,
58
+ "scope": self.scope,
59
+ "purpose": self.purpose,
60
+ "status": self.status,
61
+ "legal_basis": self.legal_basis,
62
+ }
63
+ if self.expiry is not None:
64
+ d["expiry"] = self.expiry
65
+ if self.agent_id is not None:
66
+ d["agent_id"] = self.agent_id
67
+ if self.violation_detail is not None:
68
+ d["violation_detail"] = self.violation_detail
69
+ if self.data_categories:
70
+ d["data_categories"] = list(self.data_categories)
71
+ return d
72
+
73
+ @classmethod
74
+ def from_dict(cls, data: dict[str, Any]) -> ConsentPayload:
75
+ return cls(
76
+ subject_id=data["subject_id"],
77
+ scope=data["scope"],
78
+ purpose=data["purpose"],
79
+ status=data["status"],
80
+ legal_basis=data.get("legal_basis", "consent"),
81
+ expiry=data.get("expiry"),
82
+ agent_id=data.get("agent_id"),
83
+ violation_detail=data.get("violation_detail"),
84
+ data_categories=list(data.get("data_categories", [])),
85
+ )
@@ -0,0 +1,175 @@
1
+ """spanforge.namespaces.cost — Cost payload types (RFC-0001 §9).
2
+
3
+ Classes
4
+ -------
5
+ CostTokenRecordedPayload
6
+ RFC §9.1 — cost recorded for a single model call.
7
+ CostSessionRecordedPayload
8
+ RFC §9.2 — aggregate cost across a session.
9
+ CostAttributedPayload
10
+ RFC §9.3 — cost attributed to a specific target.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from dataclasses import dataclass, field
16
+ from typing import Any
17
+
18
+ from spanforge.namespaces.trace import (
19
+ CostBreakdown,
20
+ ModelInfo,
21
+ PricingTier,
22
+ TokenUsage,
23
+ )
24
+
25
+ __all__ = [
26
+ "CostAttributedPayload",
27
+ "CostSessionRecordedPayload",
28
+ "CostTokenRecordedPayload",
29
+ ]
30
+
31
+ _VALID_ATTRIBUTION_TYPES = frozenset({"direct", "proportional", "estimated", "manual"})
32
+
33
+
34
+ @dataclass
35
+ class CostTokenRecordedPayload:
36
+ """RFC-0001 §9.1 — Cost recorded for a single model call (one span).
37
+
38
+ Used with event type: ``llm.cost.token.recorded``.
39
+ """
40
+
41
+ cost: CostBreakdown
42
+ token_usage: TokenUsage
43
+ model: ModelInfo
44
+ pricing_tier: PricingTier | None = None
45
+ span_id: str | None = None
46
+ agent_run_id: str | None = None
47
+
48
+ def __post_init__(self) -> None:
49
+ if not isinstance(self.cost, CostBreakdown):
50
+ raise TypeError("CostTokenRecordedPayload.cost must be a CostBreakdown")
51
+ if not isinstance(self.token_usage, TokenUsage):
52
+ raise TypeError("CostTokenRecordedPayload.token_usage must be a TokenUsage")
53
+ if not isinstance(self.model, ModelInfo):
54
+ raise TypeError("CostTokenRecordedPayload.model must be a ModelInfo")
55
+
56
+ def to_dict(self) -> dict[str, Any]:
57
+ """Serialise the payload to a plain ``dict``."""
58
+ d: dict[str, Any] = {
59
+ "cost": self.cost.to_dict(),
60
+ "token_usage": self.token_usage.to_dict(),
61
+ "model": self.model.to_dict(),
62
+ }
63
+ if self.pricing_tier is not None:
64
+ d["pricing_tier"] = self.pricing_tier.to_dict()
65
+ if self.span_id is not None:
66
+ d["span_id"] = self.span_id
67
+ if self.agent_run_id is not None:
68
+ d["agent_run_id"] = self.agent_run_id
69
+ return d
70
+
71
+ @classmethod
72
+ def from_dict(cls, data: dict[str, Any]) -> CostTokenRecordedPayload:
73
+ """Deserialise from a plain ``dict``."""
74
+ return cls(
75
+ cost=CostBreakdown.from_dict(data["cost"]),
76
+ token_usage=TokenUsage.from_dict(data["token_usage"]),
77
+ model=ModelInfo.from_dict(data["model"]),
78
+ pricing_tier=PricingTier.from_dict(data["pricing_tier"]) if "pricing_tier" in data else None, # noqa: E501
79
+ span_id=data.get("span_id"),
80
+ agent_run_id=data.get("agent_run_id"),
81
+ )
82
+
83
+
84
+ @dataclass
85
+ class CostSessionRecordedPayload:
86
+ """RFC-0001 §9.2 — Aggregate cost across a session.
87
+
88
+ Used with event type: ``llm.cost.session.recorded``.
89
+ A session is any arbitrary grouping (user session, request batch, experiment run).
90
+ """
91
+
92
+ total_cost: CostBreakdown
93
+ total_token_usage: TokenUsage
94
+ call_count: int
95
+ session_duration_ms: float | None = None
96
+ models_used: list[str] = field(default_factory=list)
97
+
98
+ def __post_init__(self) -> None:
99
+ if not isinstance(self.total_cost, CostBreakdown):
100
+ raise TypeError("CostSessionRecordedPayload.total_cost must be a CostBreakdown")
101
+ if not isinstance(self.total_token_usage, TokenUsage):
102
+ raise TypeError("CostSessionRecordedPayload.total_token_usage must be a TokenUsage")
103
+ if not isinstance(self.call_count, int) or self.call_count < 0:
104
+ raise ValueError("CostSessionRecordedPayload.call_count must be a non-negative int")
105
+ if self.session_duration_ms is not None and self.session_duration_ms < 0:
106
+ raise ValueError("CostSessionRecordedPayload.session_duration_ms must be non-negative")
107
+
108
+ def to_dict(self) -> dict[str, Any]:
109
+ """Serialise the payload to a plain ``dict``."""
110
+ d: dict[str, Any] = {
111
+ "total_cost": self.total_cost.to_dict(),
112
+ "total_token_usage": self.total_token_usage.to_dict(),
113
+ "call_count": self.call_count,
114
+ }
115
+ if self.session_duration_ms is not None:
116
+ d["session_duration_ms"] = self.session_duration_ms
117
+ if self.models_used:
118
+ d["models_used"] = list(self.models_used)
119
+ return d
120
+
121
+ @classmethod
122
+ def from_dict(cls, data: dict[str, Any]) -> CostSessionRecordedPayload:
123
+ """Deserialise from a plain ``dict``."""
124
+ return cls(
125
+ total_cost=CostBreakdown.from_dict(data["total_cost"]),
126
+ total_token_usage=TokenUsage.from_dict(data["total_token_usage"]),
127
+ call_count=int(data["call_count"]),
128
+ session_duration_ms=float(data["session_duration_ms"]) if "session_duration_ms" in data else None, # noqa: E501
129
+ models_used=list(data.get("models_used", [])),
130
+ )
131
+
132
+
133
+ @dataclass
134
+ class CostAttributedPayload:
135
+ """RFC-0001 §9.3 — Cost attributed to a specific target.
136
+
137
+ Used with event type: ``llm.cost.attributed``.
138
+ ``attribution_type`` describes how the cost share was computed.
139
+ """
140
+
141
+ cost: CostBreakdown
142
+ attribution_target: str
143
+ attribution_type: str # "direct"|"proportional"|"estimated"|"manual"
144
+ source_event_ids: list[str] = field(default_factory=list)
145
+
146
+ def __post_init__(self) -> None:
147
+ if not isinstance(self.cost, CostBreakdown):
148
+ raise TypeError("CostAttributedPayload.cost must be a CostBreakdown")
149
+ if not isinstance(self.attribution_target, str) or not self.attribution_target:
150
+ raise ValueError("CostAttributedPayload.attribution_target must be a non-empty string")
151
+ if self.attribution_type not in _VALID_ATTRIBUTION_TYPES:
152
+ raise ValueError(
153
+ f"CostAttributedPayload.attribution_type must be one of {sorted(_VALID_ATTRIBUTION_TYPES)}" # noqa: E501
154
+ )
155
+
156
+ def to_dict(self) -> dict[str, Any]:
157
+ """Serialise the payload to a plain ``dict``."""
158
+ d: dict[str, Any] = {
159
+ "cost": self.cost.to_dict(),
160
+ "attribution_target": self.attribution_target,
161
+ "attribution_type": self.attribution_type,
162
+ }
163
+ if self.source_event_ids:
164
+ d["source_event_ids"] = list(self.source_event_ids)
165
+ return d
166
+
167
+ @classmethod
168
+ def from_dict(cls, data: dict[str, Any]) -> CostAttributedPayload:
169
+ """Deserialise from a plain ``dict``."""
170
+ return cls(
171
+ cost=CostBreakdown.from_dict(data["cost"]),
172
+ attribution_target=data["attribution_target"],
173
+ attribution_type=data["attribution_type"],
174
+ source_event_ids=list(data.get("source_event_ids", [])),
175
+ )
@@ -0,0 +1,135 @@
1
+ """spanforge.namespaces.decision — Decision namespace payload types (RFC-0001 SPANFORGE).
2
+
3
+ Classes
4
+ -------
5
+ DecisionDriver Factor contributing to a decision (T \u2014 Transparency)
6
+ DecisionPayload decision.made / decision.revised / decision.rejected
7
+ """
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass, field
11
+ from typing import Any
12
+
13
+ __all__ = [
14
+ "DecisionDriver",
15
+ "DecisionPayload",
16
+ ]
17
+
18
+ _VALID_DECISION_TYPES = frozenset({
19
+ "classification", "routing", "generation", "tool_selection", "other",
20
+ })
21
+
22
+
23
+ @dataclass
24
+ class DecisionDriver:
25
+ """A single factor that contributed to an agent decision (T \u2014 Transparency).
26
+
27
+ ``weight`` and ``confidence`` must be in the range [0.0, 1.0].
28
+ The sum of all ``weight`` values in a list should equal 1.0 but is
29
+ not enforced here (enforcement is the caller's responsibility).
30
+ """
31
+
32
+ factor_name: str
33
+ weight: float # 0.0\u20131.0; fractional contribution to the overall decision
34
+ contribution: float # signed contribution to the final decision score
35
+ evidence: str # human-readable evidence string
36
+ confidence: float # 0.0\u20131.0
37
+
38
+ def __post_init__(self) -> None:
39
+ if not self.factor_name:
40
+ raise ValueError("DecisionDriver.factor_name must be non-empty")
41
+ if not (0.0 <= self.weight <= 1.0):
42
+ raise ValueError("DecisionDriver.weight must be in [0.0, 1.0]")
43
+ if not (0.0 <= self.confidence <= 1.0):
44
+ raise ValueError("DecisionDriver.confidence must be in [0.0, 1.0]")
45
+
46
+ def to_dict(self) -> dict[str, Any]:
47
+ return {
48
+ "factor_name": self.factor_name,
49
+ "weight": self.weight,
50
+ "contribution": self.contribution,
51
+ "evidence": self.evidence,
52
+ "confidence": self.confidence,
53
+ }
54
+
55
+ @classmethod
56
+ def from_dict(cls, data: dict[str, Any]) -> DecisionDriver:
57
+ return cls(
58
+ factor_name=data["factor_name"],
59
+ weight=float(data["weight"]),
60
+ contribution=float(data["contribution"]),
61
+ evidence=data["evidence"],
62
+ confidence=float(data["confidence"]),
63
+ )
64
+
65
+
66
+ @dataclass
67
+ class DecisionPayload:
68
+ """RFC-0001 SPANFORGE \u2014 payload for decision.* events.
69
+
70
+ Captures every individual agent decision at inference time, including
71
+ the full set of contributing decision drivers for T \u2014 Transparency.
72
+
73
+ ``actor`` is an optional dict representation of an ActorContext and is
74
+ intentionally typed as ``dict | None`` to avoid a circular import.
75
+ """
76
+
77
+ decision_id: str # ULID
78
+ agent_id: str
79
+ decision_type: str # classification | routing | generation | tool_selection | other
80
+ input_summary: str
81
+ output_summary: str
82
+ confidence: float # 0.0\u20131.0
83
+ latency_ms: float
84
+ rationale_hash: str # SHA-256 of the full rationale text
85
+ decision_drivers: list[DecisionDriver] = field(default_factory=list)
86
+ actor: dict[str, Any] | None = None
87
+
88
+ def __post_init__(self) -> None:
89
+ if not self.decision_id:
90
+ raise ValueError("DecisionPayload.decision_id must be non-empty")
91
+ if not self.agent_id:
92
+ raise ValueError("DecisionPayload.agent_id must be non-empty")
93
+ if self.decision_type not in _VALID_DECISION_TYPES:
94
+ raise ValueError(
95
+ f"DecisionPayload.decision_type must be one of {sorted(_VALID_DECISION_TYPES)}"
96
+ )
97
+ if not (0.0 <= self.confidence <= 1.0):
98
+ raise ValueError("DecisionPayload.confidence must be in [0.0, 1.0]")
99
+ if self.latency_ms < 0:
100
+ raise ValueError("DecisionPayload.latency_ms must be >= 0")
101
+
102
+ def to_dict(self) -> dict[str, Any]:
103
+ d: dict[str, Any] = {
104
+ "decision_id": self.decision_id,
105
+ "agent_id": self.agent_id,
106
+ "decision_type": self.decision_type,
107
+ "input_summary": self.input_summary,
108
+ "output_summary": self.output_summary,
109
+ "confidence": self.confidence,
110
+ "latency_ms": self.latency_ms,
111
+ "rationale_hash": self.rationale_hash,
112
+ "decision_drivers": [d.to_dict() for d in self.decision_drivers],
113
+ }
114
+ if self.actor is not None:
115
+ d["actor"] = self.actor
116
+ return d
117
+
118
+ @classmethod
119
+ def from_dict(cls, data: dict[str, Any]) -> DecisionPayload:
120
+ drivers = [
121
+ DecisionDriver.from_dict(dd)
122
+ for dd in data.get("decision_drivers", [])
123
+ ]
124
+ return cls(
125
+ decision_id=data["decision_id"],
126
+ agent_id=data["agent_id"],
127
+ decision_type=data["decision_type"],
128
+ input_summary=data["input_summary"],
129
+ output_summary=data["output_summary"],
130
+ confidence=float(data["confidence"]),
131
+ latency_ms=float(data["latency_ms"]),
132
+ rationale_hash=data["rationale_hash"],
133
+ decision_drivers=drivers,
134
+ actor=data.get("actor"),
135
+ )
@@ -0,0 +1,146 @@
1
+ """spanforge.namespaces.diff — Diff payload types (RFC-0001).
2
+
3
+ Classes
4
+ -------
5
+ DiffComputedPayload llm.diff.computed
6
+ DiffRegressionFlaggedPayload llm.diff.regression.flagged
7
+ """
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass
11
+ from typing import Any
12
+
13
+ __all__ = [
14
+ "DiffComputedPayload",
15
+ "DiffRegressionFlaggedPayload",
16
+ ]
17
+
18
+ _VALID_DIFF_TYPES = frozenset({"prompt", "response", "template", "token_usage", "cost"})
19
+ _VALID_ALGORITHMS = frozenset({
20
+ "embedding_cosine", "levenshtein", "token_edit", "lcs", "semantic_embedding"
21
+ })
22
+ _VALID_SEVERITIES = frozenset({"low", "medium", "high", "critical"})
23
+
24
+
25
+ @dataclass
26
+ class DiffComputedPayload:
27
+ """RFC-0001 — A diff was computed between two events."""
28
+
29
+ ref_event_id: str
30
+ target_event_id: str
31
+ diff_type: str # "prompt"|"response"|"template"|"token_usage"|"cost"
32
+ similarity_score: float
33
+ added_tokens: int | None = None
34
+ removed_tokens: int | None = None
35
+ diff_algorithm: str | None = None
36
+ ref_content_hash: str | None = None # 64 hex chars
37
+ target_content_hash: str | None = None # 64 hex chars
38
+ computation_duration_ms: float | None = None
39
+
40
+ def __post_init__(self) -> None:
41
+ if not self.ref_event_id:
42
+ raise ValueError("DiffComputedPayload.ref_event_id must be non-empty")
43
+ if not self.target_event_id:
44
+ raise ValueError("DiffComputedPayload.target_event_id must be non-empty")
45
+ if self.diff_type not in _VALID_DIFF_TYPES:
46
+ raise ValueError(f"DiffComputedPayload.diff_type must be one of {sorted(_VALID_DIFF_TYPES)}") # noqa: E501
47
+ if not (0.0 <= self.similarity_score <= 1.0):
48
+ raise ValueError("DiffComputedPayload.similarity_score must be in [0,1]")
49
+ if self.diff_algorithm is not None and self.diff_algorithm not in _VALID_ALGORITHMS:
50
+ raise ValueError(f"DiffComputedPayload.diff_algorithm must be one of {sorted(_VALID_ALGORITHMS)}") # noqa: E501
51
+
52
+ def to_dict(self) -> dict[str, Any]:
53
+ """Serialise the payload to a plain ``dict``."""
54
+ d: dict[str, Any] = {
55
+ "ref_event_id": self.ref_event_id,
56
+ "target_event_id": self.target_event_id,
57
+ "diff_type": self.diff_type,
58
+ "similarity_score": self.similarity_score,
59
+ }
60
+ if self.added_tokens is not None:
61
+ d["added_tokens"] = self.added_tokens
62
+ if self.removed_tokens is not None:
63
+ d["removed_tokens"] = self.removed_tokens
64
+ if self.diff_algorithm is not None:
65
+ d["diff_algorithm"] = self.diff_algorithm
66
+ if self.ref_content_hash is not None:
67
+ d["ref_content_hash"] = self.ref_content_hash
68
+ if self.target_content_hash is not None:
69
+ d["target_content_hash"] = self.target_content_hash
70
+ if self.computation_duration_ms is not None:
71
+ d["computation_duration_ms"] = self.computation_duration_ms
72
+ return d
73
+
74
+ @classmethod
75
+ def from_dict(cls, data: dict[str, Any]) -> DiffComputedPayload:
76
+ """Deserialise from a plain ``dict``."""
77
+ return cls(
78
+ ref_event_id=data["ref_event_id"],
79
+ target_event_id=data["target_event_id"],
80
+ diff_type=data["diff_type"],
81
+ similarity_score=float(data["similarity_score"]),
82
+ added_tokens=int(data["added_tokens"]) if "added_tokens" in data else None,
83
+ removed_tokens=int(data["removed_tokens"]) if "removed_tokens" in data else None,
84
+ diff_algorithm=data.get("diff_algorithm"),
85
+ ref_content_hash=data.get("ref_content_hash"),
86
+ target_content_hash=data.get("target_content_hash"),
87
+ computation_duration_ms=float(data["computation_duration_ms"]) if "computation_duration_ms" in data else None, # noqa: E501
88
+ )
89
+
90
+
91
+ @dataclass
92
+ class DiffRegressionFlaggedPayload:
93
+ """RFC-0001 — A diff score fell below the similarity threshold."""
94
+
95
+ ref_event_id: str
96
+ target_event_id: str
97
+ diff_type: str
98
+ similarity_score: float
99
+ threshold: float
100
+ severity: str # "low"|"medium"|"high"|"critical"
101
+ diff_event_id: str | None = None
102
+ alert_target: str | None = None
103
+
104
+ def __post_init__(self) -> None:
105
+ if not self.ref_event_id:
106
+ raise ValueError("DiffRegressionFlaggedPayload.ref_event_id must be non-empty")
107
+ if not self.target_event_id:
108
+ raise ValueError("DiffRegressionFlaggedPayload.target_event_id must be non-empty")
109
+ if self.diff_type not in _VALID_DIFF_TYPES:
110
+ raise ValueError(f"DiffRegressionFlaggedPayload.diff_type must be one of {sorted(_VALID_DIFF_TYPES)}") # noqa: E501
111
+ if not (0.0 <= self.similarity_score <= 1.0):
112
+ raise ValueError("DiffRegressionFlaggedPayload.similarity_score must be in [0,1]")
113
+ if not (0.0 <= self.threshold <= 1.0):
114
+ raise ValueError("DiffRegressionFlaggedPayload.threshold must be in [0,1]")
115
+ if self.severity not in _VALID_SEVERITIES:
116
+ raise ValueError(f"DiffRegressionFlaggedPayload.severity must be one of {sorted(_VALID_SEVERITIES)}") # noqa: E501
117
+
118
+ def to_dict(self) -> dict[str, Any]:
119
+ """Serialise the payload to a plain ``dict``."""
120
+ d: dict[str, Any] = {
121
+ "ref_event_id": self.ref_event_id,
122
+ "target_event_id": self.target_event_id,
123
+ "diff_type": self.diff_type,
124
+ "similarity_score": self.similarity_score,
125
+ "threshold": self.threshold,
126
+ "severity": self.severity,
127
+ }
128
+ if self.diff_event_id is not None:
129
+ d["diff_event_id"] = self.diff_event_id
130
+ if self.alert_target is not None:
131
+ d["alert_target"] = self.alert_target
132
+ return d
133
+
134
+ @classmethod
135
+ def from_dict(cls, data: dict[str, Any]) -> DiffRegressionFlaggedPayload:
136
+ """Deserialise from a plain ``dict``."""
137
+ return cls(
138
+ ref_event_id=data["ref_event_id"],
139
+ target_event_id=data["target_event_id"],
140
+ diff_type=data["diff_type"],
141
+ similarity_score=float(data["similarity_score"]),
142
+ threshold=float(data["threshold"]),
143
+ severity=data["severity"],
144
+ diff_event_id=data.get("diff_event_id"),
145
+ alert_target=data.get("alert_target"),
146
+ )