spanforge 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spanforge/__init__.py +695 -0
- spanforge/_batch_exporter.py +322 -0
- spanforge/_cli.py +3081 -0
- spanforge/_hooks.py +340 -0
- spanforge/_server.py +953 -0
- spanforge/_span.py +1015 -0
- spanforge/_store.py +287 -0
- spanforge/_stream.py +654 -0
- spanforge/_trace.py +334 -0
- spanforge/_tracer.py +253 -0
- spanforge/actor.py +141 -0
- spanforge/alerts.py +464 -0
- spanforge/auto.py +181 -0
- spanforge/baseline.py +336 -0
- spanforge/config.py +460 -0
- spanforge/consent.py +227 -0
- spanforge/consumer.py +379 -0
- spanforge/core/__init__.py +5 -0
- spanforge/core/compliance_mapping.py +1060 -0
- spanforge/cost.py +597 -0
- spanforge/debug.py +514 -0
- spanforge/drift.py +488 -0
- spanforge/egress.py +63 -0
- spanforge/eval.py +575 -0
- spanforge/event.py +1052 -0
- spanforge/exceptions.py +246 -0
- spanforge/explain.py +181 -0
- spanforge/export/__init__.py +50 -0
- spanforge/export/append_only.py +342 -0
- spanforge/export/cloud.py +349 -0
- spanforge/export/datadog.py +495 -0
- spanforge/export/grafana.py +331 -0
- spanforge/export/jsonl.py +198 -0
- spanforge/export/otel_bridge.py +291 -0
- spanforge/export/otlp.py +817 -0
- spanforge/export/otlp_bridge.py +231 -0
- spanforge/export/redis_backend.py +282 -0
- spanforge/export/webhook.py +302 -0
- spanforge/exporters/__init__.py +29 -0
- spanforge/exporters/console.py +271 -0
- spanforge/exporters/jsonl.py +144 -0
- spanforge/hitl.py +297 -0
- spanforge/inspect.py +429 -0
- spanforge/integrations/__init__.py +39 -0
- spanforge/integrations/_pricing.py +277 -0
- spanforge/integrations/anthropic.py +388 -0
- spanforge/integrations/bedrock.py +306 -0
- spanforge/integrations/crewai.py +251 -0
- spanforge/integrations/gemini.py +349 -0
- spanforge/integrations/groq.py +444 -0
- spanforge/integrations/langchain.py +349 -0
- spanforge/integrations/llamaindex.py +370 -0
- spanforge/integrations/ollama.py +286 -0
- spanforge/integrations/openai.py +370 -0
- spanforge/integrations/together.py +485 -0
- spanforge/metrics.py +393 -0
- spanforge/metrics_export.py +342 -0
- spanforge/migrate.py +278 -0
- spanforge/model_registry.py +282 -0
- spanforge/models.py +407 -0
- spanforge/namespaces/__init__.py +215 -0
- spanforge/namespaces/audit.py +253 -0
- spanforge/namespaces/cache.py +209 -0
- spanforge/namespaces/chain.py +74 -0
- spanforge/namespaces/confidence.py +69 -0
- spanforge/namespaces/consent.py +85 -0
- spanforge/namespaces/cost.py +175 -0
- spanforge/namespaces/decision.py +135 -0
- spanforge/namespaces/diff.py +146 -0
- spanforge/namespaces/drift.py +79 -0
- spanforge/namespaces/eval_.py +232 -0
- spanforge/namespaces/fence.py +180 -0
- spanforge/namespaces/guard.py +104 -0
- spanforge/namespaces/hitl.py +92 -0
- spanforge/namespaces/latency.py +69 -0
- spanforge/namespaces/prompt.py +185 -0
- spanforge/namespaces/redact.py +172 -0
- spanforge/namespaces/template.py +197 -0
- spanforge/namespaces/tool_call.py +76 -0
- spanforge/namespaces/trace.py +1006 -0
- spanforge/normalizer.py +183 -0
- spanforge/presidio_backend.py +149 -0
- spanforge/processor.py +258 -0
- spanforge/prompt_registry.py +415 -0
- spanforge/py.typed +0 -0
- spanforge/redact.py +780 -0
- spanforge/sampling.py +500 -0
- spanforge/schemas/v1.0/schema.json +170 -0
- spanforge/schemas/v2.0/schema.json +536 -0
- spanforge/signing.py +1152 -0
- spanforge/stream.py +559 -0
- spanforge/testing.py +376 -0
- spanforge/trace.py +199 -0
- spanforge/types.py +696 -0
- spanforge/ulid.py +304 -0
- spanforge/validate.py +383 -0
- spanforge-2.0.0.dist-info/METADATA +1777 -0
- spanforge-2.0.0.dist-info/RECORD +101 -0
- spanforge-2.0.0.dist-info/WHEEL +4 -0
- spanforge-2.0.0.dist-info/entry_points.txt +5 -0
- spanforge-2.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""spanforge.namespaces.chain \u2014 Chain namespace payload types (RFC-0001 SPANFORGE).
|
|
2
|
+
|
|
3
|
+
Classes
|
|
4
|
+
-------
|
|
5
|
+
ChainPayload chain.started / chain.step_completed / chain.completed / chain.failed
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
__all__ = ["ChainPayload"]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class ChainPayload:
|
|
17
|
+
"""RFC-0001 SPANFORGE \u2014 payload for chain.* events.
|
|
18
|
+
|
|
19
|
+
Captures multi-step prompt chain state: step sequence, inter-step data
|
|
20
|
+
flow references, and cumulative error/cost/latency propagation.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
chain_id: str
|
|
24
|
+
step_index: int
|
|
25
|
+
step_name: str
|
|
26
|
+
cumulative_latency_ms: float
|
|
27
|
+
cumulative_token_cost: float
|
|
28
|
+
error_propagated: bool
|
|
29
|
+
total_steps: int | None = None
|
|
30
|
+
input_refs: list[str] = field(default_factory=list) # event ULIDs of inputs
|
|
31
|
+
output_refs: list[str] = field(default_factory=list) # event ULIDs of outputs
|
|
32
|
+
|
|
33
|
+
def __post_init__(self) -> None:
|
|
34
|
+
if not self.chain_id:
|
|
35
|
+
raise ValueError("ChainPayload.chain_id must be non-empty")
|
|
36
|
+
if self.step_index < 0:
|
|
37
|
+
raise ValueError("ChainPayload.step_index must be >= 0")
|
|
38
|
+
if not self.step_name:
|
|
39
|
+
raise ValueError("ChainPayload.step_name must be non-empty")
|
|
40
|
+
if self.cumulative_latency_ms < 0:
|
|
41
|
+
raise ValueError("ChainPayload.cumulative_latency_ms must be >= 0")
|
|
42
|
+
if self.cumulative_token_cost < 0:
|
|
43
|
+
raise ValueError("ChainPayload.cumulative_token_cost must be >= 0")
|
|
44
|
+
if self.total_steps is not None and self.total_steps < 1:
|
|
45
|
+
raise ValueError("ChainPayload.total_steps must be >= 1")
|
|
46
|
+
|
|
47
|
+
def to_dict(self) -> dict[str, Any]:
|
|
48
|
+
d: dict[str, Any] = {
|
|
49
|
+
"chain_id": self.chain_id,
|
|
50
|
+
"step_index": self.step_index,
|
|
51
|
+
"step_name": self.step_name,
|
|
52
|
+
"cumulative_latency_ms": self.cumulative_latency_ms,
|
|
53
|
+
"cumulative_token_cost": self.cumulative_token_cost,
|
|
54
|
+
"error_propagated": self.error_propagated,
|
|
55
|
+
"input_refs": list(self.input_refs),
|
|
56
|
+
"output_refs": list(self.output_refs),
|
|
57
|
+
}
|
|
58
|
+
if self.total_steps is not None:
|
|
59
|
+
d["total_steps"] = self.total_steps
|
|
60
|
+
return d
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def from_dict(cls, data: dict[str, Any]) -> ChainPayload:
|
|
64
|
+
return cls(
|
|
65
|
+
chain_id=data["chain_id"],
|
|
66
|
+
step_index=int(data["step_index"]),
|
|
67
|
+
step_name=data["step_name"],
|
|
68
|
+
cumulative_latency_ms=float(data["cumulative_latency_ms"]),
|
|
69
|
+
cumulative_token_cost=float(data["cumulative_token_cost"]),
|
|
70
|
+
error_propagated=bool(data["error_propagated"]),
|
|
71
|
+
total_steps=data.get("total_steps"),
|
|
72
|
+
input_refs=list(data.get("input_refs", [])),
|
|
73
|
+
output_refs=list(data.get("output_refs", [])),
|
|
74
|
+
)
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""spanforge.namespaces.confidence \u2014 Confidence namespace payload types (RFC-0001 SPANFORGE).
|
|
2
|
+
|
|
3
|
+
Classes
|
|
4
|
+
-------
|
|
5
|
+
ConfidencePayload confidence.sample / confidence.threshold_breach
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
__all__ = ["ConfidencePayload"]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class ConfidencePayload:
|
|
17
|
+
"""RFC-0001 SPANFORGE \u2014 payload for confidence.* events.
|
|
18
|
+
|
|
19
|
+
Tracks output confidence score distributions per decision type and model,
|
|
20
|
+
measured against the deployment baseline (T \u2014 Traceability).
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
model_id: str
|
|
24
|
+
decision_type: str
|
|
25
|
+
score: float # 0.0\u20131.0
|
|
26
|
+
threshold_breached: bool
|
|
27
|
+
sampled_at: str # ISO 8601 timestamp
|
|
28
|
+
baseline_mean: float | None = None
|
|
29
|
+
baseline_stddev: float | None = None
|
|
30
|
+
z_score: float | None = None
|
|
31
|
+
|
|
32
|
+
def __post_init__(self) -> None:
|
|
33
|
+
if not self.model_id:
|
|
34
|
+
raise ValueError("ConfidencePayload.model_id must be non-empty")
|
|
35
|
+
if not self.decision_type:
|
|
36
|
+
raise ValueError("ConfidencePayload.decision_type must be non-empty")
|
|
37
|
+
if not (0.0 <= self.score <= 1.0):
|
|
38
|
+
raise ValueError("ConfidencePayload.score must be in [0.0, 1.0]")
|
|
39
|
+
if not self.sampled_at:
|
|
40
|
+
raise ValueError("ConfidencePayload.sampled_at must be non-empty")
|
|
41
|
+
|
|
42
|
+
def to_dict(self) -> dict[str, Any]:
|
|
43
|
+
d: dict[str, Any] = {
|
|
44
|
+
"model_id": self.model_id,
|
|
45
|
+
"decision_type": self.decision_type,
|
|
46
|
+
"score": self.score,
|
|
47
|
+
"threshold_breached": self.threshold_breached,
|
|
48
|
+
"sampled_at": self.sampled_at,
|
|
49
|
+
}
|
|
50
|
+
if self.baseline_mean is not None:
|
|
51
|
+
d["baseline_mean"] = self.baseline_mean
|
|
52
|
+
if self.baseline_stddev is not None:
|
|
53
|
+
d["baseline_stddev"] = self.baseline_stddev
|
|
54
|
+
if self.z_score is not None:
|
|
55
|
+
d["z_score"] = self.z_score
|
|
56
|
+
return d
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def from_dict(cls, data: dict[str, Any]) -> ConfidencePayload:
|
|
60
|
+
return cls(
|
|
61
|
+
model_id=data["model_id"],
|
|
62
|
+
decision_type=data["decision_type"],
|
|
63
|
+
score=float(data["score"]),
|
|
64
|
+
threshold_breached=bool(data["threshold_breached"]),
|
|
65
|
+
sampled_at=data["sampled_at"],
|
|
66
|
+
baseline_mean=data.get("baseline_mean"),
|
|
67
|
+
baseline_stddev=data.get("baseline_stddev"),
|
|
68
|
+
z_score=data.get("z_score"),
|
|
69
|
+
)
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""spanforge.namespaces.consent — Consent namespace payload types (RFC-0001 SPANFORGE).
|
|
2
|
+
|
|
3
|
+
Classes
|
|
4
|
+
-------
|
|
5
|
+
ConsentPayload consent.granted / consent.revoked / consent.violation
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Any, Literal
|
|
11
|
+
|
|
12
|
+
__all__ = ["ConsentPayload"]
|
|
13
|
+
|
|
14
|
+
_VALID_STATUSES = frozenset({"granted", "revoked", "violation"})
|
|
15
|
+
_VALID_LEGAL_BASES = frozenset({
|
|
16
|
+
"consent", "contract", "legal_obligation",
|
|
17
|
+
"vital_interest", "public_task", "legitimate_interest",
|
|
18
|
+
})
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class ConsentPayload:
|
|
23
|
+
"""RFC-0001 SPANFORGE — payload for consent.* events.
|
|
24
|
+
|
|
25
|
+
Tracks data-subject consent grants, revocations, and boundary violations
|
|
26
|
+
for GDPR Art. 6/7 and EU AI Act compliance (U — User Rights).
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
subject_id: str
|
|
30
|
+
scope: str
|
|
31
|
+
purpose: str
|
|
32
|
+
status: Literal["granted", "revoked", "violation"]
|
|
33
|
+
legal_basis: str = "consent"
|
|
34
|
+
expiry: str | None = None # ISO 8601 timestamp
|
|
35
|
+
agent_id: str | None = None
|
|
36
|
+
violation_detail: str | None = None
|
|
37
|
+
data_categories: list[str] = field(default_factory=list)
|
|
38
|
+
|
|
39
|
+
def __post_init__(self) -> None:
|
|
40
|
+
if not self.subject_id:
|
|
41
|
+
raise ValueError("ConsentPayload.subject_id must be non-empty")
|
|
42
|
+
if not self.scope:
|
|
43
|
+
raise ValueError("ConsentPayload.scope must be non-empty")
|
|
44
|
+
if not self.purpose:
|
|
45
|
+
raise ValueError("ConsentPayload.purpose must be non-empty")
|
|
46
|
+
if self.status not in _VALID_STATUSES:
|
|
47
|
+
raise ValueError(
|
|
48
|
+
f"ConsentPayload.status must be one of {sorted(_VALID_STATUSES)}"
|
|
49
|
+
)
|
|
50
|
+
if self.legal_basis not in _VALID_LEGAL_BASES:
|
|
51
|
+
raise ValueError(
|
|
52
|
+
f"ConsentPayload.legal_basis must be one of {sorted(_VALID_LEGAL_BASES)}"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
def to_dict(self) -> dict[str, Any]:
|
|
56
|
+
d: dict[str, Any] = {
|
|
57
|
+
"subject_id": self.subject_id,
|
|
58
|
+
"scope": self.scope,
|
|
59
|
+
"purpose": self.purpose,
|
|
60
|
+
"status": self.status,
|
|
61
|
+
"legal_basis": self.legal_basis,
|
|
62
|
+
}
|
|
63
|
+
if self.expiry is not None:
|
|
64
|
+
d["expiry"] = self.expiry
|
|
65
|
+
if self.agent_id is not None:
|
|
66
|
+
d["agent_id"] = self.agent_id
|
|
67
|
+
if self.violation_detail is not None:
|
|
68
|
+
d["violation_detail"] = self.violation_detail
|
|
69
|
+
if self.data_categories:
|
|
70
|
+
d["data_categories"] = list(self.data_categories)
|
|
71
|
+
return d
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def from_dict(cls, data: dict[str, Any]) -> ConsentPayload:
|
|
75
|
+
return cls(
|
|
76
|
+
subject_id=data["subject_id"],
|
|
77
|
+
scope=data["scope"],
|
|
78
|
+
purpose=data["purpose"],
|
|
79
|
+
status=data["status"],
|
|
80
|
+
legal_basis=data.get("legal_basis", "consent"),
|
|
81
|
+
expiry=data.get("expiry"),
|
|
82
|
+
agent_id=data.get("agent_id"),
|
|
83
|
+
violation_detail=data.get("violation_detail"),
|
|
84
|
+
data_categories=list(data.get("data_categories", [])),
|
|
85
|
+
)
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
"""spanforge.namespaces.cost — Cost payload types (RFC-0001 §9).
|
|
2
|
+
|
|
3
|
+
Classes
|
|
4
|
+
-------
|
|
5
|
+
CostTokenRecordedPayload
|
|
6
|
+
RFC §9.1 — cost recorded for a single model call.
|
|
7
|
+
CostSessionRecordedPayload
|
|
8
|
+
RFC §9.2 — aggregate cost across a session.
|
|
9
|
+
CostAttributedPayload
|
|
10
|
+
RFC §9.3 — cost attributed to a specific target.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
from spanforge.namespaces.trace import (
|
|
19
|
+
CostBreakdown,
|
|
20
|
+
ModelInfo,
|
|
21
|
+
PricingTier,
|
|
22
|
+
TokenUsage,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
"CostAttributedPayload",
|
|
27
|
+
"CostSessionRecordedPayload",
|
|
28
|
+
"CostTokenRecordedPayload",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
_VALID_ATTRIBUTION_TYPES = frozenset({"direct", "proportional", "estimated", "manual"})
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class CostTokenRecordedPayload:
|
|
36
|
+
"""RFC-0001 §9.1 — Cost recorded for a single model call (one span).
|
|
37
|
+
|
|
38
|
+
Used with event type: ``llm.cost.token.recorded``.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
cost: CostBreakdown
|
|
42
|
+
token_usage: TokenUsage
|
|
43
|
+
model: ModelInfo
|
|
44
|
+
pricing_tier: PricingTier | None = None
|
|
45
|
+
span_id: str | None = None
|
|
46
|
+
agent_run_id: str | None = None
|
|
47
|
+
|
|
48
|
+
def __post_init__(self) -> None:
|
|
49
|
+
if not isinstance(self.cost, CostBreakdown):
|
|
50
|
+
raise TypeError("CostTokenRecordedPayload.cost must be a CostBreakdown")
|
|
51
|
+
if not isinstance(self.token_usage, TokenUsage):
|
|
52
|
+
raise TypeError("CostTokenRecordedPayload.token_usage must be a TokenUsage")
|
|
53
|
+
if not isinstance(self.model, ModelInfo):
|
|
54
|
+
raise TypeError("CostTokenRecordedPayload.model must be a ModelInfo")
|
|
55
|
+
|
|
56
|
+
def to_dict(self) -> dict[str, Any]:
|
|
57
|
+
"""Serialise the payload to a plain ``dict``."""
|
|
58
|
+
d: dict[str, Any] = {
|
|
59
|
+
"cost": self.cost.to_dict(),
|
|
60
|
+
"token_usage": self.token_usage.to_dict(),
|
|
61
|
+
"model": self.model.to_dict(),
|
|
62
|
+
}
|
|
63
|
+
if self.pricing_tier is not None:
|
|
64
|
+
d["pricing_tier"] = self.pricing_tier.to_dict()
|
|
65
|
+
if self.span_id is not None:
|
|
66
|
+
d["span_id"] = self.span_id
|
|
67
|
+
if self.agent_run_id is not None:
|
|
68
|
+
d["agent_run_id"] = self.agent_run_id
|
|
69
|
+
return d
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def from_dict(cls, data: dict[str, Any]) -> CostTokenRecordedPayload:
|
|
73
|
+
"""Deserialise from a plain ``dict``."""
|
|
74
|
+
return cls(
|
|
75
|
+
cost=CostBreakdown.from_dict(data["cost"]),
|
|
76
|
+
token_usage=TokenUsage.from_dict(data["token_usage"]),
|
|
77
|
+
model=ModelInfo.from_dict(data["model"]),
|
|
78
|
+
pricing_tier=PricingTier.from_dict(data["pricing_tier"]) if "pricing_tier" in data else None, # noqa: E501
|
|
79
|
+
span_id=data.get("span_id"),
|
|
80
|
+
agent_run_id=data.get("agent_run_id"),
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@dataclass
|
|
85
|
+
class CostSessionRecordedPayload:
|
|
86
|
+
"""RFC-0001 §9.2 — Aggregate cost across a session.
|
|
87
|
+
|
|
88
|
+
Used with event type: ``llm.cost.session.recorded``.
|
|
89
|
+
A session is any arbitrary grouping (user session, request batch, experiment run).
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
total_cost: CostBreakdown
|
|
93
|
+
total_token_usage: TokenUsage
|
|
94
|
+
call_count: int
|
|
95
|
+
session_duration_ms: float | None = None
|
|
96
|
+
models_used: list[str] = field(default_factory=list)
|
|
97
|
+
|
|
98
|
+
def __post_init__(self) -> None:
|
|
99
|
+
if not isinstance(self.total_cost, CostBreakdown):
|
|
100
|
+
raise TypeError("CostSessionRecordedPayload.total_cost must be a CostBreakdown")
|
|
101
|
+
if not isinstance(self.total_token_usage, TokenUsage):
|
|
102
|
+
raise TypeError("CostSessionRecordedPayload.total_token_usage must be a TokenUsage")
|
|
103
|
+
if not isinstance(self.call_count, int) or self.call_count < 0:
|
|
104
|
+
raise ValueError("CostSessionRecordedPayload.call_count must be a non-negative int")
|
|
105
|
+
if self.session_duration_ms is not None and self.session_duration_ms < 0:
|
|
106
|
+
raise ValueError("CostSessionRecordedPayload.session_duration_ms must be non-negative")
|
|
107
|
+
|
|
108
|
+
def to_dict(self) -> dict[str, Any]:
|
|
109
|
+
"""Serialise the payload to a plain ``dict``."""
|
|
110
|
+
d: dict[str, Any] = {
|
|
111
|
+
"total_cost": self.total_cost.to_dict(),
|
|
112
|
+
"total_token_usage": self.total_token_usage.to_dict(),
|
|
113
|
+
"call_count": self.call_count,
|
|
114
|
+
}
|
|
115
|
+
if self.session_duration_ms is not None:
|
|
116
|
+
d["session_duration_ms"] = self.session_duration_ms
|
|
117
|
+
if self.models_used:
|
|
118
|
+
d["models_used"] = list(self.models_used)
|
|
119
|
+
return d
|
|
120
|
+
|
|
121
|
+
@classmethod
|
|
122
|
+
def from_dict(cls, data: dict[str, Any]) -> CostSessionRecordedPayload:
|
|
123
|
+
"""Deserialise from a plain ``dict``."""
|
|
124
|
+
return cls(
|
|
125
|
+
total_cost=CostBreakdown.from_dict(data["total_cost"]),
|
|
126
|
+
total_token_usage=TokenUsage.from_dict(data["total_token_usage"]),
|
|
127
|
+
call_count=int(data["call_count"]),
|
|
128
|
+
session_duration_ms=float(data["session_duration_ms"]) if "session_duration_ms" in data else None, # noqa: E501
|
|
129
|
+
models_used=list(data.get("models_used", [])),
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@dataclass
|
|
134
|
+
class CostAttributedPayload:
|
|
135
|
+
"""RFC-0001 §9.3 — Cost attributed to a specific target.
|
|
136
|
+
|
|
137
|
+
Used with event type: ``llm.cost.attributed``.
|
|
138
|
+
``attribution_type`` describes how the cost share was computed.
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
cost: CostBreakdown
|
|
142
|
+
attribution_target: str
|
|
143
|
+
attribution_type: str # "direct"|"proportional"|"estimated"|"manual"
|
|
144
|
+
source_event_ids: list[str] = field(default_factory=list)
|
|
145
|
+
|
|
146
|
+
def __post_init__(self) -> None:
|
|
147
|
+
if not isinstance(self.cost, CostBreakdown):
|
|
148
|
+
raise TypeError("CostAttributedPayload.cost must be a CostBreakdown")
|
|
149
|
+
if not isinstance(self.attribution_target, str) or not self.attribution_target:
|
|
150
|
+
raise ValueError("CostAttributedPayload.attribution_target must be a non-empty string")
|
|
151
|
+
if self.attribution_type not in _VALID_ATTRIBUTION_TYPES:
|
|
152
|
+
raise ValueError(
|
|
153
|
+
f"CostAttributedPayload.attribution_type must be one of {sorted(_VALID_ATTRIBUTION_TYPES)}" # noqa: E501
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
def to_dict(self) -> dict[str, Any]:
|
|
157
|
+
"""Serialise the payload to a plain ``dict``."""
|
|
158
|
+
d: dict[str, Any] = {
|
|
159
|
+
"cost": self.cost.to_dict(),
|
|
160
|
+
"attribution_target": self.attribution_target,
|
|
161
|
+
"attribution_type": self.attribution_type,
|
|
162
|
+
}
|
|
163
|
+
if self.source_event_ids:
|
|
164
|
+
d["source_event_ids"] = list(self.source_event_ids)
|
|
165
|
+
return d
|
|
166
|
+
|
|
167
|
+
@classmethod
|
|
168
|
+
def from_dict(cls, data: dict[str, Any]) -> CostAttributedPayload:
|
|
169
|
+
"""Deserialise from a plain ``dict``."""
|
|
170
|
+
return cls(
|
|
171
|
+
cost=CostBreakdown.from_dict(data["cost"]),
|
|
172
|
+
attribution_target=data["attribution_target"],
|
|
173
|
+
attribution_type=data["attribution_type"],
|
|
174
|
+
source_event_ids=list(data.get("source_event_ids", [])),
|
|
175
|
+
)
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""spanforge.namespaces.decision — Decision namespace payload types (RFC-0001 SPANFORGE).
|
|
2
|
+
|
|
3
|
+
Classes
|
|
4
|
+
-------
|
|
5
|
+
DecisionDriver Factor contributing to a decision (T \u2014 Transparency)
|
|
6
|
+
DecisionPayload decision.made / decision.revised / decision.rejected
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"DecisionDriver",
|
|
15
|
+
"DecisionPayload",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
_VALID_DECISION_TYPES = frozenset({
|
|
19
|
+
"classification", "routing", "generation", "tool_selection", "other",
|
|
20
|
+
})
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class DecisionDriver:
|
|
25
|
+
"""A single factor that contributed to an agent decision (T \u2014 Transparency).
|
|
26
|
+
|
|
27
|
+
``weight`` and ``confidence`` must be in the range [0.0, 1.0].
|
|
28
|
+
The sum of all ``weight`` values in a list should equal 1.0 but is
|
|
29
|
+
not enforced here (enforcement is the caller's responsibility).
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
factor_name: str
|
|
33
|
+
weight: float # 0.0\u20131.0; fractional contribution to the overall decision
|
|
34
|
+
contribution: float # signed contribution to the final decision score
|
|
35
|
+
evidence: str # human-readable evidence string
|
|
36
|
+
confidence: float # 0.0\u20131.0
|
|
37
|
+
|
|
38
|
+
def __post_init__(self) -> None:
|
|
39
|
+
if not self.factor_name:
|
|
40
|
+
raise ValueError("DecisionDriver.factor_name must be non-empty")
|
|
41
|
+
if not (0.0 <= self.weight <= 1.0):
|
|
42
|
+
raise ValueError("DecisionDriver.weight must be in [0.0, 1.0]")
|
|
43
|
+
if not (0.0 <= self.confidence <= 1.0):
|
|
44
|
+
raise ValueError("DecisionDriver.confidence must be in [0.0, 1.0]")
|
|
45
|
+
|
|
46
|
+
def to_dict(self) -> dict[str, Any]:
|
|
47
|
+
return {
|
|
48
|
+
"factor_name": self.factor_name,
|
|
49
|
+
"weight": self.weight,
|
|
50
|
+
"contribution": self.contribution,
|
|
51
|
+
"evidence": self.evidence,
|
|
52
|
+
"confidence": self.confidence,
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def from_dict(cls, data: dict[str, Any]) -> DecisionDriver:
|
|
57
|
+
return cls(
|
|
58
|
+
factor_name=data["factor_name"],
|
|
59
|
+
weight=float(data["weight"]),
|
|
60
|
+
contribution=float(data["contribution"]),
|
|
61
|
+
evidence=data["evidence"],
|
|
62
|
+
confidence=float(data["confidence"]),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class DecisionPayload:
|
|
68
|
+
"""RFC-0001 SPANFORGE \u2014 payload for decision.* events.
|
|
69
|
+
|
|
70
|
+
Captures every individual agent decision at inference time, including
|
|
71
|
+
the full set of contributing decision drivers for T \u2014 Transparency.
|
|
72
|
+
|
|
73
|
+
``actor`` is an optional dict representation of an ActorContext and is
|
|
74
|
+
intentionally typed as ``dict | None`` to avoid a circular import.
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
decision_id: str # ULID
|
|
78
|
+
agent_id: str
|
|
79
|
+
decision_type: str # classification | routing | generation | tool_selection | other
|
|
80
|
+
input_summary: str
|
|
81
|
+
output_summary: str
|
|
82
|
+
confidence: float # 0.0\u20131.0
|
|
83
|
+
latency_ms: float
|
|
84
|
+
rationale_hash: str # SHA-256 of the full rationale text
|
|
85
|
+
decision_drivers: list[DecisionDriver] = field(default_factory=list)
|
|
86
|
+
actor: dict[str, Any] | None = None
|
|
87
|
+
|
|
88
|
+
def __post_init__(self) -> None:
|
|
89
|
+
if not self.decision_id:
|
|
90
|
+
raise ValueError("DecisionPayload.decision_id must be non-empty")
|
|
91
|
+
if not self.agent_id:
|
|
92
|
+
raise ValueError("DecisionPayload.agent_id must be non-empty")
|
|
93
|
+
if self.decision_type not in _VALID_DECISION_TYPES:
|
|
94
|
+
raise ValueError(
|
|
95
|
+
f"DecisionPayload.decision_type must be one of {sorted(_VALID_DECISION_TYPES)}"
|
|
96
|
+
)
|
|
97
|
+
if not (0.0 <= self.confidence <= 1.0):
|
|
98
|
+
raise ValueError("DecisionPayload.confidence must be in [0.0, 1.0]")
|
|
99
|
+
if self.latency_ms < 0:
|
|
100
|
+
raise ValueError("DecisionPayload.latency_ms must be >= 0")
|
|
101
|
+
|
|
102
|
+
def to_dict(self) -> dict[str, Any]:
|
|
103
|
+
d: dict[str, Any] = {
|
|
104
|
+
"decision_id": self.decision_id,
|
|
105
|
+
"agent_id": self.agent_id,
|
|
106
|
+
"decision_type": self.decision_type,
|
|
107
|
+
"input_summary": self.input_summary,
|
|
108
|
+
"output_summary": self.output_summary,
|
|
109
|
+
"confidence": self.confidence,
|
|
110
|
+
"latency_ms": self.latency_ms,
|
|
111
|
+
"rationale_hash": self.rationale_hash,
|
|
112
|
+
"decision_drivers": [d.to_dict() for d in self.decision_drivers],
|
|
113
|
+
}
|
|
114
|
+
if self.actor is not None:
|
|
115
|
+
d["actor"] = self.actor
|
|
116
|
+
return d
|
|
117
|
+
|
|
118
|
+
@classmethod
|
|
119
|
+
def from_dict(cls, data: dict[str, Any]) -> DecisionPayload:
|
|
120
|
+
drivers = [
|
|
121
|
+
DecisionDriver.from_dict(dd)
|
|
122
|
+
for dd in data.get("decision_drivers", [])
|
|
123
|
+
]
|
|
124
|
+
return cls(
|
|
125
|
+
decision_id=data["decision_id"],
|
|
126
|
+
agent_id=data["agent_id"],
|
|
127
|
+
decision_type=data["decision_type"],
|
|
128
|
+
input_summary=data["input_summary"],
|
|
129
|
+
output_summary=data["output_summary"],
|
|
130
|
+
confidence=float(data["confidence"]),
|
|
131
|
+
latency_ms=float(data["latency_ms"]),
|
|
132
|
+
rationale_hash=data["rationale_hash"],
|
|
133
|
+
decision_drivers=drivers,
|
|
134
|
+
actor=data.get("actor"),
|
|
135
|
+
)
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""spanforge.namespaces.diff — Diff payload types (RFC-0001).
|
|
2
|
+
|
|
3
|
+
Classes
|
|
4
|
+
-------
|
|
5
|
+
DiffComputedPayload llm.diff.computed
|
|
6
|
+
DiffRegressionFlaggedPayload llm.diff.regression.flagged
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"DiffComputedPayload",
|
|
15
|
+
"DiffRegressionFlaggedPayload",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
_VALID_DIFF_TYPES = frozenset({"prompt", "response", "template", "token_usage", "cost"})
|
|
19
|
+
_VALID_ALGORITHMS = frozenset({
|
|
20
|
+
"embedding_cosine", "levenshtein", "token_edit", "lcs", "semantic_embedding"
|
|
21
|
+
})
|
|
22
|
+
_VALID_SEVERITIES = frozenset({"low", "medium", "high", "critical"})
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class DiffComputedPayload:
|
|
27
|
+
"""RFC-0001 — A diff was computed between two events."""
|
|
28
|
+
|
|
29
|
+
ref_event_id: str
|
|
30
|
+
target_event_id: str
|
|
31
|
+
diff_type: str # "prompt"|"response"|"template"|"token_usage"|"cost"
|
|
32
|
+
similarity_score: float
|
|
33
|
+
added_tokens: int | None = None
|
|
34
|
+
removed_tokens: int | None = None
|
|
35
|
+
diff_algorithm: str | None = None
|
|
36
|
+
ref_content_hash: str | None = None # 64 hex chars
|
|
37
|
+
target_content_hash: str | None = None # 64 hex chars
|
|
38
|
+
computation_duration_ms: float | None = None
|
|
39
|
+
|
|
40
|
+
def __post_init__(self) -> None:
|
|
41
|
+
if not self.ref_event_id:
|
|
42
|
+
raise ValueError("DiffComputedPayload.ref_event_id must be non-empty")
|
|
43
|
+
if not self.target_event_id:
|
|
44
|
+
raise ValueError("DiffComputedPayload.target_event_id must be non-empty")
|
|
45
|
+
if self.diff_type not in _VALID_DIFF_TYPES:
|
|
46
|
+
raise ValueError(f"DiffComputedPayload.diff_type must be one of {sorted(_VALID_DIFF_TYPES)}") # noqa: E501
|
|
47
|
+
if not (0.0 <= self.similarity_score <= 1.0):
|
|
48
|
+
raise ValueError("DiffComputedPayload.similarity_score must be in [0,1]")
|
|
49
|
+
if self.diff_algorithm is not None and self.diff_algorithm not in _VALID_ALGORITHMS:
|
|
50
|
+
raise ValueError(f"DiffComputedPayload.diff_algorithm must be one of {sorted(_VALID_ALGORITHMS)}") # noqa: E501
|
|
51
|
+
|
|
52
|
+
def to_dict(self) -> dict[str, Any]:
|
|
53
|
+
"""Serialise the payload to a plain ``dict``."""
|
|
54
|
+
d: dict[str, Any] = {
|
|
55
|
+
"ref_event_id": self.ref_event_id,
|
|
56
|
+
"target_event_id": self.target_event_id,
|
|
57
|
+
"diff_type": self.diff_type,
|
|
58
|
+
"similarity_score": self.similarity_score,
|
|
59
|
+
}
|
|
60
|
+
if self.added_tokens is not None:
|
|
61
|
+
d["added_tokens"] = self.added_tokens
|
|
62
|
+
if self.removed_tokens is not None:
|
|
63
|
+
d["removed_tokens"] = self.removed_tokens
|
|
64
|
+
if self.diff_algorithm is not None:
|
|
65
|
+
d["diff_algorithm"] = self.diff_algorithm
|
|
66
|
+
if self.ref_content_hash is not None:
|
|
67
|
+
d["ref_content_hash"] = self.ref_content_hash
|
|
68
|
+
if self.target_content_hash is not None:
|
|
69
|
+
d["target_content_hash"] = self.target_content_hash
|
|
70
|
+
if self.computation_duration_ms is not None:
|
|
71
|
+
d["computation_duration_ms"] = self.computation_duration_ms
|
|
72
|
+
return d
|
|
73
|
+
|
|
74
|
+
@classmethod
|
|
75
|
+
def from_dict(cls, data: dict[str, Any]) -> DiffComputedPayload:
|
|
76
|
+
"""Deserialise from a plain ``dict``."""
|
|
77
|
+
return cls(
|
|
78
|
+
ref_event_id=data["ref_event_id"],
|
|
79
|
+
target_event_id=data["target_event_id"],
|
|
80
|
+
diff_type=data["diff_type"],
|
|
81
|
+
similarity_score=float(data["similarity_score"]),
|
|
82
|
+
added_tokens=int(data["added_tokens"]) if "added_tokens" in data else None,
|
|
83
|
+
removed_tokens=int(data["removed_tokens"]) if "removed_tokens" in data else None,
|
|
84
|
+
diff_algorithm=data.get("diff_algorithm"),
|
|
85
|
+
ref_content_hash=data.get("ref_content_hash"),
|
|
86
|
+
target_content_hash=data.get("target_content_hash"),
|
|
87
|
+
computation_duration_ms=float(data["computation_duration_ms"]) if "computation_duration_ms" in data else None, # noqa: E501
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@dataclass
|
|
92
|
+
class DiffRegressionFlaggedPayload:
|
|
93
|
+
"""RFC-0001 — A diff score fell below the similarity threshold."""
|
|
94
|
+
|
|
95
|
+
ref_event_id: str
|
|
96
|
+
target_event_id: str
|
|
97
|
+
diff_type: str
|
|
98
|
+
similarity_score: float
|
|
99
|
+
threshold: float
|
|
100
|
+
severity: str # "low"|"medium"|"high"|"critical"
|
|
101
|
+
diff_event_id: str | None = None
|
|
102
|
+
alert_target: str | None = None
|
|
103
|
+
|
|
104
|
+
def __post_init__(self) -> None:
|
|
105
|
+
if not self.ref_event_id:
|
|
106
|
+
raise ValueError("DiffRegressionFlaggedPayload.ref_event_id must be non-empty")
|
|
107
|
+
if not self.target_event_id:
|
|
108
|
+
raise ValueError("DiffRegressionFlaggedPayload.target_event_id must be non-empty")
|
|
109
|
+
if self.diff_type not in _VALID_DIFF_TYPES:
|
|
110
|
+
raise ValueError(f"DiffRegressionFlaggedPayload.diff_type must be one of {sorted(_VALID_DIFF_TYPES)}") # noqa: E501
|
|
111
|
+
if not (0.0 <= self.similarity_score <= 1.0):
|
|
112
|
+
raise ValueError("DiffRegressionFlaggedPayload.similarity_score must be in [0,1]")
|
|
113
|
+
if not (0.0 <= self.threshold <= 1.0):
|
|
114
|
+
raise ValueError("DiffRegressionFlaggedPayload.threshold must be in [0,1]")
|
|
115
|
+
if self.severity not in _VALID_SEVERITIES:
|
|
116
|
+
raise ValueError(f"DiffRegressionFlaggedPayload.severity must be one of {sorted(_VALID_SEVERITIES)}") # noqa: E501
|
|
117
|
+
|
|
118
|
+
def to_dict(self) -> dict[str, Any]:
|
|
119
|
+
"""Serialise the payload to a plain ``dict``."""
|
|
120
|
+
d: dict[str, Any] = {
|
|
121
|
+
"ref_event_id": self.ref_event_id,
|
|
122
|
+
"target_event_id": self.target_event_id,
|
|
123
|
+
"diff_type": self.diff_type,
|
|
124
|
+
"similarity_score": self.similarity_score,
|
|
125
|
+
"threshold": self.threshold,
|
|
126
|
+
"severity": self.severity,
|
|
127
|
+
}
|
|
128
|
+
if self.diff_event_id is not None:
|
|
129
|
+
d["diff_event_id"] = self.diff_event_id
|
|
130
|
+
if self.alert_target is not None:
|
|
131
|
+
d["alert_target"] = self.alert_target
|
|
132
|
+
return d
|
|
133
|
+
|
|
134
|
+
@classmethod
|
|
135
|
+
def from_dict(cls, data: dict[str, Any]) -> DiffRegressionFlaggedPayload:
|
|
136
|
+
"""Deserialise from a plain ``dict``."""
|
|
137
|
+
return cls(
|
|
138
|
+
ref_event_id=data["ref_event_id"],
|
|
139
|
+
target_event_id=data["target_event_id"],
|
|
140
|
+
diff_type=data["diff_type"],
|
|
141
|
+
similarity_score=float(data["similarity_score"]),
|
|
142
|
+
threshold=float(data["threshold"]),
|
|
143
|
+
severity=data["severity"],
|
|
144
|
+
diff_event_id=data.get("diff_event_id"),
|
|
145
|
+
alert_target=data.get("alert_target"),
|
|
146
|
+
)
|