spanforge 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spanforge/__init__.py +815 -0
- spanforge/_ansi.py +93 -0
- spanforge/_batch_exporter.py +409 -0
- spanforge/_cli.py +2094 -0
- spanforge/_cli_audit.py +639 -0
- spanforge/_cli_compliance.py +711 -0
- spanforge/_cli_cost.py +243 -0
- spanforge/_cli_ops.py +791 -0
- spanforge/_cli_phase11.py +356 -0
- spanforge/_hooks.py +337 -0
- spanforge/_server.py +1708 -0
- spanforge/_span.py +1036 -0
- spanforge/_store.py +288 -0
- spanforge/_stream.py +664 -0
- spanforge/_trace.py +335 -0
- spanforge/_tracer.py +254 -0
- spanforge/actor.py +141 -0
- spanforge/alerts.py +469 -0
- spanforge/auto.py +464 -0
- spanforge/baseline.py +335 -0
- spanforge/cache.py +635 -0
- spanforge/compliance.py +325 -0
- spanforge/config.py +532 -0
- spanforge/consent.py +228 -0
- spanforge/consumer.py +377 -0
- spanforge/core/__init__.py +5 -0
- spanforge/core/compliance_mapping.py +1254 -0
- spanforge/cost.py +600 -0
- spanforge/debug.py +548 -0
- spanforge/deprecations.py +205 -0
- spanforge/drift.py +482 -0
- spanforge/egress.py +58 -0
- spanforge/eval.py +648 -0
- spanforge/event.py +1064 -0
- spanforge/exceptions.py +240 -0
- spanforge/explain.py +178 -0
- spanforge/export/__init__.py +69 -0
- spanforge/export/append_only.py +337 -0
- spanforge/export/cloud.py +357 -0
- spanforge/export/datadog.py +497 -0
- spanforge/export/grafana.py +320 -0
- spanforge/export/jsonl.py +195 -0
- spanforge/export/openinference.py +158 -0
- spanforge/export/otel_bridge.py +294 -0
- spanforge/export/otlp.py +811 -0
- spanforge/export/otlp_bridge.py +233 -0
- spanforge/export/redis_backend.py +282 -0
- spanforge/export/siem_schema.py +98 -0
- spanforge/export/siem_splunk.py +264 -0
- spanforge/export/siem_syslog.py +212 -0
- spanforge/export/webhook.py +299 -0
- spanforge/exporters/__init__.py +30 -0
- spanforge/exporters/console.py +271 -0
- spanforge/exporters/jsonl.py +144 -0
- spanforge/exporters/sqlite.py +142 -0
- spanforge/gate.py +1150 -0
- spanforge/governance.py +181 -0
- spanforge/hitl.py +295 -0
- spanforge/http.py +187 -0
- spanforge/inspect.py +427 -0
- spanforge/integrations/__init__.py +45 -0
- spanforge/integrations/_pricing.py +280 -0
- spanforge/integrations/anthropic.py +388 -0
- spanforge/integrations/azure_openai.py +133 -0
- spanforge/integrations/bedrock.py +292 -0
- spanforge/integrations/crewai.py +251 -0
- spanforge/integrations/gemini.py +351 -0
- spanforge/integrations/groq.py +442 -0
- spanforge/integrations/langchain.py +349 -0
- spanforge/integrations/langgraph.py +306 -0
- spanforge/integrations/llamaindex.py +373 -0
- spanforge/integrations/ollama.py +287 -0
- spanforge/integrations/openai.py +368 -0
- spanforge/integrations/together.py +483 -0
- spanforge/io.py +214 -0
- spanforge/lint.py +322 -0
- spanforge/metrics.py +417 -0
- spanforge/metrics_export.py +343 -0
- spanforge/migrate.py +402 -0
- spanforge/model_registry.py +278 -0
- spanforge/models.py +389 -0
- spanforge/namespaces/__init__.py +254 -0
- spanforge/namespaces/audit.py +256 -0
- spanforge/namespaces/cache.py +237 -0
- spanforge/namespaces/chain.py +77 -0
- spanforge/namespaces/confidence.py +72 -0
- spanforge/namespaces/consent.py +92 -0
- spanforge/namespaces/cost.py +179 -0
- spanforge/namespaces/decision.py +143 -0
- spanforge/namespaces/diff.py +157 -0
- spanforge/namespaces/drift.py +80 -0
- spanforge/namespaces/eval_.py +251 -0
- spanforge/namespaces/feedback.py +241 -0
- spanforge/namespaces/fence.py +193 -0
- spanforge/namespaces/guard.py +105 -0
- spanforge/namespaces/hitl.py +91 -0
- spanforge/namespaces/latency.py +72 -0
- spanforge/namespaces/prompt.py +190 -0
- spanforge/namespaces/redact.py +173 -0
- spanforge/namespaces/retrieval.py +379 -0
- spanforge/namespaces/runtime_governance.py +494 -0
- spanforge/namespaces/template.py +208 -0
- spanforge/namespaces/tool_call.py +77 -0
- spanforge/namespaces/trace.py +1029 -0
- spanforge/normalizer.py +171 -0
- spanforge/plugins.py +82 -0
- spanforge/presidio_backend.py +349 -0
- spanforge/processor.py +258 -0
- spanforge/prompt_registry.py +418 -0
- spanforge/py.typed +0 -0
- spanforge/redact.py +914 -0
- spanforge/regression.py +192 -0
- spanforge/runtime_policy.py +159 -0
- spanforge/sampling.py +511 -0
- spanforge/schema.py +183 -0
- spanforge/schemas/v1.0/schema.json +170 -0
- spanforge/schemas/v2.0/schema.json +536 -0
- spanforge/sdk/__init__.py +625 -0
- spanforge/sdk/_base.py +584 -0
- spanforge/sdk/_base.pyi +71 -0
- spanforge/sdk/_exceptions.py +1096 -0
- spanforge/sdk/_types.py +2184 -0
- spanforge/sdk/alert.py +1514 -0
- spanforge/sdk/alert.pyi +56 -0
- spanforge/sdk/audit.py +1196 -0
- spanforge/sdk/audit.pyi +67 -0
- spanforge/sdk/cec.py +1215 -0
- spanforge/sdk/cec.pyi +37 -0
- spanforge/sdk/config.py +641 -0
- spanforge/sdk/config.pyi +55 -0
- spanforge/sdk/enterprise.py +714 -0
- spanforge/sdk/enterprise.pyi +79 -0
- spanforge/sdk/explain.py +170 -0
- spanforge/sdk/fallback.py +432 -0
- spanforge/sdk/feedback.py +351 -0
- spanforge/sdk/gate.py +874 -0
- spanforge/sdk/gate.pyi +51 -0
- spanforge/sdk/identity.py +2114 -0
- spanforge/sdk/identity.pyi +47 -0
- spanforge/sdk/lineage.py +175 -0
- spanforge/sdk/observe.py +1065 -0
- spanforge/sdk/observe.pyi +50 -0
- spanforge/sdk/operator.py +338 -0
- spanforge/sdk/pii.py +1473 -0
- spanforge/sdk/pii.pyi +119 -0
- spanforge/sdk/pipelines.py +458 -0
- spanforge/sdk/pipelines.pyi +39 -0
- spanforge/sdk/policy.py +930 -0
- spanforge/sdk/rag.py +594 -0
- spanforge/sdk/rbac.py +280 -0
- spanforge/sdk/registry.py +430 -0
- spanforge/sdk/registry.pyi +46 -0
- spanforge/sdk/scope.py +279 -0
- spanforge/sdk/secrets.py +293 -0
- spanforge/sdk/secrets.pyi +25 -0
- spanforge/sdk/security.py +560 -0
- spanforge/sdk/security.pyi +57 -0
- spanforge/sdk/trust.py +472 -0
- spanforge/sdk/trust.pyi +41 -0
- spanforge/secrets.py +799 -0
- spanforge/signing.py +1179 -0
- spanforge/stats.py +100 -0
- spanforge/stream.py +560 -0
- spanforge/testing.py +378 -0
- spanforge/testing_mocks.py +1052 -0
- spanforge/trace.py +199 -0
- spanforge/types.py +696 -0
- spanforge/ulid.py +300 -0
- spanforge/validate.py +379 -0
- spanforge-1.0.0.dist-info/METADATA +1509 -0
- spanforge-1.0.0.dist-info/RECORD +174 -0
- spanforge-1.0.0.dist-info/WHEEL +4 -0
- spanforge-1.0.0.dist-info/entry_points.txt +5 -0
- spanforge-1.0.0.dist-info/licenses/LICENSE +128 -0
spanforge/governance.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
"""spanforge.governance — Policy-based event governance.
|
|
2
|
+
|
|
3
|
+
Block prohibited event types, warn on deprecated usage, and enforce custom
|
|
4
|
+
domain rules before events are emitted.
|
|
5
|
+
|
|
6
|
+
Public API
|
|
7
|
+
----------
|
|
8
|
+
EventGovernancePolicy Mutable policy dataclass.
|
|
9
|
+
GovernanceViolationError Raised when a policy blocks an event.
|
|
10
|
+
GovernanceWarning Warning issued for deprecated event types.
|
|
11
|
+
get_global_policy() Return the global policy singleton.
|
|
12
|
+
set_global_policy() Replace (or reset) the global policy.
|
|
13
|
+
check_event() Apply the global policy to an event.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import warnings
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from typing import TYPE_CHECKING
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from collections.abc import Callable
|
|
24
|
+
|
|
25
|
+
from spanforge.event import Event
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"EventGovernancePolicy",
|
|
29
|
+
"GovernanceViolationError",
|
|
30
|
+
"GovernanceWarning",
|
|
31
|
+
"check_event",
|
|
32
|
+
"get_global_policy",
|
|
33
|
+
"set_global_policy",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
# Exceptions / Warnings
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class GovernanceViolationError(Exception):
|
|
43
|
+
"""Raised when an event is blocked by a governance policy.
|
|
44
|
+
|
|
45
|
+
Attributes:
|
|
46
|
+
event_type: The ``event_type`` string of the blocked event.
|
|
47
|
+
reason: Human-readable description of why the event was blocked.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def __init__(self, event_type: str, reason: str) -> None:
|
|
51
|
+
super().__init__(f"Event '{event_type}' blocked: {reason}")
|
|
52
|
+
self.event_type = event_type
|
|
53
|
+
self.reason = reason
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class GovernanceWarning(UserWarning):
|
|
57
|
+
"""Warning issued via :func:`warnings.warn` when a deprecated event type is seen.
|
|
58
|
+
|
|
59
|
+
In pytest with ``filterwarnings = ["error"]`` this is automatically promoted
|
|
60
|
+
to an exception. Use ``pytest.warns(GovernanceWarning)`` to assert on it.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# ---------------------------------------------------------------------------
|
|
65
|
+
# Policy dataclass
|
|
66
|
+
# ---------------------------------------------------------------------------
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@dataclass
|
|
70
|
+
class EventGovernancePolicy:
|
|
71
|
+
"""Mutable policy controlling which events are blocked or warned about.
|
|
72
|
+
|
|
73
|
+
Attributes:
|
|
74
|
+
blocked_types: Event type strings rejected unconditionally.
|
|
75
|
+
warn_deprecated: Event type strings that emit a :class:`GovernanceWarning`.
|
|
76
|
+
custom_rules: Callables ``rule(event) -> str | None``; returning a
|
|
77
|
+
non-empty string blocks the event with that reason.
|
|
78
|
+
strict_unknown: When ``True``, any event whose type is not a registered
|
|
79
|
+
``EventType`` value is blocked.
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
blocked_types: set[str] = field(default_factory=set)
|
|
83
|
+
warn_deprecated: set[str] = field(default_factory=set)
|
|
84
|
+
custom_rules: list[Callable[[Event], str | None]] = field(default_factory=list)
|
|
85
|
+
strict_unknown: bool = False
|
|
86
|
+
|
|
87
|
+
def check_event(self, event: Event) -> None:
|
|
88
|
+
"""Evaluate all rules in this policy against *event*.
|
|
89
|
+
|
|
90
|
+
Evaluation order:
|
|
91
|
+
|
|
92
|
+
1. **blocked_types** — raises :class:`GovernanceViolationError` immediately.
|
|
93
|
+
2. **warn_deprecated** — issues :class:`GovernanceWarning`.
|
|
94
|
+
3. **custom_rules** — first non-empty return value raises
|
|
95
|
+
:class:`GovernanceViolationError`.
|
|
96
|
+
4. **strict_unknown** — blocks event types not in ``EventType`` registry.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
event: The event to evaluate.
|
|
100
|
+
|
|
101
|
+
Raises:
|
|
102
|
+
GovernanceViolationError: If the event is blocked.
|
|
103
|
+
"""
|
|
104
|
+
event_type: str = getattr(event, "event_type", "")
|
|
105
|
+
|
|
106
|
+
# Step 1 — explicit block list
|
|
107
|
+
if event_type in self.blocked_types:
|
|
108
|
+
raise GovernanceViolationError(
|
|
109
|
+
event_type,
|
|
110
|
+
f"event type '{event_type}' is in the blocked_types list",
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Step 2 — deprecated warning
|
|
114
|
+
if event_type in self.warn_deprecated:
|
|
115
|
+
warnings.warn(
|
|
116
|
+
f"Event type '{event_type}' is deprecated. Update your instrumentation.",
|
|
117
|
+
GovernanceWarning,
|
|
118
|
+
stacklevel=3,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# Step 3 — custom rules
|
|
122
|
+
for rule in self.custom_rules:
|
|
123
|
+
reason = rule(event)
|
|
124
|
+
if reason:
|
|
125
|
+
raise GovernanceViolationError(event_type, reason)
|
|
126
|
+
|
|
127
|
+
# Step 4 — strict unknown check
|
|
128
|
+
if self.strict_unknown:
|
|
129
|
+
try:
|
|
130
|
+
from spanforge.types import EventType as _EventType
|
|
131
|
+
|
|
132
|
+
# EventType members are string values
|
|
133
|
+
valid_values = {m.value for m in _EventType}
|
|
134
|
+
if event_type not in valid_values:
|
|
135
|
+
raise GovernanceViolationError(
|
|
136
|
+
event_type,
|
|
137
|
+
f"strict_unknown=True and '{event_type}' is not a registered EventType",
|
|
138
|
+
)
|
|
139
|
+
except ImportError:
|
|
140
|
+
pass # If types module unavailable, skip strict check
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
# ---------------------------------------------------------------------------
|
|
144
|
+
# Global singleton
|
|
145
|
+
# ---------------------------------------------------------------------------
|
|
146
|
+
|
|
147
|
+
_global_policy: EventGovernancePolicy = EventGovernancePolicy()
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def get_global_policy() -> EventGovernancePolicy:
|
|
151
|
+
"""Return the global :class:`EventGovernancePolicy` singleton.
|
|
152
|
+
|
|
153
|
+
The default policy has no blocked types, no deprecated types, no custom
|
|
154
|
+
rules, and ``strict_unknown=False``.
|
|
155
|
+
"""
|
|
156
|
+
return _global_policy
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def set_global_policy(policy: EventGovernancePolicy | None) -> None:
|
|
160
|
+
"""Replace the global policy. Pass ``None`` to reset to the default.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
policy: New policy, or ``None`` to restore defaults.
|
|
164
|
+
"""
|
|
165
|
+
global _global_policy
|
|
166
|
+
_global_policy = policy if policy is not None else EventGovernancePolicy()
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def check_event(event: Event) -> None:
|
|
170
|
+
"""Apply the global policy to *event*.
|
|
171
|
+
|
|
172
|
+
Equivalent to ``get_global_policy().check_event(event)``.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
event: The event to check against the global policy.
|
|
176
|
+
|
|
177
|
+
Raises:
|
|
178
|
+
GovernanceViolationError: If the event is blocked.
|
|
179
|
+
GovernanceWarning: (via warnings) if the event type is deprecated.
|
|
180
|
+
"""
|
|
181
|
+
_global_policy.check_event(event)
|
spanforge/hitl.py
ADDED
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
"""Human-in-the-Loop (HITL) review queue for SpanForge compliance pipeline.
|
|
2
|
+
|
|
3
|
+
Provides a runtime mechanism to intercept low-confidence or high-risk
|
|
4
|
+
agent decisions, queue them for human review, and track approval/rejection
|
|
5
|
+
outcomes in the HMAC audit chain.
|
|
6
|
+
|
|
7
|
+
Required for EU AI Act high-risk mandatory human oversight (Art. 14).
|
|
8
|
+
|
|
9
|
+
Configuration
|
|
10
|
+
-------------
|
|
11
|
+
* ``hitl_enabled=True`` activates the HITL queue.
|
|
12
|
+
* ``hitl_confidence_threshold`` — decisions below this confidence are auto-queued.
|
|
13
|
+
* ``hitl_risk_tiers`` — set of risk tiers that always require review.
|
|
14
|
+
* ``hitl_sla_seconds`` — SLA timeout for pending reviews.
|
|
15
|
+
|
|
16
|
+
Emits ``hitl.queued``, ``hitl.reviewed``, ``hitl.escalated``, ``hitl.timeout``
|
|
17
|
+
events into the HMAC audit chain via :func:`emit_rfc_event`.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import contextlib
|
|
23
|
+
import threading
|
|
24
|
+
from dataclasses import dataclass, field
|
|
25
|
+
from typing import Any, Literal
|
|
26
|
+
|
|
27
|
+
from spanforge.namespaces.hitl import HITLPayload
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"HITLItem",
|
|
31
|
+
"HITLQueue",
|
|
32
|
+
"list_pending",
|
|
33
|
+
"queue_for_review",
|
|
34
|
+
"review_item",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class HITLItem:
|
|
40
|
+
"""A single item pending human review."""
|
|
41
|
+
|
|
42
|
+
decision_id: str
|
|
43
|
+
agent_id: str
|
|
44
|
+
risk_tier: Literal["low", "medium", "high", "critical"]
|
|
45
|
+
reason: str
|
|
46
|
+
confidence: float | None = None
|
|
47
|
+
sla_seconds: int = 3600
|
|
48
|
+
queued_at: str | None = None
|
|
49
|
+
payload: dict[str, Any] = field(default_factory=dict)
|
|
50
|
+
status: Literal["queued", "approved", "rejected", "escalated", "timeout"] = "queued"
|
|
51
|
+
reviewer: str | None = None
|
|
52
|
+
resolved_at: str | None = None
|
|
53
|
+
escalation_tier: int = 0
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class HITLQueue:
|
|
57
|
+
"""Thread-safe human-in-the-loop review queue.
|
|
58
|
+
|
|
59
|
+
Intercepts agent decisions matching configurable risk criteria
|
|
60
|
+
(confidence below threshold, high-risk event type) and holds them
|
|
61
|
+
pending a named reviewer's approval.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(
|
|
65
|
+
self,
|
|
66
|
+
*,
|
|
67
|
+
confidence_threshold: float = 0.7,
|
|
68
|
+
risk_tiers: frozenset[str] | None = None,
|
|
69
|
+
sla_seconds: int = 3600,
|
|
70
|
+
auto_emit: bool = True,
|
|
71
|
+
) -> None:
|
|
72
|
+
self._lock = threading.Lock()
|
|
73
|
+
self._items: dict[str, HITLItem] = {}
|
|
74
|
+
self._confidence_threshold = confidence_threshold
|
|
75
|
+
self._risk_tiers: frozenset[str] = risk_tiers or frozenset({"high", "critical"})
|
|
76
|
+
self._sla_seconds = sla_seconds
|
|
77
|
+
self._auto_emit = auto_emit
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def confidence_threshold(self) -> float:
|
|
81
|
+
"""Minimum confidence below which a decision triggers review."""
|
|
82
|
+
return self._confidence_threshold
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def sla_seconds(self) -> int:
|
|
86
|
+
"""Maximum seconds allowed for a review decision."""
|
|
87
|
+
return self._sla_seconds
|
|
88
|
+
|
|
89
|
+
def should_review(
|
|
90
|
+
self,
|
|
91
|
+
*,
|
|
92
|
+
confidence: float | None = None,
|
|
93
|
+
risk_tier: str = "low",
|
|
94
|
+
) -> bool:
|
|
95
|
+
"""Determine if a decision should be queued for human review."""
|
|
96
|
+
if risk_tier in self._risk_tiers:
|
|
97
|
+
return True
|
|
98
|
+
return bool(confidence is not None and confidence < self._confidence_threshold)
|
|
99
|
+
|
|
100
|
+
def enqueue(
|
|
101
|
+
self,
|
|
102
|
+
decision_id: str,
|
|
103
|
+
agent_id: str,
|
|
104
|
+
risk_tier: Literal["low", "medium", "high", "critical"],
|
|
105
|
+
reason: str,
|
|
106
|
+
*,
|
|
107
|
+
confidence: float | None = None,
|
|
108
|
+
queued_at: str | None = None,
|
|
109
|
+
payload: dict[str, Any] | None = None,
|
|
110
|
+
) -> HITLItem:
|
|
111
|
+
"""Add a decision to the review queue and emit ``hitl.queued``."""
|
|
112
|
+
if not decision_id:
|
|
113
|
+
raise ValueError("decision_id must be non-empty")
|
|
114
|
+
if not agent_id:
|
|
115
|
+
raise ValueError("agent_id must be non-empty")
|
|
116
|
+
if not reason:
|
|
117
|
+
raise ValueError("reason must be non-empty")
|
|
118
|
+
|
|
119
|
+
if queued_at is None:
|
|
120
|
+
import datetime
|
|
121
|
+
|
|
122
|
+
queued_at = datetime.datetime.now(datetime.timezone.utc).strftime(
|
|
123
|
+
"%Y-%m-%dT%H:%M:%S.%fZ"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
item = HITLItem(
|
|
127
|
+
decision_id=decision_id,
|
|
128
|
+
agent_id=agent_id,
|
|
129
|
+
risk_tier=risk_tier,
|
|
130
|
+
reason=reason,
|
|
131
|
+
confidence=confidence,
|
|
132
|
+
sla_seconds=self._sla_seconds,
|
|
133
|
+
queued_at=queued_at,
|
|
134
|
+
payload=payload or {},
|
|
135
|
+
status="queued",
|
|
136
|
+
)
|
|
137
|
+
with self._lock:
|
|
138
|
+
self._items[decision_id] = item
|
|
139
|
+
|
|
140
|
+
if self._auto_emit:
|
|
141
|
+
self._emit_event(item, "queued")
|
|
142
|
+
return item
|
|
143
|
+
|
|
144
|
+
def review(
|
|
145
|
+
self,
|
|
146
|
+
decision_id: str,
|
|
147
|
+
reviewer: str,
|
|
148
|
+
outcome: Literal["approved", "rejected"],
|
|
149
|
+
*,
|
|
150
|
+
reason: str | None = None,
|
|
151
|
+
) -> HITLItem | None:
|
|
152
|
+
"""Record a reviewer's decision and emit ``hitl.reviewed``."""
|
|
153
|
+
if not reviewer:
|
|
154
|
+
raise ValueError("reviewer must be non-empty")
|
|
155
|
+
|
|
156
|
+
import datetime
|
|
157
|
+
|
|
158
|
+
now = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
|
|
159
|
+
|
|
160
|
+
with self._lock:
|
|
161
|
+
item = self._items.get(decision_id)
|
|
162
|
+
if item is None:
|
|
163
|
+
return None
|
|
164
|
+
item.status = outcome
|
|
165
|
+
item.reviewer = reviewer
|
|
166
|
+
item.resolved_at = now
|
|
167
|
+
if reason:
|
|
168
|
+
item.reason = reason
|
|
169
|
+
|
|
170
|
+
if self._auto_emit:
|
|
171
|
+
self._emit_event(item, "reviewed")
|
|
172
|
+
return item
|
|
173
|
+
|
|
174
|
+
def escalate(
|
|
175
|
+
self,
|
|
176
|
+
decision_id: str,
|
|
177
|
+
*,
|
|
178
|
+
reason: str = "SLA breach or reviewer escalation",
|
|
179
|
+
) -> HITLItem | None:
|
|
180
|
+
"""Escalate an item to the next reviewer tier."""
|
|
181
|
+
with self._lock:
|
|
182
|
+
item = self._items.get(decision_id)
|
|
183
|
+
if item is None:
|
|
184
|
+
return None
|
|
185
|
+
item.status = "escalated"
|
|
186
|
+
item.escalation_tier += 1
|
|
187
|
+
item.reason = reason
|
|
188
|
+
|
|
189
|
+
if self._auto_emit:
|
|
190
|
+
self._emit_event(item, "escalated")
|
|
191
|
+
return item
|
|
192
|
+
|
|
193
|
+
def timeout(self, decision_id: str) -> HITLItem | None:
|
|
194
|
+
"""Mark an item as timed out (SLA expired)."""
|
|
195
|
+
import datetime
|
|
196
|
+
|
|
197
|
+
now = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
|
|
198
|
+
|
|
199
|
+
with self._lock:
|
|
200
|
+
item = self._items.get(decision_id)
|
|
201
|
+
if item is None:
|
|
202
|
+
return None
|
|
203
|
+
item.status = "timeout"
|
|
204
|
+
item.resolved_at = now
|
|
205
|
+
|
|
206
|
+
if self._auto_emit:
|
|
207
|
+
self._emit_event(item, "timeout")
|
|
208
|
+
return item
|
|
209
|
+
|
|
210
|
+
def get(self, decision_id: str) -> HITLItem | None:
|
|
211
|
+
"""Look up an item by decision_id."""
|
|
212
|
+
with self._lock:
|
|
213
|
+
return self._items.get(decision_id)
|
|
214
|
+
|
|
215
|
+
def list_pending(self) -> list[HITLItem]:
|
|
216
|
+
"""Return all items still in ``queued`` status."""
|
|
217
|
+
with self._lock:
|
|
218
|
+
return [i for i in self._items.values() if i.status == "queued"]
|
|
219
|
+
|
|
220
|
+
def list_all(self) -> list[HITLItem]:
|
|
221
|
+
"""Return all items regardless of status."""
|
|
222
|
+
with self._lock:
|
|
223
|
+
return list(self._items.values())
|
|
224
|
+
|
|
225
|
+
def clear(self) -> None:
|
|
226
|
+
"""Remove all items (for testing)."""
|
|
227
|
+
with self._lock:
|
|
228
|
+
self._items.clear()
|
|
229
|
+
|
|
230
|
+
@staticmethod
|
|
231
|
+
def _emit_event(item: HITLItem, action: str) -> None:
|
|
232
|
+
"""Emit an HITL event into the HMAC audit chain."""
|
|
233
|
+
try:
|
|
234
|
+
from spanforge._stream import emit_rfc_event
|
|
235
|
+
from spanforge.types import EventType
|
|
236
|
+
|
|
237
|
+
_action_to_event = {
|
|
238
|
+
"queued": EventType.HITL_QUEUED,
|
|
239
|
+
"reviewed": EventType.HITL_REVIEWED,
|
|
240
|
+
"escalated": EventType.HITL_ESCALATED,
|
|
241
|
+
"timeout": EventType.HITL_TIMEOUT,
|
|
242
|
+
}
|
|
243
|
+
et = _action_to_event.get(action)
|
|
244
|
+
if et is None:
|
|
245
|
+
return
|
|
246
|
+
payload = HITLPayload(
|
|
247
|
+
decision_id=item.decision_id,
|
|
248
|
+
agent_id=item.agent_id,
|
|
249
|
+
risk_tier=item.risk_tier,
|
|
250
|
+
status=item.status,
|
|
251
|
+
reason=item.reason,
|
|
252
|
+
reviewer=item.reviewer,
|
|
253
|
+
sla_seconds=item.sla_seconds,
|
|
254
|
+
queued_at=item.queued_at,
|
|
255
|
+
resolved_at=item.resolved_at,
|
|
256
|
+
escalation_tier=item.escalation_tier,
|
|
257
|
+
confidence=item.confidence,
|
|
258
|
+
)
|
|
259
|
+
with contextlib.suppress(Exception):
|
|
260
|
+
emit_rfc_event(et, payload.to_dict())
|
|
261
|
+
except ImportError:
|
|
262
|
+
pass
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
# ---------------------------------------------------------------------------
|
|
266
|
+
# Module-level singleton & convenience functions
|
|
267
|
+
# ---------------------------------------------------------------------------
|
|
268
|
+
|
|
269
|
+
_queue = HITLQueue()
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def queue_for_review(
|
|
273
|
+
decision_id: str,
|
|
274
|
+
agent_id: str,
|
|
275
|
+
risk_tier: Literal["low", "medium", "high", "critical"],
|
|
276
|
+
reason: str,
|
|
277
|
+
**kwargs: Any,
|
|
278
|
+
) -> HITLItem:
|
|
279
|
+
"""Enqueue a decision via the module-level :class:`HITLQueue`."""
|
|
280
|
+
return _queue.enqueue(decision_id, agent_id, risk_tier, reason, **kwargs)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def review_item(
|
|
284
|
+
decision_id: str,
|
|
285
|
+
reviewer: str,
|
|
286
|
+
outcome: Literal["approved", "rejected"],
|
|
287
|
+
**kwargs: Any,
|
|
288
|
+
) -> HITLItem | None:
|
|
289
|
+
"""Record a review via the module-level :class:`HITLQueue`."""
|
|
290
|
+
return _queue.review(decision_id, reviewer, outcome, **kwargs)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def list_pending() -> list[HITLItem]:
|
|
294
|
+
"""List pending items via the module-level :class:`HITLQueue`."""
|
|
295
|
+
return _queue.list_pending()
|
spanforge/http.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""spanforge.http — OpenAI-compatible HTTP client with retry and backoff.
|
|
2
|
+
|
|
3
|
+
Provides a single high-level function :func:`chat_completion` that calls any
|
|
4
|
+
OpenAI-compatible ``/chat/completions`` endpoint, with configurable retry,
|
|
5
|
+
exponential backoff, timeout, and usage extraction. Uses only the standard
|
|
6
|
+
library (``urllib``) so it adds zero dependencies to the framework.
|
|
7
|
+
|
|
8
|
+
Usage::
|
|
9
|
+
|
|
10
|
+
from spanforge.http import chat_completion
|
|
11
|
+
|
|
12
|
+
resp = chat_completion(
|
|
13
|
+
endpoint="https://api.openai.com/v1",
|
|
14
|
+
model="gpt-4o",
|
|
15
|
+
messages=[{"role": "user", "content": "Hello!"}],
|
|
16
|
+
api_key="sk-...",
|
|
17
|
+
max_retries=2,
|
|
18
|
+
)
|
|
19
|
+
if resp.error:
|
|
20
|
+
print("Error:", resp.error)
|
|
21
|
+
else:
|
|
22
|
+
print(resp.text)
|
|
23
|
+
print(f"Tokens used: {resp.total_tokens}")
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import json
|
|
29
|
+
import time
|
|
30
|
+
import urllib.error
|
|
31
|
+
import urllib.request
|
|
32
|
+
from dataclasses import dataclass
|
|
33
|
+
from typing import Any
|
|
34
|
+
|
|
35
|
+
__all__ = [
|
|
36
|
+
"ChatCompletionResponse",
|
|
37
|
+
"chat_completion",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
# HTTP status codes that are safe to retry.
|
|
41
|
+
_RETRYABLE_CODES = frozenset({429, 500, 502, 503, 504})
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass(frozen=True)
|
|
45
|
+
class ChatCompletionResponse:
|
|
46
|
+
"""Result of a single ``/chat/completions`` call.
|
|
47
|
+
|
|
48
|
+
Attributes:
|
|
49
|
+
text: The assistant message content, or ``""`` on error.
|
|
50
|
+
latency_ms: Round-trip time in milliseconds.
|
|
51
|
+
error: Human-readable error string, or ``None`` on success.
|
|
52
|
+
prompt_tokens: Tokens consumed by the prompt (0 when unavailable).
|
|
53
|
+
completion_tokens: Tokens in the completion (0 when unavailable).
|
|
54
|
+
total_tokens: Total tokens for the request (0 when unavailable).
|
|
55
|
+
|
|
56
|
+
Example::
|
|
57
|
+
|
|
58
|
+
resp = chat_completion(endpoint=..., model=..., messages=...)
|
|
59
|
+
if resp.error is None:
|
|
60
|
+
print(resp.text)
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
text: str
|
|
64
|
+
latency_ms: float
|
|
65
|
+
error: str | None = None
|
|
66
|
+
prompt_tokens: int = 0
|
|
67
|
+
completion_tokens: int = 0
|
|
68
|
+
total_tokens: int = 0
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def ok(self) -> bool:
|
|
72
|
+
"""``True`` when the call succeeded (``error is None``)."""
|
|
73
|
+
return self.error is None
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def chat_completion(
|
|
77
|
+
endpoint: str,
|
|
78
|
+
model: str,
|
|
79
|
+
messages: list[dict[str, str]],
|
|
80
|
+
*,
|
|
81
|
+
api_key: str = "",
|
|
82
|
+
timeout: int = 30,
|
|
83
|
+
max_retries: int = 0,
|
|
84
|
+
extra_body: dict[str, Any] | None = None,
|
|
85
|
+
) -> ChatCompletionResponse:
|
|
86
|
+
"""Call an OpenAI-compatible ``/chat/completions`` endpoint.
|
|
87
|
+
|
|
88
|
+
On transient HTTP errors (429, 5xx) and network errors the call is retried
|
|
89
|
+
up to *max_retries* times with exponential back-off (``min(2**attempt, 8)``
|
|
90
|
+
seconds between attempts).
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
endpoint: Base URL of the API (e.g. ``"https://api.openai.com/v1"``).
|
|
94
|
+
The path ``/chat/completions`` is appended automatically.
|
|
95
|
+
model: Model identifier to pass in the request body.
|
|
96
|
+
messages: List of ``{"role": ..., "content": ...}`` dicts.
|
|
97
|
+
api_key: Bearer token. Falls back to ``$OPENAI_API_KEY`` when empty.
|
|
98
|
+
timeout: Per-attempt timeout in seconds (default 30).
|
|
99
|
+
max_retries: Number of additional attempts after the first failure
|
|
100
|
+
(default 0 = no retries).
|
|
101
|
+
extra_body: Additional top-level keys to merge into the request body
|
|
102
|
+
(e.g. ``{"temperature": 0.0}``).
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
A :class:`ChatCompletionResponse` describing the result. Check
|
|
106
|
+
:attr:`~ChatCompletionResponse.ok` or
|
|
107
|
+
:attr:`~ChatCompletionResponse.error` before using
|
|
108
|
+
:attr:`~ChatCompletionResponse.text`.
|
|
109
|
+
|
|
110
|
+
Example::
|
|
111
|
+
|
|
112
|
+
from spanforge.http import chat_completion
|
|
113
|
+
|
|
114
|
+
resp = chat_completion(
|
|
115
|
+
endpoint="https://api.openai.com/v1",
|
|
116
|
+
model="gpt-4o",
|
|
117
|
+
messages=[{"role": "user", "content": "Say hello."}],
|
|
118
|
+
api_key="sk-...",
|
|
119
|
+
max_retries=2,
|
|
120
|
+
)
|
|
121
|
+
assert resp.ok
|
|
122
|
+
print(resp.text)
|
|
123
|
+
"""
|
|
124
|
+
import os
|
|
125
|
+
|
|
126
|
+
resolved_key = api_key or os.environ.get("OPENAI_API_KEY", "")
|
|
127
|
+
url = endpoint.rstrip("/") + "/chat/completions"
|
|
128
|
+
|
|
129
|
+
payload: dict[str, Any] = {"model": model, "messages": messages}
|
|
130
|
+
if extra_body:
|
|
131
|
+
payload.update(extra_body)
|
|
132
|
+
data = json.dumps(payload).encode("utf-8")
|
|
133
|
+
|
|
134
|
+
headers = {
|
|
135
|
+
"Content-Type": "application/json",
|
|
136
|
+
"Authorization": f"Bearer {resolved_key}",
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
last_error = ""
|
|
140
|
+
for attempt in range(max(0, max_retries) + 1):
|
|
141
|
+
req = urllib.request.Request(url, data=data, headers=headers, method="POST")
|
|
142
|
+
t0 = time.perf_counter()
|
|
143
|
+
try:
|
|
144
|
+
with urllib.request.urlopen(req, timeout=timeout) as resp: # nosec B310
|
|
145
|
+
body: dict[str, Any] = json.loads(resp.read().decode("utf-8"))
|
|
146
|
+
latency_ms = (time.perf_counter() - t0) * 1000.0
|
|
147
|
+
except urllib.error.HTTPError as exc:
|
|
148
|
+
latency_ms = (time.perf_counter() - t0) * 1000.0
|
|
149
|
+
try:
|
|
150
|
+
detail = exc.read(8192).decode("utf-8", errors="replace")
|
|
151
|
+
except Exception:
|
|
152
|
+
detail = str(exc)
|
|
153
|
+
last_error = f"HTTP {exc.code}: {detail[:300]}"
|
|
154
|
+
if exc.code in _RETRYABLE_CODES and attempt < max_retries:
|
|
155
|
+
time.sleep(min(2**attempt, 8))
|
|
156
|
+
continue
|
|
157
|
+
return ChatCompletionResponse(text="", latency_ms=latency_ms, error=last_error)
|
|
158
|
+
except (OSError, urllib.error.URLError) as exc:
|
|
159
|
+
latency_ms = (time.perf_counter() - t0) * 1000.0
|
|
160
|
+
last_error = str(exc)
|
|
161
|
+
if attempt < max_retries:
|
|
162
|
+
time.sleep(min(2**attempt, 8))
|
|
163
|
+
continue
|
|
164
|
+
return ChatCompletionResponse(text="", latency_ms=latency_ms, error=last_error)
|
|
165
|
+
|
|
166
|
+
usage = body.get("usage") or {}
|
|
167
|
+
try:
|
|
168
|
+
text: str = body["choices"][0]["message"]["content"]
|
|
169
|
+
except (KeyError, IndexError, TypeError) as exc:
|
|
170
|
+
return ChatCompletionResponse(
|
|
171
|
+
text="",
|
|
172
|
+
latency_ms=latency_ms,
|
|
173
|
+
error=f"unexpected response shape: {exc}",
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
return ChatCompletionResponse(
|
|
177
|
+
text=text,
|
|
178
|
+
latency_ms=latency_ms,
|
|
179
|
+
error=None,
|
|
180
|
+
prompt_tokens=int(usage.get("prompt_tokens", 0)),
|
|
181
|
+
completion_tokens=int(usage.get("completion_tokens", 0)),
|
|
182
|
+
total_tokens=int(usage.get("total_tokens", 0)),
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
# Exhausted retries without returning (defensive; loop always returns or
|
|
186
|
+
# hits a 'continue' that leads back here)
|
|
187
|
+
return ChatCompletionResponse(text="", latency_ms=0.0, error=last_error) # pragma: no cover
|