spanforge 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. spanforge/__init__.py +815 -0
  2. spanforge/_ansi.py +93 -0
  3. spanforge/_batch_exporter.py +409 -0
  4. spanforge/_cli.py +2094 -0
  5. spanforge/_cli_audit.py +639 -0
  6. spanforge/_cli_compliance.py +711 -0
  7. spanforge/_cli_cost.py +243 -0
  8. spanforge/_cli_ops.py +791 -0
  9. spanforge/_cli_phase11.py +356 -0
  10. spanforge/_hooks.py +337 -0
  11. spanforge/_server.py +1708 -0
  12. spanforge/_span.py +1036 -0
  13. spanforge/_store.py +288 -0
  14. spanforge/_stream.py +664 -0
  15. spanforge/_trace.py +335 -0
  16. spanforge/_tracer.py +254 -0
  17. spanforge/actor.py +141 -0
  18. spanforge/alerts.py +469 -0
  19. spanforge/auto.py +464 -0
  20. spanforge/baseline.py +335 -0
  21. spanforge/cache.py +635 -0
  22. spanforge/compliance.py +325 -0
  23. spanforge/config.py +532 -0
  24. spanforge/consent.py +228 -0
  25. spanforge/consumer.py +377 -0
  26. spanforge/core/__init__.py +5 -0
  27. spanforge/core/compliance_mapping.py +1254 -0
  28. spanforge/cost.py +600 -0
  29. spanforge/debug.py +548 -0
  30. spanforge/deprecations.py +205 -0
  31. spanforge/drift.py +482 -0
  32. spanforge/egress.py +58 -0
  33. spanforge/eval.py +648 -0
  34. spanforge/event.py +1064 -0
  35. spanforge/exceptions.py +240 -0
  36. spanforge/explain.py +178 -0
  37. spanforge/export/__init__.py +69 -0
  38. spanforge/export/append_only.py +337 -0
  39. spanforge/export/cloud.py +357 -0
  40. spanforge/export/datadog.py +497 -0
  41. spanforge/export/grafana.py +320 -0
  42. spanforge/export/jsonl.py +195 -0
  43. spanforge/export/openinference.py +158 -0
  44. spanforge/export/otel_bridge.py +294 -0
  45. spanforge/export/otlp.py +811 -0
  46. spanforge/export/otlp_bridge.py +233 -0
  47. spanforge/export/redis_backend.py +282 -0
  48. spanforge/export/siem_schema.py +98 -0
  49. spanforge/export/siem_splunk.py +264 -0
  50. spanforge/export/siem_syslog.py +212 -0
  51. spanforge/export/webhook.py +299 -0
  52. spanforge/exporters/__init__.py +30 -0
  53. spanforge/exporters/console.py +271 -0
  54. spanforge/exporters/jsonl.py +144 -0
  55. spanforge/exporters/sqlite.py +142 -0
  56. spanforge/gate.py +1150 -0
  57. spanforge/governance.py +181 -0
  58. spanforge/hitl.py +295 -0
  59. spanforge/http.py +187 -0
  60. spanforge/inspect.py +427 -0
  61. spanforge/integrations/__init__.py +45 -0
  62. spanforge/integrations/_pricing.py +280 -0
  63. spanforge/integrations/anthropic.py +388 -0
  64. spanforge/integrations/azure_openai.py +133 -0
  65. spanforge/integrations/bedrock.py +292 -0
  66. spanforge/integrations/crewai.py +251 -0
  67. spanforge/integrations/gemini.py +351 -0
  68. spanforge/integrations/groq.py +442 -0
  69. spanforge/integrations/langchain.py +349 -0
  70. spanforge/integrations/langgraph.py +306 -0
  71. spanforge/integrations/llamaindex.py +373 -0
  72. spanforge/integrations/ollama.py +287 -0
  73. spanforge/integrations/openai.py +368 -0
  74. spanforge/integrations/together.py +483 -0
  75. spanforge/io.py +214 -0
  76. spanforge/lint.py +322 -0
  77. spanforge/metrics.py +417 -0
  78. spanforge/metrics_export.py +343 -0
  79. spanforge/migrate.py +402 -0
  80. spanforge/model_registry.py +278 -0
  81. spanforge/models.py +389 -0
  82. spanforge/namespaces/__init__.py +254 -0
  83. spanforge/namespaces/audit.py +256 -0
  84. spanforge/namespaces/cache.py +237 -0
  85. spanforge/namespaces/chain.py +77 -0
  86. spanforge/namespaces/confidence.py +72 -0
  87. spanforge/namespaces/consent.py +92 -0
  88. spanforge/namespaces/cost.py +179 -0
  89. spanforge/namespaces/decision.py +143 -0
  90. spanforge/namespaces/diff.py +157 -0
  91. spanforge/namespaces/drift.py +80 -0
  92. spanforge/namespaces/eval_.py +251 -0
  93. spanforge/namespaces/feedback.py +241 -0
  94. spanforge/namespaces/fence.py +193 -0
  95. spanforge/namespaces/guard.py +105 -0
  96. spanforge/namespaces/hitl.py +91 -0
  97. spanforge/namespaces/latency.py +72 -0
  98. spanforge/namespaces/prompt.py +190 -0
  99. spanforge/namespaces/redact.py +173 -0
  100. spanforge/namespaces/retrieval.py +379 -0
  101. spanforge/namespaces/runtime_governance.py +494 -0
  102. spanforge/namespaces/template.py +208 -0
  103. spanforge/namespaces/tool_call.py +77 -0
  104. spanforge/namespaces/trace.py +1029 -0
  105. spanforge/normalizer.py +171 -0
  106. spanforge/plugins.py +82 -0
  107. spanforge/presidio_backend.py +349 -0
  108. spanforge/processor.py +258 -0
  109. spanforge/prompt_registry.py +418 -0
  110. spanforge/py.typed +0 -0
  111. spanforge/redact.py +914 -0
  112. spanforge/regression.py +192 -0
  113. spanforge/runtime_policy.py +159 -0
  114. spanforge/sampling.py +511 -0
  115. spanforge/schema.py +183 -0
  116. spanforge/schemas/v1.0/schema.json +170 -0
  117. spanforge/schemas/v2.0/schema.json +536 -0
  118. spanforge/sdk/__init__.py +625 -0
  119. spanforge/sdk/_base.py +584 -0
  120. spanforge/sdk/_base.pyi +71 -0
  121. spanforge/sdk/_exceptions.py +1096 -0
  122. spanforge/sdk/_types.py +2184 -0
  123. spanforge/sdk/alert.py +1514 -0
  124. spanforge/sdk/alert.pyi +56 -0
  125. spanforge/sdk/audit.py +1196 -0
  126. spanforge/sdk/audit.pyi +67 -0
  127. spanforge/sdk/cec.py +1215 -0
  128. spanforge/sdk/cec.pyi +37 -0
  129. spanforge/sdk/config.py +641 -0
  130. spanforge/sdk/config.pyi +55 -0
  131. spanforge/sdk/enterprise.py +714 -0
  132. spanforge/sdk/enterprise.pyi +79 -0
  133. spanforge/sdk/explain.py +170 -0
  134. spanforge/sdk/fallback.py +432 -0
  135. spanforge/sdk/feedback.py +351 -0
  136. spanforge/sdk/gate.py +874 -0
  137. spanforge/sdk/gate.pyi +51 -0
  138. spanforge/sdk/identity.py +2114 -0
  139. spanforge/sdk/identity.pyi +47 -0
  140. spanforge/sdk/lineage.py +175 -0
  141. spanforge/sdk/observe.py +1065 -0
  142. spanforge/sdk/observe.pyi +50 -0
  143. spanforge/sdk/operator.py +338 -0
  144. spanforge/sdk/pii.py +1473 -0
  145. spanforge/sdk/pii.pyi +119 -0
  146. spanforge/sdk/pipelines.py +458 -0
  147. spanforge/sdk/pipelines.pyi +39 -0
  148. spanforge/sdk/policy.py +930 -0
  149. spanforge/sdk/rag.py +594 -0
  150. spanforge/sdk/rbac.py +280 -0
  151. spanforge/sdk/registry.py +430 -0
  152. spanforge/sdk/registry.pyi +46 -0
  153. spanforge/sdk/scope.py +279 -0
  154. spanforge/sdk/secrets.py +293 -0
  155. spanforge/sdk/secrets.pyi +25 -0
  156. spanforge/sdk/security.py +560 -0
  157. spanforge/sdk/security.pyi +57 -0
  158. spanforge/sdk/trust.py +472 -0
  159. spanforge/sdk/trust.pyi +41 -0
  160. spanforge/secrets.py +799 -0
  161. spanforge/signing.py +1179 -0
  162. spanforge/stats.py +100 -0
  163. spanforge/stream.py +560 -0
  164. spanforge/testing.py +378 -0
  165. spanforge/testing_mocks.py +1052 -0
  166. spanforge/trace.py +199 -0
  167. spanforge/types.py +696 -0
  168. spanforge/ulid.py +300 -0
  169. spanforge/validate.py +379 -0
  170. spanforge-1.0.0.dist-info/METADATA +1509 -0
  171. spanforge-1.0.0.dist-info/RECORD +174 -0
  172. spanforge-1.0.0.dist-info/WHEEL +4 -0
  173. spanforge-1.0.0.dist-info/entry_points.txt +5 -0
  174. spanforge-1.0.0.dist-info/licenses/LICENSE +128 -0
@@ -0,0 +1,192 @@
1
+ """spanforge.regression — Generic pass/fail regression detection.
2
+
3
+ Provides :class:`RegressionDetector` for comparing two evaluation runs and
4
+ surfacing cases that have *regressed*: passing in the baseline but failing in
5
+ the current run, or whose score dropped by more than a configurable threshold.
6
+
7
+ Unlike :class:`spanforge.eval.RegressionDetector` (which compares mean metric
8
+ scores between runs), this detector operates on individual result records with
9
+ explicit ``passed`` and ``score`` fields — making it well-suited for CI gates
10
+ where each test case must individually pass.
11
+
12
+ Usage::
13
+
14
+ from spanforge.regression import RegressionDetector
15
+
16
+ detector = RegressionDetector(score_drop_threshold=0.1)
17
+ report = detector.compare(
18
+ baseline=baseline_results,
19
+ current=current_results,
20
+ key_fn=lambda r: (r["case_id"], r["scorer_name"]),
21
+ passed_fn=lambda r: r["passed"],
22
+ score_fn=lambda r: r["score"],
23
+ )
24
+
25
+ if report.has_regression:
26
+ for item in report.new_failures:
27
+ print("NEW FAILURE:", item)
28
+ for base, curr in report.score_drops:
29
+ print(f"SCORE DROP: {base} → {curr}")
30
+ sys.exit(1)
31
+
32
+ Works with any record type (dicts, dataclasses, etc.) via the *key_fn*,
33
+ *passed_fn*, and *score_fn* callbacks. There is also a convenience
34
+ :func:`compare` top-level function for one-shot use.
35
+ """
36
+
37
+ from __future__ import annotations
38
+
39
+ from dataclasses import dataclass, field
40
+ from typing import Any, Callable, Generic, TypeVar
41
+
42
+ __all__ = [
43
+ "RegressionDetector",
44
+ "RegressionReport",
45
+ "compare",
46
+ ]
47
+
48
+ T = TypeVar("T")
49
+
50
+
51
+ @dataclass
52
+ class RegressionReport(Generic[T]):
53
+ """Summary of regressions found between two evaluation runs.
54
+
55
+ Attributes:
56
+ new_failures: Items that *passed* in the baseline but *fail* in the
57
+ current run.
58
+ score_drops: ``(baseline_item, current_item)`` pairs where the score
59
+ dropped by at least the configured threshold.
60
+ """
61
+
62
+ new_failures: list[T] = field(default_factory=list)
63
+ score_drops: list[tuple[T, T]] = field(default_factory=list)
64
+
65
+ @property
66
+ def has_regression(self) -> bool:
67
+ """``True`` when at least one regression was detected."""
68
+ return bool(self.new_failures or self.score_drops)
69
+
70
+ def summary(self) -> str:
71
+ """Return a short human-readable summary string."""
72
+ parts: list[str] = []
73
+ if self.new_failures:
74
+ parts.append(f"{len(self.new_failures)} new failure(s)")
75
+ if self.score_drops:
76
+ parts.append(f"{len(self.score_drops)} score drop(s)")
77
+ if not parts:
78
+ return "no regression detected"
79
+ return "; ".join(parts)
80
+
81
+
82
+ class RegressionDetector(Generic[T]):
83
+ """Compare two evaluation runs and report regressions.
84
+
85
+ A *regression* is one of:
86
+
87
+ * A key that **passed** in the baseline but **fails** in the current run.
88
+ * A key whose score **dropped** by at least *score_drop_threshold*
89
+ (even when the current result still passes).
90
+
91
+ New keys that appear only in the current run are **not** flagged as
92
+ regressions (they may be new test cases). Keys that disappear from the
93
+ current run are also silently ignored.
94
+
95
+ Args:
96
+ score_drop_threshold: Minimum absolute score decrease that
97
+ constitutes a regression. Default is ``0.1``.
98
+
99
+ Example::
100
+
101
+ detector = RegressionDetector[dict](score_drop_threshold=0.05)
102
+ report = detector.compare(
103
+ baseline, current,
104
+ key_fn=lambda r: (r["case_id"], r["scorer"]),
105
+ passed_fn=lambda r: r["passed"],
106
+ score_fn=lambda r: r["score"],
107
+ )
108
+ print(report.summary())
109
+ """
110
+
111
+ def __init__(self, score_drop_threshold: float = 0.1) -> None:
112
+ self.score_drop_threshold = score_drop_threshold
113
+
114
+ def compare(
115
+ self,
116
+ baseline: list[T],
117
+ current: list[T],
118
+ *,
119
+ key_fn: Callable[[T], Any],
120
+ passed_fn: Callable[[T], bool],
121
+ score_fn: Callable[[T], float],
122
+ ) -> RegressionReport[T]:
123
+ """Compare *current* against *baseline* and return a :class:`RegressionReport`.
124
+
125
+ Args:
126
+ baseline: Results from a known-good previous run.
127
+ current: Results from the run being checked.
128
+ key_fn: Callable that returns a hashable key identifying a
129
+ result (e.g. ``lambda r: (r.case_id, r.scorer_name)``).
130
+ passed_fn: Callable that returns ``True`` when a result passed.
131
+ score_fn: Callable that returns the numeric score of a result.
132
+
133
+ Returns:
134
+ A :class:`RegressionReport` describing found regressions.
135
+ """
136
+ baseline_map: dict[Any, T] = {key_fn(r): r for r in baseline}
137
+ current_map: dict[Any, T] = {key_fn(r): r for r in current}
138
+
139
+ new_failures: list[T] = []
140
+ score_drops: list[tuple[T, T]] = []
141
+
142
+ for key, curr in current_map.items():
143
+ base = baseline_map.get(key)
144
+ if base is None:
145
+ continue # new key — not a regression
146
+
147
+ if passed_fn(base) and not passed_fn(curr):
148
+ new_failures.append(curr)
149
+ elif (score_fn(base) - score_fn(curr)) >= self.score_drop_threshold:
150
+ score_drops.append((base, curr))
151
+
152
+ return RegressionReport(new_failures=new_failures, score_drops=score_drops)
153
+
154
+
155
+ def compare(
156
+ baseline: list[Any],
157
+ current: list[Any],
158
+ *,
159
+ key_fn: Callable[[Any], Any],
160
+ passed_fn: Callable[[Any], bool],
161
+ score_fn: Callable[[Any], float],
162
+ score_drop_threshold: float = 0.1,
163
+ ) -> RegressionReport[Any]:
164
+ """One-shot convenience wrapper around :class:`RegressionDetector`.
165
+
166
+ Args:
167
+ baseline: Results from the baseline run.
168
+ current: Results from the run being checked.
169
+ key_fn: Returns a unique key for each result.
170
+ passed_fn: Returns ``True`` when a result passed.
171
+ score_fn: Returns the numeric score of a result.
172
+ score_drop_threshold: Minimum score drop to flag as regression.
173
+
174
+ Returns:
175
+ A :class:`RegressionReport`.
176
+
177
+ Example::
178
+
179
+ report = compare(
180
+ baseline, current,
181
+ key_fn=lambda r: r["id"],
182
+ passed_fn=lambda r: r["ok"],
183
+ score_fn=lambda r: r["score"],
184
+ )
185
+ """
186
+ return RegressionDetector(score_drop_threshold=score_drop_threshold).compare(
187
+ baseline,
188
+ current,
189
+ key_fn=key_fn,
190
+ passed_fn=passed_fn,
191
+ score_fn=score_fn,
192
+ )
@@ -0,0 +1,159 @@
1
+ """spanforge.runtime_policy - Phase 0 runtime policy schema contracts.
2
+
3
+ This module freezes the policy object model used by the GA runtime governance
4
+ control plane. Enforcement engines can evolve behind these contracts without
5
+ changing the configuration shape exposed to users.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass, field
11
+ from typing import Any
12
+
13
+ __all__ = [
14
+ "RuntimePolicyBundle",
15
+ "RuntimePolicyRule",
16
+ ]
17
+
18
+ _VALID_ENVIRONMENTS = frozenset({"dev", "staging", "prod"})
19
+ _VALID_POLICY_ACTIONS = frozenset({"allow", "allow+log", "redact", "block", "human_review"})
20
+ _VALID_SERVICES = frozenset({"sf_explain", "sf_scope", "sf_rbac", "sf_rag", "sf_lineage"})
21
+
22
+
23
+ def _require_mapping(data: Any, type_name: str) -> dict[str, Any]:
24
+ if not isinstance(data, dict):
25
+ raise ValueError(f"{type_name} input must be a dict")
26
+ return data
27
+
28
+
29
+ def _require_fields(data: dict[str, Any], type_name: str, fields: tuple[str, ...]) -> None:
30
+ missing = [field for field in fields if field not in data]
31
+ if missing:
32
+ raise ValueError(f"{type_name} is missing required fields: {', '.join(missing)}")
33
+
34
+
35
+ @dataclass
36
+ class RuntimePolicyRule:
37
+ """One runtime governance rule bound to a service and control."""
38
+
39
+ rule_id: str
40
+ service: str
41
+ control: str
42
+ action: str
43
+ enabled: bool = True
44
+ threshold: float | None = None
45
+ rationale: str = ""
46
+ metadata: dict[str, Any] = field(default_factory=dict)
47
+
48
+ def __post_init__(self) -> None:
49
+ if not self.rule_id:
50
+ raise ValueError("RuntimePolicyRule.rule_id must be non-empty")
51
+ if self.service not in _VALID_SERVICES:
52
+ raise ValueError(
53
+ f"RuntimePolicyRule.service must be one of {sorted(_VALID_SERVICES)}"
54
+ )
55
+ if not self.control:
56
+ raise ValueError("RuntimePolicyRule.control must be non-empty")
57
+ if self.action not in _VALID_POLICY_ACTIONS:
58
+ raise ValueError(
59
+ f"RuntimePolicyRule.action must be one of {sorted(_VALID_POLICY_ACTIONS)}"
60
+ )
61
+ if self.threshold is not None and not (0.0 <= self.threshold <= 1.0):
62
+ raise ValueError("RuntimePolicyRule.threshold must be in [0.0, 1.0]")
63
+
64
+ def to_dict(self) -> dict[str, Any]:
65
+ data: dict[str, Any] = {
66
+ "rule_id": self.rule_id,
67
+ "service": self.service,
68
+ "control": self.control,
69
+ "action": self.action,
70
+ "enabled": self.enabled,
71
+ }
72
+ if self.threshold is not None:
73
+ data["threshold"] = self.threshold
74
+ if self.rationale:
75
+ data["rationale"] = self.rationale
76
+ if self.metadata:
77
+ data["metadata"] = self.metadata
78
+ return data
79
+
80
+ @classmethod
81
+ def from_dict(cls, data: dict[str, Any]) -> RuntimePolicyRule:
82
+ parsed = _require_mapping(data, "RuntimePolicyRule")
83
+ _require_fields(
84
+ parsed,
85
+ "RuntimePolicyRule",
86
+ ("rule_id", "service", "control", "action"),
87
+ )
88
+ return cls(
89
+ rule_id=parsed["rule_id"],
90
+ service=parsed["service"],
91
+ control=parsed["control"],
92
+ action=parsed["action"],
93
+ enabled=bool(parsed.get("enabled", True)),
94
+ threshold=float(parsed["threshold"]) if "threshold" in parsed else None,
95
+ rationale=parsed.get("rationale", ""),
96
+ metadata=dict(parsed.get("metadata", {})),
97
+ )
98
+
99
+
100
+ @dataclass
101
+ class RuntimePolicyBundle:
102
+ """Versioned runtime policy bundle for one deployment environment."""
103
+
104
+ policy_id: str
105
+ version: str
106
+ environment: str
107
+ owner: str
108
+ effective_at: str
109
+ rules: list[RuntimePolicyRule] = field(default_factory=list)
110
+ rationale: str = ""
111
+ metadata: dict[str, Any] = field(default_factory=dict)
112
+
113
+ def __post_init__(self) -> None:
114
+ if not self.policy_id:
115
+ raise ValueError("RuntimePolicyBundle.policy_id must be non-empty")
116
+ if not self.version:
117
+ raise ValueError("RuntimePolicyBundle.version must be non-empty")
118
+ if self.environment not in _VALID_ENVIRONMENTS:
119
+ raise ValueError(
120
+ f"RuntimePolicyBundle.environment must be one of {sorted(_VALID_ENVIRONMENTS)}"
121
+ )
122
+ if not self.owner:
123
+ raise ValueError("RuntimePolicyBundle.owner must be non-empty")
124
+ if not self.effective_at:
125
+ raise ValueError("RuntimePolicyBundle.effective_at must be non-empty")
126
+
127
+ def to_dict(self) -> dict[str, Any]:
128
+ data: dict[str, Any] = {
129
+ "policy_id": self.policy_id,
130
+ "version": self.version,
131
+ "environment": self.environment,
132
+ "owner": self.owner,
133
+ "effective_at": self.effective_at,
134
+ "rules": [rule.to_dict() for rule in self.rules],
135
+ }
136
+ if self.rationale:
137
+ data["rationale"] = self.rationale
138
+ if self.metadata:
139
+ data["metadata"] = self.metadata
140
+ return data
141
+
142
+ @classmethod
143
+ def from_dict(cls, data: dict[str, Any]) -> RuntimePolicyBundle:
144
+ parsed = _require_mapping(data, "RuntimePolicyBundle")
145
+ _require_fields(
146
+ parsed,
147
+ "RuntimePolicyBundle",
148
+ ("policy_id", "version", "environment", "owner", "effective_at"),
149
+ )
150
+ return cls(
151
+ policy_id=parsed["policy_id"],
152
+ version=parsed["version"],
153
+ environment=parsed["environment"],
154
+ owner=parsed["owner"],
155
+ effective_at=parsed["effective_at"],
156
+ rules=[RuntimePolicyRule.from_dict(item) for item in parsed.get("rules", [])],
157
+ rationale=parsed.get("rationale", ""),
158
+ metadata=dict(parsed.get("metadata", {})),
159
+ )