spanforge 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. spanforge/__init__.py +815 -0
  2. spanforge/_ansi.py +93 -0
  3. spanforge/_batch_exporter.py +409 -0
  4. spanforge/_cli.py +2094 -0
  5. spanforge/_cli_audit.py +639 -0
  6. spanforge/_cli_compliance.py +711 -0
  7. spanforge/_cli_cost.py +243 -0
  8. spanforge/_cli_ops.py +791 -0
  9. spanforge/_cli_phase11.py +356 -0
  10. spanforge/_hooks.py +337 -0
  11. spanforge/_server.py +1708 -0
  12. spanforge/_span.py +1036 -0
  13. spanforge/_store.py +288 -0
  14. spanforge/_stream.py +664 -0
  15. spanforge/_trace.py +335 -0
  16. spanforge/_tracer.py +254 -0
  17. spanforge/actor.py +141 -0
  18. spanforge/alerts.py +469 -0
  19. spanforge/auto.py +464 -0
  20. spanforge/baseline.py +335 -0
  21. spanforge/cache.py +635 -0
  22. spanforge/compliance.py +325 -0
  23. spanforge/config.py +532 -0
  24. spanforge/consent.py +228 -0
  25. spanforge/consumer.py +377 -0
  26. spanforge/core/__init__.py +5 -0
  27. spanforge/core/compliance_mapping.py +1254 -0
  28. spanforge/cost.py +600 -0
  29. spanforge/debug.py +548 -0
  30. spanforge/deprecations.py +205 -0
  31. spanforge/drift.py +482 -0
  32. spanforge/egress.py +58 -0
  33. spanforge/eval.py +648 -0
  34. spanforge/event.py +1064 -0
  35. spanforge/exceptions.py +240 -0
  36. spanforge/explain.py +178 -0
  37. spanforge/export/__init__.py +69 -0
  38. spanforge/export/append_only.py +337 -0
  39. spanforge/export/cloud.py +357 -0
  40. spanforge/export/datadog.py +497 -0
  41. spanforge/export/grafana.py +320 -0
  42. spanforge/export/jsonl.py +195 -0
  43. spanforge/export/openinference.py +158 -0
  44. spanforge/export/otel_bridge.py +294 -0
  45. spanforge/export/otlp.py +811 -0
  46. spanforge/export/otlp_bridge.py +233 -0
  47. spanforge/export/redis_backend.py +282 -0
  48. spanforge/export/siem_schema.py +98 -0
  49. spanforge/export/siem_splunk.py +264 -0
  50. spanforge/export/siem_syslog.py +212 -0
  51. spanforge/export/webhook.py +299 -0
  52. spanforge/exporters/__init__.py +30 -0
  53. spanforge/exporters/console.py +271 -0
  54. spanforge/exporters/jsonl.py +144 -0
  55. spanforge/exporters/sqlite.py +142 -0
  56. spanforge/gate.py +1150 -0
  57. spanforge/governance.py +181 -0
  58. spanforge/hitl.py +295 -0
  59. spanforge/http.py +187 -0
  60. spanforge/inspect.py +427 -0
  61. spanforge/integrations/__init__.py +45 -0
  62. spanforge/integrations/_pricing.py +280 -0
  63. spanforge/integrations/anthropic.py +388 -0
  64. spanforge/integrations/azure_openai.py +133 -0
  65. spanforge/integrations/bedrock.py +292 -0
  66. spanforge/integrations/crewai.py +251 -0
  67. spanforge/integrations/gemini.py +351 -0
  68. spanforge/integrations/groq.py +442 -0
  69. spanforge/integrations/langchain.py +349 -0
  70. spanforge/integrations/langgraph.py +306 -0
  71. spanforge/integrations/llamaindex.py +373 -0
  72. spanforge/integrations/ollama.py +287 -0
  73. spanforge/integrations/openai.py +368 -0
  74. spanforge/integrations/together.py +483 -0
  75. spanforge/io.py +214 -0
  76. spanforge/lint.py +322 -0
  77. spanforge/metrics.py +417 -0
  78. spanforge/metrics_export.py +343 -0
  79. spanforge/migrate.py +402 -0
  80. spanforge/model_registry.py +278 -0
  81. spanforge/models.py +389 -0
  82. spanforge/namespaces/__init__.py +254 -0
  83. spanforge/namespaces/audit.py +256 -0
  84. spanforge/namespaces/cache.py +237 -0
  85. spanforge/namespaces/chain.py +77 -0
  86. spanforge/namespaces/confidence.py +72 -0
  87. spanforge/namespaces/consent.py +92 -0
  88. spanforge/namespaces/cost.py +179 -0
  89. spanforge/namespaces/decision.py +143 -0
  90. spanforge/namespaces/diff.py +157 -0
  91. spanforge/namespaces/drift.py +80 -0
  92. spanforge/namespaces/eval_.py +251 -0
  93. spanforge/namespaces/feedback.py +241 -0
  94. spanforge/namespaces/fence.py +193 -0
  95. spanforge/namespaces/guard.py +105 -0
  96. spanforge/namespaces/hitl.py +91 -0
  97. spanforge/namespaces/latency.py +72 -0
  98. spanforge/namespaces/prompt.py +190 -0
  99. spanforge/namespaces/redact.py +173 -0
  100. spanforge/namespaces/retrieval.py +379 -0
  101. spanforge/namespaces/runtime_governance.py +494 -0
  102. spanforge/namespaces/template.py +208 -0
  103. spanforge/namespaces/tool_call.py +77 -0
  104. spanforge/namespaces/trace.py +1029 -0
  105. spanforge/normalizer.py +171 -0
  106. spanforge/plugins.py +82 -0
  107. spanforge/presidio_backend.py +349 -0
  108. spanforge/processor.py +258 -0
  109. spanforge/prompt_registry.py +418 -0
  110. spanforge/py.typed +0 -0
  111. spanforge/redact.py +914 -0
  112. spanforge/regression.py +192 -0
  113. spanforge/runtime_policy.py +159 -0
  114. spanforge/sampling.py +511 -0
  115. spanforge/schema.py +183 -0
  116. spanforge/schemas/v1.0/schema.json +170 -0
  117. spanforge/schemas/v2.0/schema.json +536 -0
  118. spanforge/sdk/__init__.py +625 -0
  119. spanforge/sdk/_base.py +584 -0
  120. spanforge/sdk/_base.pyi +71 -0
  121. spanforge/sdk/_exceptions.py +1096 -0
  122. spanforge/sdk/_types.py +2184 -0
  123. spanforge/sdk/alert.py +1514 -0
  124. spanforge/sdk/alert.pyi +56 -0
  125. spanforge/sdk/audit.py +1196 -0
  126. spanforge/sdk/audit.pyi +67 -0
  127. spanforge/sdk/cec.py +1215 -0
  128. spanforge/sdk/cec.pyi +37 -0
  129. spanforge/sdk/config.py +641 -0
  130. spanforge/sdk/config.pyi +55 -0
  131. spanforge/sdk/enterprise.py +714 -0
  132. spanforge/sdk/enterprise.pyi +79 -0
  133. spanforge/sdk/explain.py +170 -0
  134. spanforge/sdk/fallback.py +432 -0
  135. spanforge/sdk/feedback.py +351 -0
  136. spanforge/sdk/gate.py +874 -0
  137. spanforge/sdk/gate.pyi +51 -0
  138. spanforge/sdk/identity.py +2114 -0
  139. spanforge/sdk/identity.pyi +47 -0
  140. spanforge/sdk/lineage.py +175 -0
  141. spanforge/sdk/observe.py +1065 -0
  142. spanforge/sdk/observe.pyi +50 -0
  143. spanforge/sdk/operator.py +338 -0
  144. spanforge/sdk/pii.py +1473 -0
  145. spanforge/sdk/pii.pyi +119 -0
  146. spanforge/sdk/pipelines.py +458 -0
  147. spanforge/sdk/pipelines.pyi +39 -0
  148. spanforge/sdk/policy.py +930 -0
  149. spanforge/sdk/rag.py +594 -0
  150. spanforge/sdk/rbac.py +280 -0
  151. spanforge/sdk/registry.py +430 -0
  152. spanforge/sdk/registry.pyi +46 -0
  153. spanforge/sdk/scope.py +279 -0
  154. spanforge/sdk/secrets.py +293 -0
  155. spanforge/sdk/secrets.pyi +25 -0
  156. spanforge/sdk/security.py +560 -0
  157. spanforge/sdk/security.pyi +57 -0
  158. spanforge/sdk/trust.py +472 -0
  159. spanforge/sdk/trust.pyi +41 -0
  160. spanforge/secrets.py +799 -0
  161. spanforge/signing.py +1179 -0
  162. spanforge/stats.py +100 -0
  163. spanforge/stream.py +560 -0
  164. spanforge/testing.py +378 -0
  165. spanforge/testing_mocks.py +1052 -0
  166. spanforge/trace.py +199 -0
  167. spanforge/types.py +696 -0
  168. spanforge/ulid.py +300 -0
  169. spanforge/validate.py +379 -0
  170. spanforge-1.0.0.dist-info/METADATA +1509 -0
  171. spanforge-1.0.0.dist-info/RECORD +174 -0
  172. spanforge-1.0.0.dist-info/WHEEL +4 -0
  173. spanforge-1.0.0.dist-info/entry_points.txt +5 -0
  174. spanforge-1.0.0.dist-info/licenses/LICENSE +128 -0
spanforge/stats.py ADDED
@@ -0,0 +1,100 @@
1
+ """spanforge.stats — Latency percentile and summary statistics.
2
+
3
+ Lightweight, dependency-free statistics helpers for latency analysis. All
4
+ inputs are plain Python lists; no NumPy or pandas required.
5
+
6
+ Usage::
7
+
8
+ from spanforge.stats import percentile, latency_summary
9
+
10
+ latencies_ms = [12.3, 45.6, 23.1, 89.4, 34.7]
11
+
12
+ p95 = percentile(latencies_ms, 95)
13
+ summary = latency_summary(latencies_ms)
14
+ print(summary)
15
+ # {'count': 5, 'mean': 41.02, 'min': 12.3, 'max': 89.4,
16
+ # 'p50': 34.7, 'p95': 89.4, 'p99': 89.4}
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import statistics
22
+ from typing import Any
23
+
24
+ __all__ = [
25
+ "latency_summary",
26
+ "percentile",
27
+ ]
28
+
29
+
30
+ def percentile(values: list[float], p: float) -> float:
31
+ """Return the *p*-th percentile of *values* using linear interpolation.
32
+
33
+ Args:
34
+ values: List of numeric values (need not be sorted).
35
+ p: Percentile in the range ``[0, 100]``.
36
+
37
+ Returns:
38
+ The interpolated percentile value, or ``0.0`` for an empty list.
39
+
40
+ Raises:
41
+ ValueError: If *p* is not in ``[0, 100]``.
42
+
43
+ Example::
44
+
45
+ percentile([1.0, 2.0, 3.0, 4.0, 5.0], 50) # → 3.0
46
+ percentile([1.0, 2.0, 3.0, 4.0, 5.0], 95) # → 4.8
47
+ """
48
+ if not 0.0 <= p <= 100.0:
49
+ raise ValueError(f"p must be in [0, 100], got {p!r}")
50
+ if not values:
51
+ return 0.0
52
+ sorted_vals = sorted(values)
53
+ n = len(sorted_vals)
54
+ if n == 1:
55
+ return float(sorted_vals[0])
56
+ idx = (p / 100.0) * (n - 1)
57
+ lo = int(idx)
58
+ hi = lo + 1
59
+ if hi >= n:
60
+ return float(sorted_vals[-1])
61
+ frac = idx - lo
62
+ return sorted_vals[lo] * (1.0 - frac) + sorted_vals[hi] * frac
63
+
64
+
65
+ def latency_summary(values_ms: list[float]) -> dict[str, Any]:
66
+ """Return a summary statistics dict for a list of latency measurements.
67
+
68
+ Args:
69
+ values_ms: List of latency values in milliseconds.
70
+
71
+ Returns:
72
+ A dict with keys ``count``, ``mean``, ``min``, ``max``, ``p50``,
73
+ ``p95``, and ``p99``. All float values are rounded to 3 decimal
74
+ places. Returns zeroed-out values for an empty list.
75
+
76
+ Example::
77
+
78
+ summary = latency_summary([10.0, 20.0, 30.0, 40.0, 50.0])
79
+ # {'count': 5, 'mean': 30.0, 'min': 10.0, 'max': 50.0,
80
+ # 'p50': 30.0, 'p95': 48.0, 'p99': 49.6}
81
+ """
82
+ if not values_ms:
83
+ return {
84
+ "count": 0,
85
+ "mean": 0.0,
86
+ "min": 0.0,
87
+ "max": 0.0,
88
+ "p50": 0.0,
89
+ "p95": 0.0,
90
+ "p99": 0.0,
91
+ }
92
+ return {
93
+ "count": len(values_ms),
94
+ "mean": round(statistics.mean(values_ms), 3),
95
+ "min": round(min(values_ms), 3),
96
+ "max": round(max(values_ms), 3),
97
+ "p50": round(percentile(values_ms, 50), 3),
98
+ "p95": round(percentile(values_ms, 95), 3),
99
+ "p99": round(percentile(values_ms, 99), 3),
100
+ }
spanforge/stream.py ADDED
@@ -0,0 +1,560 @@
1
+ """In-memory event stream with filtering and routing.
2
+
3
+ :class:`EventStream` is an ordered, immutable sequence of
4
+ :class:`~spanforge.event.Event` objects with a fluent API for filtering and
5
+ routing to export backends.
6
+
7
+ Usage examples
8
+ --------------
9
+ **Build from a list**::
10
+
11
+ stream = EventStream([event1, event2, event3])
12
+
13
+ **Filter**::
14
+
15
+ errors = stream.filter(lambda e: "error" in e.payload)
16
+ llm_trace = stream.filter_by_type("llm.trace.span.completed")
17
+
18
+ **Route to an exporter**::
19
+
20
+ exporter = JSONLExporter("errors.jsonl")
21
+ await stream.route(exporter, lambda e: e.event_type.startswith("llm.error"))
22
+
23
+ **Drain to an exporter (export all)**::
24
+
25
+ await stream.drain(exporter)
26
+
27
+ **Load from a JSONL file**::
28
+
29
+ stream = EventStream.from_file("audit.jsonl")
30
+
31
+ **Load from an asyncio.Queue**::
32
+
33
+ stream = await EventStream.from_async_queue(queue)
34
+ """
35
+
36
+ from __future__ import annotations
37
+
38
+ import asyncio
39
+ import queue as stdlib_queue
40
+ from pathlib import Path
41
+ from typing import (
42
+ TYPE_CHECKING,
43
+ Any,
44
+ Callable,
45
+ Protocol,
46
+ runtime_checkable,
47
+ )
48
+
49
+ from spanforge.event import Event
50
+
51
+ if TYPE_CHECKING:
52
+ from collections.abc import AsyncIterator, Iterable, Iterator, Sequence
53
+
54
+ __all__ = ["EventStream", "Exporter", "aiter_file", "iter_file"]
55
+
56
+
57
+ # ---------------------------------------------------------------------------
58
+ # Exporter protocol
59
+ # ---------------------------------------------------------------------------
60
+
61
+
62
+ @runtime_checkable
63
+ class Exporter(Protocol):
64
+ """Structural protocol for exporters accepted by :class:`EventStream`.
65
+
66
+ Any object with an async ``export_batch`` method satisfies this protocol.
67
+ All built-in exporters (:class:`~spanforge.export.otlp.OTLPExporter`,
68
+ :class:`~spanforge.export.webhook.WebhookExporter`,
69
+ :class:`~spanforge.export.jsonl.JSONLExporter`) implement it.
70
+ """
71
+
72
+ async def export_batch(self, events: Sequence[Event]) -> Any:
73
+ """Export a sequence of events."""
74
+ ...
75
+
76
+
77
+ # ---------------------------------------------------------------------------
78
+ # EventStream
79
+ # ---------------------------------------------------------------------------
80
+
81
+
82
+ class EventStream:
83
+ """An immutable, ordered sequence of :class:`~spanforge.event.Event` objects.
84
+
85
+ All methods that return a subset (``filter``, ``filter_by_type``,
86
+ ``filter_by_tags``) return a **new** :class:`EventStream` without
87
+ modifying the original.
88
+
89
+ Args:
90
+ events: Initial sequence of events. Defaults to an empty stream.
91
+
92
+ Example::
93
+
94
+ stream = EventStream([event1, event2, event3])
95
+ filtered = stream.filter_by_type("llm.trace.span.completed")
96
+ await filtered.drain(exporter)
97
+ """
98
+
99
+ def __init__(self, events: Iterable[Event] | None = None) -> None:
100
+ self._events: list[Event] = list(events) if events is not None else []
101
+
102
+ # ------------------------------------------------------------------
103
+ # Class-method constructors
104
+ # ------------------------------------------------------------------
105
+
106
+ @classmethod
107
+ def from_file(
108
+ cls,
109
+ path: str | Path,
110
+ *,
111
+ encoding: str = "utf-8",
112
+ skip_errors: bool = False,
113
+ ) -> EventStream:
114
+ """Load events from a JSONL file.
115
+
116
+ Each non-empty line is deserialized with
117
+ :meth:`~spanforge.event.Event.from_json`. Lines that fail to
118
+ deserialize are skipped when ``skip_errors=True``; by default they
119
+ raise :class:`~spanforge.exceptions.DeserializationError`.
120
+
121
+ Args:
122
+ path: Path to a ``.jsonl`` file.
123
+ encoding: File encoding (default ``"utf-8"``).
124
+ skip_errors: When ``True``, silently skip malformed lines instead
125
+ of raising.
126
+
127
+ Returns:
128
+ A new :class:`EventStream` with the loaded events.
129
+
130
+ Raises:
131
+ DeserializationError: On the first malformed line when
132
+ ``skip_errors=False`` (default).
133
+ OSError: If the file cannot be opened.
134
+ """
135
+ from spanforge.exceptions import DeserializationError, LLMSchemaError
136
+
137
+ events: list[Event] = []
138
+ with Path(str(path)).open(encoding=encoding) as fh:
139
+ for lineno, raw_line in enumerate(fh, start=1):
140
+ line = raw_line.strip()
141
+ if not line:
142
+ continue
143
+ try:
144
+ events.append(Event.from_json(line))
145
+ except (LLMSchemaError, ValueError) as exc:
146
+ if skip_errors:
147
+ continue
148
+ raise DeserializationError(
149
+ reason=f"line {lineno}: {exc}",
150
+ source_hint=str(path),
151
+ ) from exc
152
+ return cls(events)
153
+
154
+ @classmethod
155
+ def from_queue(
156
+ cls,
157
+ q: stdlib_queue.Queue[Event],
158
+ *,
159
+ sentinel: object = None,
160
+ ) -> EventStream:
161
+ """Drain a synchronous :class:`queue.Queue` into an EventStream.
162
+
163
+ Reads items from *q* until the queue is empty or a *sentinel* value is
164
+ encountered. Non-blocking: uses :meth:`queue.Queue.get_nowait` so this
165
+ method returns immediately once the queue is drained.
166
+
167
+ Args:
168
+ q: A :class:`queue.Queue` containing
169
+ :class:`~spanforge.event.Event` objects.
170
+ sentinel: Stop-value that signals end-of-stream. The sentinel
171
+ itself is not added to the stream. Defaults to ``None``.
172
+
173
+ Returns:
174
+ A new :class:`EventStream` with all events drained from the queue.
175
+ """
176
+ events: list[Event] = []
177
+ while True:
178
+ try:
179
+ item = q.get_nowait()
180
+ except stdlib_queue.Empty:
181
+ break
182
+ if item is sentinel:
183
+ break
184
+ events.append(item)
185
+ return cls(events)
186
+
187
+ @classmethod
188
+ async def from_async_queue(
189
+ cls,
190
+ q: asyncio.Queue[Event],
191
+ *,
192
+ sentinel: object = None,
193
+ timeout: float | None = None,
194
+ ) -> EventStream:
195
+ """Drain an :class:`asyncio.Queue` into an EventStream.
196
+
197
+ Awaits items from *q* until the *sentinel* value is received. The
198
+ sentinel itself is not added to the stream.
199
+
200
+ Args:
201
+ q: An :class:`asyncio.Queue` containing
202
+ :class:`~spanforge.event.Event` objects.
203
+ sentinel: Stop-value (default ``None``).
204
+ timeout: Maximum seconds to wait for each individual item.
205
+ If the wait times out the stream is returned with
206
+ however many events were collected. ``None`` (default)
207
+ waits indefinitely for each item.
208
+
209
+ Returns:
210
+ A new :class:`EventStream` with all events from the queue.
211
+ """
212
+ events: list[Event] = []
213
+ while True:
214
+ try:
215
+ if timeout is not None:
216
+ item = await asyncio.wait_for(q.get(), timeout=timeout)
217
+ else:
218
+ item = await q.get()
219
+ except asyncio.TimeoutError:
220
+ break
221
+ if item is sentinel:
222
+ break
223
+ events.append(item)
224
+ return cls(events)
225
+
226
+ @classmethod
227
+ async def from_async_iter(
228
+ cls,
229
+ async_iter: AsyncIterator[Event],
230
+ ) -> EventStream:
231
+ """Consume an async iterator into an EventStream.
232
+
233
+ Args:
234
+ async_iter: Any :class:`~typing.AsyncIterator` of events.
235
+
236
+ Returns:
237
+ A new :class:`EventStream`.
238
+ """
239
+ return cls([event async for event in async_iter])
240
+
241
+ @classmethod
242
+ def from_kafka(
243
+ cls,
244
+ topic: str,
245
+ bootstrap_servers: str | list[str],
246
+ *,
247
+ group_id: str | None = None,
248
+ sentinel: object = None,
249
+ max_messages: int | None = None,
250
+ poll_timeout_ms: int = 1000,
251
+ skip_errors: bool = False,
252
+ ) -> EventStream:
253
+ """Consume messages from a Kafka topic into an EventStream.
254
+
255
+ Each Kafka message value is deserialised as a UTF-8 JSON string and
256
+ parsed with :meth:`~spanforge.event.Event.from_json`.
257
+
258
+ Requires ``kafka-python >= 2.0`` to be installed. Install it with::
259
+
260
+ pip install "spanforge[kafka]"
261
+
262
+ Consumption stops when:
263
+
264
+ * A *sentinel* message value is received (not added to stream).
265
+ * *max_messages* events have been collected (when set).
266
+ * The topic-partition reaches the end-of-partition offset and there
267
+ are no more messages within *poll_timeout_ms* (``StopIteration``
268
+ from the consumer is caught automatically).
269
+
270
+ Args:
271
+ topic: Kafka topic name to consume from.
272
+ bootstrap_servers: Kafka broker address(es),
273
+ e.g. ``"localhost:9092"`` or
274
+ ``["broker1:9092", "broker2:9092"]``.
275
+ group_id: Consumer group ID. ``None`` creates an
276
+ anonymous (uncoordinated) consumer.
277
+ sentinel: Message value (decoded UTF-8 string) that
278
+ signals end-of-stream. The sentinel message
279
+ is not added to the returned stream.
280
+ max_messages: Maximum number of events to collect. ``None``
281
+ means no limit.
282
+ poll_timeout_ms: Milliseconds to wait for messages in each poll
283
+ (default 1 000 ms).
284
+ skip_errors: When ``True``, silently skip messages that fail
285
+ to deserialise instead of raising.
286
+
287
+ Returns:
288
+ A new :class:`EventStream` with all consumed events.
289
+
290
+ Raises:
291
+ ImportError: If ``kafka-python`` is not installed.
292
+ DeserializationError: On the first malformed message when
293
+ ``skip_errors=False`` (default).
294
+
295
+ Example::
296
+
297
+ stream = EventStream.from_kafka(
298
+ "llm-events",
299
+ "localhost:9092",
300
+ group_id="analytics-pipeline",
301
+ max_messages=1000,
302
+ )
303
+ """
304
+ try:
305
+ from kafka import KafkaConsumer
306
+ except ImportError as exc: # pragma: no cover
307
+ raise ImportError(
308
+ "kafka-python is required for EventStream.from_kafka(). "
309
+ 'Install it with: pip install "spanforge[kafka]"'
310
+ ) from exc
311
+
312
+ from spanforge.exceptions import DeserializationError, LLMSchemaError
313
+
314
+ consumer: Any = KafkaConsumer(
315
+ topic,
316
+ bootstrap_servers=bootstrap_servers,
317
+ group_id=group_id,
318
+ consumer_timeout_ms=poll_timeout_ms,
319
+ value_deserializer=lambda m: m.decode("utf-8"),
320
+ auto_offset_reset="earliest",
321
+ enable_auto_commit=group_id is not None,
322
+ )
323
+
324
+ events: list[Event] = []
325
+ try:
326
+ for message in consumer:
327
+ value = message.value
328
+ if value == sentinel:
329
+ break
330
+ try:
331
+ events.append(Event.from_json(value))
332
+ except (LLMSchemaError, ValueError) as exc:
333
+ if skip_errors:
334
+ continue
335
+ raise DeserializationError(
336
+ reason=f"Kafka message offset {message.offset}: {exc}",
337
+ source_hint=f"topic={topic}",
338
+ ) from exc
339
+ if max_messages is not None and len(events) >= max_messages:
340
+ break
341
+ finally:
342
+ consumer.close()
343
+
344
+ return cls(events)
345
+
346
+ # ------------------------------------------------------------------
347
+ # Filtering
348
+ # ------------------------------------------------------------------
349
+
350
+ def filter(
351
+ self,
352
+ predicate: Callable[[Event], bool],
353
+ ) -> EventStream:
354
+ """Return a new stream containing only events for which *predicate* returns ``True``.
355
+
356
+ Args:
357
+ predicate: A callable that accepts an :class:`~spanforge.event.Event`
358
+ and returns ``True`` to keep the event.
359
+
360
+ Returns:
361
+ New :class:`EventStream`.
362
+ """
363
+ return EventStream(e for e in self._events if predicate(e))
364
+
365
+ def filter_by_type(self, *event_types: str) -> EventStream:
366
+ """Return a new stream with events matching one of the supplied ``event_type`` strings.
367
+
368
+ Args:
369
+ *event_types: One or more event type strings.
370
+
371
+ Returns:
372
+ New :class:`EventStream`.
373
+ """
374
+ type_set = frozenset(event_types)
375
+ return EventStream(e for e in self._events if e.event_type in type_set)
376
+
377
+ def filter_by_tags(self, **tags: str) -> EventStream:
378
+ """Return a filtered stream keeping only events whose tags include all supplied key-value pairs.
379
+
380
+ Args:
381
+ **tags: Tag key=value pairs that must all be present.
382
+
383
+ Returns:
384
+ New :class:`EventStream`.
385
+ """
386
+
387
+ def _matches(event: Event) -> bool:
388
+ if event.tags is None:
389
+ return False
390
+ tag_dict = event.tags.to_dict()
391
+ return all(tag_dict.get(k) == v for k, v in tags.items())
392
+
393
+ return EventStream(e for e in self._events if _matches(e))
394
+
395
+ # ------------------------------------------------------------------
396
+ # Routing & export
397
+ # ------------------------------------------------------------------
398
+
399
+ async def route(
400
+ self,
401
+ exporter: Exporter,
402
+ predicate: Callable[[Event], bool] | None = None,
403
+ ) -> int:
404
+ """Dispatch matching events to *exporter* as a single batch.
405
+
406
+ Args:
407
+ exporter: Any object satisfying the :class:`Exporter` protocol
408
+ (has an async ``export_batch`` method).
409
+ predicate: Optional filter. When ``None`` all events are sent.
410
+
411
+ Returns:
412
+ Number of events dispatched.
413
+ """
414
+ subset = self._events if predicate is None else [e for e in self._events if predicate(e)]
415
+
416
+ if subset:
417
+ await exporter.export_batch(subset)
418
+ return len(subset)
419
+
420
+ async def drain(self, exporter: Exporter) -> int:
421
+ """Export all events in this stream to *exporter*.
422
+
423
+ Equivalent to ``await stream.route(exporter)``.
424
+
425
+ Args:
426
+ exporter: Target exporter.
427
+
428
+ Returns:
429
+ Number of events exported.
430
+ """
431
+ return await self.route(exporter)
432
+
433
+ # ------------------------------------------------------------------
434
+ # Sequence protocol
435
+ # ------------------------------------------------------------------
436
+
437
+ def __iter__(self) -> Iterator[Event]:
438
+ return iter(self._events)
439
+
440
+ def __len__(self) -> int:
441
+ return len(self._events)
442
+
443
+ def __getitem__(self, index: int | slice) -> Event | EventStream:
444
+ result = self._events[index]
445
+ if isinstance(index, slice):
446
+ return EventStream(result) # type: ignore[arg-type]
447
+ return result # type: ignore[return-value]
448
+
449
+ def __repr__(self) -> str:
450
+ return f"EventStream({len(self._events)} events)"
451
+
452
+ def __eq__(self, other: object) -> bool:
453
+ if not isinstance(other, EventStream):
454
+ return NotImplemented
455
+ return self._events == other._events
456
+
457
+ __hash__ = None # type: ignore[assignment] # EventStream is unhashable (mutable container)
458
+
459
+
460
+ # ---------------------------------------------------------------------------
461
+ # Module-level streaming generators (avoid full in-memory accumulation)
462
+ # ---------------------------------------------------------------------------
463
+
464
+
465
+ def iter_file(
466
+ path: str | Path,
467
+ *,
468
+ encoding: str = "utf-8",
469
+ skip_errors: bool = False,
470
+ ) -> Iterator[Event]:
471
+ """Yield :class:`~spanforge.event.Event` objects from a NDJSON file one at a time.
472
+
473
+ Unlike :meth:`EventStream.from_file`, this function is a **generator**;
474
+ each event is parsed and yielded individually so that very large log files
475
+ can be processed with constant memory overhead.
476
+
477
+ Args:
478
+ path: Path to the NDJSON file.
479
+ encoding: File encoding (default ``"utf-8"``).
480
+ skip_errors: When ``True``, lines that fail to parse are silently
481
+ skipped instead of raising.
482
+
483
+ Yields:
484
+ Parsed :class:`~spanforge.event.Event` instances.
485
+
486
+ Raises:
487
+ DeserializationError: On the first malformed line when
488
+ ``skip_errors=False`` (default).
489
+
490
+ Example::
491
+
492
+ for event in iter_file("events.ndjson"):
493
+ process(event)
494
+ """
495
+ from spanforge.exceptions import DeserializationError, LLMSchemaError
496
+
497
+ with Path(path).open(encoding=encoding) as fh:
498
+ for lineno, raw in enumerate(fh, start=1):
499
+ line = raw.strip()
500
+ if not line:
501
+ continue
502
+ try:
503
+ yield Event.from_json(line)
504
+ except (LLMSchemaError, ValueError) as exc:
505
+ if skip_errors:
506
+ continue
507
+ raise DeserializationError(
508
+ reason=f"Line {lineno}: {exc}",
509
+ source_hint=str(path),
510
+ ) from exc
511
+
512
+
513
+ async def aiter_file(
514
+ path: str | Path,
515
+ *,
516
+ encoding: str = "utf-8",
517
+ skip_errors: bool = False,
518
+ ) -> AsyncIterator[Event]:
519
+ """Async generator equivalent of :func:`iter_file`.
520
+
521
+ Reads a newline-delimited JSON file line-by-line using
522
+ :func:`asyncio.to_thread` to avoid blocking the event loop on I/O,
523
+ yielding one :class:`~spanforge.event.Event` at a time.
524
+
525
+ Args:
526
+ path: Path to the NDJSON file.
527
+ encoding: File encoding (default ``"utf-8"``).
528
+ skip_errors: When ``True``, lines that fail to parse are silently
529
+ skipped instead of raising.
530
+
531
+ Yields:
532
+ Parsed :class:`~spanforge.event.Event` instances.
533
+
534
+ Raises:
535
+ DeserializationError: On the first malformed line when
536
+ ``skip_errors=False`` (default).
537
+
538
+ Example::
539
+
540
+ async for event in aiter_file("events.ndjson"):
541
+ await process(event)
542
+ """
543
+ from spanforge.exceptions import DeserializationError, LLMSchemaError
544
+
545
+ lines: list[str] = await asyncio.to_thread(
546
+ lambda: Path(path).read_text(encoding=encoding).splitlines()
547
+ )
548
+ for lineno, raw in enumerate(lines, start=1):
549
+ line = raw.strip()
550
+ if not line:
551
+ continue
552
+ try:
553
+ yield Event.from_json(line)
554
+ except (LLMSchemaError, ValueError) as exc:
555
+ if skip_errors:
556
+ continue
557
+ raise DeserializationError(
558
+ reason=f"Line {lineno}: {exc}",
559
+ source_hint=str(path),
560
+ ) from exc