spanforge 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spanforge/__init__.py +695 -0
- spanforge/_batch_exporter.py +322 -0
- spanforge/_cli.py +3081 -0
- spanforge/_hooks.py +340 -0
- spanforge/_server.py +953 -0
- spanforge/_span.py +1015 -0
- spanforge/_store.py +287 -0
- spanforge/_stream.py +654 -0
- spanforge/_trace.py +334 -0
- spanforge/_tracer.py +253 -0
- spanforge/actor.py +141 -0
- spanforge/alerts.py +464 -0
- spanforge/auto.py +181 -0
- spanforge/baseline.py +336 -0
- spanforge/config.py +460 -0
- spanforge/consent.py +227 -0
- spanforge/consumer.py +379 -0
- spanforge/core/__init__.py +5 -0
- spanforge/core/compliance_mapping.py +1060 -0
- spanforge/cost.py +597 -0
- spanforge/debug.py +514 -0
- spanforge/drift.py +488 -0
- spanforge/egress.py +63 -0
- spanforge/eval.py +575 -0
- spanforge/event.py +1052 -0
- spanforge/exceptions.py +246 -0
- spanforge/explain.py +181 -0
- spanforge/export/__init__.py +50 -0
- spanforge/export/append_only.py +342 -0
- spanforge/export/cloud.py +349 -0
- spanforge/export/datadog.py +495 -0
- spanforge/export/grafana.py +331 -0
- spanforge/export/jsonl.py +198 -0
- spanforge/export/otel_bridge.py +291 -0
- spanforge/export/otlp.py +817 -0
- spanforge/export/otlp_bridge.py +231 -0
- spanforge/export/redis_backend.py +282 -0
- spanforge/export/webhook.py +302 -0
- spanforge/exporters/__init__.py +29 -0
- spanforge/exporters/console.py +271 -0
- spanforge/exporters/jsonl.py +144 -0
- spanforge/hitl.py +297 -0
- spanforge/inspect.py +429 -0
- spanforge/integrations/__init__.py +39 -0
- spanforge/integrations/_pricing.py +277 -0
- spanforge/integrations/anthropic.py +388 -0
- spanforge/integrations/bedrock.py +306 -0
- spanforge/integrations/crewai.py +251 -0
- spanforge/integrations/gemini.py +349 -0
- spanforge/integrations/groq.py +444 -0
- spanforge/integrations/langchain.py +349 -0
- spanforge/integrations/llamaindex.py +370 -0
- spanforge/integrations/ollama.py +286 -0
- spanforge/integrations/openai.py +370 -0
- spanforge/integrations/together.py +485 -0
- spanforge/metrics.py +393 -0
- spanforge/metrics_export.py +342 -0
- spanforge/migrate.py +278 -0
- spanforge/model_registry.py +282 -0
- spanforge/models.py +407 -0
- spanforge/namespaces/__init__.py +215 -0
- spanforge/namespaces/audit.py +253 -0
- spanforge/namespaces/cache.py +209 -0
- spanforge/namespaces/chain.py +74 -0
- spanforge/namespaces/confidence.py +69 -0
- spanforge/namespaces/consent.py +85 -0
- spanforge/namespaces/cost.py +175 -0
- spanforge/namespaces/decision.py +135 -0
- spanforge/namespaces/diff.py +146 -0
- spanforge/namespaces/drift.py +79 -0
- spanforge/namespaces/eval_.py +232 -0
- spanforge/namespaces/fence.py +180 -0
- spanforge/namespaces/guard.py +104 -0
- spanforge/namespaces/hitl.py +92 -0
- spanforge/namespaces/latency.py +69 -0
- spanforge/namespaces/prompt.py +185 -0
- spanforge/namespaces/redact.py +172 -0
- spanforge/namespaces/template.py +197 -0
- spanforge/namespaces/tool_call.py +76 -0
- spanforge/namespaces/trace.py +1006 -0
- spanforge/normalizer.py +183 -0
- spanforge/presidio_backend.py +149 -0
- spanforge/processor.py +258 -0
- spanforge/prompt_registry.py +415 -0
- spanforge/py.typed +0 -0
- spanforge/redact.py +780 -0
- spanforge/sampling.py +500 -0
- spanforge/schemas/v1.0/schema.json +170 -0
- spanforge/schemas/v2.0/schema.json +536 -0
- spanforge/signing.py +1152 -0
- spanforge/stream.py +559 -0
- spanforge/testing.py +376 -0
- spanforge/trace.py +199 -0
- spanforge/types.py +696 -0
- spanforge/ulid.py +304 -0
- spanforge/validate.py +383 -0
- spanforge-2.0.0.dist-info/METADATA +1777 -0
- spanforge-2.0.0.dist-info/RECORD +101 -0
- spanforge-2.0.0.dist-info/WHEEL +4 -0
- spanforge-2.0.0.dist-info/entry_points.txt +5 -0
- spanforge-2.0.0.dist-info/licenses/LICENSE +21 -0
spanforge/stream.py
ADDED
|
@@ -0,0 +1,559 @@
|
|
|
1
|
+
"""In-memory event stream with filtering and routing.
|
|
2
|
+
|
|
3
|
+
:class:`EventStream` is an ordered, immutable sequence of
|
|
4
|
+
:class:`~spanforge.event.Event` objects with a fluent API for filtering and
|
|
5
|
+
routing to export backends.
|
|
6
|
+
|
|
7
|
+
Usage examples
|
|
8
|
+
--------------
|
|
9
|
+
**Build from a list**::
|
|
10
|
+
|
|
11
|
+
stream = EventStream([event1, event2, event3])
|
|
12
|
+
|
|
13
|
+
**Filter**::
|
|
14
|
+
|
|
15
|
+
errors = stream.filter(lambda e: "error" in e.payload)
|
|
16
|
+
llm_trace = stream.filter_by_type("llm.trace.span.completed")
|
|
17
|
+
|
|
18
|
+
**Route to an exporter**::
|
|
19
|
+
|
|
20
|
+
exporter = JSONLExporter("errors.jsonl")
|
|
21
|
+
await stream.route(exporter, lambda e: e.event_type.startswith("llm.error"))
|
|
22
|
+
|
|
23
|
+
**Drain to an exporter (export all)**::
|
|
24
|
+
|
|
25
|
+
await stream.drain(exporter)
|
|
26
|
+
|
|
27
|
+
**Load from a JSONL file**::
|
|
28
|
+
|
|
29
|
+
stream = EventStream.from_file("audit.jsonl")
|
|
30
|
+
|
|
31
|
+
**Load from an asyncio.Queue**::
|
|
32
|
+
|
|
33
|
+
stream = await EventStream.from_async_queue(queue)
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
from __future__ import annotations
|
|
37
|
+
|
|
38
|
+
import asyncio
|
|
39
|
+
import queue as stdlib_queue
|
|
40
|
+
from pathlib import Path
|
|
41
|
+
from typing import (
|
|
42
|
+
TYPE_CHECKING,
|
|
43
|
+
Any,
|
|
44
|
+
Callable,
|
|
45
|
+
Protocol,
|
|
46
|
+
runtime_checkable,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
from spanforge.event import Event
|
|
50
|
+
|
|
51
|
+
if TYPE_CHECKING:
|
|
52
|
+
from collections.abc import AsyncIterator, Iterable, Iterator, Sequence
|
|
53
|
+
|
|
54
|
+
__all__ = ["EventStream", "Exporter", "aiter_file", "iter_file"]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# ---------------------------------------------------------------------------
|
|
58
|
+
# Exporter protocol
|
|
59
|
+
# ---------------------------------------------------------------------------
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@runtime_checkable
|
|
63
|
+
class Exporter(Protocol):
|
|
64
|
+
"""Structural protocol for exporters accepted by :class:`EventStream`.
|
|
65
|
+
|
|
66
|
+
Any object with an async ``export_batch`` method satisfies this protocol.
|
|
67
|
+
All built-in exporters (:class:`~spanforge.export.otlp.OTLPExporter`,
|
|
68
|
+
:class:`~spanforge.export.webhook.WebhookExporter`,
|
|
69
|
+
:class:`~spanforge.export.jsonl.JSONLExporter`) implement it.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
async def export_batch(self, events: Sequence[Event]) -> Any: # noqa: ANN401
|
|
73
|
+
"""Export a sequence of events."""
|
|
74
|
+
...
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# ---------------------------------------------------------------------------
|
|
78
|
+
# EventStream
|
|
79
|
+
# ---------------------------------------------------------------------------
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class EventStream:
|
|
83
|
+
"""An immutable, ordered sequence of :class:`~spanforge.event.Event` objects.
|
|
84
|
+
|
|
85
|
+
All methods that return a subset (``filter``, ``filter_by_type``,
|
|
86
|
+
``filter_by_tags``) return a **new** :class:`EventStream` without
|
|
87
|
+
modifying the original.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
events: Initial sequence of events. Defaults to an empty stream.
|
|
91
|
+
|
|
92
|
+
Example::
|
|
93
|
+
|
|
94
|
+
stream = EventStream([event1, event2, event3])
|
|
95
|
+
filtered = stream.filter_by_type("llm.trace.span.completed")
|
|
96
|
+
await filtered.drain(exporter)
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
def __init__(self, events: Iterable[Event] | None = None) -> None:
|
|
100
|
+
self._events: list[Event] = list(events) if events is not None else []
|
|
101
|
+
|
|
102
|
+
# ------------------------------------------------------------------
|
|
103
|
+
# Class-method constructors
|
|
104
|
+
# ------------------------------------------------------------------
|
|
105
|
+
|
|
106
|
+
@classmethod
|
|
107
|
+
def from_file(
|
|
108
|
+
cls,
|
|
109
|
+
path: str | Path,
|
|
110
|
+
*,
|
|
111
|
+
encoding: str = "utf-8",
|
|
112
|
+
skip_errors: bool = False,
|
|
113
|
+
) -> EventStream:
|
|
114
|
+
"""Load events from a JSONL file.
|
|
115
|
+
|
|
116
|
+
Each non-empty line is deserialized with
|
|
117
|
+
:meth:`~spanforge.event.Event.from_json`. Lines that fail to
|
|
118
|
+
deserialize are skipped when ``skip_errors=True``; by default they
|
|
119
|
+
raise :class:`~spanforge.exceptions.DeserializationError`.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
path: Path to a ``.jsonl`` file.
|
|
123
|
+
encoding: File encoding (default ``"utf-8"``).
|
|
124
|
+
skip_errors: When ``True``, silently skip malformed lines instead
|
|
125
|
+
of raising.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
A new :class:`EventStream` with the loaded events.
|
|
129
|
+
|
|
130
|
+
Raises:
|
|
131
|
+
DeserializationError: On the first malformed line when
|
|
132
|
+
``skip_errors=False`` (default).
|
|
133
|
+
OSError: If the file cannot be opened.
|
|
134
|
+
"""
|
|
135
|
+
from spanforge.exceptions import DeserializationError, LLMSchemaError # noqa: PLC0415
|
|
136
|
+
|
|
137
|
+
events: list[Event] = []
|
|
138
|
+
with Path(str(path)).open(encoding=encoding) as fh:
|
|
139
|
+
for lineno, raw_line in enumerate(fh, start=1):
|
|
140
|
+
line = raw_line.strip()
|
|
141
|
+
if not line:
|
|
142
|
+
continue
|
|
143
|
+
try:
|
|
144
|
+
events.append(Event.from_json(line))
|
|
145
|
+
except (LLMSchemaError, ValueError) as exc:
|
|
146
|
+
if skip_errors:
|
|
147
|
+
continue
|
|
148
|
+
raise DeserializationError(
|
|
149
|
+
reason=f"line {lineno}: {exc}",
|
|
150
|
+
source_hint=str(path),
|
|
151
|
+
) from exc
|
|
152
|
+
return cls(events)
|
|
153
|
+
|
|
154
|
+
@classmethod
|
|
155
|
+
def from_queue(
|
|
156
|
+
cls,
|
|
157
|
+
q: stdlib_queue.Queue[Event],
|
|
158
|
+
*,
|
|
159
|
+
sentinel: object = None,
|
|
160
|
+
) -> EventStream:
|
|
161
|
+
"""Drain a synchronous :class:`queue.Queue` into an EventStream.
|
|
162
|
+
|
|
163
|
+
Reads items from *q* until the queue is empty or a *sentinel* value is
|
|
164
|
+
encountered. Non-blocking: uses :meth:`queue.Queue.get_nowait` so this
|
|
165
|
+
method returns immediately once the queue is drained.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
q: A :class:`queue.Queue` containing
|
|
169
|
+
:class:`~spanforge.event.Event` objects.
|
|
170
|
+
sentinel: Stop-value that signals end-of-stream. The sentinel
|
|
171
|
+
itself is not added to the stream. Defaults to ``None``.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
A new :class:`EventStream` with all events drained from the queue.
|
|
175
|
+
"""
|
|
176
|
+
events: list[Event] = []
|
|
177
|
+
while True:
|
|
178
|
+
try:
|
|
179
|
+
item = q.get_nowait()
|
|
180
|
+
except stdlib_queue.Empty:
|
|
181
|
+
break
|
|
182
|
+
if item is sentinel:
|
|
183
|
+
break
|
|
184
|
+
events.append(item)
|
|
185
|
+
return cls(events)
|
|
186
|
+
|
|
187
|
+
@classmethod
|
|
188
|
+
async def from_async_queue(
|
|
189
|
+
cls,
|
|
190
|
+
q: asyncio.Queue[Event],
|
|
191
|
+
*,
|
|
192
|
+
sentinel: object = None,
|
|
193
|
+
timeout: float | None = None,
|
|
194
|
+
) -> "EventStream":
|
|
195
|
+
"""Drain an :class:`asyncio.Queue` into an EventStream.
|
|
196
|
+
|
|
197
|
+
Awaits items from *q* until the *sentinel* value is received. The
|
|
198
|
+
sentinel itself is not added to the stream.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
q: An :class:`asyncio.Queue` containing
|
|
202
|
+
:class:`~spanforge.event.Event` objects.
|
|
203
|
+
sentinel: Stop-value (default ``None``).
|
|
204
|
+
timeout: Maximum seconds to wait for each individual item.
|
|
205
|
+
If the wait times out the stream is returned with
|
|
206
|
+
however many events were collected. ``None`` (default)
|
|
207
|
+
waits indefinitely for each item.
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
A new :class:`EventStream` with all events from the queue.
|
|
211
|
+
"""
|
|
212
|
+
events: list[Event] = []
|
|
213
|
+
while True:
|
|
214
|
+
try:
|
|
215
|
+
if timeout is not None:
|
|
216
|
+
item = await asyncio.wait_for(q.get(), timeout=timeout)
|
|
217
|
+
else:
|
|
218
|
+
item = await q.get()
|
|
219
|
+
except asyncio.TimeoutError:
|
|
220
|
+
break
|
|
221
|
+
if item is sentinel:
|
|
222
|
+
break
|
|
223
|
+
events.append(item)
|
|
224
|
+
return cls(events)
|
|
225
|
+
|
|
226
|
+
@classmethod
|
|
227
|
+
async def from_async_iter(
|
|
228
|
+
cls,
|
|
229
|
+
async_iter: "AsyncIterator[Event]",
|
|
230
|
+
) -> "EventStream":
|
|
231
|
+
"""Consume an async iterator into an EventStream.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
async_iter: Any :class:`~typing.AsyncIterator` of events.
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
A new :class:`EventStream`.
|
|
238
|
+
"""
|
|
239
|
+
return cls([event async for event in async_iter])
|
|
240
|
+
|
|
241
|
+
@classmethod
|
|
242
|
+
def from_kafka( # noqa: PLR0913
|
|
243
|
+
cls,
|
|
244
|
+
topic: str,
|
|
245
|
+
bootstrap_servers: str | list[str],
|
|
246
|
+
*,
|
|
247
|
+
group_id: str | None = None,
|
|
248
|
+
sentinel: object = None,
|
|
249
|
+
max_messages: int | None = None,
|
|
250
|
+
poll_timeout_ms: int = 1000,
|
|
251
|
+
skip_errors: bool = False,
|
|
252
|
+
) -> EventStream:
|
|
253
|
+
"""Consume messages from a Kafka topic into an EventStream.
|
|
254
|
+
|
|
255
|
+
Each Kafka message value is deserialised as a UTF-8 JSON string and
|
|
256
|
+
parsed with :meth:`~spanforge.event.Event.from_json`.
|
|
257
|
+
|
|
258
|
+
Requires ``kafka-python >= 2.0`` to be installed. Install it with::
|
|
259
|
+
|
|
260
|
+
pip install "spanforge[kafka]"
|
|
261
|
+
|
|
262
|
+
Consumption stops when:
|
|
263
|
+
|
|
264
|
+
* A *sentinel* message value is received (not added to stream).
|
|
265
|
+
* *max_messages* events have been collected (when set).
|
|
266
|
+
* The topic-partition reaches the end-of-partition offset and there
|
|
267
|
+
are no more messages within *poll_timeout_ms* (``StopIteration``
|
|
268
|
+
from the consumer is caught automatically).
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
topic: Kafka topic name to consume from.
|
|
272
|
+
bootstrap_servers: Kafka broker address(es),
|
|
273
|
+
e.g. ``"localhost:9092"`` or
|
|
274
|
+
``["broker1:9092", "broker2:9092"]``.
|
|
275
|
+
group_id: Consumer group ID. ``None`` creates an
|
|
276
|
+
anonymous (uncoordinated) consumer.
|
|
277
|
+
sentinel: Message value (decoded UTF-8 string) that
|
|
278
|
+
signals end-of-stream. The sentinel message
|
|
279
|
+
is not added to the returned stream.
|
|
280
|
+
max_messages: Maximum number of events to collect. ``None``
|
|
281
|
+
means no limit.
|
|
282
|
+
poll_timeout_ms: Milliseconds to wait for messages in each poll
|
|
283
|
+
(default 1 000 ms).
|
|
284
|
+
skip_errors: When ``True``, silently skip messages that fail
|
|
285
|
+
to deserialise instead of raising.
|
|
286
|
+
|
|
287
|
+
Returns:
|
|
288
|
+
A new :class:`EventStream` with all consumed events.
|
|
289
|
+
|
|
290
|
+
Raises:
|
|
291
|
+
ImportError: If ``kafka-python`` is not installed.
|
|
292
|
+
DeserializationError: On the first malformed message when
|
|
293
|
+
``skip_errors=False`` (default).
|
|
294
|
+
|
|
295
|
+
Example::
|
|
296
|
+
|
|
297
|
+
stream = EventStream.from_kafka(
|
|
298
|
+
"llm-events",
|
|
299
|
+
"localhost:9092",
|
|
300
|
+
group_id="analytics-pipeline",
|
|
301
|
+
max_messages=1000,
|
|
302
|
+
)
|
|
303
|
+
"""
|
|
304
|
+
try:
|
|
305
|
+
from kafka import KafkaConsumer # type: ignore[import-untyped] # noqa: PLC0415
|
|
306
|
+
except ImportError as exc: # pragma: no cover
|
|
307
|
+
raise ImportError(
|
|
308
|
+
"kafka-python is required for EventStream.from_kafka(). "
|
|
309
|
+
'Install it with: pip install "spanforge[kafka]"'
|
|
310
|
+
) from exc
|
|
311
|
+
|
|
312
|
+
from spanforge.exceptions import DeserializationError, LLMSchemaError # noqa: PLC0415
|
|
313
|
+
|
|
314
|
+
consumer: Any = KafkaConsumer(
|
|
315
|
+
topic,
|
|
316
|
+
bootstrap_servers=bootstrap_servers,
|
|
317
|
+
group_id=group_id,
|
|
318
|
+
consumer_timeout_ms=poll_timeout_ms,
|
|
319
|
+
value_deserializer=lambda m: m.decode("utf-8"),
|
|
320
|
+
auto_offset_reset="earliest",
|
|
321
|
+
enable_auto_commit=group_id is not None,
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
events: list[Event] = []
|
|
325
|
+
try:
|
|
326
|
+
for message in consumer:
|
|
327
|
+
value = message.value
|
|
328
|
+
if value == sentinel:
|
|
329
|
+
break
|
|
330
|
+
try:
|
|
331
|
+
events.append(Event.from_json(value))
|
|
332
|
+
except (LLMSchemaError, ValueError) as exc:
|
|
333
|
+
if skip_errors:
|
|
334
|
+
continue
|
|
335
|
+
raise DeserializationError(
|
|
336
|
+
reason=f"Kafka message offset {message.offset}: {exc}",
|
|
337
|
+
source_hint=f"topic={topic}",
|
|
338
|
+
) from exc
|
|
339
|
+
if max_messages is not None and len(events) >= max_messages:
|
|
340
|
+
break
|
|
341
|
+
finally:
|
|
342
|
+
consumer.close()
|
|
343
|
+
|
|
344
|
+
return cls(events)
|
|
345
|
+
|
|
346
|
+
# ------------------------------------------------------------------
|
|
347
|
+
# Filtering
|
|
348
|
+
# ------------------------------------------------------------------
|
|
349
|
+
|
|
350
|
+
def filter(
|
|
351
|
+
self,
|
|
352
|
+
predicate: Callable[[Event], bool],
|
|
353
|
+
) -> EventStream:
|
|
354
|
+
"""Return a new stream containing only events for which *predicate* returns ``True``.
|
|
355
|
+
|
|
356
|
+
Args:
|
|
357
|
+
predicate: A callable that accepts an :class:`~spanforge.event.Event`
|
|
358
|
+
and returns ``True`` to keep the event.
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
New :class:`EventStream`.
|
|
362
|
+
"""
|
|
363
|
+
return EventStream(e for e in self._events if predicate(e))
|
|
364
|
+
|
|
365
|
+
def filter_by_type(self, *event_types: str) -> EventStream:
|
|
366
|
+
"""Return a new stream with events matching one of the supplied ``event_type`` strings.
|
|
367
|
+
|
|
368
|
+
Args:
|
|
369
|
+
*event_types: One or more event type strings.
|
|
370
|
+
|
|
371
|
+
Returns:
|
|
372
|
+
New :class:`EventStream`.
|
|
373
|
+
"""
|
|
374
|
+
type_set = frozenset(event_types)
|
|
375
|
+
return EventStream(e for e in self._events if e.event_type in type_set)
|
|
376
|
+
|
|
377
|
+
def filter_by_tags(self, **tags: str) -> EventStream:
|
|
378
|
+
"""Return a filtered stream keeping only events whose tags include all supplied key-value pairs.
|
|
379
|
+
|
|
380
|
+
Args:
|
|
381
|
+
**tags: Tag key=value pairs that must all be present.
|
|
382
|
+
|
|
383
|
+
Returns:
|
|
384
|
+
New :class:`EventStream`.
|
|
385
|
+
""" # noqa: E501
|
|
386
|
+
def _matches(event: Event) -> bool:
|
|
387
|
+
if event.tags is None:
|
|
388
|
+
return False
|
|
389
|
+
tag_dict = event.tags.to_dict()
|
|
390
|
+
return all(tag_dict.get(k) == v for k, v in tags.items())
|
|
391
|
+
|
|
392
|
+
return EventStream(e for e in self._events if _matches(e))
|
|
393
|
+
|
|
394
|
+
# ------------------------------------------------------------------
|
|
395
|
+
# Routing & export
|
|
396
|
+
# ------------------------------------------------------------------
|
|
397
|
+
|
|
398
|
+
async def route(
|
|
399
|
+
self,
|
|
400
|
+
exporter: Exporter,
|
|
401
|
+
predicate: Callable[[Event], bool] | None = None,
|
|
402
|
+
) -> int:
|
|
403
|
+
"""Dispatch matching events to *exporter* as a single batch.
|
|
404
|
+
|
|
405
|
+
Args:
|
|
406
|
+
exporter: Any object satisfying the :class:`Exporter` protocol
|
|
407
|
+
(has an async ``export_batch`` method).
|
|
408
|
+
predicate: Optional filter. When ``None`` all events are sent.
|
|
409
|
+
|
|
410
|
+
Returns:
|
|
411
|
+
Number of events dispatched.
|
|
412
|
+
"""
|
|
413
|
+
subset = self._events if predicate is None else [e for e in self._events if predicate(e)]
|
|
414
|
+
|
|
415
|
+
if subset:
|
|
416
|
+
await exporter.export_batch(subset)
|
|
417
|
+
return len(subset)
|
|
418
|
+
|
|
419
|
+
async def drain(self, exporter: Exporter) -> int:
|
|
420
|
+
"""Export all events in this stream to *exporter*.
|
|
421
|
+
|
|
422
|
+
Equivalent to ``await stream.route(exporter)``.
|
|
423
|
+
|
|
424
|
+
Args:
|
|
425
|
+
exporter: Target exporter.
|
|
426
|
+
|
|
427
|
+
Returns:
|
|
428
|
+
Number of events exported.
|
|
429
|
+
"""
|
|
430
|
+
return await self.route(exporter)
|
|
431
|
+
|
|
432
|
+
# ------------------------------------------------------------------
|
|
433
|
+
# Sequence protocol
|
|
434
|
+
# ------------------------------------------------------------------
|
|
435
|
+
|
|
436
|
+
def __iter__(self) -> Iterator[Event]:
|
|
437
|
+
return iter(self._events)
|
|
438
|
+
|
|
439
|
+
def __len__(self) -> int:
|
|
440
|
+
return len(self._events)
|
|
441
|
+
|
|
442
|
+
def __getitem__(self, index: int | slice) -> Event | EventStream:
|
|
443
|
+
result = self._events[index]
|
|
444
|
+
if isinstance(index, slice):
|
|
445
|
+
return EventStream(result) # type: ignore[arg-type]
|
|
446
|
+
return result # type: ignore[return-value]
|
|
447
|
+
|
|
448
|
+
def __repr__(self) -> str:
|
|
449
|
+
return f"EventStream({len(self._events)} events)"
|
|
450
|
+
|
|
451
|
+
def __eq__(self, other: object) -> bool:
|
|
452
|
+
if not isinstance(other, EventStream):
|
|
453
|
+
return NotImplemented
|
|
454
|
+
return self._events == other._events
|
|
455
|
+
|
|
456
|
+
__hash__: None = None # EventStream is unhashable (mutable container)
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
# ---------------------------------------------------------------------------
|
|
460
|
+
# Module-level streaming generators (avoid full in-memory accumulation)
|
|
461
|
+
# ---------------------------------------------------------------------------
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def iter_file(
|
|
465
|
+
path: str | Path,
|
|
466
|
+
*,
|
|
467
|
+
encoding: str = "utf-8",
|
|
468
|
+
skip_errors: bool = False,
|
|
469
|
+
) -> Iterator[Event]:
|
|
470
|
+
"""Yield :class:`~spanforge.event.Event` objects from a NDJSON file one at a time.
|
|
471
|
+
|
|
472
|
+
Unlike :meth:`EventStream.from_file`, this function is a **generator**;
|
|
473
|
+
each event is parsed and yielded individually so that very large log files
|
|
474
|
+
can be processed with constant memory overhead.
|
|
475
|
+
|
|
476
|
+
Args:
|
|
477
|
+
path: Path to the NDJSON file.
|
|
478
|
+
encoding: File encoding (default ``"utf-8"``).
|
|
479
|
+
skip_errors: When ``True``, lines that fail to parse are silently
|
|
480
|
+
skipped instead of raising.
|
|
481
|
+
|
|
482
|
+
Yields:
|
|
483
|
+
Parsed :class:`~spanforge.event.Event` instances.
|
|
484
|
+
|
|
485
|
+
Raises:
|
|
486
|
+
DeserializationError: On the first malformed line when
|
|
487
|
+
``skip_errors=False`` (default).
|
|
488
|
+
|
|
489
|
+
Example::
|
|
490
|
+
|
|
491
|
+
for event in iter_file("events.ndjson"):
|
|
492
|
+
process(event)
|
|
493
|
+
"""
|
|
494
|
+
from spanforge.exceptions import DeserializationError, LLMSchemaError # noqa: PLC0415
|
|
495
|
+
|
|
496
|
+
with Path(path).open(encoding=encoding) as fh:
|
|
497
|
+
for lineno, raw in enumerate(fh, start=1):
|
|
498
|
+
line = raw.strip()
|
|
499
|
+
if not line:
|
|
500
|
+
continue
|
|
501
|
+
try:
|
|
502
|
+
yield Event.from_json(line)
|
|
503
|
+
except (LLMSchemaError, ValueError) as exc:
|
|
504
|
+
if skip_errors:
|
|
505
|
+
continue
|
|
506
|
+
raise DeserializationError(
|
|
507
|
+
reason=f"Line {lineno}: {exc}",
|
|
508
|
+
source_hint=str(path),
|
|
509
|
+
) from exc
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
async def aiter_file(
|
|
513
|
+
path: str | Path,
|
|
514
|
+
*,
|
|
515
|
+
encoding: str = "utf-8",
|
|
516
|
+
skip_errors: bool = False,
|
|
517
|
+
) -> AsyncIterator[Event]:
|
|
518
|
+
"""Async generator equivalent of :func:`iter_file`.
|
|
519
|
+
|
|
520
|
+
Reads a newline-delimited JSON file line-by-line using
|
|
521
|
+
:func:`asyncio.to_thread` to avoid blocking the event loop on I/O,
|
|
522
|
+
yielding one :class:`~spanforge.event.Event` at a time.
|
|
523
|
+
|
|
524
|
+
Args:
|
|
525
|
+
path: Path to the NDJSON file.
|
|
526
|
+
encoding: File encoding (default ``"utf-8"``).
|
|
527
|
+
skip_errors: When ``True``, lines that fail to parse are silently
|
|
528
|
+
skipped instead of raising.
|
|
529
|
+
|
|
530
|
+
Yields:
|
|
531
|
+
Parsed :class:`~spanforge.event.Event` instances.
|
|
532
|
+
|
|
533
|
+
Raises:
|
|
534
|
+
DeserializationError: On the first malformed line when
|
|
535
|
+
``skip_errors=False`` (default).
|
|
536
|
+
|
|
537
|
+
Example::
|
|
538
|
+
|
|
539
|
+
async for event in aiter_file("events.ndjson"):
|
|
540
|
+
await process(event)
|
|
541
|
+
"""
|
|
542
|
+
from spanforge.exceptions import DeserializationError, LLMSchemaError # noqa: PLC0415
|
|
543
|
+
|
|
544
|
+
lines: list[str] = await asyncio.to_thread(
|
|
545
|
+
lambda: Path(path).read_text(encoding=encoding).splitlines()
|
|
546
|
+
)
|
|
547
|
+
for lineno, raw in enumerate(lines, start=1):
|
|
548
|
+
line = raw.strip()
|
|
549
|
+
if not line:
|
|
550
|
+
continue
|
|
551
|
+
try:
|
|
552
|
+
yield Event.from_json(line)
|
|
553
|
+
except (LLMSchemaError, ValueError) as exc:
|
|
554
|
+
if skip_errors:
|
|
555
|
+
continue
|
|
556
|
+
raise DeserializationError(
|
|
557
|
+
reason=f"Line {lineno}: {exc}",
|
|
558
|
+
source_hint=str(path),
|
|
559
|
+
) from exc
|