tracepipe 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracepipe/__init__.py +117 -78
- tracepipe/api.py +219 -332
- tracepipe/context.py +21 -1
- tracepipe/contracts.py +473 -0
- tracepipe/convenience.py +817 -0
- tracepipe/core.py +174 -17
- tracepipe/debug.py +325 -0
- tracepipe/instrumentation/apply_capture.py +453 -0
- tracepipe/instrumentation/filter_capture.py +468 -0
- tracepipe/instrumentation/indexer_capture.py +813 -0
- tracepipe/instrumentation/merge_capture.py +434 -0
- tracepipe/instrumentation/pandas_inst.py +66 -183
- tracepipe/instrumentation/series_capture.py +331 -0
- tracepipe/safety.py +3 -3
- tracepipe/snapshot.py +420 -0
- tracepipe/storage/base.py +7 -3
- tracepipe/storage/lineage_store.py +252 -47
- tracepipe/storage/row_identity.py +366 -104
- tracepipe/value_provenance.py +309 -0
- tracepipe/visualization/html_export.py +22 -7
- tracepipe-0.3.1.dist-info/METADATA +308 -0
- tracepipe-0.3.1.dist-info/RECORD +29 -0
- tracepipe-0.2.0.dist-info/METADATA +0 -508
- tracepipe-0.2.0.dist-info/RECORD +0 -19
- {tracepipe-0.2.0.dist-info → tracepipe-0.3.1.dist-info}/WHEEL +0 -0
- {tracepipe-0.2.0.dist-info → tracepipe-0.3.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -3,6 +3,11 @@
|
|
|
3
3
|
In-memory lineage storage using Structure of Arrays (SoA) pattern.
|
|
4
4
|
|
|
5
5
|
Memory: ~40 bytes/diff vs ~150 bytes with dataclass
|
|
6
|
+
|
|
7
|
+
Features:
|
|
8
|
+
- Merge mapping storage with O(log n) lookup via binary search
|
|
9
|
+
- Sorted bulk drops for efficient drop event lookup
|
|
10
|
+
- Stable API for api/convenience/visualization layers
|
|
6
11
|
"""
|
|
7
12
|
|
|
8
13
|
import atexit
|
|
@@ -11,13 +16,17 @@ import time
|
|
|
11
16
|
from pathlib import Path
|
|
12
17
|
from typing import Any, Optional
|
|
13
18
|
|
|
19
|
+
import numpy as np
|
|
20
|
+
|
|
14
21
|
from ..core import (
|
|
15
22
|
AggregationMapping,
|
|
16
23
|
ChangeType,
|
|
17
24
|
CompletenessLevel,
|
|
18
25
|
LineageGap,
|
|
19
26
|
LineageGaps,
|
|
20
|
-
|
|
27
|
+
MergeMapping,
|
|
28
|
+
MergeStats,
|
|
29
|
+
StepEvent,
|
|
21
30
|
TracePipeConfig,
|
|
22
31
|
)
|
|
23
32
|
from ..utils.value_capture import capture_typed_value
|
|
@@ -29,9 +38,26 @@ class InMemoryLineageStore:
|
|
|
29
38
|
|
|
30
39
|
Implements: LineageBackend protocol
|
|
31
40
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
41
|
+
STABLE INTERNAL API (used by api.py, convenience.py, visualization):
|
|
42
|
+
|
|
43
|
+
=== ATTRIBUTES (read-only from outside) ===
|
|
44
|
+
steps: list[StepEvent] # All recorded steps
|
|
45
|
+
bulk_drops: dict[int, np.ndarray] # step_id -> sorted dropped RIDs
|
|
46
|
+
merge_mappings: list[MergeMapping] # Merge parent mappings (debug mode)
|
|
47
|
+
merge_stats: list[tuple[int, MergeStats]] # (step_id, stats) pairs
|
|
48
|
+
|
|
49
|
+
=== WRITE METHODS (called by instrumentation) ===
|
|
50
|
+
append_step(...) -> int # Returns step_id
|
|
51
|
+
append_bulk_drops(step_id, rids) # rids will be sorted internally
|
|
52
|
+
append_diff(step_id, row_id, col, ...) # Cell-level diff
|
|
53
|
+
|
|
54
|
+
=== READ METHODS (called by api/convenience) ===
|
|
55
|
+
get_drop_event(row_id) -> Optional[dict] # {step_id, operation}
|
|
56
|
+
get_dropped_rows() -> list[int] # All dropped RIDs
|
|
57
|
+
get_dropped_by_step() -> dict[str, int] # operation -> count
|
|
58
|
+
get_row_history(row_id) -> list[dict] # Chronological events
|
|
59
|
+
get_merge_stats(step_id=None) -> list[tuple[int, MergeStats]]
|
|
60
|
+
get_merge_origin(row_id) -> Optional[dict] # {left_parent, right_parent, step_id}
|
|
35
61
|
"""
|
|
36
62
|
|
|
37
63
|
def __init__(self, config: TracePipeConfig):
|
|
@@ -49,7 +75,14 @@ class InMemoryLineageStore:
|
|
|
49
75
|
self.diff_change_types: list[int] = []
|
|
50
76
|
|
|
51
77
|
# === STEP METADATA ===
|
|
52
|
-
self._steps: list[
|
|
78
|
+
self._steps: list[StepEvent] = []
|
|
79
|
+
|
|
80
|
+
# === BULK DROPS (step_id -> SORTED numpy array) ===
|
|
81
|
+
self.bulk_drops: dict[int, np.ndarray] = {}
|
|
82
|
+
|
|
83
|
+
# === MERGE TRACKING ===
|
|
84
|
+
self.merge_mappings: list[MergeMapping] = []
|
|
85
|
+
self.merge_stats: list[tuple[int, MergeStats]] = []
|
|
53
86
|
|
|
54
87
|
# === AGGREGATION MAPPINGS ===
|
|
55
88
|
self.aggregation_mappings: list[AggregationMapping] = []
|
|
@@ -66,11 +99,12 @@ class InMemoryLineageStore:
|
|
|
66
99
|
self._col_intern: dict[str, str] = {}
|
|
67
100
|
self._type_intern: dict[str, str] = {}
|
|
68
101
|
|
|
69
|
-
#
|
|
70
|
-
|
|
102
|
+
# === ATEXIT HANDLER ===
|
|
103
|
+
self._atexit_registered: bool = False
|
|
104
|
+
self._register_atexit()
|
|
71
105
|
|
|
72
106
|
@property
|
|
73
|
-
def steps(self) -> list[
|
|
107
|
+
def steps(self) -> list[StepEvent]:
|
|
74
108
|
"""Access step metadata list."""
|
|
75
109
|
return self._steps
|
|
76
110
|
|
|
@@ -150,8 +184,7 @@ class InMemoryLineageStore:
|
|
|
150
184
|
"""
|
|
151
185
|
Bulk append dropped rows - optimized for filter operations.
|
|
152
186
|
|
|
153
|
-
|
|
154
|
-
Typically 10-50x faster than calling append_diff() in a loop.
|
|
187
|
+
Stores dropped RIDs SORTED for O(log n) lookup via searchsorted.
|
|
155
188
|
|
|
156
189
|
Args:
|
|
157
190
|
step_id: Step ID for all drops
|
|
@@ -160,24 +193,28 @@ class InMemoryLineageStore:
|
|
|
160
193
|
Returns:
|
|
161
194
|
Number of drops recorded
|
|
162
195
|
"""
|
|
163
|
-
import numpy as np
|
|
164
|
-
|
|
165
196
|
n = len(dropped_row_ids)
|
|
166
197
|
if n == 0:
|
|
167
198
|
return 0
|
|
168
199
|
|
|
169
|
-
# Convert to
|
|
200
|
+
# Convert to sorted numpy array
|
|
170
201
|
if isinstance(dropped_row_ids, np.ndarray):
|
|
171
|
-
|
|
202
|
+
sorted_rids = np.sort(dropped_row_ids.astype(np.int64))
|
|
172
203
|
else:
|
|
173
|
-
|
|
204
|
+
sorted_rids = np.sort(np.array(list(dropped_row_ids), dtype=np.int64))
|
|
205
|
+
|
|
206
|
+
# Store sorted for O(log n) lookup
|
|
207
|
+
self.bulk_drops[step_id] = sorted_rids
|
|
208
|
+
|
|
209
|
+
# Also record in diff arrays for backwards compatibility
|
|
210
|
+
row_ids_list = sorted_rids.tolist()
|
|
174
211
|
|
|
175
212
|
# Pre-intern the constant strings once
|
|
176
213
|
col_interned = self._intern_string("__row__", self._col_intern)
|
|
177
214
|
old_type_interned = self._intern_string("str", self._type_intern)
|
|
178
215
|
new_type_interned = self._intern_string("null", self._type_intern)
|
|
179
216
|
|
|
180
|
-
# Bulk extend all arrays at once
|
|
217
|
+
# Bulk extend all arrays at once
|
|
181
218
|
self.diff_step_ids.extend([step_id] * n)
|
|
182
219
|
self.diff_row_ids.extend(row_ids_list)
|
|
183
220
|
self.diff_cols.extend([col_interned] * n)
|
|
@@ -212,7 +249,7 @@ class InMemoryLineageStore:
|
|
|
212
249
|
"""Append step metadata and return step_id."""
|
|
213
250
|
step_id = self.next_step_id()
|
|
214
251
|
self._steps.append(
|
|
215
|
-
|
|
252
|
+
StepEvent(
|
|
216
253
|
step_id=step_id,
|
|
217
254
|
operation=operation,
|
|
218
255
|
stage=stage,
|
|
@@ -250,6 +287,64 @@ class InMemoryLineageStore:
|
|
|
250
287
|
"""Return False for mass updates exceeding threshold."""
|
|
251
288
|
return affected_count <= self.config.max_diffs_per_step
|
|
252
289
|
|
|
290
|
+
# === DROP LOOKUP (O(log n) via searchsorted) ===
|
|
291
|
+
|
|
292
|
+
def get_drop_event(self, row_id: int) -> Optional[dict]:
|
|
293
|
+
"""
|
|
294
|
+
Get drop event for a row from bulk_drops.
|
|
295
|
+
|
|
296
|
+
O(log n) per step via searchsorted.
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
{step_id, operation} if dropped, else None.
|
|
300
|
+
"""
|
|
301
|
+
for step_id, dropped_rids in self.bulk_drops.items():
|
|
302
|
+
# Binary search in sorted array
|
|
303
|
+
i = np.searchsorted(dropped_rids, row_id)
|
|
304
|
+
if i < len(dropped_rids) and dropped_rids[i] == row_id:
|
|
305
|
+
step = self._steps[step_id - 1] if step_id <= len(self._steps) else None
|
|
306
|
+
return {
|
|
307
|
+
"step_id": step_id,
|
|
308
|
+
"operation": step.operation if step else "unknown",
|
|
309
|
+
}
|
|
310
|
+
return None
|
|
311
|
+
|
|
312
|
+
def is_dropped(self, row_id: int) -> bool:
|
|
313
|
+
"""Fast check if row was dropped anywhere."""
|
|
314
|
+
return self.get_drop_event(row_id) is not None
|
|
315
|
+
|
|
316
|
+
# === MERGE LOOKUP (O(log n) via searchsorted) ===
|
|
317
|
+
|
|
318
|
+
def get_merge_origin(self, row_id: int) -> Optional[dict]:
|
|
319
|
+
"""
|
|
320
|
+
Get merge parent RIDs for a row.
|
|
321
|
+
|
|
322
|
+
Uses binary search (O(log n)) instead of linear scan (O(n)).
|
|
323
|
+
"""
|
|
324
|
+
for mapping in self.merge_mappings:
|
|
325
|
+
# Binary search on sorted out_rids
|
|
326
|
+
i = np.searchsorted(mapping.out_rids, row_id)
|
|
327
|
+
if i < len(mapping.out_rids) and mapping.out_rids[i] == row_id:
|
|
328
|
+
left_parent = mapping.left_parent_rids[i]
|
|
329
|
+
right_parent = mapping.right_parent_rids[i]
|
|
330
|
+
return {
|
|
331
|
+
"step_id": mapping.step_id,
|
|
332
|
+
"left_parent": int(left_parent) if left_parent >= 0 else None,
|
|
333
|
+
"right_parent": int(right_parent) if right_parent >= 0 else None,
|
|
334
|
+
}
|
|
335
|
+
return None
|
|
336
|
+
|
|
337
|
+
def get_merge_stats(self, step_id: Optional[int] = None) -> list[tuple[int, MergeStats]]:
|
|
338
|
+
"""
|
|
339
|
+
Get merge statistics.
|
|
340
|
+
|
|
341
|
+
Returns:
|
|
342
|
+
list of (step_id, MergeStats) tuples - ALWAYS this shape for consistency.
|
|
343
|
+
"""
|
|
344
|
+
if step_id is not None:
|
|
345
|
+
return [(sid, s) for sid, s in self.merge_stats if sid == step_id]
|
|
346
|
+
return list(self.merge_stats)
|
|
347
|
+
|
|
253
348
|
# === MEMORY MANAGEMENT ===
|
|
254
349
|
|
|
255
350
|
def _check_memory_and_spill(self) -> None:
|
|
@@ -307,8 +402,26 @@ class InMemoryLineageStore:
|
|
|
307
402
|
self.diff_change_types.clear()
|
|
308
403
|
self._diff_count = 0
|
|
309
404
|
|
|
405
|
+
def _register_atexit(self) -> None:
|
|
406
|
+
"""Register cleanup handler if not already registered."""
|
|
407
|
+
if not self._atexit_registered:
|
|
408
|
+
atexit.register(self._cleanup_spillover)
|
|
409
|
+
self._atexit_registered = True
|
|
410
|
+
|
|
411
|
+
def _unregister_atexit(self) -> None:
|
|
412
|
+
"""Unregister cleanup handler."""
|
|
413
|
+
if self._atexit_registered:
|
|
414
|
+
try:
|
|
415
|
+
atexit.unregister(self._cleanup_spillover)
|
|
416
|
+
except Exception:
|
|
417
|
+
pass
|
|
418
|
+
self._atexit_registered = False
|
|
419
|
+
|
|
310
420
|
def _cleanup_spillover(self) -> None:
|
|
311
421
|
"""Clean up spillover files on exit."""
|
|
422
|
+
# Unregister to prevent multiple calls
|
|
423
|
+
self._unregister_atexit()
|
|
424
|
+
|
|
312
425
|
if not self.config.cleanup_spillover_on_disable:
|
|
313
426
|
return
|
|
314
427
|
|
|
@@ -367,12 +480,48 @@ class InMemoryLineageStore:
|
|
|
367
480
|
}
|
|
368
481
|
|
|
369
482
|
def get_row_history(self, row_id: int) -> list[dict]:
|
|
370
|
-
"""
|
|
483
|
+
"""
|
|
484
|
+
Get all events for a specific row in CHRONOLOGICAL order (oldest first).
|
|
485
|
+
|
|
486
|
+
CONTRACT: Returned list has monotonically increasing step_id.
|
|
487
|
+
Convenience layer may reverse for display.
|
|
488
|
+
|
|
489
|
+
Note: This returns only direct events for this row_id.
|
|
490
|
+
Use get_row_history_with_lineage() to include pre-merge parent history.
|
|
491
|
+
"""
|
|
371
492
|
step_map = {s.step_id: s for s in self._steps}
|
|
372
493
|
events = []
|
|
373
494
|
|
|
495
|
+
# Collect from bulk_drops (sorted by step_id)
|
|
496
|
+
for step_id in sorted(self.bulk_drops.keys()):
|
|
497
|
+
dropped_rids = self.bulk_drops[step_id]
|
|
498
|
+
i = np.searchsorted(dropped_rids, row_id)
|
|
499
|
+
if i < len(dropped_rids) and dropped_rids[i] == row_id:
|
|
500
|
+
step = step_map.get(step_id)
|
|
501
|
+
events.append(
|
|
502
|
+
{
|
|
503
|
+
"step_id": step_id,
|
|
504
|
+
"operation": step.operation if step else "unknown",
|
|
505
|
+
"stage": step.stage if step else None,
|
|
506
|
+
"col": "__row__",
|
|
507
|
+
"old_val": "present",
|
|
508
|
+
"old_type": "str",
|
|
509
|
+
"new_val": None,
|
|
510
|
+
"new_type": "null",
|
|
511
|
+
"change_type": "DROPPED",
|
|
512
|
+
"timestamp": step.timestamp if step else None,
|
|
513
|
+
"completeness": step.completeness.name if step else "UNKNOWN",
|
|
514
|
+
"code_location": (
|
|
515
|
+
f"{step.code_file}:{step.code_line}"
|
|
516
|
+
if step and step.code_file
|
|
517
|
+
else None
|
|
518
|
+
),
|
|
519
|
+
}
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
# Collect from diffs
|
|
374
523
|
for diff in self._iter_all_diffs():
|
|
375
|
-
if diff["row_id"] == row_id:
|
|
524
|
+
if diff["row_id"] == row_id and diff["col"] != "__row__":
|
|
376
525
|
step = step_map.get(diff["step_id"])
|
|
377
526
|
events.append(
|
|
378
527
|
{
|
|
@@ -395,16 +544,81 @@ class InMemoryLineageStore:
|
|
|
395
544
|
}
|
|
396
545
|
)
|
|
397
546
|
|
|
398
|
-
|
|
547
|
+
# ENFORCE: sort by step_id (chronological)
|
|
548
|
+
events.sort(key=lambda e: e["step_id"])
|
|
549
|
+
|
|
550
|
+
return events
|
|
551
|
+
|
|
552
|
+
def get_row_history_with_lineage(self, row_id: int, max_depth: int = 10) -> list[dict]:
|
|
553
|
+
"""
|
|
554
|
+
Get row history including pre-merge parent history.
|
|
555
|
+
|
|
556
|
+
Follows merge lineage recursively to build complete cell provenance.
|
|
557
|
+
This is essential for tracking changes that happened before merge operations.
|
|
558
|
+
|
|
559
|
+
Args:
|
|
560
|
+
row_id: Row ID to trace
|
|
561
|
+
max_depth: Maximum merge depth to follow (prevents infinite loops)
|
|
562
|
+
|
|
563
|
+
Returns:
|
|
564
|
+
List of events in chronological order, including parent row events.
|
|
565
|
+
"""
|
|
566
|
+
visited: set[int] = set()
|
|
567
|
+
|
|
568
|
+
def _collect_history(rid: int, depth: int) -> list[dict]:
|
|
569
|
+
if depth > max_depth or rid in visited:
|
|
570
|
+
return []
|
|
571
|
+
visited.add(rid)
|
|
572
|
+
|
|
573
|
+
events = []
|
|
574
|
+
|
|
575
|
+
# Check if this row came from a merge
|
|
576
|
+
origin = self.get_merge_origin(rid)
|
|
577
|
+
if origin and origin["left_parent"] is not None:
|
|
578
|
+
# Recursively get parent's history first (chronological order)
|
|
579
|
+
parent_events = _collect_history(origin["left_parent"], depth + 1)
|
|
580
|
+
events.extend(parent_events)
|
|
581
|
+
|
|
582
|
+
# Add this row's direct events
|
|
583
|
+
events.extend(self.get_row_history(rid))
|
|
584
|
+
|
|
585
|
+
return events
|
|
586
|
+
|
|
587
|
+
all_events = _collect_history(row_id, 0)
|
|
588
|
+
|
|
589
|
+
# Sort by step_id to ensure chronological order across lineage
|
|
590
|
+
all_events.sort(key=lambda e: e["step_id"])
|
|
591
|
+
|
|
592
|
+
return all_events
|
|
593
|
+
|
|
594
|
+
def get_cell_history_with_lineage(
|
|
595
|
+
self, row_id: int, column: str, max_depth: int = 10
|
|
596
|
+
) -> list[dict]:
|
|
597
|
+
"""
|
|
598
|
+
Get cell history for a specific column, including pre-merge parent history.
|
|
599
|
+
|
|
600
|
+
Args:
|
|
601
|
+
row_id: Row ID to trace
|
|
602
|
+
column: Column name to filter events for
|
|
603
|
+
max_depth: Maximum merge depth to follow
|
|
604
|
+
|
|
605
|
+
Returns:
|
|
606
|
+
List of events for this column in chronological order.
|
|
607
|
+
"""
|
|
608
|
+
all_events = self.get_row_history_with_lineage(row_id, max_depth)
|
|
609
|
+
return [e for e in all_events if e["col"] == column]
|
|
399
610
|
|
|
400
611
|
def get_dropped_rows(self, step_id: Optional[int] = None) -> list[int]:
|
|
401
612
|
"""Get all dropped row IDs, optionally filtered by step."""
|
|
402
|
-
|
|
613
|
+
if step_id is not None:
|
|
614
|
+
if step_id in self.bulk_drops:
|
|
615
|
+
return self.bulk_drops[step_id].tolist()
|
|
616
|
+
return []
|
|
403
617
|
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
618
|
+
# Collect all dropped rows
|
|
619
|
+
dropped = set()
|
|
620
|
+
for rids in self.bulk_drops.values():
|
|
621
|
+
dropped.update(rids.tolist())
|
|
408
622
|
|
|
409
623
|
return sorted(dropped)
|
|
410
624
|
|
|
@@ -413,10 +627,9 @@ class InMemoryLineageStore:
|
|
|
413
627
|
step_map = {s.step_id: s.operation for s in self._steps}
|
|
414
628
|
counts: dict[str, int] = {}
|
|
415
629
|
|
|
416
|
-
for
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
counts[op] = counts.get(op, 0) + 1
|
|
630
|
+
for step_id, rids in self.bulk_drops.items():
|
|
631
|
+
op = step_map.get(step_id, "unknown")
|
|
632
|
+
counts[op] = counts.get(op, 0) + len(rids)
|
|
420
633
|
|
|
421
634
|
return dict(sorted(counts.items(), key=lambda x: -x[1]))
|
|
422
635
|
|
|
@@ -458,6 +671,13 @@ class InMemoryLineageStore:
|
|
|
458
671
|
gaps = []
|
|
459
672
|
row_step_ids = set()
|
|
460
673
|
|
|
674
|
+
# From bulk_drops
|
|
675
|
+
for step_id, rids in self.bulk_drops.items():
|
|
676
|
+
i = np.searchsorted(rids, row_id)
|
|
677
|
+
if i < len(rids) and rids[i] == row_id:
|
|
678
|
+
row_step_ids.add(step_id)
|
|
679
|
+
|
|
680
|
+
# From diffs
|
|
461
681
|
for diff in self._iter_all_diffs():
|
|
462
682
|
if diff["row_id"] == row_id:
|
|
463
683
|
row_step_ids.add(diff["step_id"])
|
|
@@ -490,28 +710,12 @@ class InMemoryLineageStore:
|
|
|
490
710
|
diffs = list(self._iter_all_diffs())
|
|
491
711
|
|
|
492
712
|
data = {
|
|
493
|
-
"tracepipe_version": "0.
|
|
713
|
+
"tracepipe_version": "0.3.1",
|
|
494
714
|
"export_timestamp": time.time(),
|
|
495
715
|
"total_diffs": len(diffs),
|
|
496
716
|
"total_steps": len(self._steps),
|
|
497
717
|
"diffs": diffs,
|
|
498
|
-
"steps": [
|
|
499
|
-
{
|
|
500
|
-
"step_id": s.step_id,
|
|
501
|
-
"operation": s.operation,
|
|
502
|
-
"stage": s.stage,
|
|
503
|
-
"timestamp": s.timestamp,
|
|
504
|
-
"code_file": s.code_file,
|
|
505
|
-
"code_line": s.code_line,
|
|
506
|
-
"params": s.params,
|
|
507
|
-
"input_shape": s.input_shape,
|
|
508
|
-
"output_shape": s.output_shape,
|
|
509
|
-
"is_mass_update": s.is_mass_update,
|
|
510
|
-
"rows_affected": s.rows_affected,
|
|
511
|
-
"completeness": s.completeness.name,
|
|
512
|
-
}
|
|
513
|
-
for s in self._steps
|
|
514
|
-
],
|
|
718
|
+
"steps": [s.to_dict() for s in self._steps],
|
|
515
719
|
"aggregation_mappings": [
|
|
516
720
|
{
|
|
517
721
|
"step_id": a.step_id,
|
|
@@ -521,6 +725,7 @@ class InMemoryLineageStore:
|
|
|
521
725
|
}
|
|
522
726
|
for a in self.aggregation_mappings
|
|
523
727
|
],
|
|
728
|
+
"merge_stats": [{"step_id": sid, **vars(stats)} for sid, stats in self.merge_stats],
|
|
524
729
|
}
|
|
525
730
|
|
|
526
731
|
return json.dumps(data)
|