tracepipe 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracepipe/__init__.py +1 -1
- tracepipe/storage/lineage_store.py +37 -2
- {tracepipe-0.3.3.dist-info → tracepipe-0.3.4.dist-info}/METADATA +1 -1
- {tracepipe-0.3.3.dist-info → tracepipe-0.3.4.dist-info}/RECORD +6 -6
- {tracepipe-0.3.3.dist-info → tracepipe-0.3.4.dist-info}/WHEEL +0 -0
- {tracepipe-0.3.3.dist-info → tracepipe-0.3.4.dist-info}/licenses/LICENSE +0 -0
tracepipe/__init__.py
CHANGED
|
@@ -32,6 +32,22 @@ from ..core import (
|
|
|
32
32
|
from ..utils.value_capture import capture_typed_value
|
|
33
33
|
|
|
34
34
|
|
|
35
|
+
def _stable_repr(val) -> str:
|
|
36
|
+
"""Create a stable string representation for deduplication.
|
|
37
|
+
|
|
38
|
+
Handles NaN, None, and other values that don't compare equal to themselves.
|
|
39
|
+
"""
|
|
40
|
+
if val is None:
|
|
41
|
+
return "None"
|
|
42
|
+
# Handle NaN (which doesn't equal itself)
|
|
43
|
+
try:
|
|
44
|
+
if isinstance(val, float) and val != val: # NaN check
|
|
45
|
+
return "NaN"
|
|
46
|
+
except (TypeError, ValueError):
|
|
47
|
+
pass
|
|
48
|
+
return repr(val)
|
|
49
|
+
|
|
50
|
+
|
|
35
51
|
class InMemoryLineageStore:
|
|
36
52
|
"""
|
|
37
53
|
Columnar storage for lineage data using Structure of Arrays (SoA).
|
|
@@ -556,12 +572,15 @@ class InMemoryLineageStore:
|
|
|
556
572
|
Follows merge lineage recursively to build complete cell provenance.
|
|
557
573
|
This is essential for tracking changes that happened before merge operations.
|
|
558
574
|
|
|
575
|
+
Deduplicates events by (col, old_val, new_val, operation) signature to prevent
|
|
576
|
+
cross-pipeline contamination when multiple DataFrames share row IDs.
|
|
577
|
+
|
|
559
578
|
Args:
|
|
560
579
|
row_id: Row ID to trace
|
|
561
580
|
max_depth: Maximum merge depth to follow (prevents infinite loops)
|
|
562
581
|
|
|
563
582
|
Returns:
|
|
564
|
-
List of events in chronological order, including parent row events.
|
|
583
|
+
List of UNIQUE events in chronological order, including parent row events.
|
|
565
584
|
"""
|
|
566
585
|
visited: set[int] = set()
|
|
567
586
|
|
|
@@ -589,7 +608,23 @@ class InMemoryLineageStore:
|
|
|
589
608
|
# Sort by step_id to ensure chronological order across lineage
|
|
590
609
|
all_events.sort(key=lambda e: e["step_id"])
|
|
591
610
|
|
|
592
|
-
|
|
611
|
+
# Deduplicate by (col, old_val, new_val, operation) signature
|
|
612
|
+
# This prevents cross-pipeline contamination when multiple DataFrames
|
|
613
|
+
# share the same row IDs (e.g., df.copy() followed by parallel transforms)
|
|
614
|
+
seen_signatures: set[tuple] = set()
|
|
615
|
+
unique_events = []
|
|
616
|
+
for event in all_events:
|
|
617
|
+
sig = (
|
|
618
|
+
event.get("col"),
|
|
619
|
+
_stable_repr(event.get("old_val")),
|
|
620
|
+
_stable_repr(event.get("new_val")),
|
|
621
|
+
event.get("operation"),
|
|
622
|
+
)
|
|
623
|
+
if sig not in seen_signatures:
|
|
624
|
+
seen_signatures.add(sig)
|
|
625
|
+
unique_events.append(event)
|
|
626
|
+
|
|
627
|
+
return unique_events
|
|
593
628
|
|
|
594
629
|
def get_cell_history_with_lineage(
|
|
595
630
|
self, row_id: int, column: str, max_depth: int = 10
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tracepipe
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.4
|
|
4
4
|
Summary: Row-level data lineage tracking for pandas pipelines
|
|
5
5
|
Project-URL: Homepage, https://github.com/tracepipe/tracepipe
|
|
6
6
|
Project-URL: Documentation, https://tracepipe.github.io/tracepipe/
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
tracepipe/__init__.py,sha256=
|
|
1
|
+
tracepipe/__init__.py,sha256=1vKVGGc_fePrf1FNjP1R7-RPjtVnAc3Ori9QQl-E_4U,3342
|
|
2
2
|
tracepipe/api.py,sha256=WdcKvvzI3voDt6fxZWa8vjyZQU8lfRshx7T78oj7oFE,13351
|
|
3
3
|
tracepipe/context.py,sha256=_povLpqa5wd_ESHt5hbSmWTSMTF3nUfeutEQo4RMK2E,3856
|
|
4
4
|
tracepipe/contracts.py,sha256=m-rjPrgnCiAgKEkweOS7P95jrjDptt5UPdvUlqaV_rU,16226
|
|
@@ -17,13 +17,13 @@ tracepipe/instrumentation/pandas_inst.py,sha256=2YSoju9ml2PjLOYzsx8MHH1iqhjgnXHb
|
|
|
17
17
|
tracepipe/instrumentation/series_capture.py,sha256=i7FiA2ndEzS6duIj5y-a7SDfIMl2cCY_jGC1tmG7TGU,11271
|
|
18
18
|
tracepipe/storage/__init__.py,sha256=pGFMfbIgIi2kofVPwYDqe2HTYMYJoabiGjTq77pYi-g,348
|
|
19
19
|
tracepipe/storage/base.py,sha256=7DV_-rp37DjBMr9B1w85hLVYhC8OQShk2PcEhT-n4tE,4894
|
|
20
|
-
tracepipe/storage/lineage_store.py,sha256=
|
|
20
|
+
tracepipe/storage/lineage_store.py,sha256=KhGri2uC_O_43fUivFGEHY6KBDHd1I0O_PPd_KD3L4M,28683
|
|
21
21
|
tracepipe/storage/row_identity.py,sha256=HBU0gTTJlFtFTcAdUCKuX-c9cHa0lo3CDIodDPDgOzA,17161
|
|
22
22
|
tracepipe/utils/__init__.py,sha256=CI_GXViCjdMbu1j6HuzZhoQZEW0sIB6WAve6j5pfOC0,182
|
|
23
23
|
tracepipe/utils/value_capture.py,sha256=wGgegQmJnVHxHbwHSH9di7JAOBChzD3ERJrabZNiayk,4092
|
|
24
24
|
tracepipe/visualization/__init__.py,sha256=M3s44ZTUNEToyghjhQW0FgbmWHKPr4Xc-7iNF6DpI_E,132
|
|
25
25
|
tracepipe/visualization/html_export.py,sha256=G0hfZTJctUCfpun17zXX1NIXhvJZbca6hKmP3rcIjbg,42282
|
|
26
|
-
tracepipe-0.3.
|
|
27
|
-
tracepipe-0.3.
|
|
28
|
-
tracepipe-0.3.
|
|
29
|
-
tracepipe-0.3.
|
|
26
|
+
tracepipe-0.3.4.dist-info/METADATA,sha256=DooQHiRi1HBiFK-QZPpE3PfLg43xE5Yg93kXWEdxhNY,9152
|
|
27
|
+
tracepipe-0.3.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
28
|
+
tracepipe-0.3.4.dist-info/licenses/LICENSE,sha256=HMOAFHBClL79POwWL-2_aDcx42DJAq7Ce-nwJPvMB9U,1075
|
|
29
|
+
tracepipe-0.3.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|