tracepipe 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tracepipe/__init__.py CHANGED
@@ -81,7 +81,7 @@ from .core import TracePipeConfig, TracePipeMode
81
81
  from .snapshot import DiffResult, Snapshot, diff, snapshot
82
82
 
83
83
  # === VERSION ===
84
- __version__ = "0.3.3"
84
+ __version__ = "0.3.4"
85
85
 
86
86
  # === MINIMAL __all__ ===
87
87
  __all__ = [
@@ -32,6 +32,22 @@ from ..core import (
32
32
  from ..utils.value_capture import capture_typed_value
33
33
 
34
34
 
35
+ def _stable_repr(val) -> str:
36
+ """Create a stable string representation for deduplication.
37
+
38
+ Handles NaN, None, and other values that don't compare equal to themselves.
39
+ """
40
+ if val is None:
41
+ return "None"
42
+ # Handle NaN (which doesn't equal itself)
43
+ try:
44
+ if isinstance(val, float) and val != val: # NaN check
45
+ return "NaN"
46
+ except (TypeError, ValueError):
47
+ pass
48
+ return repr(val)
49
+
50
+
35
51
  class InMemoryLineageStore:
36
52
  """
37
53
  Columnar storage for lineage data using Structure of Arrays (SoA).
@@ -556,12 +572,15 @@ class InMemoryLineageStore:
556
572
  Follows merge lineage recursively to build complete cell provenance.
557
573
  This is essential for tracking changes that happened before merge operations.
558
574
 
575
+ Deduplicates events by (col, old_val, new_val, operation) signature to prevent
576
+ cross-pipeline contamination when multiple DataFrames share row IDs.
577
+
559
578
  Args:
560
579
  row_id: Row ID to trace
561
580
  max_depth: Maximum merge depth to follow (prevents infinite loops)
562
581
 
563
582
  Returns:
564
- List of events in chronological order, including parent row events.
583
+ List of UNIQUE events in chronological order, including parent row events.
565
584
  """
566
585
  visited: set[int] = set()
567
586
 
@@ -589,7 +608,23 @@ class InMemoryLineageStore:
589
608
  # Sort by step_id to ensure chronological order across lineage
590
609
  all_events.sort(key=lambda e: e["step_id"])
591
610
 
592
- return all_events
611
+ # Deduplicate by (col, old_val, new_val, operation) signature
612
+ # This prevents cross-pipeline contamination when multiple DataFrames
613
+ # share the same row IDs (e.g., df.copy() followed by parallel transforms)
614
+ seen_signatures: set[tuple] = set()
615
+ unique_events = []
616
+ for event in all_events:
617
+ sig = (
618
+ event.get("col"),
619
+ _stable_repr(event.get("old_val")),
620
+ _stable_repr(event.get("new_val")),
621
+ event.get("operation"),
622
+ )
623
+ if sig not in seen_signatures:
624
+ seen_signatures.add(sig)
625
+ unique_events.append(event)
626
+
627
+ return unique_events
593
628
 
594
629
  def get_cell_history_with_lineage(
595
630
  self, row_id: int, column: str, max_depth: int = 10
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tracepipe
3
- Version: 0.3.3
3
+ Version: 0.3.4
4
4
  Summary: Row-level data lineage tracking for pandas pipelines
5
5
  Project-URL: Homepage, https://github.com/tracepipe/tracepipe
6
6
  Project-URL: Documentation, https://tracepipe.github.io/tracepipe/
@@ -1,4 +1,4 @@
1
- tracepipe/__init__.py,sha256=VhfrIqYHgzY7mDQwcsmH_Gw8_j7U_libclBKXTFMwE0,3342
1
+ tracepipe/__init__.py,sha256=1vKVGGc_fePrf1FNjP1R7-RPjtVnAc3Ori9QQl-E_4U,3342
2
2
  tracepipe/api.py,sha256=WdcKvvzI3voDt6fxZWa8vjyZQU8lfRshx7T78oj7oFE,13351
3
3
  tracepipe/context.py,sha256=_povLpqa5wd_ESHt5hbSmWTSMTF3nUfeutEQo4RMK2E,3856
4
4
  tracepipe/contracts.py,sha256=m-rjPrgnCiAgKEkweOS7P95jrjDptt5UPdvUlqaV_rU,16226
@@ -17,13 +17,13 @@ tracepipe/instrumentation/pandas_inst.py,sha256=2YSoju9ml2PjLOYzsx8MHH1iqhjgnXHb
17
17
  tracepipe/instrumentation/series_capture.py,sha256=i7FiA2ndEzS6duIj5y-a7SDfIMl2cCY_jGC1tmG7TGU,11271
18
18
  tracepipe/storage/__init__.py,sha256=pGFMfbIgIi2kofVPwYDqe2HTYMYJoabiGjTq77pYi-g,348
19
19
  tracepipe/storage/base.py,sha256=7DV_-rp37DjBMr9B1w85hLVYhC8OQShk2PcEhT-n4tE,4894
20
- tracepipe/storage/lineage_store.py,sha256=swMMf59isoCQZHaezCmquA-0R5iGNH3eGWjc9d9LGmo,27392
20
+ tracepipe/storage/lineage_store.py,sha256=KhGri2uC_O_43fUivFGEHY6KBDHd1I0O_PPd_KD3L4M,28683
21
21
  tracepipe/storage/row_identity.py,sha256=HBU0gTTJlFtFTcAdUCKuX-c9cHa0lo3CDIodDPDgOzA,17161
22
22
  tracepipe/utils/__init__.py,sha256=CI_GXViCjdMbu1j6HuzZhoQZEW0sIB6WAve6j5pfOC0,182
23
23
  tracepipe/utils/value_capture.py,sha256=wGgegQmJnVHxHbwHSH9di7JAOBChzD3ERJrabZNiayk,4092
24
24
  tracepipe/visualization/__init__.py,sha256=M3s44ZTUNEToyghjhQW0FgbmWHKPr4Xc-7iNF6DpI_E,132
25
25
  tracepipe/visualization/html_export.py,sha256=G0hfZTJctUCfpun17zXX1NIXhvJZbca6hKmP3rcIjbg,42282
26
- tracepipe-0.3.3.dist-info/METADATA,sha256=OnHkiCjZ0-fUERI4PoUh3VmIWMZ_sePk8-pFBwo_8cI,9152
27
- tracepipe-0.3.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
28
- tracepipe-0.3.3.dist-info/licenses/LICENSE,sha256=HMOAFHBClL79POwWL-2_aDcx42DJAq7Ce-nwJPvMB9U,1075
29
- tracepipe-0.3.3.dist-info/RECORD,,
26
+ tracepipe-0.3.4.dist-info/METADATA,sha256=DooQHiRi1HBiFK-QZPpE3PfLg43xE5Yg93kXWEdxhNY,9152
27
+ tracepipe-0.3.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
28
+ tracepipe-0.3.4.dist-info/licenses/LICENSE,sha256=HMOAFHBClL79POwWL-2_aDcx42DJAq7Ce-nwJPvMB9U,1075
29
+ tracepipe-0.3.4.dist-info/RECORD,,