tracepipe 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tracepipe/__init__.py +1 -1
- tracepipe/convenience.py +11 -6
- tracepipe/storage/lineage_store.py +1 -1
- {tracepipe-0.3.1.dist-info → tracepipe-0.3.2.dist-info}/METADATA +1 -1
- {tracepipe-0.3.1.dist-info → tracepipe-0.3.2.dist-info}/RECORD +7 -7
- {tracepipe-0.3.1.dist-info → tracepipe-0.3.2.dist-info}/WHEEL +0 -0
- {tracepipe-0.3.1.dist-info → tracepipe-0.3.2.dist-info}/licenses/LICENSE +0 -0
tracepipe/__init__.py
CHANGED
tracepipe/convenience.py
CHANGED
|
@@ -385,22 +385,27 @@ def check(
|
|
|
385
385
|
)
|
|
386
386
|
)
|
|
387
387
|
|
|
388
|
-
|
|
388
|
+
# Note on dup_rate semantics:
|
|
389
|
+
# - left_dup_rate = fraction of LEFT rows appearing >1 times in result
|
|
390
|
+
# This happens when RIGHT table has duplicate join keys
|
|
391
|
+
# - right_dup_rate = fraction of RIGHT rows appearing >1 times in result
|
|
392
|
+
# This happens when LEFT table has duplicate join keys
|
|
393
|
+
if stats.right_dup_rate > 0.01:
|
|
389
394
|
warnings_list.append(
|
|
390
395
|
CheckWarning(
|
|
391
396
|
category="duplicate_keys",
|
|
392
397
|
severity="fact",
|
|
393
|
-
message=f"Left table has {stats.
|
|
394
|
-
details={"step_id": step_id, "dup_rate": stats.
|
|
398
|
+
message=f"Left table has {stats.right_dup_rate:.1%} duplicate join keys",
|
|
399
|
+
details={"step_id": step_id, "dup_rate": stats.right_dup_rate},
|
|
395
400
|
)
|
|
396
401
|
)
|
|
397
|
-
if stats.
|
|
402
|
+
if stats.left_dup_rate > 0.01:
|
|
398
403
|
warnings_list.append(
|
|
399
404
|
CheckWarning(
|
|
400
405
|
category="duplicate_keys",
|
|
401
406
|
severity="fact",
|
|
402
|
-
message=f"Right table has {stats.
|
|
403
|
-
details={"step_id": step_id, "dup_rate": stats.
|
|
407
|
+
message=f"Right table has {stats.left_dup_rate:.1%} duplicate join keys",
|
|
408
|
+
details={"step_id": step_id, "dup_rate": stats.left_dup_rate},
|
|
404
409
|
)
|
|
405
410
|
)
|
|
406
411
|
|
|
@@ -710,7 +710,7 @@ class InMemoryLineageStore:
|
|
|
710
710
|
diffs = list(self._iter_all_diffs())
|
|
711
711
|
|
|
712
712
|
data = {
|
|
713
|
-
"tracepipe_version": "0.3.
|
|
713
|
+
"tracepipe_version": "0.3.2",
|
|
714
714
|
"export_timestamp": time.time(),
|
|
715
715
|
"total_diffs": len(diffs),
|
|
716
716
|
"total_steps": len(self._steps),
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tracepipe
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: Row-level data lineage tracking for pandas pipelines
|
|
5
5
|
Project-URL: Homepage, https://github.com/tracepipe/tracepipe
|
|
6
6
|
Project-URL: Documentation, https://tracepipe.github.io/tracepipe/
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
tracepipe/__init__.py,sha256=
|
|
1
|
+
tracepipe/__init__.py,sha256=MuwxV2mU4XxHqab62vQxaDAlhMvRCgUCmr_YU9R16ss,3342
|
|
2
2
|
tracepipe/api.py,sha256=WdcKvvzI3voDt6fxZWa8vjyZQU8lfRshx7T78oj7oFE,13351
|
|
3
3
|
tracepipe/context.py,sha256=_povLpqa5wd_ESHt5hbSmWTSMTF3nUfeutEQo4RMK2E,3856
|
|
4
4
|
tracepipe/contracts.py,sha256=m-rjPrgnCiAgKEkweOS7P95jrjDptt5UPdvUlqaV_rU,16226
|
|
5
|
-
tracepipe/convenience.py,sha256=
|
|
5
|
+
tracepipe/convenience.py,sha256=SZGcSOKPjAeJ9udPP_Fa_zTZY5GeDX61W6uftMwafjc,26563
|
|
6
6
|
tracepipe/core.py,sha256=kAXks694rR0Z4tD7Gyty0TyJGWx2whsSdteYYpHuazo,8010
|
|
7
7
|
tracepipe/debug.py,sha256=6t2GKVZLwn7SJLhrStE9qsmTiVIHATTE3jJPQ2DYtnc,10140
|
|
8
8
|
tracepipe/safety.py,sha256=jTBZv4QGDJfnZETsSZeMKbdOUtGXk-_XkmllhnGWM-M,5537
|
|
@@ -17,13 +17,13 @@ tracepipe/instrumentation/pandas_inst.py,sha256=2YSoju9ml2PjLOYzsx8MHH1iqhjgnXHb
|
|
|
17
17
|
tracepipe/instrumentation/series_capture.py,sha256=N1Cf-pQDh23qQLLd8DNsxbcaD-91sTJkRd5AnccKZGE,10649
|
|
18
18
|
tracepipe/storage/__init__.py,sha256=pGFMfbIgIi2kofVPwYDqe2HTYMYJoabiGjTq77pYi-g,348
|
|
19
19
|
tracepipe/storage/base.py,sha256=7DV_-rp37DjBMr9B1w85hLVYhC8OQShk2PcEhT-n4tE,4894
|
|
20
|
-
tracepipe/storage/lineage_store.py,sha256=
|
|
20
|
+
tracepipe/storage/lineage_store.py,sha256=swMMf59isoCQZHaezCmquA-0R5iGNH3eGWjc9d9LGmo,27392
|
|
21
21
|
tracepipe/storage/row_identity.py,sha256=HBU0gTTJlFtFTcAdUCKuX-c9cHa0lo3CDIodDPDgOzA,17161
|
|
22
22
|
tracepipe/utils/__init__.py,sha256=CI_GXViCjdMbu1j6HuzZhoQZEW0sIB6WAve6j5pfOC0,182
|
|
23
23
|
tracepipe/utils/value_capture.py,sha256=wGgegQmJnVHxHbwHSH9di7JAOBChzD3ERJrabZNiayk,4092
|
|
24
24
|
tracepipe/visualization/__init__.py,sha256=M3s44ZTUNEToyghjhQW0FgbmWHKPr4Xc-7iNF6DpI_E,132
|
|
25
25
|
tracepipe/visualization/html_export.py,sha256=G0hfZTJctUCfpun17zXX1NIXhvJZbca6hKmP3rcIjbg,42282
|
|
26
|
-
tracepipe-0.3.
|
|
27
|
-
tracepipe-0.3.
|
|
28
|
-
tracepipe-0.3.
|
|
29
|
-
tracepipe-0.3.
|
|
26
|
+
tracepipe-0.3.2.dist-info/METADATA,sha256=ik5FLmADKLqj25TprTnJPi21SW4EJ88mBTG-aQ4p-gc,9152
|
|
27
|
+
tracepipe-0.3.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
28
|
+
tracepipe-0.3.2.dist-info/licenses/LICENSE,sha256=HMOAFHBClL79POwWL-2_aDcx42DJAq7Ce-nwJPvMB9U,1075
|
|
29
|
+
tracepipe-0.3.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|