tracepipe 0.3.1__tar.gz → 0.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tracepipe-0.3.1 → tracepipe-0.3.3}/CHANGELOG.md +19 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/PKG-INFO +1 -1
- {tracepipe-0.3.1 → tracepipe-0.3.3}/pyproject.toml +1 -1
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tests/test_lineage_through_merge.py +129 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/__init__.py +1 -1
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/convenience.py +60 -8
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/instrumentation/series_capture.py +13 -4
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/storage/lineage_store.py +1 -1
- {tracepipe-0.3.1 → tracepipe-0.3.3}/uv.lock +1 -1
- {tracepipe-0.3.1 → tracepipe-0.3.3}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/.github/workflows/ci.yml +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/.github/workflows/docs.yml +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/.github/workflows/release.yml +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/.gitignore +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/.pre-commit-config.yaml +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/CONTRIBUTING.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/LICENSE +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/README.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/benchmarks/README.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/benchmarks/bench_memory.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/benchmarks/bench_overhead.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/benchmarks/bench_scale.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/benchmarks/run_all.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/docs/api/contracts.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/docs/api/core.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/docs/api/debug.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/docs/api/index.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/docs/changelog.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/docs/contributing.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/docs/examples/data-validation.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/docs/examples/ml-pipeline.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/docs/getting-started/installation.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/docs/getting-started/modes.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/docs/getting-started/quickstart.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/docs/guide/cell-provenance.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/docs/guide/concepts.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/docs/guide/contracts.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/docs/guide/health-checks.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/docs/guide/reports.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/docs/guide/row-tracing.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/docs/guide/snapshots.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/docs/index.md +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/examples/comprehensive_demo.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/examples/demo.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/examples/ml_pipeline_demo.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/examples/red_team_test.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/mkdocs.yml +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/404.html +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/api/contracts/index.html +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/api/core/index.html +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/api/debug/index.html +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/api/index.html +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/_mkdocstrings.css +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/images/favicon.png +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/bundle.79ae519e.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/bundle.79ae519e.min.js.map +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.ar.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.da.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.de.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.du.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.el.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.es.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.fi.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.fr.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.he.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.hi.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.hu.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.hy.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.it.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.ja.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.jp.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.kn.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.ko.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.multi.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.nl.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.no.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.pt.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.ro.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.ru.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.sa.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.stemmer.support.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.sv.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.ta.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.te.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.th.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.tr.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.vi.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.zh.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/tinyseg.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/wordcut.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/workers/search.2c215733.min.js +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/workers/search.2c215733.min.js.map +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/stylesheets/main.484c7ddc.min.css +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/stylesheets/main.484c7ddc.min.css.map +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/stylesheets/palette.ab4e12ef.min.css +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/stylesheets/palette.ab4e12ef.min.css.map +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/changelog/index.html +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/contributing/index.html +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/examples/data-validation/index.html +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/examples/ml-pipeline/index.html +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/getting-started/installation/index.html +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/getting-started/modes/index.html +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/getting-started/quickstart/index.html +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/guide/cell-provenance/index.html +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/guide/concepts/index.html +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/guide/contracts/index.html +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/guide/health-checks/index.html +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/guide/reports/index.html +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/guide/row-tracing/index.html +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/guide/snapshots/index.html +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/index.html +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/objects.inv +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/search/search_index.json +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/sitemap.xml +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/site/sitemap.xml.gz +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tests/__init__.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tests/conftest.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tests/test_api.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tests/test_concurrency.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tests/test_contracts.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tests/test_convenience_debug.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tests/test_edge_cases.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tests/test_integration.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tests/test_io_operations.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tests/test_pandas_inst.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tests/test_public_api.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tests/test_snapshot.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tests/test_version_matrix.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/api.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/context.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/contracts.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/core.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/debug.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/instrumentation/__init__.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/instrumentation/apply_capture.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/instrumentation/filter_capture.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/instrumentation/indexer_capture.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/instrumentation/merge_capture.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/instrumentation/pandas_inst.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/safety.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/snapshot.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/storage/__init__.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/storage/base.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/storage/row_identity.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/utils/__init__.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/utils/value_capture.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/value_provenance.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/visualization/__init__.py +0 -0
- {tracepipe-0.3.1 → tracepipe-0.3.3}/tracepipe/visualization/html_export.py +0 -0
|
@@ -5,6 +5,25 @@ All notable changes to TracePipe will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## 0.3.3 - 2026-02-03
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
- **Double-logging bug**: `df['col'] = df['col'].fillna()` now logs exactly one event, not two
|
|
12
|
+
- Fixed duplicate capture from both `_wrap_setitem` and `wrap_series_assignment`
|
|
13
|
+
- **Merge warning scoping**: `tp.check(df)` now only shows warnings for merges in df's lineage
|
|
14
|
+
- Previously showed warnings from ALL merges in the session (cross-contamination)
|
|
15
|
+
- Now filters by tracking which merge steps produced the queried DataFrame's rows
|
|
16
|
+
|
|
17
|
+
### Added
|
|
18
|
+
- `_get_merge_stats_for_df()` helper to scope merge warnings to df's lineage
|
|
19
|
+
- Tests for double-logging prevention and merge warning scoping
|
|
20
|
+
|
|
21
|
+
## 0.3.2 - 2026-02-03
|
|
22
|
+
|
|
23
|
+
### Fixed
|
|
24
|
+
- Merge duplicate key warnings now correctly identify which table (left/right) has duplicates
|
|
25
|
+
- Previously `right_dup_rate` was mislabeled as "Right table" when it actually indicates LEFT table duplicates
|
|
26
|
+
|
|
8
27
|
## 0.3.1 - 2026-02-03
|
|
9
28
|
|
|
10
29
|
### Fixed
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tracepipe
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.3
|
|
4
4
|
Summary: Row-level data lineage tracking for pandas pipelines
|
|
5
5
|
Project-URL: Homepage, https://github.com/tracepipe/tracepipe
|
|
6
6
|
Project-URL: Documentation, https://tracepipe.github.io/tracepipe/
|
|
@@ -262,6 +262,135 @@ class TestFillnaTrackingVerification:
|
|
|
262
262
|
), f"loc assignment should be tracked, got {result.n_changes} changes"
|
|
263
263
|
|
|
264
264
|
|
|
265
|
+
class TestMergeWarningLabeling:
|
|
266
|
+
"""Tests for correct left/right labeling in merge warnings."""
|
|
267
|
+
|
|
268
|
+
def test_duplicate_left_keys_warns_about_left(self):
|
|
269
|
+
"""When LEFT has duplicate keys, warning should say 'Left table'."""
|
|
270
|
+
tp.enable(mode="debug")
|
|
271
|
+
|
|
272
|
+
# LEFT has duplicate key (94107 appears twice)
|
|
273
|
+
left = pd.DataFrame({"zip": ["10001", "94107", "94107"], "val": [1, 2, 3]})
|
|
274
|
+
# RIGHT has unique keys
|
|
275
|
+
right = pd.DataFrame({"zip": ["10001", "94107", "99999"], "region": ["NY", "CA", "XX"]})
|
|
276
|
+
|
|
277
|
+
df = left.merge(right, on="zip", how="left")
|
|
278
|
+
result = tp.check(df)
|
|
279
|
+
|
|
280
|
+
# Should warn about LEFT having duplicates, not RIGHT
|
|
281
|
+
dup_warnings = [w for w in result.warnings if "duplicate" in w.message.lower()]
|
|
282
|
+
assert len(dup_warnings) >= 1, "Should have duplicate key warning"
|
|
283
|
+
|
|
284
|
+
# The warning should mention "Left table", not "Right table"
|
|
285
|
+
left_warnings = [w for w in dup_warnings if "left" in w.message.lower()]
|
|
286
|
+
right_warnings = [w for w in dup_warnings if "right" in w.message.lower()]
|
|
287
|
+
|
|
288
|
+
assert len(left_warnings) >= 1, (
|
|
289
|
+
f"Should warn about LEFT table having duplicates. "
|
|
290
|
+
f"Got warnings: {[w.message for w in dup_warnings]}"
|
|
291
|
+
)
|
|
292
|
+
# RIGHT is unique, shouldn't warn about right
|
|
293
|
+
assert len(right_warnings) == 0, (
|
|
294
|
+
f"Should NOT warn about RIGHT table (it's unique). "
|
|
295
|
+
f"Got warnings: {[w.message for w in dup_warnings]}"
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
def test_duplicate_right_keys_warns_about_right(self):
|
|
299
|
+
"""When RIGHT has duplicate keys, warning should say 'Right table'."""
|
|
300
|
+
tp.enable(mode="debug")
|
|
301
|
+
|
|
302
|
+
# LEFT has unique keys
|
|
303
|
+
left = pd.DataFrame({"zip": ["10001", "94107"], "val": [1, 2]})
|
|
304
|
+
# RIGHT has duplicate key (94107 appears twice)
|
|
305
|
+
right = pd.DataFrame({"zip": ["10001", "94107", "94107"], "region": ["NY", "CA", "CA2"]})
|
|
306
|
+
|
|
307
|
+
df = left.merge(right, on="zip", how="left")
|
|
308
|
+
result = tp.check(df)
|
|
309
|
+
|
|
310
|
+
dup_warnings = [w for w in result.warnings if "duplicate" in w.message.lower()]
|
|
311
|
+
assert len(dup_warnings) >= 1, "Should have duplicate key warning"
|
|
312
|
+
|
|
313
|
+
# The warning should mention "Right table"
|
|
314
|
+
right_warnings = [w for w in dup_warnings if "right" in w.message.lower()]
|
|
315
|
+
assert len(right_warnings) >= 1, (
|
|
316
|
+
f"Should warn about RIGHT table having duplicates. "
|
|
317
|
+
f"Got warnings: {[w.message for w in dup_warnings]}"
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
class TestNoDoubleLogging:
|
|
322
|
+
"""Tests to verify no duplicate events are logged."""
|
|
323
|
+
|
|
324
|
+
def test_fillna_logs_once_not_twice(self):
|
|
325
|
+
"""df['col'] = df['col'].fillna() should log exactly one event per row."""
|
|
326
|
+
tp.enable(mode="debug", watch=["income"])
|
|
327
|
+
|
|
328
|
+
df = pd.DataFrame({"id": [1], "income": [None]})
|
|
329
|
+
df["income"] = df["income"].fillna(0)
|
|
330
|
+
|
|
331
|
+
result = tp.why(df, col="income", row=0)
|
|
332
|
+
|
|
333
|
+
# Should have exactly 1 change event, not 2 (no double-logging)
|
|
334
|
+
assert result.n_changes == 1, (
|
|
335
|
+
f"Expected exactly 1 change event, got {result.n_changes}. "
|
|
336
|
+
f"Double-logging bug if > 1. History: {result.history}"
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
def test_setitem_logs_once(self):
|
|
340
|
+
"""Direct column assignment should log exactly once."""
|
|
341
|
+
tp.enable(mode="debug", watch=["val"])
|
|
342
|
+
|
|
343
|
+
df = pd.DataFrame({"id": [1], "val": [10]})
|
|
344
|
+
df["val"] = df["val"] * 2
|
|
345
|
+
|
|
346
|
+
result = tp.why(df, col="val", row=0)
|
|
347
|
+
|
|
348
|
+
assert result.n_changes == 1, (
|
|
349
|
+
f"Expected exactly 1 change event, got {result.n_changes}. "
|
|
350
|
+
f"History: {result.history}"
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
class TestMergeWarningScoping:
|
|
355
|
+
"""Tests for merge warnings being scoped to df's lineage."""
|
|
356
|
+
|
|
357
|
+
def test_check_only_shows_warnings_for_df_lineage(self):
|
|
358
|
+
"""check(df) should only show warnings from merges that produced df."""
|
|
359
|
+
tp.enable(mode="debug")
|
|
360
|
+
|
|
361
|
+
# First pipeline - merge with unique right
|
|
362
|
+
left1 = pd.DataFrame({"k": ["a", "a", "b"], "v": [1, 2, 3]}) # Dup left
|
|
363
|
+
right1 = pd.DataFrame({"k": ["a", "b", "c"], "r": [10, 20, 30]}) # Unique
|
|
364
|
+
df1 = left1.merge(right1, on="k", how="left")
|
|
365
|
+
|
|
366
|
+
# Second pipeline - merge with duplicate right
|
|
367
|
+
left2 = pd.DataFrame({"k": ["x", "y"], "v": [1, 2]}) # Unique
|
|
368
|
+
right2 = pd.DataFrame({"k": ["x", "x", "y"], "r": [10, 11, 20]}) # Dup right
|
|
369
|
+
df2 = left2.merge(right2, on="k", how="left")
|
|
370
|
+
|
|
371
|
+
# check(df1) should only show warnings about LEFT duplicates (from df1's merge)
|
|
372
|
+
result1 = tp.check(df1)
|
|
373
|
+
dup_warnings1 = [w for w in result1.warnings if "duplicate" in w.message.lower()]
|
|
374
|
+
|
|
375
|
+
# check(df2) should only show warnings about RIGHT duplicates (from df2's merge)
|
|
376
|
+
result2 = tp.check(df2)
|
|
377
|
+
dup_warnings2 = [w for w in result2.warnings if "duplicate" in w.message.lower()]
|
|
378
|
+
|
|
379
|
+
# df1's check should NOT include df2's "Right table" warning
|
|
380
|
+
right_in_df1 = [w for w in dup_warnings1 if "right" in w.message.lower()]
|
|
381
|
+
assert len(right_in_df1) == 0, (
|
|
382
|
+
f"df1 check should not have Right table warnings (from df2). "
|
|
383
|
+
f"Got: {[w.message for w in dup_warnings1]}"
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
# df2's check should NOT include df1's "Left table" warning
|
|
387
|
+
left_in_df2 = [w for w in dup_warnings2 if "left" in w.message.lower()]
|
|
388
|
+
assert len(left_in_df2) == 0, (
|
|
389
|
+
f"df2 check should not have Left table warnings (from df1). "
|
|
390
|
+
f"Got: {[w.message for w in dup_warnings2]}"
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
|
|
265
394
|
class TestLineageDepthLimit:
|
|
266
395
|
"""Tests for lineage traversal depth limiting."""
|
|
267
396
|
|
|
@@ -361,8 +361,8 @@ def check(
|
|
|
361
361
|
facts["rows_dropped"] = len(dropped)
|
|
362
362
|
facts["total_steps"] = len(ctx.store.steps)
|
|
363
363
|
|
|
364
|
-
# Merge statistics
|
|
365
|
-
merge_stats_list =
|
|
364
|
+
# Merge statistics - filter to df's lineage to avoid cross-contamination
|
|
365
|
+
merge_stats_list = _get_merge_stats_for_df(df, ctx)
|
|
366
366
|
|
|
367
367
|
for i, (step_id, stats) in enumerate(merge_stats_list):
|
|
368
368
|
facts[f"merge_{i}_expansion"] = stats.expansion_ratio
|
|
@@ -385,22 +385,27 @@ def check(
|
|
|
385
385
|
)
|
|
386
386
|
)
|
|
387
387
|
|
|
388
|
-
|
|
388
|
+
# Note on dup_rate semantics:
|
|
389
|
+
# - left_dup_rate = fraction of LEFT rows appearing >1 times in result
|
|
390
|
+
# This happens when RIGHT table has duplicate join keys
|
|
391
|
+
# - right_dup_rate = fraction of RIGHT rows appearing >1 times in result
|
|
392
|
+
# This happens when LEFT table has duplicate join keys
|
|
393
|
+
if stats.right_dup_rate > 0.01:
|
|
389
394
|
warnings_list.append(
|
|
390
395
|
CheckWarning(
|
|
391
396
|
category="duplicate_keys",
|
|
392
397
|
severity="fact",
|
|
393
|
-
message=f"Left table has {stats.
|
|
394
|
-
details={"step_id": step_id, "dup_rate": stats.
|
|
398
|
+
message=f"Left table has {stats.right_dup_rate:.1%} duplicate join keys",
|
|
399
|
+
details={"step_id": step_id, "dup_rate": stats.right_dup_rate},
|
|
395
400
|
)
|
|
396
401
|
)
|
|
397
|
-
if stats.
|
|
402
|
+
if stats.left_dup_rate > 0.01:
|
|
398
403
|
warnings_list.append(
|
|
399
404
|
CheckWarning(
|
|
400
405
|
category="duplicate_keys",
|
|
401
406
|
severity="fact",
|
|
402
|
-
message=f"Right table has {stats.
|
|
403
|
-
details={"step_id": step_id, "dup_rate": stats.
|
|
407
|
+
message=f"Right table has {stats.left_dup_rate:.1%} duplicate join keys",
|
|
408
|
+
details={"step_id": step_id, "dup_rate": stats.left_dup_rate},
|
|
404
409
|
)
|
|
405
410
|
)
|
|
406
411
|
|
|
@@ -653,6 +658,53 @@ def find(
|
|
|
653
658
|
# ============ HELPERS ============
|
|
654
659
|
|
|
655
660
|
|
|
661
|
+
def _get_merge_stats_for_df(df: pd.DataFrame, ctx) -> list[tuple[int, Any]]:
|
|
662
|
+
"""
|
|
663
|
+
Get merge stats relevant to df's lineage only.
|
|
664
|
+
|
|
665
|
+
This prevents cross-contamination where check(df) would show warnings
|
|
666
|
+
from merges that produced OTHER DataFrames in the same session.
|
|
667
|
+
"""
|
|
668
|
+
if not hasattr(ctx.store, "get_merge_stats"):
|
|
669
|
+
return []
|
|
670
|
+
|
|
671
|
+
all_stats = ctx.store.get_merge_stats()
|
|
672
|
+
if not all_stats:
|
|
673
|
+
return []
|
|
674
|
+
|
|
675
|
+
# Get row IDs from df
|
|
676
|
+
rids = ctx.row_manager.get_ids_array(df)
|
|
677
|
+
if rids is None:
|
|
678
|
+
return []
|
|
679
|
+
|
|
680
|
+
# Find which merge steps produced rows in df
|
|
681
|
+
relevant_step_ids = set()
|
|
682
|
+
|
|
683
|
+
# Check merge mappings to find which merges produced df's rows
|
|
684
|
+
if hasattr(ctx.store, "merge_mappings"):
|
|
685
|
+
for mapping in ctx.store.merge_mappings:
|
|
686
|
+
# Check if any of df's row IDs are in this merge's output
|
|
687
|
+
for rid in rids:
|
|
688
|
+
# Binary search in sorted out_rids
|
|
689
|
+
i = np.searchsorted(mapping.out_rids, rid)
|
|
690
|
+
if i < len(mapping.out_rids) and mapping.out_rids[i] == rid:
|
|
691
|
+
relevant_step_ids.add(mapping.step_id)
|
|
692
|
+
break # Found at least one match, this merge is relevant
|
|
693
|
+
|
|
694
|
+
# If no merge mappings found, fall back to checking if df was just merged
|
|
695
|
+
# by seeing if it has more columns than typical (heuristic)
|
|
696
|
+
if not relevant_step_ids and all_stats:
|
|
697
|
+
# Fallback: return only the most recent merge that could have produced df
|
|
698
|
+
# This handles the case where merge_mappings aren't available
|
|
699
|
+
for step_id, stats in reversed(all_stats):
|
|
700
|
+
if stats.result_rows == len(df):
|
|
701
|
+
relevant_step_ids.add(step_id)
|
|
702
|
+
break
|
|
703
|
+
|
|
704
|
+
# Filter stats to relevant merges only
|
|
705
|
+
return [(sid, stats) for sid, stats in all_stats if sid in relevant_step_ids]
|
|
706
|
+
|
|
707
|
+
|
|
656
708
|
def _json_safe(val: Any) -> Any:
|
|
657
709
|
"""Convert value to JSON-serializable form."""
|
|
658
710
|
if pd.isna(val):
|
|
@@ -116,6 +116,10 @@ def wrap_series_assignment():
|
|
|
116
116
|
"""
|
|
117
117
|
Wrap DataFrame.__setitem__ to capture diffs when assigning Series.
|
|
118
118
|
|
|
119
|
+
Note: For watched columns, _wrap_setitem (pandas_inst.py) already captures
|
|
120
|
+
the assignment. This wrapper only captures for NON-watched columns when
|
|
121
|
+
a TrackedSeries is assigned, to avoid double-logging.
|
|
122
|
+
|
|
119
123
|
Handles:
|
|
120
124
|
- df['col'] = series (where series may have been modified)
|
|
121
125
|
- df['col'] = scalar (broadcast assignment)
|
|
@@ -127,28 +131,33 @@ def wrap_series_assignment():
|
|
|
127
131
|
def tracked_setitem(self, key, value):
|
|
128
132
|
ctx = get_context()
|
|
129
133
|
|
|
130
|
-
#
|
|
134
|
+
# For watched columns, _wrap_setitem already captures - skip to avoid double-logging
|
|
135
|
+
# We only capture here for NON-watched columns when a TrackedSeries is involved
|
|
136
|
+
should_capture_here = False
|
|
131
137
|
before_values = None
|
|
138
|
+
|
|
132
139
|
if (
|
|
133
140
|
ctx.enabled
|
|
134
141
|
and isinstance(key, str)
|
|
135
|
-
and key in ctx.watched_columns
|
|
136
142
|
and key in self.columns
|
|
143
|
+
and key not in ctx.watched_columns # Only capture NON-watched columns here
|
|
144
|
+
and isinstance(value, TrackedSeries) # Only for TrackedSeries assignments
|
|
137
145
|
):
|
|
138
146
|
rids = ctx.row_manager.get_ids_array(self)
|
|
139
147
|
if rids is not None:
|
|
148
|
+
should_capture_here = True
|
|
140
149
|
before_values = {
|
|
141
150
|
"rids": rids.copy(),
|
|
142
151
|
"values": self[key].values.copy(),
|
|
143
152
|
}
|
|
144
153
|
|
|
145
|
-
# Always run original
|
|
154
|
+
# Always run original (which may be _wrap_setitem's wrapper)
|
|
146
155
|
original_setitem(self, key, value)
|
|
147
156
|
|
|
148
157
|
if not ctx.enabled:
|
|
149
158
|
return
|
|
150
159
|
|
|
151
|
-
if before_values is None:
|
|
160
|
+
if not should_capture_here or before_values is None:
|
|
152
161
|
return
|
|
153
162
|
|
|
154
163
|
try:
|
|
@@ -710,7 +710,7 @@ class InMemoryLineageStore:
|
|
|
710
710
|
diffs = list(self._iter_all_diffs())
|
|
711
711
|
|
|
712
712
|
data = {
|
|
713
|
-
"tracepipe_version": "0.3.
|
|
713
|
+
"tracepipe_version": "0.3.2",
|
|
714
714
|
"export_timestamp": time.time(),
|
|
715
715
|
"total_diffs": len(diffs),
|
|
716
716
|
"total_steps": len(self._steps),
|
|
@@ -2051,7 +2051,7 @@ wheels = [
|
|
|
2051
2051
|
|
|
2052
2052
|
[[package]]
|
|
2053
2053
|
name = "tracepipe"
|
|
2054
|
-
version = "0.3.
|
|
2054
|
+
version = "0.3.2"
|
|
2055
2055
|
source = { editable = "." }
|
|
2056
2056
|
dependencies = [
|
|
2057
2057
|
{ name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/lunr/min/lunr.stemmer.support.min.js
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tracepipe-0.3.1 → tracepipe-0.3.3}/site/assets/javascripts/workers/search.2c215733.min.js.map
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|