tracepipe 0.3.3__tar.gz → 0.3.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tracepipe-0.3.3 → tracepipe-0.3.5}/CHANGELOG.md +24 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/PKG-INFO +21 -1
- {tracepipe-0.3.3 → tracepipe-0.3.5}/README.md +20 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/pyproject.toml +1 -1
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tests/test_lineage_through_merge.py +68 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/__init__.py +1 -1
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/context.py +4 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/instrumentation/pandas_inst.py +11 -3
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/safety.py +29 -4
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/storage/lineage_store.py +37 -2
- {tracepipe-0.3.3 → tracepipe-0.3.5}/uv.lock +1 -1
- {tracepipe-0.3.3 → tracepipe-0.3.5}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/.github/workflows/ci.yml +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/.github/workflows/docs.yml +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/.github/workflows/release.yml +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/.gitignore +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/.pre-commit-config.yaml +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/CONTRIBUTING.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/LICENSE +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/benchmarks/README.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/benchmarks/bench_memory.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/benchmarks/bench_overhead.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/benchmarks/bench_scale.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/benchmarks/run_all.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/docs/api/contracts.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/docs/api/core.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/docs/api/debug.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/docs/api/index.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/docs/changelog.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/docs/contributing.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/docs/examples/data-validation.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/docs/examples/ml-pipeline.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/docs/getting-started/installation.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/docs/getting-started/modes.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/docs/getting-started/quickstart.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/docs/guide/cell-provenance.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/docs/guide/concepts.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/docs/guide/contracts.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/docs/guide/health-checks.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/docs/guide/reports.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/docs/guide/row-tracing.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/docs/guide/snapshots.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/docs/index.md +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/examples/comprehensive_demo.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/examples/demo.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/examples/ml_pipeline_demo.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/examples/red_team_test.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/mkdocs.yml +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/404.html +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/api/contracts/index.html +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/api/core/index.html +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/api/debug/index.html +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/api/index.html +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/_mkdocstrings.css +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/images/favicon.png +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/bundle.79ae519e.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/bundle.79ae519e.min.js.map +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.ar.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.da.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.de.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.du.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.el.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.es.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.fi.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.fr.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.he.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.hi.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.hu.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.hy.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.it.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.ja.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.jp.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.kn.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.ko.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.multi.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.nl.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.no.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.pt.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.ro.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.ru.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.sa.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.stemmer.support.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.sv.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.ta.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.te.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.th.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.tr.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.vi.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.zh.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/tinyseg.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/wordcut.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/workers/search.2c215733.min.js +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/workers/search.2c215733.min.js.map +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/stylesheets/main.484c7ddc.min.css +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/stylesheets/main.484c7ddc.min.css.map +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/stylesheets/palette.ab4e12ef.min.css +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/stylesheets/palette.ab4e12ef.min.css.map +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/changelog/index.html +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/contributing/index.html +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/examples/data-validation/index.html +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/examples/ml-pipeline/index.html +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/getting-started/installation/index.html +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/getting-started/modes/index.html +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/getting-started/quickstart/index.html +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/guide/cell-provenance/index.html +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/guide/concepts/index.html +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/guide/contracts/index.html +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/guide/health-checks/index.html +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/guide/reports/index.html +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/guide/row-tracing/index.html +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/guide/snapshots/index.html +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/index.html +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/objects.inv +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/search/search_index.json +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/sitemap.xml +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/site/sitemap.xml.gz +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tests/__init__.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tests/conftest.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tests/test_api.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tests/test_concurrency.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tests/test_contracts.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tests/test_convenience_debug.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tests/test_edge_cases.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tests/test_integration.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tests/test_io_operations.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tests/test_pandas_inst.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tests/test_public_api.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tests/test_snapshot.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tests/test_version_matrix.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/api.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/contracts.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/convenience.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/core.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/debug.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/instrumentation/__init__.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/instrumentation/apply_capture.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/instrumentation/filter_capture.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/instrumentation/indexer_capture.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/instrumentation/merge_capture.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/instrumentation/series_capture.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/snapshot.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/storage/__init__.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/storage/base.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/storage/row_identity.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/utils/__init__.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/utils/value_capture.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/value_provenance.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/visualization/__init__.py +0 -0
- {tracepipe-0.3.3 → tracepipe-0.3.5}/tracepipe/visualization/html_export.py +0 -0
|
@@ -5,6 +5,30 @@ All notable changes to TracePipe will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## 0.3.5 - 2026-02-03
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
- **DataFrame.fillna double-logging**: `df.fillna({"col": 0})` now logs exactly 1 event
|
|
12
|
+
- Previously logged both `DataFrame.fillna` and internal `__setitem__` for same change
|
|
13
|
+
- Added `wrap_pandas_transform_method` with `_in_transform_op` flag to suppress nested setitem
|
|
14
|
+
- Works for both `fillna` and `replace` operations, including `inplace=True`
|
|
15
|
+
|
|
16
|
+
### Added
|
|
17
|
+
- Known Limitations section in README documenting concat/dedup tracking gaps
|
|
18
|
+
- Test for `DataFrame.fillna` single-event logging
|
|
19
|
+
|
|
20
|
+
## 0.3.4 - 2026-02-03
|
|
21
|
+
|
|
22
|
+
### Fixed
|
|
23
|
+
- **Event deduplication**: Identical events from parallel pipelines are now deduplicated
|
|
24
|
+
- When multiple DataFrames share row IDs (e.g., from `df.copy()`), same changes are recorded once
|
|
25
|
+
- Events deduplicated by `(col, old_val, new_val, operation)` signature
|
|
26
|
+
- Prevents "4 events" when only 1 logical change occurred
|
|
27
|
+
|
|
28
|
+
### Added
|
|
29
|
+
- `_stable_repr()` helper for robust value comparison in deduplication
|
|
30
|
+
- Tests for cross-pipeline event deduplication behavior
|
|
31
|
+
|
|
8
32
|
## 0.3.3 - 2026-02-03
|
|
9
33
|
|
|
10
34
|
### Fixed
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tracepipe
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.5
|
|
4
4
|
Summary: Row-level data lineage tracking for pandas pipelines
|
|
5
5
|
Project-URL: Homepage, https://github.com/tracepipe/tracepipe
|
|
6
6
|
Project-URL: Documentation, https://tracepipe.github.io/tracepipe/
|
|
@@ -276,6 +276,26 @@ tp.enable(mode="debug") # Full lineage
|
|
|
276
276
|
|
|
277
277
|
---
|
|
278
278
|
|
|
279
|
+
## Known Limitations
|
|
280
|
+
|
|
281
|
+
TracePipe tracks **cell mutations** (fillna, replace, loc assignment) and **merge provenance** reliably. However, some patterns are not yet fully supported:
|
|
282
|
+
|
|
283
|
+
| Pattern | Status | Notes |
|
|
284
|
+
|---------|--------|-------|
|
|
285
|
+
| `df["col"] = df["col"].fillna(0)` | ✅ Tracked | Series + assignment |
|
|
286
|
+
| `df = df.fillna({"col": 0})` | ✅ Tracked | DataFrame-level fillna |
|
|
287
|
+
| `df.loc[mask, "col"] = val` | ✅ Tracked | Conditional assignment |
|
|
288
|
+
| `df.merge(other, on="key")` | ✅ Tracked | Full provenance in debug mode |
|
|
289
|
+
| `pd.concat([df1, df2])` | ⚠️ Partial | Row IDs preserved, but no "source DataFrame" tracking |
|
|
290
|
+
| `df.drop_duplicates(keep='last')` | ⚠️ Partial | Which row was kept is not tracked |
|
|
291
|
+
| Sort + dedup patterns | ⚠️ Partial | "Latest record wins" logic not traced |
|
|
292
|
+
|
|
293
|
+
**Why?** TracePipe tracks value changes within rows, not row-selection operations. When `drop_duplicates` picks one row over another, that's a provenance decision (not a cell mutation) that isn't currently instrumented.
|
|
294
|
+
|
|
295
|
+
**Planned for 0.4**: Full row-provenance tracking for concat, drop_duplicates, and sort operations.
|
|
296
|
+
|
|
297
|
+
---
|
|
298
|
+
|
|
279
299
|
## Contributing
|
|
280
300
|
|
|
281
301
|
```bash
|
|
@@ -207,6 +207,26 @@ tp.enable(mode="debug") # Full lineage
|
|
|
207
207
|
|
|
208
208
|
---
|
|
209
209
|
|
|
210
|
+
## Known Limitations
|
|
211
|
+
|
|
212
|
+
TracePipe tracks **cell mutations** (fillna, replace, loc assignment) and **merge provenance** reliably. However, some patterns are not yet fully supported:
|
|
213
|
+
|
|
214
|
+
| Pattern | Status | Notes |
|
|
215
|
+
|---------|--------|-------|
|
|
216
|
+
| `df["col"] = df["col"].fillna(0)` | ✅ Tracked | Series + assignment |
|
|
217
|
+
| `df = df.fillna({"col": 0})` | ✅ Tracked | DataFrame-level fillna |
|
|
218
|
+
| `df.loc[mask, "col"] = val` | ✅ Tracked | Conditional assignment |
|
|
219
|
+
| `df.merge(other, on="key")` | ✅ Tracked | Full provenance in debug mode |
|
|
220
|
+
| `pd.concat([df1, df2])` | ⚠️ Partial | Row IDs preserved, but no "source DataFrame" tracking |
|
|
221
|
+
| `df.drop_duplicates(keep='last')` | ⚠️ Partial | Which row was kept is not tracked |
|
|
222
|
+
| Sort + dedup patterns | ⚠️ Partial | "Latest record wins" logic not traced |
|
|
223
|
+
|
|
224
|
+
**Why?** TracePipe tracks value changes within rows, not row-selection operations. When `drop_duplicates` picks one row over another, that's a provenance decision (not a cell mutation) that isn't currently instrumented.
|
|
225
|
+
|
|
226
|
+
**Planned for 0.4**: Full row-provenance tracking for concat, drop_duplicates, and sort operations.
|
|
227
|
+
|
|
228
|
+
---
|
|
229
|
+
|
|
210
230
|
## Contributing
|
|
211
231
|
|
|
212
232
|
```bash
|
|
@@ -249,6 +249,23 @@ class TestFillnaTrackingVerification:
|
|
|
249
249
|
result.n_changes >= 1
|
|
250
250
|
), f"DataFrame.fillna should be tracked, got {result.n_changes} changes"
|
|
251
251
|
|
|
252
|
+
def test_dataframe_fillna_logs_once_not_twice(self):
|
|
253
|
+
"""df.fillna({'col': val}) should log exactly 1 event, not 2.
|
|
254
|
+
|
|
255
|
+
Previously, both DataFrame.fillna and the internal __setitem__ were
|
|
256
|
+
recording the same change, causing double-logging.
|
|
257
|
+
"""
|
|
258
|
+
tp.enable(mode="debug", watch=["a"])
|
|
259
|
+
|
|
260
|
+
df = pd.DataFrame({"a": [1.0, None, 3.0]})
|
|
261
|
+
df = df.fillna({"a": 0})
|
|
262
|
+
|
|
263
|
+
result = tp.why(df, col="a", row=1)
|
|
264
|
+
assert result.n_changes == 1, (
|
|
265
|
+
f"DataFrame.fillna should log exactly 1 event, got {result.n_changes}. "
|
|
266
|
+
f"Double-logging bug if > 1. History: {result.history}"
|
|
267
|
+
)
|
|
268
|
+
|
|
252
269
|
def test_loc_assignment_tracked(self):
|
|
253
270
|
"""df.loc[mask, col] = val should be tracked."""
|
|
254
271
|
tp.enable(mode="debug", watch=["a"])
|
|
@@ -350,6 +367,57 @@ class TestNoDoubleLogging:
|
|
|
350
367
|
f"History: {result.history}"
|
|
351
368
|
)
|
|
352
369
|
|
|
370
|
+
def test_cross_pipeline_identical_change_deduplication(self):
|
|
371
|
+
"""Identical changes from parallel pipelines should be deduplicated.
|
|
372
|
+
|
|
373
|
+
When multiple pipelines from the same source do the SAME transformation
|
|
374
|
+
(e.g., both do fillna(0)), the event should only appear once.
|
|
375
|
+
"""
|
|
376
|
+
tp.enable(mode="debug", watch=["income"])
|
|
377
|
+
|
|
378
|
+
# Source data with a row that has None income
|
|
379
|
+
customers = pd.DataFrame({"id": ["A", "B"], "income": [None, 100.0]})
|
|
380
|
+
|
|
381
|
+
# Two parallel pipelines doing the SAME transformation
|
|
382
|
+
df1 = customers.copy()
|
|
383
|
+
df1["income"] = df1["income"].fillna(0) # Records None -> 0
|
|
384
|
+
|
|
385
|
+
df2 = customers.copy()
|
|
386
|
+
df2["income"] = df2["income"].fillna(0) # Records SAME None -> 0
|
|
387
|
+
|
|
388
|
+
# Query df1 - should deduplicate identical events
|
|
389
|
+
result1 = tp.why(df1, col="income", row=0)
|
|
390
|
+
assert result1.n_changes == 1, (
|
|
391
|
+
f"Identical events should be deduplicated. Got {result1.n_changes}. "
|
|
392
|
+
f"History: {result1.history}"
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
def test_cross_pipeline_different_changes_preserved(self):
|
|
396
|
+
"""Different changes from parallel pipelines should NOT be deduplicated.
|
|
397
|
+
|
|
398
|
+
When pipelines do DIFFERENT transformations on the same row,
|
|
399
|
+
both events should be visible (this is expected behavior - row IDs
|
|
400
|
+
are shared, so history includes all changes to that row ID).
|
|
401
|
+
"""
|
|
402
|
+
tp.enable(mode="debug", watch=["income"])
|
|
403
|
+
|
|
404
|
+
customers = pd.DataFrame({"id": ["A", "B"], "income": [None, 100.0]})
|
|
405
|
+
|
|
406
|
+
# Two pipelines doing DIFFERENT transformations
|
|
407
|
+
df1 = customers.copy()
|
|
408
|
+
df1["income"] = df1["income"].fillna(0) # None -> 0
|
|
409
|
+
|
|
410
|
+
df2 = customers.copy()
|
|
411
|
+
df2["income"] = df2["income"].fillna(99) # None -> 99
|
|
412
|
+
|
|
413
|
+
# Query df1 - since row ID is shared, both changes are visible
|
|
414
|
+
# This is expected: deduplication only removes IDENTICAL events
|
|
415
|
+
result1 = tp.why(df1, col="income", row=0)
|
|
416
|
+
assert result1.n_changes == 2, (
|
|
417
|
+
f"Different changes should both be visible. Got {result1.n_changes}. "
|
|
418
|
+
f"History: {result1.history}"
|
|
419
|
+
)
|
|
420
|
+
|
|
353
421
|
|
|
354
422
|
class TestMergeWarningScoping:
|
|
355
423
|
"""Tests for merge warnings being scoped to df's lineage."""
|
|
@@ -63,6 +63,10 @@ class TracePipeContext:
|
|
|
63
63
|
# When > 0, __getitem__[mask] skips capture (parent op will capture)
|
|
64
64
|
self._filter_op_depth: int = 0
|
|
65
65
|
|
|
66
|
+
# Transform operation tracking (prevents double-counting fillna/replace)
|
|
67
|
+
# When > 0, __setitem__ skips capture (transform op will capture)
|
|
68
|
+
self._in_transform_op: int = 0
|
|
69
|
+
|
|
66
70
|
# GroupBy state stack (supports nesting)
|
|
67
71
|
self._groupby_stack: list[dict] = []
|
|
68
72
|
|
|
@@ -22,6 +22,7 @@ from ..safety import (
|
|
|
22
22
|
get_caller_info,
|
|
23
23
|
wrap_pandas_method,
|
|
24
24
|
wrap_pandas_method_inplace,
|
|
25
|
+
wrap_pandas_transform_method,
|
|
25
26
|
)
|
|
26
27
|
from ..utils.value_capture import find_changed_indices_vectorized
|
|
27
28
|
from .apply_capture import instrument_apply_pipe, uninstrument_apply_pipe
|
|
@@ -554,6 +555,9 @@ def _wrap_setitem(original):
|
|
|
554
555
|
|
|
555
556
|
Captures BEFORE state for existing columns, then executes assignment,
|
|
556
557
|
then records the diff with actual old/new values.
|
|
558
|
+
|
|
559
|
+
Skips recording when inside a transform operation (fillna, replace) to
|
|
560
|
+
avoid double-counting cell changes - the transform wrapper will capture.
|
|
557
561
|
"""
|
|
558
562
|
|
|
559
563
|
@wraps(original)
|
|
@@ -565,7 +569,9 @@ def _wrap_setitem(original):
|
|
|
565
569
|
is_new_column = False
|
|
566
570
|
should_track = False
|
|
567
571
|
|
|
568
|
-
if
|
|
572
|
+
# Skip tracking if we're inside a transform operation (fillna, replace)
|
|
573
|
+
# Those operations will capture the change themselves
|
|
574
|
+
if ctx.enabled and isinstance(key, str) and ctx._in_transform_op == 0:
|
|
569
575
|
if key in ctx.watched_columns:
|
|
570
576
|
should_track = True
|
|
571
577
|
if key in self.columns:
|
|
@@ -771,13 +777,15 @@ def instrument_pandas():
|
|
|
771
777
|
wrapped = wrap_filter_method(method_name, original)
|
|
772
778
|
setattr(pd.DataFrame, method_name, wrapped)
|
|
773
779
|
|
|
774
|
-
# === DataFrame transform methods (
|
|
780
|
+
# === DataFrame transform methods (fillna, replace) ===
|
|
781
|
+
# These use wrap_pandas_transform_method to suppress __setitem__ recording
|
|
782
|
+
# during the transform, avoiding double-counting cell changes
|
|
775
783
|
transform_methods = ["fillna", "replace"]
|
|
776
784
|
for method_name in transform_methods:
|
|
777
785
|
if hasattr(pd.DataFrame, method_name):
|
|
778
786
|
original = getattr(pd.DataFrame, method_name)
|
|
779
787
|
_originals[f"DataFrame.{method_name}"] = original
|
|
780
|
-
wrapped =
|
|
788
|
+
wrapped = wrap_pandas_transform_method(method_name, original, _capture_transform)
|
|
781
789
|
setattr(pd.DataFrame, method_name, wrapped)
|
|
782
790
|
|
|
783
791
|
# === astype (no inplace) ===
|
|
@@ -103,7 +103,7 @@ def _make_wrapper(
|
|
|
103
103
|
method_name: Name for error messages
|
|
104
104
|
original_method: The original pandas method
|
|
105
105
|
capture_func: func(self, args, kwargs, result, ctx, method_name)
|
|
106
|
-
mode: "standard", "filter", or "
|
|
106
|
+
mode: "standard", "filter", "inplace", or "transform"
|
|
107
107
|
"""
|
|
108
108
|
|
|
109
109
|
@wraps(original_method)
|
|
@@ -112,10 +112,21 @@ def _make_wrapper(
|
|
|
112
112
|
|
|
113
113
|
# === PRE-EXECUTION SETUP ===
|
|
114
114
|
before_snapshot = None
|
|
115
|
+
is_inplace = kwargs.get("inplace", False)
|
|
115
116
|
|
|
116
117
|
if mode == "filter" and ctx.enabled:
|
|
117
118
|
ctx._filter_op_depth += 1
|
|
118
|
-
elif mode == "
|
|
119
|
+
elif mode == "transform" and ctx.enabled:
|
|
120
|
+
# Suppress __setitem__ recording during transform ops (fillna, replace)
|
|
121
|
+
# to avoid double-counting the same cell change
|
|
122
|
+
ctx._in_transform_op += 1
|
|
123
|
+
# Also handle inplace for transform operations
|
|
124
|
+
if is_inplace:
|
|
125
|
+
try:
|
|
126
|
+
before_snapshot = self.copy()
|
|
127
|
+
except Exception:
|
|
128
|
+
pass
|
|
129
|
+
elif mode == "inplace" and ctx.enabled and is_inplace:
|
|
119
130
|
try:
|
|
120
131
|
before_snapshot = self.copy()
|
|
121
132
|
except Exception:
|
|
@@ -127,15 +138,18 @@ def _make_wrapper(
|
|
|
127
138
|
finally:
|
|
128
139
|
if mode == "filter" and ctx.enabled:
|
|
129
140
|
ctx._filter_op_depth -= 1
|
|
141
|
+
elif mode == "transform" and ctx.enabled:
|
|
142
|
+
ctx._in_transform_op -= 1
|
|
130
143
|
|
|
131
144
|
# === CAPTURE LINEAGE (SIDE EFFECT) ===
|
|
132
145
|
# Skip capture if we're inside a filter operation (prevents recursion during export)
|
|
133
146
|
if ctx.enabled and ctx._filter_op_depth == 0:
|
|
134
147
|
try:
|
|
135
|
-
|
|
148
|
+
# Handle inplace for both "inplace" and "transform" modes
|
|
149
|
+
if (mode == "inplace" or mode == "transform") and is_inplace:
|
|
136
150
|
if before_snapshot is not None:
|
|
137
151
|
capture_func(before_snapshot, args, kwargs, self, ctx, method_name)
|
|
138
|
-
elif mode == "inplace" and result is not None:
|
|
152
|
+
elif (mode == "inplace" or mode == "transform") and result is not None:
|
|
139
153
|
capture_func(self, args, kwargs, result, ctx, method_name)
|
|
140
154
|
else:
|
|
141
155
|
capture_func(self, args, kwargs, result, ctx, method_name)
|
|
@@ -176,3 +190,14 @@ def wrap_pandas_method_inplace(
|
|
|
176
190
|
) -> Callable:
|
|
177
191
|
"""Wrap a pandas method that supports inplace=True."""
|
|
178
192
|
return _make_wrapper(method_name, original_method, capture_func, mode="inplace")
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def wrap_pandas_transform_method(
|
|
196
|
+
method_name: str, original_method: Callable, capture_func: Callable
|
|
197
|
+
) -> Callable:
|
|
198
|
+
"""Wrap a pandas transform method (fillna, replace) that may trigger internal setitem.
|
|
199
|
+
|
|
200
|
+
These methods modify column values and pandas internally uses setitem.
|
|
201
|
+
We suppress setitem recording during these ops to avoid double-counting.
|
|
202
|
+
"""
|
|
203
|
+
return _make_wrapper(method_name, original_method, capture_func, mode="transform")
|
|
@@ -32,6 +32,22 @@ from ..core import (
|
|
|
32
32
|
from ..utils.value_capture import capture_typed_value
|
|
33
33
|
|
|
34
34
|
|
|
35
|
+
def _stable_repr(val) -> str:
|
|
36
|
+
"""Create a stable string representation for deduplication.
|
|
37
|
+
|
|
38
|
+
Handles NaN, None, and other values that don't compare equal to themselves.
|
|
39
|
+
"""
|
|
40
|
+
if val is None:
|
|
41
|
+
return "None"
|
|
42
|
+
# Handle NaN (which doesn't equal itself)
|
|
43
|
+
try:
|
|
44
|
+
if isinstance(val, float) and val != val: # NaN check
|
|
45
|
+
return "NaN"
|
|
46
|
+
except (TypeError, ValueError):
|
|
47
|
+
pass
|
|
48
|
+
return repr(val)
|
|
49
|
+
|
|
50
|
+
|
|
35
51
|
class InMemoryLineageStore:
|
|
36
52
|
"""
|
|
37
53
|
Columnar storage for lineage data using Structure of Arrays (SoA).
|
|
@@ -556,12 +572,15 @@ class InMemoryLineageStore:
|
|
|
556
572
|
Follows merge lineage recursively to build complete cell provenance.
|
|
557
573
|
This is essential for tracking changes that happened before merge operations.
|
|
558
574
|
|
|
575
|
+
Deduplicates events by (col, old_val, new_val, operation) signature to prevent
|
|
576
|
+
cross-pipeline contamination when multiple DataFrames share row IDs.
|
|
577
|
+
|
|
559
578
|
Args:
|
|
560
579
|
row_id: Row ID to trace
|
|
561
580
|
max_depth: Maximum merge depth to follow (prevents infinite loops)
|
|
562
581
|
|
|
563
582
|
Returns:
|
|
564
|
-
List of events in chronological order, including parent row events.
|
|
583
|
+
List of UNIQUE events in chronological order, including parent row events.
|
|
565
584
|
"""
|
|
566
585
|
visited: set[int] = set()
|
|
567
586
|
|
|
@@ -589,7 +608,23 @@ class InMemoryLineageStore:
|
|
|
589
608
|
# Sort by step_id to ensure chronological order across lineage
|
|
590
609
|
all_events.sort(key=lambda e: e["step_id"])
|
|
591
610
|
|
|
592
|
-
|
|
611
|
+
# Deduplicate by (col, old_val, new_val, operation) signature
|
|
612
|
+
# This prevents cross-pipeline contamination when multiple DataFrames
|
|
613
|
+
# share the same row IDs (e.g., df.copy() followed by parallel transforms)
|
|
614
|
+
seen_signatures: set[tuple] = set()
|
|
615
|
+
unique_events = []
|
|
616
|
+
for event in all_events:
|
|
617
|
+
sig = (
|
|
618
|
+
event.get("col"),
|
|
619
|
+
_stable_repr(event.get("old_val")),
|
|
620
|
+
_stable_repr(event.get("new_val")),
|
|
621
|
+
event.get("operation"),
|
|
622
|
+
)
|
|
623
|
+
if sig not in seen_signatures:
|
|
624
|
+
seen_signatures.add(sig)
|
|
625
|
+
unique_events.append(event)
|
|
626
|
+
|
|
627
|
+
return unique_events
|
|
593
628
|
|
|
594
629
|
def get_cell_history_with_lineage(
|
|
595
630
|
self, row_id: int, column: str, max_depth: int = 10
|
|
@@ -2051,7 +2051,7 @@ wheels = [
|
|
|
2051
2051
|
|
|
2052
2052
|
[[package]]
|
|
2053
2053
|
name = "tracepipe"
|
|
2054
|
-
version = "0.3.
|
|
2054
|
+
version = "0.3.4"
|
|
2055
2055
|
source = { editable = "." }
|
|
2056
2056
|
dependencies = [
|
|
2057
2057
|
{ name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/lunr/min/lunr.stemmer.support.min.js
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tracepipe-0.3.3 → tracepipe-0.3.5}/site/assets/javascripts/workers/search.2c215733.min.js.map
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|