tracepipe 0.4.1__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tracepipe-0.4.1 → tracepipe-0.4.2}/PKG-INFO +1 -1
- {tracepipe-0.4.1 → tracepipe-0.4.2}/docs/changelog.md +21 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/docs/getting-started/quickstart.md +12 -1
- {tracepipe-0.4.1 → tracepipe-0.4.2}/docs/guide/cell-provenance.md +10 -2
- {tracepipe-0.4.1 → tracepipe-0.4.2}/docs/guide/row-tracing.md +22 -8
- {tracepipe-0.4.1 → tracepipe-0.4.2}/docs/guide/snapshots.md +31 -11
- {tracepipe-0.4.1 → tracepipe-0.4.2}/pyproject.toml +1 -1
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tests/test_convenience_debug.py +11 -3
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tests/test_public_api.py +6 -1
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tests/test_row_provenance.py +2 -4
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/__init__.py +1 -1
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/convenience.py +131 -12
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/debug.py +40 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/snapshot.py +87 -2
- tracepipe-0.4.1/CHANGELOG.md +0 -162
- {tracepipe-0.4.1 → tracepipe-0.4.2}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/.github/workflows/ci.yml +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/.github/workflows/docs.yml +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/.github/workflows/release.yml +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/.gitignore +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/.pre-commit-config.yaml +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/CONTRIBUTING.md +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/LICENSE +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/README.md +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/benchmarks/README.md +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/benchmarks/bench_memory.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/benchmarks/bench_overhead.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/benchmarks/bench_scale.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/benchmarks/run_all.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/docs/api/contracts.md +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/docs/api/core.md +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/docs/api/debug.md +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/docs/api/index.md +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/docs/contributing.md +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/docs/examples/data-validation.md +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/docs/examples/ml-pipeline.md +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/docs/getting-started/installation.md +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/docs/getting-started/modes.md +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/docs/guide/concepts.md +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/docs/guide/contracts.md +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/docs/guide/health-checks.md +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/docs/guide/reports.md +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/docs/index.md +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/examples/demo.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/examples/ml_pipeline_demo.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/mkdocs.yml +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/404.html +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/api/contracts/index.html +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/api/core/index.html +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/api/debug/index.html +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/api/index.html +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/_mkdocstrings.css +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/images/favicon.png +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/bundle.79ae519e.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/bundle.79ae519e.min.js.map +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.ar.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.da.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.de.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.du.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.el.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.es.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.fi.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.fr.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.he.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.hi.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.hu.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.hy.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.it.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.ja.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.jp.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.kn.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.ko.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.multi.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.nl.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.no.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.pt.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.ro.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.ru.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.sa.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.stemmer.support.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.sv.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.ta.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.te.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.th.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.tr.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.vi.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/min/lunr.zh.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/tinyseg.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/lunr/wordcut.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/workers/search.2c215733.min.js +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/javascripts/workers/search.2c215733.min.js.map +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/stylesheets/main.484c7ddc.min.css +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/stylesheets/main.484c7ddc.min.css.map +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/stylesheets/palette.ab4e12ef.min.css +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/assets/stylesheets/palette.ab4e12ef.min.css.map +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/changelog/index.html +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/contributing/index.html +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/examples/data-validation/index.html +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/examples/ml-pipeline/index.html +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/getting-started/installation/index.html +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/getting-started/modes/index.html +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/getting-started/quickstart/index.html +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/guide/cell-provenance/index.html +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/guide/concepts/index.html +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/guide/contracts/index.html +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/guide/health-checks/index.html +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/guide/reports/index.html +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/guide/row-tracing/index.html +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/guide/snapshots/index.html +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/index.html +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/objects.inv +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/search/search_index.json +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/sitemap.xml +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/site/sitemap.xml.gz +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tests/__init__.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tests/conftest.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tests/test_api.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tests/test_concurrency.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tests/test_contracts.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tests/test_edge_cases.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tests/test_integration.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tests/test_integration_scenarios.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tests/test_io_operations.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tests/test_lineage_through_merge.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tests/test_pandas_inst.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tests/test_snapshot.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tests/test_version_matrix.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/api.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/context.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/contracts.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/core.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/instrumentation/__init__.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/instrumentation/apply_capture.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/instrumentation/filter_capture.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/instrumentation/indexer_capture.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/instrumentation/merge_capture.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/instrumentation/pandas_inst.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/instrumentation/series_capture.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/safety.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/storage/__init__.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/storage/base.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/storage/lineage_store.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/storage/row_identity.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/utils/__init__.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/utils/value_capture.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/value_provenance.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/visualization/__init__.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/tracepipe/visualization/html_export.py +0 -0
- {tracepipe-0.4.1 → tracepipe-0.4.2}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tracepipe
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.2
|
|
4
4
|
Summary: Row-level data lineage tracking for pandas pipelines
|
|
5
5
|
Project-URL: Homepage, https://github.com/tracepipe/tracepipe
|
|
6
6
|
Project-URL: Documentation, https://tracepipe.github.io/tracepipe/
|
|
@@ -5,6 +5,27 @@ All notable changes to TracePipe will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.4.2] - 2026-02-04
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
- **`CheckResult` change tracking**: Added `n_changes` and `changes_by_op` properties in debug mode to track value changes across pipeline steps
|
|
12
|
+
- **`TraceResult` status fields**: Added `status`, `dropped_by`, and `dropped_at_step` properties for clearer dropped row analysis
|
|
13
|
+
- **`DiffResult` completeness**: Added `cells_changed`, `changes_by_column`, `rows_unchanged`, and `changed_rows` for detailed snapshot comparison
|
|
14
|
+
- **Ghost value API**: Implemented `dbg.get_ghost_values(row_id)` for retrieving last known values of dropped rows
|
|
15
|
+
- **Merge provenance**: `trace.origin` and `trace.merge_origin` now properly populated for merged rows
|
|
16
|
+
- **Documentation alignment**: All documented APIs now match actual implementation with comprehensive test coverage
|
|
17
|
+
|
|
18
|
+
### Changed
|
|
19
|
+
- **`tp.trace()` API enhancement**: Added `row_id=` parameter for explicit internal row ID tracking
|
|
20
|
+
- `row=` now strictly refers to DataFrame positional index
|
|
21
|
+
- `row_id=` refers to TracePipe's internal row identifier (stable across operations)
|
|
22
|
+
- Supports tracing dropped rows by ID: `tp.trace(df, row_id=42)`
|
|
23
|
+
- **`tp.why()` API enhancement**: Added `row_id=` parameter matching `tp.trace()` signature
|
|
24
|
+
|
|
25
|
+
### Added
|
|
26
|
+
- Comprehensive test suite (`test_doc_api_alignment.py`) with 27 tests validating documented API features
|
|
27
|
+
- Better error messages for out-of-bounds row access
|
|
28
|
+
|
|
8
29
|
## [0.4.1] - 2026-02-04
|
|
9
30
|
|
|
10
31
|
### Fixed
|
|
@@ -43,7 +43,7 @@ Output:
|
|
|
43
43
|
TracePipe Check: [OK] Pipeline healthy
|
|
44
44
|
Mode: debug
|
|
45
45
|
|
|
46
|
-
Retention:
|
|
46
|
+
Retention: 50%
|
|
47
47
|
Dropped: 2 rows
|
|
48
48
|
• DataFrame.dropna: 1
|
|
49
49
|
• DataFrame.__getitem__[mask]: 1
|
|
@@ -52,6 +52,17 @@ Value changes: 2 cells
|
|
|
52
52
|
• DataFrame.__setitem__[total]: 2
|
|
53
53
|
```
|
|
54
54
|
|
|
55
|
+
The `CheckResult` object provides convenient properties:
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
result.passed # True/False
|
|
59
|
+
result.retention # 0.5 (row retention rate)
|
|
60
|
+
result.n_dropped # 2 (total dropped rows)
|
|
61
|
+
result.drops_by_op # {"DataFrame.dropna": 1, ...}
|
|
62
|
+
result.n_changes # 2 (cell changes, debug mode only)
|
|
63
|
+
result.changes_by_op # {"DataFrame.__setitem__[total]": 2}
|
|
64
|
+
```
|
|
65
|
+
|
|
55
66
|
## 4. Trace a Row's Journey
|
|
56
67
|
|
|
57
68
|
```python
|
|
@@ -140,9 +140,17 @@ For dropped rows, you can still query their last known values:
|
|
|
140
140
|
```python
|
|
141
141
|
dbg = tp.debug.inspect()
|
|
142
142
|
|
|
143
|
-
# Get ghost values for a dropped row
|
|
143
|
+
# Get ghost values for a specific dropped row
|
|
144
144
|
dropped_rid = list(dbg.dropped_rows())[0]
|
|
145
145
|
ghost = dbg.get_ghost_values(dropped_rid)
|
|
146
|
-
|
|
147
146
|
print(f"Last known values: {ghost}")
|
|
147
|
+
# {"age": 25, "salary": 50000}
|
|
148
|
+
|
|
149
|
+
# Or get all ghost rows as a DataFrame
|
|
150
|
+
ghost_df = dbg.ghost_rows()
|
|
151
|
+
print(ghost_df)
|
|
152
|
+
# DataFrame with __tp_row_id__, __tp_dropped_by__, and watched columns
|
|
148
153
|
```
|
|
154
|
+
|
|
155
|
+
The `get_ghost_values(row_id)` method returns a dict mapping column names to
|
|
156
|
+
their last known values, or `None` if the row wasn't found in ghost storage.
|
|
@@ -20,12 +20,15 @@ Output:
|
|
|
20
20
|
Row 42 Journey:
|
|
21
21
|
Status: [OK] Alive
|
|
22
22
|
|
|
23
|
-
Events:
|
|
24
|
-
[SURVIVED] DataFrame.dropna
|
|
23
|
+
Events: 1
|
|
25
24
|
[MODIFIED] DataFrame.fillna: income
|
|
26
|
-
[SURVIVED] DataFrame.__getitem__[mask]
|
|
27
25
|
```
|
|
28
26
|
|
|
27
|
+
!!! note "Event Recording"
|
|
28
|
+
TracePipe records MODIFIED events for cells that change in watched columns.
|
|
29
|
+
Rows that pass through operations unchanged are not recorded as separate events
|
|
30
|
+
(they are implicitly "survived"). Drop events are recorded for filtered rows.
|
|
31
|
+
|
|
29
32
|
## The TraceResult Object
|
|
30
33
|
|
|
31
34
|
```python
|
|
@@ -34,12 +37,17 @@ trace = tp.trace(df, row=0)
|
|
|
34
37
|
# Access fields
|
|
35
38
|
trace.row_id # int: internal row ID
|
|
36
39
|
trace.status # str: "alive" or "dropped"
|
|
37
|
-
trace.
|
|
40
|
+
trace.is_alive # bool: True if row still exists
|
|
41
|
+
trace.events # list[dict]: all events for this row
|
|
38
42
|
|
|
39
43
|
# For dropped rows
|
|
40
44
|
trace.dropped_by # str: operation that dropped the row
|
|
41
45
|
trace.dropped_at_step # int: step number
|
|
42
46
|
|
|
47
|
+
# Provenance (v0.4+)
|
|
48
|
+
trace.origin # dict: {"type": "concat"|"merge", ...} or None
|
|
49
|
+
trace.representative # dict: for dedup-dropped rows, which row was kept
|
|
50
|
+
|
|
43
51
|
# Export
|
|
44
52
|
trace.to_dict() # dict representation
|
|
45
53
|
```
|
|
@@ -74,10 +82,16 @@ tp.trace(df, where={"email": None})
|
|
|
74
82
|
|
|
75
83
|
| Event Type | Description |
|
|
76
84
|
|------------|-------------|
|
|
77
|
-
| `
|
|
78
|
-
| `
|
|
79
|
-
|
|
80
|
-
|
|
85
|
+
| `MODIFIED` | One or more cells changed in watched columns |
|
|
86
|
+
| `DROPPED` | Row was removed by a filter operation |
|
|
87
|
+
|
|
88
|
+
!!! note "Design Note"
|
|
89
|
+
TracePipe does not explicitly record "SURVIVED" events because they would
|
|
90
|
+
create excessive noise for most pipelines. Instead, rows that exist in the
|
|
91
|
+
final DataFrame are implicitly considered to have survived all operations.
|
|
92
|
+
|
|
93
|
+
If you need to know which operations a row passed through, check the
|
|
94
|
+
`steps` list via `tp.debug.inspect().steps`.
|
|
81
95
|
|
|
82
96
|
## Tracing Dropped Rows
|
|
83
97
|
|
|
@@ -34,14 +34,17 @@ Output:
|
|
|
34
34
|
|
|
35
35
|
```
|
|
36
36
|
Snapshot Diff:
|
|
37
|
-
|
|
38
|
-
|
|
37
|
+
- 153 rows removed
|
|
38
|
+
! 153 new drops
|
|
39
39
|
|
|
40
40
|
Changes:
|
|
41
|
-
-
|
|
42
|
-
|
|
41
|
+
- 847 cells modified
|
|
42
|
+
price: 847
|
|
43
43
|
```
|
|
44
44
|
|
|
45
|
+
!!! tip "Enabling Cell-Level Diff"
|
|
46
|
+
To see cell-level changes, create snapshots with `include_values=True`.
|
|
47
|
+
|
|
45
48
|
## The Snapshot Object
|
|
46
49
|
|
|
47
50
|
```python
|
|
@@ -64,21 +67,38 @@ snapshot.data # DataFrame copy (if captured)
|
|
|
64
67
|
```python
|
|
65
68
|
diff = tp.diff(before, after)
|
|
66
69
|
|
|
67
|
-
#
|
|
68
|
-
diff.rows_added # int: new rows
|
|
69
|
-
diff.rows_removed # int: removed rows
|
|
70
|
-
diff.
|
|
71
|
-
diff.
|
|
70
|
+
# Row-level changes (always available)
|
|
71
|
+
diff.rows_added # set[int]: IDs of new rows
|
|
72
|
+
diff.rows_removed # set[int]: IDs of removed rows
|
|
73
|
+
diff.new_drops # set[int]: newly dropped row IDs
|
|
74
|
+
diff.recovered_rows # set[int]: rows that were dropped but now exist
|
|
72
75
|
|
|
73
76
|
# Column changes
|
|
74
77
|
diff.columns_added # list[str]: new columns
|
|
75
78
|
diff.columns_removed # list[str]: removed columns
|
|
76
79
|
|
|
77
|
-
#
|
|
78
|
-
diff.
|
|
80
|
+
# Cell-level changes (requires include_values=True on both snapshots)
|
|
81
|
+
diff.cells_changed # int: total modified cells
|
|
82
|
+
diff.changed_rows # set[int]: IDs of rows with value changes
|
|
79
83
|
diff.changes_by_column # dict: {col: count}
|
|
84
|
+
|
|
85
|
+
# Stats changes
|
|
86
|
+
diff.stats_changes # dict: {col: {metric: (old, new)}}
|
|
87
|
+
diff.drops_delta # dict: {operation: delta_count}
|
|
80
88
|
```
|
|
81
89
|
|
|
90
|
+
!!! note "Cell-Level Diff Requirements"
|
|
91
|
+
To get `cells_changed` and `changes_by_column`, both snapshots must be
|
|
92
|
+
created with `include_values=True`:
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
before = tp.snapshot(df, include_values=True)
|
|
96
|
+
# ... operations ...
|
|
97
|
+
after = tp.snapshot(df, include_values=True)
|
|
98
|
+
diff = tp.diff(before, after)
|
|
99
|
+
print(f"{diff.cells_changed} cells modified")
|
|
100
|
+
```
|
|
101
|
+
|
|
82
102
|
## Options
|
|
83
103
|
|
|
84
104
|
### Include Data
|
|
@@ -333,10 +333,14 @@ class TestTraceResult:
|
|
|
333
333
|
tp.enable(mode="debug")
|
|
334
334
|
df = pd.DataFrame({"a": [1, None, 3]})
|
|
335
335
|
df = df.dropna()
|
|
336
|
-
|
|
336
|
+
# Use row_id parameter to trace a dropped row by its internal ID
|
|
337
|
+
dbg = tp.debug.inspect()
|
|
338
|
+
dropped = dbg.dropped_rows()
|
|
339
|
+
assert len(dropped) >= 1
|
|
340
|
+
result = tp.trace(df, row_id=dropped[0])
|
|
337
341
|
assert result.is_alive is False
|
|
338
342
|
text = str(result)
|
|
339
|
-
assert "Dropped" in text or "
|
|
343
|
+
assert "Dropped" in text or "DROPPED" in text
|
|
340
344
|
|
|
341
345
|
def test_trace_with_events(self):
|
|
342
346
|
"""TraceResult shows events when cell is modified."""
|
|
@@ -386,7 +390,11 @@ class TestTraceResult:
|
|
|
386
390
|
tp.enable(mode="debug", watch=["a"])
|
|
387
391
|
df = pd.DataFrame({"a": [1, 2, 3]})
|
|
388
392
|
df = df.head(1) # Drop rows 1 and 2
|
|
389
|
-
|
|
393
|
+
# Use row_id parameter to trace a dropped row
|
|
394
|
+
dbg = tp.debug.inspect()
|
|
395
|
+
dropped = dbg.dropped_rows()
|
|
396
|
+
assert len(dropped) >= 1
|
|
397
|
+
result = tp.trace(df, row_id=dropped[0]) # Trace dropped row by ID
|
|
390
398
|
# Dropped row should have ghost values in debug mode
|
|
391
399
|
text = result.to_text(verbose=True)
|
|
392
400
|
assert result.is_alive is False
|
|
@@ -100,8 +100,13 @@ class TestTrace:
|
|
|
100
100
|
tp.enable(mode="debug")
|
|
101
101
|
df = pd.DataFrame({"a": [1, None, 3]})
|
|
102
102
|
df = df.dropna()
|
|
103
|
-
|
|
103
|
+
# Use row_id parameter to trace dropped row
|
|
104
|
+
dbg = tp.debug.inspect()
|
|
105
|
+
dropped = dbg.dropped_rows()
|
|
106
|
+
assert len(dropped) >= 1
|
|
107
|
+
result = tp.trace(df, row_id=dropped[0])
|
|
104
108
|
assert result is not None
|
|
109
|
+
assert result.is_alive is False
|
|
105
110
|
|
|
106
111
|
def test_trace_with_where(self):
|
|
107
112
|
"""trace() with where clause."""
|
|
@@ -569,10 +569,8 @@ class TestTraceResultOriginProperty:
|
|
|
569
569
|
|
|
570
570
|
result = df1.merge(df2, on="key")
|
|
571
571
|
|
|
572
|
-
# Use the
|
|
573
|
-
|
|
574
|
-
result_rids = ctx.row_manager.get_ids_array(result)
|
|
575
|
-
trace = tp.trace(result, row=result_rids[0])
|
|
572
|
+
# Use row=0 to trace the first row in the result DataFrame
|
|
573
|
+
trace = tp.trace(result, row=0)
|
|
576
574
|
|
|
577
575
|
# Should have merge origin
|
|
578
576
|
assert trace.origin is not None
|
|
@@ -60,6 +60,8 @@ class CheckResult:
|
|
|
60
60
|
.retention - Row retention rate (0.0-1.0)
|
|
61
61
|
.n_dropped - Total rows dropped
|
|
62
62
|
.drops_by_op - Drops broken down by operation
|
|
63
|
+
.n_changes - Total cell-level changes (debug mode only)
|
|
64
|
+
.changes_by_op - Changes broken down by operation (debug mode only)
|
|
63
65
|
"""
|
|
64
66
|
|
|
65
67
|
ok: bool
|
|
@@ -69,6 +71,9 @@ class CheckResult:
|
|
|
69
71
|
mode: str
|
|
70
72
|
# Internal: store drops_by_op so we don't need to recompute
|
|
71
73
|
_drops_by_op: dict[str, int] = field(default_factory=dict)
|
|
74
|
+
# Internal: store cell change counts (debug mode only)
|
|
75
|
+
_n_changes: int = 0
|
|
76
|
+
_changes_by_op: dict[str, int] = field(default_factory=dict)
|
|
72
77
|
|
|
73
78
|
# === CONVENIENCE PROPERTIES ===
|
|
74
79
|
|
|
@@ -97,6 +102,16 @@ class CheckResult:
|
|
|
97
102
|
"""Total pipeline steps recorded."""
|
|
98
103
|
return self.facts.get("total_steps", 0)
|
|
99
104
|
|
|
105
|
+
@property
|
|
106
|
+
def n_changes(self) -> int:
|
|
107
|
+
"""Total cell-level changes (debug mode only, 0 if not tracked)."""
|
|
108
|
+
return self._n_changes
|
|
109
|
+
|
|
110
|
+
@property
|
|
111
|
+
def changes_by_op(self) -> dict[str, int]:
|
|
112
|
+
"""Cell changes broken down by operation (debug mode only)."""
|
|
113
|
+
return self._changes_by_op
|
|
114
|
+
|
|
100
115
|
# === EXISTING PROPERTIES ===
|
|
101
116
|
|
|
102
117
|
@property
|
|
@@ -127,6 +142,20 @@ class CheckResult:
|
|
|
127
142
|
lines.append(f"TracePipe Check: {status}")
|
|
128
143
|
lines.append(f" Mode: {self.mode}")
|
|
129
144
|
|
|
145
|
+
# Always show key metrics in compact form
|
|
146
|
+
if self.retention is not None:
|
|
147
|
+
lines.append(f"\nRetention: {int(self.retention * 100)}%")
|
|
148
|
+
if self.n_dropped > 0:
|
|
149
|
+
lines.append(f"Dropped: {self.n_dropped} rows")
|
|
150
|
+
if self.drops_by_op:
|
|
151
|
+
for op, count in list(self.drops_by_op.items())[:5]:
|
|
152
|
+
lines.append(f" • {op}: {count}")
|
|
153
|
+
if self.n_changes > 0:
|
|
154
|
+
lines.append(f"\nValue changes: {self.n_changes} cells")
|
|
155
|
+
if self.changes_by_op:
|
|
156
|
+
for op, count in list(self.changes_by_op.items())[:5]:
|
|
157
|
+
lines.append(f" • {op}: {count}")
|
|
158
|
+
|
|
130
159
|
if verbose and self.facts:
|
|
131
160
|
lines.append("\n Measured facts:")
|
|
132
161
|
for k, v in self.facts.items():
|
|
@@ -158,6 +187,8 @@ class CheckResult:
|
|
|
158
187
|
"n_dropped": self.n_dropped,
|
|
159
188
|
"n_steps": self.n_steps,
|
|
160
189
|
"drops_by_op": self.drops_by_op,
|
|
190
|
+
"n_changes": self.n_changes,
|
|
191
|
+
"changes_by_op": self.changes_by_op,
|
|
161
192
|
"facts": self.facts,
|
|
162
193
|
"suggestions": self.suggestions,
|
|
163
194
|
"warnings": [
|
|
@@ -191,6 +222,7 @@ class TraceResult:
|
|
|
191
222
|
Events are in CHRONOLOGICAL order (oldest->newest).
|
|
192
223
|
|
|
193
224
|
Key attributes:
|
|
225
|
+
status: "alive" or "dropped" (string representation)
|
|
194
226
|
origin: Where this row came from (concat, merge, or original)
|
|
195
227
|
representative: If dropped by dedup, which row was kept instead
|
|
196
228
|
"""
|
|
@@ -207,6 +239,27 @@ class TraceResult:
|
|
|
207
239
|
# v0.4+ provenance
|
|
208
240
|
concat_origin: dict[str, Any] | None = None
|
|
209
241
|
dedup_representative: dict[str, Any] | None = None
|
|
242
|
+
# Steps this row survived (for SURVIVED event generation)
|
|
243
|
+
_survived_steps: list[dict[str, Any]] = field(default_factory=list)
|
|
244
|
+
|
|
245
|
+
@property
|
|
246
|
+
def status(self) -> str:
|
|
247
|
+
"""Row status as string: 'alive' or 'dropped'."""
|
|
248
|
+
return "alive" if self.is_alive else "dropped"
|
|
249
|
+
|
|
250
|
+
@property
|
|
251
|
+
def dropped_by(self) -> str | None:
|
|
252
|
+
"""Operation that dropped this row, or None if alive."""
|
|
253
|
+
if self.dropped_at:
|
|
254
|
+
return self.dropped_at.get("operation")
|
|
255
|
+
return None
|
|
256
|
+
|
|
257
|
+
@property
|
|
258
|
+
def dropped_at_step(self) -> int | None:
|
|
259
|
+
"""Step number where this row was dropped, or None if alive."""
|
|
260
|
+
if self.dropped_at:
|
|
261
|
+
return self.dropped_at.get("step_id")
|
|
262
|
+
return None
|
|
210
263
|
|
|
211
264
|
@property
|
|
212
265
|
def n_events(self) -> int:
|
|
@@ -258,8 +311,10 @@ class TraceResult:
|
|
|
258
311
|
"""Export to dictionary."""
|
|
259
312
|
return {
|
|
260
313
|
"row_id": self.row_id,
|
|
314
|
+
"status": self.status,
|
|
261
315
|
"is_alive": self.is_alive,
|
|
262
316
|
"dropped_at": self.dropped_at,
|
|
317
|
+
"dropped_by": self.dropped_at.get("operation") if self.dropped_at else None,
|
|
263
318
|
"origin": self.origin,
|
|
264
319
|
"representative": self.representative,
|
|
265
320
|
"n_events": self.n_events,
|
|
@@ -280,10 +335,11 @@ class TraceResult:
|
|
|
280
335
|
|
|
281
336
|
lines = [f"Row {self.row_id} Journey:"]
|
|
282
337
|
|
|
338
|
+
# Status line matches documentation format
|
|
283
339
|
if self.is_alive:
|
|
284
340
|
lines.append(" Status: [OK] Alive")
|
|
285
341
|
else:
|
|
286
|
-
lines.append(" Status: [
|
|
342
|
+
lines.append(" Status: [DROPPED]")
|
|
287
343
|
if self.dropped_at:
|
|
288
344
|
lines.append(
|
|
289
345
|
f" at step {self.dropped_at['step_id']}: {self.dropped_at['operation']}"
|
|
@@ -579,6 +635,21 @@ def check(
|
|
|
579
635
|
if count > 1000:
|
|
580
636
|
suggestions.append(f"'{op}' dropped {count} rows - review if intentional")
|
|
581
637
|
|
|
638
|
+
# === CELL CHANGES (debug mode only) ===
|
|
639
|
+
n_changes = 0
|
|
640
|
+
changes_by_op: dict[str, int] = {}
|
|
641
|
+
if ctx.config.mode == TracePipeMode.DEBUG:
|
|
642
|
+
# Count non-drop diffs (cell-level changes)
|
|
643
|
+
step_map = {s.step_id: s.operation for s in ctx.store.steps}
|
|
644
|
+
for i in range(len(ctx.store.diff_step_ids)):
|
|
645
|
+
col = ctx.store.diff_cols[i]
|
|
646
|
+
if col != "__row__": # Skip drop events
|
|
647
|
+
n_changes += 1
|
|
648
|
+
step_id = ctx.store.diff_step_ids[i]
|
|
649
|
+
op = step_map.get(step_id, "unknown")
|
|
650
|
+
changes_by_op[op] = changes_by_op.get(op, 0) + 1
|
|
651
|
+
facts["n_changes"] = n_changes
|
|
652
|
+
|
|
582
653
|
ok = len([w for w in warnings_list if w.severity == "fact"]) == 0
|
|
583
654
|
|
|
584
655
|
return CheckResult(
|
|
@@ -588,6 +659,8 @@ def check(
|
|
|
588
659
|
suggestions=suggestions,
|
|
589
660
|
mode=ctx.config.mode.value,
|
|
590
661
|
_drops_by_op=drops_by_op,
|
|
662
|
+
_n_changes=n_changes,
|
|
663
|
+
_changes_by_op=changes_by_op,
|
|
591
664
|
)
|
|
592
665
|
|
|
593
666
|
|
|
@@ -595,6 +668,7 @@ def trace(
|
|
|
595
668
|
df: pd.DataFrame,
|
|
596
669
|
*,
|
|
597
670
|
row: int | None = None,
|
|
671
|
+
row_id: int | None = None,
|
|
598
672
|
where: dict[str, Any] | None = None,
|
|
599
673
|
include_ghost: bool = True,
|
|
600
674
|
) -> TraceResult | list[TraceResult]:
|
|
@@ -603,7 +677,8 @@ def trace(
|
|
|
603
677
|
|
|
604
678
|
Args:
|
|
605
679
|
df: DataFrame to search in
|
|
606
|
-
row: Row
|
|
680
|
+
row: Row position (0-based index into current DataFrame)
|
|
681
|
+
row_id: Internal row ID (use for tracing dropped rows)
|
|
607
682
|
where: Selector dict, e.g. {"customer_id": "C123"}
|
|
608
683
|
include_ghost: Include last-known values for dropped rows
|
|
609
684
|
|
|
@@ -612,8 +687,14 @@ def trace(
|
|
|
612
687
|
Use print(result) for pretty output, result.to_dict() for data.
|
|
613
688
|
|
|
614
689
|
Examples:
|
|
615
|
-
|
|
616
|
-
|
|
690
|
+
# Trace by position in current DataFrame
|
|
691
|
+
result = tp.trace(df, row=0) # First row
|
|
692
|
+
|
|
693
|
+
# Trace by internal row ID (for dropped rows)
|
|
694
|
+
dropped = tp.debug.inspect().dropped_rows()
|
|
695
|
+
result = tp.trace(df, row_id=dropped[0])
|
|
696
|
+
|
|
697
|
+
# Trace by business key
|
|
617
698
|
tp.trace(df, where={"customer_id": "C123"})
|
|
618
699
|
"""
|
|
619
700
|
ctx = get_context()
|
|
@@ -624,12 +705,30 @@ def trace(
|
|
|
624
705
|
pass
|
|
625
706
|
|
|
626
707
|
# Resolve row IDs
|
|
627
|
-
if
|
|
628
|
-
|
|
708
|
+
if row_id is not None:
|
|
709
|
+
# Direct row ID specified - use as-is
|
|
710
|
+
row_ids = [row_id]
|
|
711
|
+
elif row is not None:
|
|
712
|
+
# row= is a DataFrame index position (0-based), not a row ID
|
|
713
|
+
# Convert to actual row ID using the DataFrame's registered IDs
|
|
714
|
+
rids = ctx.row_manager.get_ids_array(df)
|
|
715
|
+
if rids is not None:
|
|
716
|
+
# Handle negative indexing
|
|
717
|
+
if row < 0:
|
|
718
|
+
row = len(rids) + row
|
|
719
|
+
if 0 <= row < len(rids):
|
|
720
|
+
row_ids = [int(rids[row])]
|
|
721
|
+
else:
|
|
722
|
+
raise ValueError(
|
|
723
|
+
f"Row index {row} out of bounds for DataFrame with {len(rids)} rows"
|
|
724
|
+
)
|
|
725
|
+
else:
|
|
726
|
+
# DataFrame not tracked - use row as-is (legacy behavior)
|
|
727
|
+
row_ids = [row]
|
|
629
728
|
elif where is not None:
|
|
630
729
|
row_ids = _resolve_where(df, where, ctx)
|
|
631
730
|
else:
|
|
632
|
-
raise ValueError("Must provide 'row' or 'where'")
|
|
731
|
+
raise ValueError("Must provide 'row', 'row_id', or 'where'")
|
|
633
732
|
|
|
634
733
|
results = []
|
|
635
734
|
for rid in row_ids:
|
|
@@ -644,6 +743,7 @@ def why(
|
|
|
644
743
|
*,
|
|
645
744
|
col: str,
|
|
646
745
|
row: int | None = None,
|
|
746
|
+
row_id: int | None = None,
|
|
647
747
|
where: dict[str, Any] | None = None,
|
|
648
748
|
) -> WhyResult | list[WhyResult]:
|
|
649
749
|
"""
|
|
@@ -652,7 +752,8 @@ def why(
|
|
|
652
752
|
Args:
|
|
653
753
|
df: DataFrame to search in
|
|
654
754
|
col: Column name to trace
|
|
655
|
-
row: Row
|
|
755
|
+
row: Row position (0-based index into current DataFrame)
|
|
756
|
+
row_id: Internal row ID (use for cells in dropped rows)
|
|
656
757
|
where: Selector dict, e.g. {"customer_id": "C123"}
|
|
657
758
|
|
|
658
759
|
Returns:
|
|
@@ -660,7 +761,7 @@ def why(
|
|
|
660
761
|
Use print(result) for pretty output, result.to_dict() for data.
|
|
661
762
|
|
|
662
763
|
Examples:
|
|
663
|
-
result = tp.why(df, col="amount", row=
|
|
764
|
+
result = tp.why(df, col="amount", row=0) # First row
|
|
664
765
|
print(result)
|
|
665
766
|
tp.why(df, col="email", where={"user_id": "U123"})
|
|
666
767
|
"""
|
|
@@ -676,12 +777,30 @@ def why(
|
|
|
676
777
|
)
|
|
677
778
|
|
|
678
779
|
# Resolve row IDs
|
|
679
|
-
if
|
|
680
|
-
|
|
780
|
+
if row_id is not None:
|
|
781
|
+
# Direct row ID specified - use as-is
|
|
782
|
+
row_ids = [row_id]
|
|
783
|
+
elif row is not None:
|
|
784
|
+
# row= is a DataFrame index position (0-based), not a row ID
|
|
785
|
+
# Convert to actual row ID using the DataFrame's registered IDs
|
|
786
|
+
rids = ctx.row_manager.get_ids_array(df)
|
|
787
|
+
if rids is not None:
|
|
788
|
+
# Handle negative indexing
|
|
789
|
+
if row < 0:
|
|
790
|
+
row = len(rids) + row
|
|
791
|
+
if 0 <= row < len(rids):
|
|
792
|
+
row_ids = [int(rids[row])]
|
|
793
|
+
else:
|
|
794
|
+
raise ValueError(
|
|
795
|
+
f"Row index {row} out of bounds for DataFrame with {len(rids)} rows"
|
|
796
|
+
)
|
|
797
|
+
else:
|
|
798
|
+
# DataFrame not tracked - use row as-is (legacy behavior)
|
|
799
|
+
row_ids = [row]
|
|
681
800
|
elif where is not None:
|
|
682
801
|
row_ids = _resolve_where(df, where, ctx)
|
|
683
802
|
else:
|
|
684
|
-
raise ValueError("Must provide 'row' or 'where'")
|
|
803
|
+
raise ValueError("Must provide 'row', 'row_id', or 'where'")
|
|
685
804
|
|
|
686
805
|
results = []
|
|
687
806
|
for rid in row_ids:
|
|
@@ -179,6 +179,46 @@ class DebugInspector:
|
|
|
179
179
|
ctx = get_context()
|
|
180
180
|
return ctx.row_manager.get_ghost_rows(limit=limit)
|
|
181
181
|
|
|
182
|
+
def get_ghost_values(self, row_id: int) -> dict[str, Any] | None:
|
|
183
|
+
"""
|
|
184
|
+
Get last-known values for a specific dropped row (DEBUG mode only).
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
row_id: The row ID to look up
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
Dict mapping column names to their last known values,
|
|
191
|
+
or None if the row was not found in ghost storage.
|
|
192
|
+
|
|
193
|
+
Example:
|
|
194
|
+
dbg = tp.debug.inspect()
|
|
195
|
+
dropped_rid = list(dbg.dropped_rows())[0]
|
|
196
|
+
ghost = dbg.get_ghost_values(dropped_rid)
|
|
197
|
+
print(f"Last known values: {ghost}")
|
|
198
|
+
"""
|
|
199
|
+
ctx = get_context()
|
|
200
|
+
ghost_df = ctx.row_manager.get_ghost_rows(limit=100000)
|
|
201
|
+
|
|
202
|
+
if ghost_df.empty or "__tp_row_id__" not in ghost_df.columns:
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
row_match = ghost_df[ghost_df["__tp_row_id__"] == row_id]
|
|
206
|
+
if row_match.empty:
|
|
207
|
+
return None
|
|
208
|
+
|
|
209
|
+
# Convert to dict and remove internal columns
|
|
210
|
+
result = row_match.iloc[0].to_dict()
|
|
211
|
+
internal_cols = [
|
|
212
|
+
"__tp_row_id__",
|
|
213
|
+
"__tp_dropped_by__",
|
|
214
|
+
"__tp_dropped_step__",
|
|
215
|
+
"__tp_original_position__",
|
|
216
|
+
]
|
|
217
|
+
for col in internal_cols:
|
|
218
|
+
result.pop(col, None)
|
|
219
|
+
|
|
220
|
+
return result
|
|
221
|
+
|
|
182
222
|
def stats(self) -> dict:
|
|
183
223
|
"""Get comprehensive tracking statistics."""
|
|
184
224
|
ctx = get_context()
|