tracepipe 0.3.0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. {tracepipe-0.3.0 → tracepipe-0.3.1}/CHANGELOG.md +12 -0
  2. tracepipe-0.3.1/PKG-INFO +308 -0
  3. tracepipe-0.3.1/README.md +239 -0
  4. {tracepipe-0.3.0 → tracepipe-0.3.1}/pyproject.toml +1 -1
  5. tracepipe-0.3.1/tests/test_lineage_through_merge.py +286 -0
  6. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/__init__.py +1 -1
  7. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/api.py +54 -4
  8. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/convenience.py +6 -1
  9. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/storage/lineage_store.py +63 -1
  10. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/value_provenance.py +32 -24
  11. tracepipe-0.3.0/PKG-INFO +0 -575
  12. tracepipe-0.3.0/README.md +0 -506
  13. {tracepipe-0.3.0 → tracepipe-0.3.1}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  14. {tracepipe-0.3.0 → tracepipe-0.3.1}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  15. {tracepipe-0.3.0 → tracepipe-0.3.1}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  16. {tracepipe-0.3.0 → tracepipe-0.3.1}/.github/workflows/ci.yml +0 -0
  17. {tracepipe-0.3.0 → tracepipe-0.3.1}/.github/workflows/docs.yml +0 -0
  18. {tracepipe-0.3.0 → tracepipe-0.3.1}/.github/workflows/release.yml +0 -0
  19. {tracepipe-0.3.0 → tracepipe-0.3.1}/.gitignore +0 -0
  20. {tracepipe-0.3.0 → tracepipe-0.3.1}/.pre-commit-config.yaml +0 -0
  21. {tracepipe-0.3.0 → tracepipe-0.3.1}/CONTRIBUTING.md +0 -0
  22. {tracepipe-0.3.0 → tracepipe-0.3.1}/LICENSE +0 -0
  23. {tracepipe-0.3.0 → tracepipe-0.3.1}/benchmarks/README.md +0 -0
  24. {tracepipe-0.3.0 → tracepipe-0.3.1}/benchmarks/bench_memory.py +0 -0
  25. {tracepipe-0.3.0 → tracepipe-0.3.1}/benchmarks/bench_overhead.py +0 -0
  26. {tracepipe-0.3.0 → tracepipe-0.3.1}/benchmarks/bench_scale.py +0 -0
  27. {tracepipe-0.3.0 → tracepipe-0.3.1}/benchmarks/run_all.py +0 -0
  28. {tracepipe-0.3.0 → tracepipe-0.3.1}/docs/api/contracts.md +0 -0
  29. {tracepipe-0.3.0 → tracepipe-0.3.1}/docs/api/core.md +0 -0
  30. {tracepipe-0.3.0 → tracepipe-0.3.1}/docs/api/debug.md +0 -0
  31. {tracepipe-0.3.0 → tracepipe-0.3.1}/docs/api/index.md +0 -0
  32. {tracepipe-0.3.0 → tracepipe-0.3.1}/docs/changelog.md +0 -0
  33. {tracepipe-0.3.0 → tracepipe-0.3.1}/docs/contributing.md +0 -0
  34. {tracepipe-0.3.0 → tracepipe-0.3.1}/docs/examples/data-validation.md +0 -0
  35. {tracepipe-0.3.0 → tracepipe-0.3.1}/docs/examples/ml-pipeline.md +0 -0
  36. {tracepipe-0.3.0 → tracepipe-0.3.1}/docs/getting-started/installation.md +0 -0
  37. {tracepipe-0.3.0 → tracepipe-0.3.1}/docs/getting-started/modes.md +0 -0
  38. {tracepipe-0.3.0 → tracepipe-0.3.1}/docs/getting-started/quickstart.md +0 -0
  39. {tracepipe-0.3.0 → tracepipe-0.3.1}/docs/guide/cell-provenance.md +0 -0
  40. {tracepipe-0.3.0 → tracepipe-0.3.1}/docs/guide/concepts.md +0 -0
  41. {tracepipe-0.3.0 → tracepipe-0.3.1}/docs/guide/contracts.md +0 -0
  42. {tracepipe-0.3.0 → tracepipe-0.3.1}/docs/guide/health-checks.md +0 -0
  43. {tracepipe-0.3.0 → tracepipe-0.3.1}/docs/guide/reports.md +0 -0
  44. {tracepipe-0.3.0 → tracepipe-0.3.1}/docs/guide/row-tracing.md +0 -0
  45. {tracepipe-0.3.0 → tracepipe-0.3.1}/docs/guide/snapshots.md +0 -0
  46. {tracepipe-0.3.0 → tracepipe-0.3.1}/docs/index.md +0 -0
  47. {tracepipe-0.3.0 → tracepipe-0.3.1}/examples/comprehensive_demo.py +0 -0
  48. {tracepipe-0.3.0 → tracepipe-0.3.1}/examples/demo.py +0 -0
  49. {tracepipe-0.3.0 → tracepipe-0.3.1}/examples/ml_pipeline_demo.py +0 -0
  50. {tracepipe-0.3.0 → tracepipe-0.3.1}/examples/red_team_test.py +0 -0
  51. {tracepipe-0.3.0 → tracepipe-0.3.1}/mkdocs.yml +0 -0
  52. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/404.html +0 -0
  53. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/api/contracts/index.html +0 -0
  54. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/api/core/index.html +0 -0
  55. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/api/debug/index.html +0 -0
  56. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/api/index.html +0 -0
  57. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/_mkdocstrings.css +0 -0
  58. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/images/favicon.png +0 -0
  59. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/bundle.79ae519e.min.js +0 -0
  60. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/bundle.79ae519e.min.js.map +0 -0
  61. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.ar.min.js +0 -0
  62. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.da.min.js +0 -0
  63. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.de.min.js +0 -0
  64. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.du.min.js +0 -0
  65. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.el.min.js +0 -0
  66. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.es.min.js +0 -0
  67. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.fi.min.js +0 -0
  68. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.fr.min.js +0 -0
  69. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.he.min.js +0 -0
  70. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.hi.min.js +0 -0
  71. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.hu.min.js +0 -0
  72. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.hy.min.js +0 -0
  73. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.it.min.js +0 -0
  74. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.ja.min.js +0 -0
  75. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.jp.min.js +0 -0
  76. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.kn.min.js +0 -0
  77. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.ko.min.js +0 -0
  78. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.multi.min.js +0 -0
  79. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.nl.min.js +0 -0
  80. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.no.min.js +0 -0
  81. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.pt.min.js +0 -0
  82. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.ro.min.js +0 -0
  83. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.ru.min.js +0 -0
  84. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.sa.min.js +0 -0
  85. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.stemmer.support.min.js +0 -0
  86. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.sv.min.js +0 -0
  87. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.ta.min.js +0 -0
  88. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.te.min.js +0 -0
  89. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.th.min.js +0 -0
  90. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.tr.min.js +0 -0
  91. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.vi.min.js +0 -0
  92. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/min/lunr.zh.min.js +0 -0
  93. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/tinyseg.js +0 -0
  94. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/lunr/wordcut.js +0 -0
  95. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/workers/search.2c215733.min.js +0 -0
  96. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/javascripts/workers/search.2c215733.min.js.map +0 -0
  97. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/stylesheets/main.484c7ddc.min.css +0 -0
  98. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/stylesheets/main.484c7ddc.min.css.map +0 -0
  99. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/stylesheets/palette.ab4e12ef.min.css +0 -0
  100. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/assets/stylesheets/palette.ab4e12ef.min.css.map +0 -0
  101. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/changelog/index.html +0 -0
  102. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/contributing/index.html +0 -0
  103. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/examples/data-validation/index.html +0 -0
  104. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/examples/ml-pipeline/index.html +0 -0
  105. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/getting-started/installation/index.html +0 -0
  106. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/getting-started/modes/index.html +0 -0
  107. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/getting-started/quickstart/index.html +0 -0
  108. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/guide/cell-provenance/index.html +0 -0
  109. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/guide/concepts/index.html +0 -0
  110. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/guide/contracts/index.html +0 -0
  111. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/guide/health-checks/index.html +0 -0
  112. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/guide/reports/index.html +0 -0
  113. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/guide/row-tracing/index.html +0 -0
  114. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/guide/snapshots/index.html +0 -0
  115. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/index.html +0 -0
  116. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/objects.inv +0 -0
  117. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/search/search_index.json +0 -0
  118. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/sitemap.xml +0 -0
  119. {tracepipe-0.3.0 → tracepipe-0.3.1}/site/sitemap.xml.gz +0 -0
  120. {tracepipe-0.3.0 → tracepipe-0.3.1}/tests/__init__.py +0 -0
  121. {tracepipe-0.3.0 → tracepipe-0.3.1}/tests/conftest.py +0 -0
  122. {tracepipe-0.3.0 → tracepipe-0.3.1}/tests/test_api.py +0 -0
  123. {tracepipe-0.3.0 → tracepipe-0.3.1}/tests/test_concurrency.py +0 -0
  124. {tracepipe-0.3.0 → tracepipe-0.3.1}/tests/test_contracts.py +0 -0
  125. {tracepipe-0.3.0 → tracepipe-0.3.1}/tests/test_convenience_debug.py +0 -0
  126. {tracepipe-0.3.0 → tracepipe-0.3.1}/tests/test_edge_cases.py +0 -0
  127. {tracepipe-0.3.0 → tracepipe-0.3.1}/tests/test_integration.py +0 -0
  128. {tracepipe-0.3.0 → tracepipe-0.3.1}/tests/test_io_operations.py +0 -0
  129. {tracepipe-0.3.0 → tracepipe-0.3.1}/tests/test_pandas_inst.py +0 -0
  130. {tracepipe-0.3.0 → tracepipe-0.3.1}/tests/test_public_api.py +0 -0
  131. {tracepipe-0.3.0 → tracepipe-0.3.1}/tests/test_snapshot.py +0 -0
  132. {tracepipe-0.3.0 → tracepipe-0.3.1}/tests/test_version_matrix.py +0 -0
  133. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/context.py +0 -0
  134. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/contracts.py +0 -0
  135. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/core.py +0 -0
  136. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/debug.py +0 -0
  137. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/instrumentation/__init__.py +0 -0
  138. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/instrumentation/apply_capture.py +0 -0
  139. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/instrumentation/filter_capture.py +0 -0
  140. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/instrumentation/indexer_capture.py +0 -0
  141. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/instrumentation/merge_capture.py +0 -0
  142. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/instrumentation/pandas_inst.py +0 -0
  143. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/instrumentation/series_capture.py +0 -0
  144. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/safety.py +0 -0
  145. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/snapshot.py +0 -0
  146. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/storage/__init__.py +0 -0
  147. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/storage/base.py +0 -0
  148. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/storage/row_identity.py +0 -0
  149. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/utils/__init__.py +0 -0
  150. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/utils/value_capture.py +0 -0
  151. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/visualization/__init__.py +0 -0
  152. {tracepipe-0.3.0 → tracepipe-0.3.1}/tracepipe/visualization/html_export.py +0 -0
  153. {tracepipe-0.3.0 → tracepipe-0.3.1}/uv.lock +0 -0
@@ -5,6 +5,18 @@ All notable changes to TracePipe will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## 0.3.1 - 2026-02-03
9
+
10
+ ### Fixed
11
+ - Cell history now correctly chains through merge operations via lineage traversal
12
+ - `tp.why()` and `tp.trace()` show pre-merge changes for post-merge rows
13
+ - `enable()` resets accumulated state when called multiple times (fixes duplicate warnings in notebooks/IDEs)
14
+
15
+ ### Added
16
+ - `get_row_history_with_lineage()` and `get_cell_history_with_lineage()` methods for lineage-aware queries
17
+ - `follow_lineage` parameter in `explain_value()` for opt-out of lineage traversal
18
+ - Integration tests for cell provenance through merge operations
19
+
8
20
  ## 0.3.0 - 2026-02-03
9
21
 
10
22
  ### Added
@@ -0,0 +1,308 @@
1
+ Metadata-Version: 2.4
2
+ Name: tracepipe
3
+ Version: 0.3.1
4
+ Summary: Row-level data lineage tracking for pandas pipelines
5
+ Project-URL: Homepage, https://github.com/tracepipe/tracepipe
6
+ Project-URL: Documentation, https://tracepipe.github.io/tracepipe/
7
+ Project-URL: Repository, https://github.com/tracepipe/tracepipe.git
8
+ Project-URL: Issues, https://github.com/tracepipe/tracepipe/issues
9
+ Project-URL: Changelog, https://tracepipe.github.io/tracepipe/changelog/
10
+ Author: Gauthier Piarrette
11
+ License: MIT License
12
+
13
+ Copyright (c) 2026 Gauthier Piarrette
14
+
15
+ Permission is hereby granted, free of charge, to any person obtaining a copy
16
+ of this software and associated documentation files (the "Software"), to deal
17
+ in the Software without restriction, including without limitation the rights
18
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
19
+ copies of the Software, and to permit persons to whom the Software is
20
+ furnished to do so, subject to the following conditions:
21
+
22
+ The above copyright notice and this permission notice shall be included in all
23
+ copies or substantial portions of the Software.
24
+
25
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31
+ SOFTWARE.
32
+ License-File: LICENSE
33
+ Keywords: data-engineering,data-lineage,data-quality,debugging,observability,pandas
34
+ Classifier: Development Status :: 4 - Beta
35
+ Classifier: Intended Audience :: Developers
36
+ Classifier: Intended Audience :: Science/Research
37
+ Classifier: License :: OSI Approved :: MIT License
38
+ Classifier: Operating System :: OS Independent
39
+ Classifier: Programming Language :: Python :: 3
40
+ Classifier: Programming Language :: Python :: 3.9
41
+ Classifier: Programming Language :: Python :: 3.10
42
+ Classifier: Programming Language :: Python :: 3.11
43
+ Classifier: Programming Language :: Python :: 3.12
44
+ Classifier: Topic :: Scientific/Engineering
45
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
46
+ Requires-Python: >=3.9
47
+ Requires-Dist: numpy>=1.20.0
48
+ Requires-Dist: pandas>=1.5.0
49
+ Provides-Extra: all
50
+ Requires-Dist: psutil>=5.9.0; extra == 'all'
51
+ Requires-Dist: pyarrow>=10.0.0; extra == 'all'
52
+ Provides-Extra: arrow
53
+ Requires-Dist: pyarrow>=10.0.0; extra == 'arrow'
54
+ Provides-Extra: dev
55
+ Requires-Dist: black>=23.0.0; extra == 'dev'
56
+ Requires-Dist: pre-commit>=3.5.0; extra == 'dev'
57
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
58
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
59
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
60
+ Requires-Dist: taskipy>=1.12.0; extra == 'dev'
61
+ Provides-Extra: docs
62
+ Requires-Dist: mkdocs-material>=9.5.0; extra == 'docs'
63
+ Requires-Dist: mkdocs>=1.5.0; extra == 'docs'
64
+ Requires-Dist: mkdocstrings[python]>=0.24.0; extra == 'docs'
65
+ Requires-Dist: pymdown-extensions>=10.0.0; extra == 'docs'
66
+ Provides-Extra: memory
67
+ Requires-Dist: psutil>=5.9.0; extra == 'memory'
68
+ Description-Content-Type: text/markdown
69
+
70
+ <div align="center">
71
+
72
+ # TracePipe
73
+
74
+ ### Row-level data lineage for pandas pipelines
75
+
76
+ **Know exactly where every row went, why values changed, and how your data transformed.**
77
+
78
+ [![PyPI version](https://img.shields.io/pypi/v/tracepipe.svg)](https://pypi.org/project/tracepipe/)
79
+ [![Python 3.9+](https://img.shields.io/pypi/pyversions/tracepipe.svg)](https://pypi.org/project/tracepipe/)
80
+ [![CI](https://github.com/gauthierpiarrette/tracepipe/actions/workflows/ci.yml/badge.svg)](https://github.com/gauthierpiarrette/tracepipe/actions/workflows/ci.yml)
81
+ [![codecov](https://codecov.io/gh/gauthierpiarrette/tracepipe/branch/main/graph/badge.svg)](https://codecov.io/gh/gauthierpiarrette/tracepipe)
82
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
83
+ [![Docs](https://img.shields.io/badge/docs-mkdocs-blue.svg)](https://gauthierpiarrette.github.io/tracepipe/)
84
+
85
+ [Getting Started](#getting-started) · [Documentation](https://gauthierpiarrette.github.io/tracepipe/) · [Examples](#real-world-example)
86
+
87
+ </div>
88
+
89
+ ---
90
+
91
+ ## Why TracePipe?
92
+
93
+ Data pipelines are black boxes. Rows vanish. Values change. You're left guessing.
94
+
95
+ ```python
96
+ df = pd.read_csv("customers.csv")
97
+ df = df.dropna() # Some rows disappear
98
+ df = df.merge(regions, on="zip") # New rows appear, some vanish
99
+ df["income"] = df["income"].fillna(0) # Values change silently
100
+ df = df[df["age"] >= 18] # More rows gone
101
+ # What happened to customer C-789? 🤷
102
+ ```
103
+
104
+ **TracePipe gives you the complete audit trail — zero code changes required.**
105
+
106
+ ---
107
+
108
+ ## Getting Started
109
+
110
+ ```bash
111
+ pip install tracepipe
112
+ ```
113
+
114
+ ```python
115
+ import tracepipe as tp
116
+ import pandas as pd
117
+
118
+ tp.enable(mode="debug", watch=["income"])
119
+
120
+ df = pd.read_csv("customers.csv")
121
+ df = df.dropna()
122
+ df["income"] = df["income"].fillna(0)
123
+ df = df[df["age"] >= 18]
124
+
125
+ tp.check(df) # See what happened
126
+ ```
127
+
128
+ ```
129
+ TracePipe Check: [OK] Pipeline healthy
130
+
131
+ Retention: 847/1000 (84.7%)
132
+ Dropped: 153 rows
133
+ • DataFrame.dropna: 42
134
+ • DataFrame.__getitem__[mask]: 111
135
+
136
+ Value changes: 23 cells modified
137
+ • DataFrame.fillna: 23 (income)
138
+ ```
139
+
140
+ That's it. **One import, full visibility.**
141
+
142
+ ---
143
+
144
+ ## Core API
145
+
146
+ | Function | What it does |
147
+ |----------|--------------|
148
+ | `tp.enable()` | Start tracking |
149
+ | `tp.check(df)` | Health check — retention, drops, changes |
150
+ | `tp.trace(df, where={"id": "C-789"})` | Follow a row's complete journey |
151
+ | `tp.why(df, col="income", row=5)` | Explain why a cell has its current value |
152
+ | `tp.report(df, "audit.html")` | Export interactive HTML report |
153
+
154
+ ---
155
+
156
+ ## Key Features
157
+
158
+ <table>
159
+ <tr>
160
+ <td width="50%">
161
+
162
+ ### 🔍 Zero-Code Instrumentation
163
+ TracePipe patches pandas at runtime. Your existing code works unchanged.
164
+
165
+ ### 📊 Complete Provenance
166
+ Track drops, transforms, merges, and cell-level changes with before/after values.
167
+
168
+ </td>
169
+ <td width="50%">
170
+
171
+ ### 🎯 Business-Key Lookups
172
+ Find rows by their values: `tp.trace(df, where={"email": "alice@example.com"})`
173
+
174
+ ### ⚡ Production-Ready
175
+ 1.0-2.8x overhead (varies by operation). Tested on DataFrames up to 1M rows.
176
+
177
+ </td>
178
+ </tr>
179
+ </table>
180
+
181
+ ---
182
+
183
+ ## Real-World Example
184
+
185
+ ```python
186
+ import tracepipe as tp
187
+ import pandas as pd
188
+
189
+ tp.enable(mode="debug", watch=["age", "income", "label"])
190
+
191
+ # Load and clean
192
+ df = pd.read_csv("training_data.csv")
193
+ df = df.dropna(subset=["label"])
194
+ df["income"] = df["income"].fillna(df["income"].median())
195
+ df = df[df["age"] >= 18]
196
+
197
+ # Audit
198
+ print(tp.check(df))
199
+ ```
200
+
201
+ ```
202
+ Retention: 8234/10000 (82.3%)
203
+ Dropped: 1766 rows
204
+ • DataFrame.dropna: 423
205
+ • DataFrame.__getitem__[mask]: 1343
206
+
207
+ Value changes: 892 cells
208
+ • DataFrame.fillna: 892 (income)
209
+ ```
210
+
211
+ ```python
212
+ # Why does this customer have a filled income?
213
+ tp.why(df, col="income", where={"customer_id": "C-789"})
214
+ ```
215
+
216
+ ```
217
+ Cell History: row 156, column 'income'
218
+ Current value: 45000.0
219
+ [i] Was null at step 1 (later recovered)
220
+
221
+ History (1 change):
222
+ None -> 45000.0
223
+ by: DataFrame.fillna
224
+ ```
225
+
226
+ ---
227
+
228
+ ## Two Modes
229
+
230
+ | Mode | Use Case | What's Tracked |
231
+ |------|----------|----------------|
232
+ | **CI** (default) | Production pipelines | Step counts, retention rates, merge warnings |
233
+ | **Debug** | Development | Full row history, cell diffs, merge parents, group membership |
234
+
235
+ ```python
236
+ tp.enable(mode="ci") # Lightweight
237
+ tp.enable(mode="debug") # Full lineage
238
+ ```
239
+
240
+ ---
241
+
242
+ ## What's Tracked
243
+
244
+ | Operation | Coverage |
245
+ |-----------|----------|
246
+ | `dropna`, `drop_duplicates`, `query`, `df[mask]` | ✅ Full |
247
+ | `fillna`, `replace`, `loc[]=`, `iloc[]=` | ✅ Full (cell diffs) |
248
+ | `merge`, `join` | ✅ Full (parent tracking) |
249
+ | `groupby().agg()` | ✅ Full (group membership) |
250
+ | `sort_values`, `head`, `tail`, `sample` | ✅ Full |
251
+ | `apply`, `pipe` | ⚠️ Partial |
252
+
253
+ ---
254
+
255
+ ## Data Quality Contracts
256
+
257
+ ```python
258
+ (tp.contract()
259
+ .expect_unique("customer_id")
260
+ .expect_no_nulls("email")
261
+ .expect_retention(min_rate=0.9)
262
+ .check(df)
263
+ .raise_if_failed())
264
+ ```
265
+
266
+ ---
267
+
268
+ ## Documentation
269
+
270
+ 📚 **[Full Documentation](https://gauthierpiarrette.github.io/tracepipe/)**
271
+
272
+ - [Quickstart](https://gauthierpiarrette.github.io/tracepipe/getting-started/quickstart/)
273
+ - [User Guide](https://gauthierpiarrette.github.io/tracepipe/guide/concepts/)
274
+ - [API Reference](https://gauthierpiarrette.github.io/tracepipe/api/)
275
+ - [Examples](https://gauthierpiarrette.github.io/tracepipe/examples/ml-pipeline/)
276
+
277
+ ---
278
+
279
+ ## Contributing
280
+
281
+ ```bash
282
+ git clone https://github.com/gauthierpiarrette/tracepipe.git
283
+ cd tracepipe
284
+ pip install -e ".[dev]"
285
+ pytest tests/ -v
286
+ ```
287
+
288
+ See [CONTRIBUTING](https://gauthierpiarrette.github.io/tracepipe/contributing/) for guidelines.
289
+
290
+ ---
291
+
292
+ ## License
293
+
294
+ MIT License. See [LICENSE](LICENSE).
295
+
296
+ ---
297
+
298
+ <div align="center">
299
+
300
+ **Stop guessing where your rows went.**
301
+
302
+ ```bash
303
+ pip install tracepipe
304
+ ```
305
+
306
+ ⭐ Star us on GitHub if TracePipe helps your data work!
307
+
308
+ </div>
@@ -0,0 +1,239 @@
1
+ <div align="center">
2
+
3
+ # TracePipe
4
+
5
+ ### Row-level data lineage for pandas pipelines
6
+
7
+ **Know exactly where every row went, why values changed, and how your data transformed.**
8
+
9
+ [![PyPI version](https://img.shields.io/pypi/v/tracepipe.svg)](https://pypi.org/project/tracepipe/)
10
+ [![Python 3.9+](https://img.shields.io/pypi/pyversions/tracepipe.svg)](https://pypi.org/project/tracepipe/)
11
+ [![CI](https://github.com/gauthierpiarrette/tracepipe/actions/workflows/ci.yml/badge.svg)](https://github.com/gauthierpiarrette/tracepipe/actions/workflows/ci.yml)
12
+ [![codecov](https://codecov.io/gh/gauthierpiarrette/tracepipe/branch/main/graph/badge.svg)](https://codecov.io/gh/gauthierpiarrette/tracepipe)
13
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
14
+ [![Docs](https://img.shields.io/badge/docs-mkdocs-blue.svg)](https://gauthierpiarrette.github.io/tracepipe/)
15
+
16
+ [Getting Started](#getting-started) · [Documentation](https://gauthierpiarrette.github.io/tracepipe/) · [Examples](#real-world-example)
17
+
18
+ </div>
19
+
20
+ ---
21
+
22
+ ## Why TracePipe?
23
+
24
+ Data pipelines are black boxes. Rows vanish. Values change. You're left guessing.
25
+
26
+ ```python
27
+ df = pd.read_csv("customers.csv")
28
+ df = df.dropna() # Some rows disappear
29
+ df = df.merge(regions, on="zip") # New rows appear, some vanish
30
+ df["income"] = df["income"].fillna(0) # Values change silently
31
+ df = df[df["age"] >= 18] # More rows gone
32
+ # What happened to customer C-789? 🤷
33
+ ```
34
+
35
+ **TracePipe gives you the complete audit trail — zero code changes required.**
36
+
37
+ ---
38
+
39
+ ## Getting Started
40
+
41
+ ```bash
42
+ pip install tracepipe
43
+ ```
44
+
45
+ ```python
46
+ import tracepipe as tp
47
+ import pandas as pd
48
+
49
+ tp.enable(mode="debug", watch=["income"])
50
+
51
+ df = pd.read_csv("customers.csv")
52
+ df = df.dropna()
53
+ df["income"] = df["income"].fillna(0)
54
+ df = df[df["age"] >= 18]
55
+
56
+ tp.check(df) # See what happened
57
+ ```
58
+
59
+ ```
60
+ TracePipe Check: [OK] Pipeline healthy
61
+
62
+ Retention: 847/1000 (84.7%)
63
+ Dropped: 153 rows
64
+ • DataFrame.dropna: 42
65
+ • DataFrame.__getitem__[mask]: 111
66
+
67
+ Value changes: 23 cells modified
68
+ • DataFrame.fillna: 23 (income)
69
+ ```
70
+
71
+ That's it. **One import, full visibility.**
72
+
73
+ ---
74
+
75
+ ## Core API
76
+
77
+ | Function | What it does |
78
+ |----------|--------------|
79
+ | `tp.enable()` | Start tracking |
80
+ | `tp.check(df)` | Health check — retention, drops, changes |
81
+ | `tp.trace(df, where={"id": "C-789"})` | Follow a row's complete journey |
82
+ | `tp.why(df, col="income", row=5)` | Explain why a cell has its current value |
83
+ | `tp.report(df, "audit.html")` | Export interactive HTML report |
84
+
85
+ ---
86
+
87
+ ## Key Features
88
+
89
+ <table>
90
+ <tr>
91
+ <td width="50%">
92
+
93
+ ### 🔍 Zero-Code Instrumentation
94
+ TracePipe patches pandas at runtime. Your existing code works unchanged.
95
+
96
+ ### 📊 Complete Provenance
97
+ Track drops, transforms, merges, and cell-level changes with before/after values.
98
+
99
+ </td>
100
+ <td width="50%">
101
+
102
+ ### 🎯 Business-Key Lookups
103
+ Find rows by their values: `tp.trace(df, where={"email": "alice@example.com"})`
104
+
105
+ ### ⚡ Production-Ready
106
+ 1.0-2.8x overhead (varies by operation). Tested on DataFrames up to 1M rows.
107
+
108
+ </td>
109
+ </tr>
110
+ </table>
111
+
112
+ ---
113
+
114
+ ## Real-World Example
115
+
116
+ ```python
117
+ import tracepipe as tp
118
+ import pandas as pd
119
+
120
+ tp.enable(mode="debug", watch=["age", "income", "label"])
121
+
122
+ # Load and clean
123
+ df = pd.read_csv("training_data.csv")
124
+ df = df.dropna(subset=["label"])
125
+ df["income"] = df["income"].fillna(df["income"].median())
126
+ df = df[df["age"] >= 18]
127
+
128
+ # Audit
129
+ print(tp.check(df))
130
+ ```
131
+
132
+ ```
133
+ Retention: 8234/10000 (82.3%)
134
+ Dropped: 1766 rows
135
+ • DataFrame.dropna: 423
136
+ • DataFrame.__getitem__[mask]: 1343
137
+
138
+ Value changes: 892 cells
139
+ • DataFrame.fillna: 892 (income)
140
+ ```
141
+
142
+ ```python
143
+ # Why does this customer have a filled income?
144
+ tp.why(df, col="income", where={"customer_id": "C-789"})
145
+ ```
146
+
147
+ ```
148
+ Cell History: row 156, column 'income'
149
+ Current value: 45000.0
150
+ [i] Was null at step 1 (later recovered)
151
+
152
+ History (1 change):
153
+ None -> 45000.0
154
+ by: DataFrame.fillna
155
+ ```
156
+
157
+ ---
158
+
159
+ ## Two Modes
160
+
161
+ | Mode | Use Case | What's Tracked |
162
+ |------|----------|----------------|
163
+ | **CI** (default) | Production pipelines | Step counts, retention rates, merge warnings |
164
+ | **Debug** | Development | Full row history, cell diffs, merge parents, group membership |
165
+
166
+ ```python
167
+ tp.enable(mode="ci") # Lightweight
168
+ tp.enable(mode="debug") # Full lineage
169
+ ```
170
+
171
+ ---
172
+
173
+ ## What's Tracked
174
+
175
+ | Operation | Coverage |
176
+ |-----------|----------|
177
+ | `dropna`, `drop_duplicates`, `query`, `df[mask]` | ✅ Full |
178
+ | `fillna`, `replace`, `loc[]=`, `iloc[]=` | ✅ Full (cell diffs) |
179
+ | `merge`, `join` | ✅ Full (parent tracking) |
180
+ | `groupby().agg()` | ✅ Full (group membership) |
181
+ | `sort_values`, `head`, `tail`, `sample` | ✅ Full |
182
+ | `apply`, `pipe` | ⚠️ Partial |
183
+
184
+ ---
185
+
186
+ ## Data Quality Contracts
187
+
188
+ ```python
189
+ (tp.contract()
190
+ .expect_unique("customer_id")
191
+ .expect_no_nulls("email")
192
+ .expect_retention(min_rate=0.9)
193
+ .check(df)
194
+ .raise_if_failed())
195
+ ```
196
+
197
+ ---
198
+
199
+ ## Documentation
200
+
201
+ 📚 **[Full Documentation](https://gauthierpiarrette.github.io/tracepipe/)**
202
+
203
+ - [Quickstart](https://gauthierpiarrette.github.io/tracepipe/getting-started/quickstart/)
204
+ - [User Guide](https://gauthierpiarrette.github.io/tracepipe/guide/concepts/)
205
+ - [API Reference](https://gauthierpiarrette.github.io/tracepipe/api/)
206
+ - [Examples](https://gauthierpiarrette.github.io/tracepipe/examples/ml-pipeline/)
207
+
208
+ ---
209
+
210
+ ## Contributing
211
+
212
+ ```bash
213
+ git clone https://github.com/gauthierpiarrette/tracepipe.git
214
+ cd tracepipe
215
+ pip install -e ".[dev]"
216
+ pytest tests/ -v
217
+ ```
218
+
219
+ See [CONTRIBUTING](https://gauthierpiarrette.github.io/tracepipe/contributing/) for guidelines.
220
+
221
+ ---
222
+
223
+ ## License
224
+
225
+ MIT License. See [LICENSE](LICENSE).
226
+
227
+ ---
228
+
229
+ <div align="center">
230
+
231
+ **Stop guessing where your rows went.**
232
+
233
+ ```bash
234
+ pip install tracepipe
235
+ ```
236
+
237
+ ⭐ Star us on GitHub if TracePipe helps your data work!
238
+
239
+ </div>
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "tracepipe"
7
- version = "0.3.0"
7
+ version = "0.3.1"
8
8
  description = "Row-level data lineage tracking for pandas pipelines"
9
9
  readme = "README.md"
10
10
  license = {file = "LICENSE"}