tracepipe 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tracepipe/__init__.py CHANGED
@@ -5,106 +5,145 @@ TracePipe: Row-Level Data Lineage Tracking
5
5
  Track every row, every change, every step in your pandas pipelines.
6
6
 
7
7
  Quick Start:
8
- import tracepipe
8
+ import tracepipe as tp
9
9
  import pandas as pd
10
10
 
11
- tracepipe.enable()
12
- tracepipe.watch("age", "salary") # Watch specific columns
11
+ tp.enable(mode="debug", watch=["age", "salary"])
13
12
 
14
13
  df = pd.DataFrame({"age": [25, None, 35], "salary": [50000, 60000, None]})
15
14
  df = df.dropna()
16
15
  df["salary"] = df["salary"] * 1.1
17
16
 
18
- # Query lineage
19
- row = tracepipe.explain(0) # What happened to row 0?
20
- print(row.history())
17
+ # Health audit
18
+ result = tp.check(df)
19
+ print(result)
21
20
 
22
- dropped = tracepipe.dropped_rows() # Which rows were dropped?
23
- print(dropped)
21
+ # Row journey
22
+ trace = tp.trace(df, row=0)
23
+ print(trace)
24
24
 
25
- Features:
26
- - Row-level tracking: Know exactly which rows were dropped and why
27
- - Cell-level diffs: See before/after values for watched columns
28
- - Aggregation lineage: Trace back from grouped results to source rows
29
- - Zero-copy design: Minimal overhead on your pipelines
30
- - Safe instrumentation: Never crashes your code
25
+ # Cell provenance
26
+ why = tp.why(df, col="salary", row=0)
27
+ print(why)
31
28
 
32
- See IMPLEMENTATION_PLAN_v5.md for full documentation.
29
+ # Generate report
30
+ tp.report(df, "audit.html")
31
+
32
+ Modes:
33
+ - CI mode (default): Step stats, retention rates, merge mismatch detection.
34
+ No per-row provenance. Fast for production.
35
+ - DEBUG mode: Full per-row provenance, cell history, ghost values.
36
+
37
+ API Summary:
38
+ Core (5 functions for 90% of use cases):
39
+ tp.enable() - Start tracking
40
+ tp.check() - Health audit → CheckResult
41
+ tp.trace() - Row journey → TraceResult
42
+ tp.why() - Cell provenance → WhyResult
43
+ tp.report() - HTML export
44
+
45
+ Power features (via namespaces):
46
+ tp.debug.inspect() - Raw lineage access
47
+ tp.contracts.contract() - Data quality contracts
48
+ tp.snapshot(), tp.diff() - Pipeline state comparison
49
+
50
+ All functions return structured result objects.
51
+ Use print(result) for pretty output, result.to_dict() for data.
33
52
  """
34
53
 
35
- from .api import (
36
- GroupLineageResult,
37
- # Result classes
38
- RowLineageResult,
39
- aggregation_groups,
40
- # Convenience functions
41
- alive_rows,
42
- clear_watch,
43
- configure,
44
- disable,
45
- dropped_rows,
46
- # Core control
47
- enable,
48
- # Query API
49
- explain,
50
- explain_group,
51
- explain_many,
52
- export_arrow,
53
- # Export
54
- export_json,
55
- mass_updates,
56
- register,
57
- reset,
58
- stage,
59
- stats,
60
- steps,
61
- unwatch,
62
- # Column watching
63
- watch,
64
- watch_all,
54
+ # === CORE API (6 functions) ===
55
+ # === NAMESPACES ===
56
+ from . import contracts, debug
57
+ from .api import configure, disable, enable, register, reset, stage
58
+
59
+ # Re-export contract() at top level for convenience
60
+ from .contracts import contract
61
+
62
+ # === CONVENIENCE API (user-facing) ===
63
+ from .convenience import (
64
+ CheckFailed,
65
+ # Result types
66
+ CheckResult,
67
+ CheckWarning,
68
+ TraceResult,
69
+ WhyResult,
70
+ check,
71
+ find,
72
+ report,
73
+ trace,
74
+ why,
65
75
  )
66
- from .core import TracePipeConfig
67
76
 
68
- # Export protocols for custom backend implementers
69
- from .storage.base import LineageBackend, RowIdentityStrategy
70
- from .visualization.html_export import save
77
+ # === CONFIGURATION ===
78
+ from .core import TracePipeConfig, TracePipeMode
71
79
 
72
- __version__ = "0.2.0"
80
+ # === SNAPSHOTS (top-level for convenience) ===
81
+ from .snapshot import DiffResult, Snapshot, diff, snapshot
73
82
 
83
+ # === VERSION ===
84
+ __version__ = "0.3.0"
85
+
86
+ # === MINIMAL __all__ ===
74
87
  __all__ = [
75
- # Core API
88
+ # Core control (6)
76
89
  "enable",
77
90
  "disable",
78
91
  "reset",
79
- "configure",
80
- "watch",
81
- "watch_all",
82
- "unwatch",
83
- "clear_watch",
84
92
  "register",
85
93
  "stage",
86
- # Query API
87
- "explain",
88
- "explain_many",
89
- "explain_group",
90
- "dropped_rows",
91
- "alive_rows",
92
- "mass_updates",
93
- "steps",
94
- "aggregation_groups",
95
- # Export
96
- "export_json",
97
- "export_arrow",
98
- "stats",
99
- "save",
100
- # Configuration
94
+ "configure",
95
+ # Convenience API (5)
96
+ "check",
97
+ "find",
98
+ "trace",
99
+ "why",
100
+ "report",
101
+ # Result types (5)
102
+ "CheckResult",
103
+ "CheckWarning",
104
+ "CheckFailed",
105
+ "TraceResult",
106
+ "WhyResult",
107
+ # Snapshots (4)
108
+ "snapshot",
109
+ "diff",
110
+ "Snapshot",
111
+ "DiffResult",
112
+ # Contracts (1)
113
+ "contract",
114
+ # Namespaces (2)
115
+ "debug",
116
+ "contracts",
117
+ # Config (2)
101
118
  "TracePipeConfig",
102
- # Result classes
103
- "RowLineageResult",
104
- "GroupLineageResult",
105
- # Protocols (for custom backends)
106
- "LineageBackend",
107
- "RowIdentityStrategy",
108
- # Version
119
+ "TracePipeMode",
120
+ # Version (1)
109
121
  "__version__",
110
122
  ]
123
+
124
+
125
+ def __dir__():
126
+ """Control what shows up in IDE autocomplete - only essential functions."""
127
+ return [
128
+ # Primary API
129
+ "enable",
130
+ "disable",
131
+ "reset",
132
+ "register",
133
+ "configure",
134
+ "check",
135
+ "find",
136
+ "trace",
137
+ "why",
138
+ "report",
139
+ # Snapshots
140
+ "snapshot",
141
+ "diff",
142
+ # Contract
143
+ "contract",
144
+ # Namespaces
145
+ "debug",
146
+ "contracts",
147
+ # Config
148
+ "TracePipeConfig",
149
+ ]