temporal-leaks 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 PRAKUL HIREMATH
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: temporal-leaks
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Valgrind for Time-Series ML — automatically detect look-ahead bias in data science pipelines.
|
|
5
|
+
Project-URL: Homepage, https://github.com/prakulhiremath/temporal-leaks
|
|
6
|
+
Project-URL: Repository, https://github.com/prakulhiremath/temporal-leaks
|
|
7
|
+
Project-URL: Issues, https://github.com/prakulhiremath/temporal-leaks/issues
|
|
8
|
+
Author: Prakul Hiremath
|
|
9
|
+
Maintainer: Prakul Hiremath
|
|
10
|
+
License: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: auditing,data-science,feature-engineering,forecasting,leakage,look-ahead-bias,machine-learning,quant-finance,time-series
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
24
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
25
|
+
Classifier: Topic :: Software Development :: Testing
|
|
26
|
+
Classifier: Typing :: Typed
|
|
27
|
+
Requires-Python: >=3.9
|
|
28
|
+
Requires-Dist: jinja2>=3.1.0
|
|
29
|
+
Requires-Dist: numpy>=1.23.0
|
|
30
|
+
Requires-Dist: pandas>=1.5.0
|
|
31
|
+
Requires-Dist: polars>=0.19.0
|
|
32
|
+
Provides-Extra: all
|
|
33
|
+
Requires-Dist: pandas>=1.5.0; extra == 'all'
|
|
34
|
+
Requires-Dist: polars>=0.19.0; extra == 'all'
|
|
35
|
+
Provides-Extra: dev
|
|
36
|
+
Requires-Dist: mypy>=1.5.0; extra == 'dev'
|
|
37
|
+
Requires-Dist: pandas-stubs>=2.0.0; extra == 'dev'
|
|
38
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
|
|
39
|
+
Requires-Dist: pytest>=7.4.0; extra == 'dev'
|
|
40
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
41
|
+
Provides-Extra: pandas
|
|
42
|
+
Requires-Dist: pandas>=1.5.0; extra == 'pandas'
|
|
43
|
+
Provides-Extra: polars
|
|
44
|
+
Requires-Dist: polars>=0.19.0; extra == 'polars'
|
|
45
|
+
Description-Content-Type: text/markdown
|
|
46
|
+
|
|
47
|
+
# 🕵️ Temporal Leaks: Valgrind for Time-Series ML
|
|
48
|
+
|
|
49
|
+
<p align="center">
|
|
50
|
+
<img src="https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue?style=flat-square" />
|
|
51
|
+
<img src="https://img.shields.io/badge/license-MIT-green?style=flat-square" />
|
|
52
|
+
<img src="https://img.shields.io/badge/pandas-%E2%9C%93-150458?style=flat-square&logo=pandas" />
|
|
53
|
+
<img src="https://img.shields.io/badge/polars-%E2%9C%93-CD792C?style=flat-square" />
|
|
54
|
+
<img src="https://img.shields.io/badge/mypy-strict-blue?style=flat-square" />
|
|
55
|
+
<img src="https://img.shields.io/badge/ruff-lint-red?style=flat-square" />
|
|
56
|
+
<img src="https://img.shields.io/badge/CI-passing-brightgreen?style=flat-square&logo=github-actions" />
|
|
57
|
+
</p>
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
> **Look-ahead bias** is the silent killer of quant strategies and forecasting models.
|
|
62
|
+
> Your backtest shows 40% annual returns. You deploy. You lose money.
|
|
63
|
+
> Somewhere in your feature pipeline, a rolling average peeked at tomorrow's prices.
|
|
64
|
+
|
|
65
|
+
`temporal-leaks` catches this automatically — before it costs you.
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## The Problem: Future Data in Your Past Features
|
|
70
|
+
|
|
71
|
+
In time-series machine learning, look-ahead bias (also called *data leakage* or *future leakage*) occurs when a feature computed for timestamp `t` inadvertently uses data from timestamps `t+1`, `t+2`, … `t+n`.
|
|
72
|
+
|
|
73
|
+
This is devastatingly easy to introduce:
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
# BUG: center=True means the window is centred — it looks forward AND backward
|
|
77
|
+
df["roll_mean"] = df["price"].rolling(window=5, center=True).mean()
|
|
78
|
+
|
|
79
|
+
# BUG: shift(-1) reads the NEXT row's value
|
|
80
|
+
df["next_return"] = df["return"].shift(-1)
|
|
81
|
+
|
|
82
|
+
# BUG: global z-score uses future data to compute mean/std
|
|
83
|
+
df["znorm"] = (df["price"] - df["price"].mean()) / df["price"].std()
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
None of these will raise an error.
|
|
87
|
+
Your tests will pass.
|
|
88
|
+
Your backtests will look amazing.
|
|
89
|
+
**And then reality hits.**
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
## How It Works: The Temporal Perturbation Test
|
|
94
|
+
|
|
95
|
+
```
|
|
96
|
+
Timeline: ──────────────────────────────────────────────────▶
|
|
97
|
+
T (midpoint)
|
|
98
|
+
│
|
|
99
|
+
Past ◀──────────────────────┤──────────────────────────────▶ Future
|
|
100
|
+
│
|
|
101
|
+
Step 1: Run pipeline on original data
|
|
102
|
+
baseline_features = pipeline(df)
|
|
103
|
+
|
|
104
|
+
Step 2: MUTATE the future
|
|
105
|
+
df_perturbed[t > T] = 🔥 (noise / sign flip / NaN)
|
|
106
|
+
|
|
107
|
+
Step 3: Re-run pipeline on perturbed data
|
|
108
|
+
perturbed_features = pipeline(df_perturbed)
|
|
109
|
+
|
|
110
|
+
Step 4: Compare features for PAST rows only (t ≤ T)
|
|
111
|
+
If baseline_features[t≤T] ≠ perturbed_features[t≤T]
|
|
112
|
+
then the past features DEPEND on future data → LEAK! 🚨
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
The key insight: **if your past features are truly causal, mutating the future should not change them.** If they change, future data crept in.
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## Installation
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
pip install temporal-leaks
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
Or from source:
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
git clone https://github.com/temporal-leaks/temporal-leaks
|
|
129
|
+
cd temporal-leaks
|
|
130
|
+
pip install -e ".[dev]"
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## Quick Start
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
import pandas as pd
|
|
139
|
+
import numpy as np
|
|
140
|
+
from temporal_leaks import TemporalAudit, TemporalLeakageError
|
|
141
|
+
|
|
142
|
+
# Build a sample time-series dataset
|
|
143
|
+
df = pd.DataFrame({
|
|
144
|
+
"ts": np.arange(500),
|
|
145
|
+
"price": np.random.default_rng(42).normal(100, 5, size=500),
|
|
146
|
+
})
|
|
147
|
+
|
|
148
|
+
# ─── ✓ CLEAN PIPELINE ────────────────────────────────────────────────────────
|
|
149
|
+
def causal_features(df: pd.DataFrame) -> pd.DataFrame:
|
|
150
|
+
out = df.copy()
|
|
151
|
+
# Expanding window only looks at past — safe!
|
|
152
|
+
out["expanding_mean"] = out["price"].expanding(min_count=1).mean()
|
|
153
|
+
# shift(+1) looks at the previous row — safe!
|
|
154
|
+
out["lag1"] = out["price"].shift(1)
|
|
155
|
+
return out
|
|
156
|
+
|
|
157
|
+
auditor = TemporalAudit(mode="nullify", random_seed=42)
|
|
158
|
+
report = auditor.check(df, timestamp_col="ts", pipeline_fn=causal_features)
|
|
159
|
+
print(report)
|
|
160
|
+
# ✓ CLEAN — leakage_score=0.0000
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
# ─── ✗ LEAKING PIPELINE ──────────────────────────────────────────────────────
|
|
164
|
+
def leaking_features(df: pd.DataFrame) -> pd.DataFrame:
|
|
165
|
+
out = df.copy()
|
|
166
|
+
# center=True peeks at future rows — LEAKS!
|
|
167
|
+
out["centred_roll"] = out["price"].rolling(11, center=True, min_periods=1).mean()
|
|
168
|
+
return out
|
|
169
|
+
|
|
170
|
+
try:
|
|
171
|
+
auditor.check(df, timestamp_col="ts", pipeline_fn=leaking_features)
|
|
172
|
+
except TemporalLeakageError as exc:
|
|
173
|
+
print(exc)
|
|
174
|
+
# TemporalLeakageError: leakage_score=0.4812
|
|
175
|
+
# Breached columns (1):
|
|
176
|
+
# • [HIGH] column='centred_roll' effect_size=0.4812 ...
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
---
|
|
180
|
+
|
|
181
|
+
## Decorator API
|
|
182
|
+
|
|
183
|
+
```python
|
|
184
|
+
from temporal_leaks import temporal_audit
|
|
185
|
+
|
|
186
|
+
@temporal_audit(timestamp_col="ts", mode="noise", random_seed=42)
|
|
187
|
+
def build_features(df: pd.DataFrame) -> pd.DataFrame:
|
|
188
|
+
df = df.copy()
|
|
189
|
+
df["expanding_mean"] = df["price"].expanding(min_count=1).mean()
|
|
190
|
+
return df
|
|
191
|
+
|
|
192
|
+
# The audit runs automatically on every call.
|
|
193
|
+
# TemporalLeakageError is raised if leakage is detected.
|
|
194
|
+
result = build_features(df)
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## HTML Audit Reports
|
|
200
|
+
|
|
201
|
+
```python
|
|
202
|
+
report = auditor.check(df, "ts", leaking_features)
|
|
203
|
+
|
|
204
|
+
# Write a beautiful standalone HTML report
|
|
205
|
+
with open("audit_report.html", "w") as f:
|
|
206
|
+
f.write(report.to_html())
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
The HTML report includes:
|
|
210
|
+
- Leakage score with a visual progress bar
|
|
211
|
+
- Per-column severity badges (LOW / MEDIUM / HIGH / CRITICAL)
|
|
212
|
+
- Effect size, mean |Δ|, max |Δ|, % rows changed
|
|
213
|
+
- First timestamp where each leak was observed
|
|
214
|
+
- Provenance hints describing likely causes
|
|
215
|
+
|
|
216
|
+
---
|
|
217
|
+
|
|
218
|
+
## API Reference
|
|
219
|
+
|
|
220
|
+
### `TemporalAudit`
|
|
221
|
+
|
|
222
|
+
```python
|
|
223
|
+
TemporalAudit(
|
|
224
|
+
mode: Literal["noise", "sign_flip", "nullify"] = "noise",
|
|
225
|
+
random_seed: int = 42,
|
|
226
|
+
delta_threshold: float = 1e-8,
|
|
227
|
+
leakage_threshold: float = 0.0,
|
|
228
|
+
ignore_columns: list[str] | None = None,
|
|
229
|
+
)
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
| Parameter | Description |
|
|
233
|
+
|---|---|
|
|
234
|
+
| `mode` | Perturbation strategy: **noise** adds Gaussian noise, **sign_flip** multiplies by -1, **nullify** sets NaN |
|
|
235
|
+
| `random_seed` | Integer seed — fully deterministic, reproducible across runs |
|
|
236
|
+
| `delta_threshold` | Minimum cell-level change to count as "different" (suppresses float noise) |
|
|
237
|
+
| `leakage_threshold` | If `leakage_score > leakage_threshold`, raise `TemporalLeakageError`. Set to `1.1` to always return report |
|
|
238
|
+
| `ignore_columns` | List of output columns to skip during comparison |
|
|
239
|
+
|
|
240
|
+
### `AuditReport`
|
|
241
|
+
|
|
242
|
+
```python
|
|
243
|
+
@dataclass
|
|
244
|
+
class AuditReport:
|
|
245
|
+
leakage_score: float # 0.0 = clean, 1.0 = fully compromised
|
|
246
|
+
breached_columns: list[ColumnLeakMeta]
|
|
247
|
+
clean_columns: list[str]
|
|
248
|
+
perturbation_mode: str
|
|
249
|
+
evaluation_time: Any
|
|
250
|
+
random_seed: int
|
|
251
|
+
provenance_hints: dict[str, str]
|
|
252
|
+
|
|
253
|
+
def to_html(self) -> str: ... # standalone HTML report
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
### `ColumnLeakMeta`
|
|
257
|
+
|
|
258
|
+
```python
|
|
259
|
+
@dataclass(frozen=True)
|
|
260
|
+
class ColumnLeakMeta:
|
|
261
|
+
column_name: str
|
|
262
|
+
first_leaky_timestamp: Any
|
|
263
|
+
mean_absolute_delta: float
|
|
264
|
+
max_delta: float
|
|
265
|
+
pct_rows_changed: float
|
|
266
|
+
effect_size: float # normalised, 0–1
|
|
267
|
+
severity: str # LOW | MEDIUM | HIGH | CRITICAL
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
### Severity Classification
|
|
271
|
+
|
|
272
|
+
| Severity | Effect Size |
|
|
273
|
+
|---|---|
|
|
274
|
+
| 🟦 LOW | `effect_size < 0.15` |
|
|
275
|
+
| 🟨 MEDIUM | `0.15 ≤ effect_size < 0.40` |
|
|
276
|
+
| 🟧 HIGH | `0.40 ≤ effect_size < 0.75` |
|
|
277
|
+
| 🟥 CRITICAL | `effect_size ≥ 0.75` |
|
|
278
|
+
|
|
279
|
+
---
|
|
280
|
+
|
|
281
|
+
## Perturbation Modes
|
|
282
|
+
|
|
283
|
+
```
|
|
284
|
+
┌────────────────┬──────────────────────────────────────────────────────┐
|
|
285
|
+
│ Mode │ What it does to future rows │
|
|
286
|
+
├────────────────┼──────────────────────────────────────────────────────┤
|
|
287
|
+
│ noise │ Adds Gaussian noise: μ=0, σ=2×column_std │
|
|
288
|
+
│ sign_flip │ Multiplies all numeric values by −1 │
|
|
289
|
+
│ nullify │ Replaces all values with NaN / null │
|
|
290
|
+
└────────────────┴──────────────────────────────────────────────────────┘
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
Use **nullify** for the strictest test.
|
|
294
|
+
Use **noise** for pipelines that handle NaN gracefully (e.g., imputers).
|
|
295
|
+
Use **sign_flip** to test pipelines sensitive to sign changes (e.g., momentum factors).
|
|
296
|
+
|
|
297
|
+
---
|
|
298
|
+
|
|
299
|
+
## Polars Support
|
|
300
|
+
|
|
301
|
+
```python
|
|
302
|
+
import polars as pl
|
|
303
|
+
from temporal_leaks import TemporalAudit
|
|
304
|
+
|
|
305
|
+
df = pl.DataFrame({"ts": range(200), "value": [float(i) for i in range(200)]})
|
|
306
|
+
|
|
307
|
+
auditor = TemporalAudit(mode="nullify", random_seed=42)
|
|
308
|
+
report = auditor.check(df, "ts", my_polars_pipeline)
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
`temporal-leaks` handles Polars DataFrames transparently — pass them in, get results back in the same type.
|
|
312
|
+
|
|
313
|
+
---
|
|
314
|
+
|
|
315
|
+
## Benchmarks
|
|
316
|
+
|
|
317
|
+
| Dataset | Rows | Columns | Backend | Mode | Time |
|
|
318
|
+
|---|---|---|---|---|---|
|
|
319
|
+
| Synthetic prices | 1,000,000 | 5 | Polars | nullify | ~1.1 s |
|
|
320
|
+
| Synthetic prices | 10,000,000 | 5 | Polars | nullify | ~3.2 s |
|
|
321
|
+
| Equity features | 500,000 | 20 | Pandas | noise | ~2.8 s |
|
|
322
|
+
|
|
323
|
+
> Benchmarks run on Apple M2 Pro, 16 GB RAM. Polars backend strongly recommended for large frames.
|
|
324
|
+
|
|
325
|
+
---
|
|
326
|
+
|
|
327
|
+
## Running Tests
|
|
328
|
+
|
|
329
|
+
```bash
|
|
330
|
+
# Install dev extras
|
|
331
|
+
pip install -e ".[dev]"
|
|
332
|
+
|
|
333
|
+
# Run the full suite
|
|
334
|
+
pytest tests/ -v
|
|
335
|
+
|
|
336
|
+
# With coverage
|
|
337
|
+
pytest tests/ --cov=temporal_leaks --cov-report=term-missing
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
---
|
|
341
|
+
|
|
342
|
+
## Contributing
|
|
343
|
+
|
|
344
|
+
Pull requests are welcome. For major changes, please open an issue first.
|
|
345
|
+
|
|
346
|
+
1. Fork the repo
|
|
347
|
+
2. Create your feature branch: `git checkout -b feat/my-feature`
|
|
348
|
+
3. Commit your changes: `git commit -m 'feat: add my feature'`
|
|
349
|
+
4. Push and open a PR
|
|
350
|
+
|
|
351
|
+
Please make sure `ruff check .` and `mypy temporal_leaks/` pass before submitting.
|
|
352
|
+
|
|
353
|
+
---
|
|
354
|
+
|
|
355
|
+
## License
|
|
356
|
+
|
|
357
|
+
MIT © temporal-leaks contributors
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
# 🕵️ Temporal Leaks: Valgrind for Time-Series ML
|
|
2
|
+
|
|
3
|
+
<p align="center">
|
|
4
|
+
<img src="https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue?style=flat-square" />
|
|
5
|
+
<img src="https://img.shields.io/badge/license-MIT-green?style=flat-square" />
|
|
6
|
+
<img src="https://img.shields.io/badge/pandas-%E2%9C%93-150458?style=flat-square&logo=pandas" />
|
|
7
|
+
<img src="https://img.shields.io/badge/polars-%E2%9C%93-CD792C?style=flat-square" />
|
|
8
|
+
<img src="https://img.shields.io/badge/mypy-strict-blue?style=flat-square" />
|
|
9
|
+
<img src="https://img.shields.io/badge/ruff-lint-red?style=flat-square" />
|
|
10
|
+
<img src="https://img.shields.io/badge/CI-passing-brightgreen?style=flat-square&logo=github-actions" />
|
|
11
|
+
</p>
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
> **Look-ahead bias** is the silent killer of quant strategies and forecasting models.
|
|
16
|
+
> Your backtest shows 40% annual returns. You deploy. You lose money.
|
|
17
|
+
> Somewhere in your feature pipeline, a rolling average peeked at tomorrow's prices.
|
|
18
|
+
|
|
19
|
+
`temporal-leaks` catches this automatically — before it costs you.
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## The Problem: Future Data in Your Past Features
|
|
24
|
+
|
|
25
|
+
In time-series machine learning, look-ahead bias (also called *data leakage* or *future leakage*) occurs when a feature computed for timestamp `t` inadvertently uses data from timestamps `t+1`, `t+2`, … `t+n`.
|
|
26
|
+
|
|
27
|
+
This is devastatingly easy to introduce:
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
# BUG: center=True means the window is centred — it looks forward AND backward
|
|
31
|
+
df["roll_mean"] = df["price"].rolling(window=5, center=True).mean()
|
|
32
|
+
|
|
33
|
+
# BUG: shift(-1) reads the NEXT row's value
|
|
34
|
+
df["next_return"] = df["return"].shift(-1)
|
|
35
|
+
|
|
36
|
+
# BUG: global z-score uses future data to compute mean/std
|
|
37
|
+
df["znorm"] = (df["price"] - df["price"].mean()) / df["price"].std()
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
None of these will raise an error.
|
|
41
|
+
Your tests will pass.
|
|
42
|
+
Your backtests will look amazing.
|
|
43
|
+
**And then reality hits.**
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## How It Works: The Temporal Perturbation Test
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
Timeline: ──────────────────────────────────────────────────▶
|
|
51
|
+
T (midpoint)
|
|
52
|
+
│
|
|
53
|
+
Past ◀──────────────────────┤──────────────────────────────▶ Future
|
|
54
|
+
│
|
|
55
|
+
Step 1: Run pipeline on original data
|
|
56
|
+
baseline_features = pipeline(df)
|
|
57
|
+
|
|
58
|
+
Step 2: MUTATE the future
|
|
59
|
+
df_perturbed[t > T] = 🔥 (noise / sign flip / NaN)
|
|
60
|
+
|
|
61
|
+
Step 3: Re-run pipeline on perturbed data
|
|
62
|
+
perturbed_features = pipeline(df_perturbed)
|
|
63
|
+
|
|
64
|
+
Step 4: Compare features for PAST rows only (t ≤ T)
|
|
65
|
+
If baseline_features[t≤T] ≠ perturbed_features[t≤T]
|
|
66
|
+
then the past features DEPEND on future data → LEAK! 🚨
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
The key insight: **if your past features are truly causal, mutating the future should not change them.** If they change, future data crept in.
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
73
|
+
## Installation
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
pip install temporal-leaks
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Or from source:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
git clone https://github.com/temporal-leaks/temporal-leaks
|
|
83
|
+
cd temporal-leaks
|
|
84
|
+
pip install -e ".[dev]"
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## Quick Start
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
import pandas as pd
|
|
93
|
+
import numpy as np
|
|
94
|
+
from temporal_leaks import TemporalAudit, TemporalLeakageError
|
|
95
|
+
|
|
96
|
+
# Build a sample time-series dataset
|
|
97
|
+
df = pd.DataFrame({
|
|
98
|
+
"ts": np.arange(500),
|
|
99
|
+
"price": np.random.default_rng(42).normal(100, 5, size=500),
|
|
100
|
+
})
|
|
101
|
+
|
|
102
|
+
# ─── ✓ CLEAN PIPELINE ────────────────────────────────────────────────────────
|
|
103
|
+
def causal_features(df: pd.DataFrame) -> pd.DataFrame:
|
|
104
|
+
out = df.copy()
|
|
105
|
+
# Expanding window only looks at past — safe!
|
|
106
|
+
out["expanding_mean"] = out["price"].expanding(min_count=1).mean()
|
|
107
|
+
# shift(+1) looks at the previous row — safe!
|
|
108
|
+
out["lag1"] = out["price"].shift(1)
|
|
109
|
+
return out
|
|
110
|
+
|
|
111
|
+
auditor = TemporalAudit(mode="nullify", random_seed=42)
|
|
112
|
+
report = auditor.check(df, timestamp_col="ts", pipeline_fn=causal_features)
|
|
113
|
+
print(report)
|
|
114
|
+
# ✓ CLEAN — leakage_score=0.0000
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
# ─── ✗ LEAKING PIPELINE ──────────────────────────────────────────────────────
|
|
118
|
+
def leaking_features(df: pd.DataFrame) -> pd.DataFrame:
|
|
119
|
+
out = df.copy()
|
|
120
|
+
# center=True peeks at future rows — LEAKS!
|
|
121
|
+
out["centred_roll"] = out["price"].rolling(11, center=True, min_periods=1).mean()
|
|
122
|
+
return out
|
|
123
|
+
|
|
124
|
+
try:
|
|
125
|
+
auditor.check(df, timestamp_col="ts", pipeline_fn=leaking_features)
|
|
126
|
+
except TemporalLeakageError as exc:
|
|
127
|
+
print(exc)
|
|
128
|
+
# TemporalLeakageError: leakage_score=0.4812
|
|
129
|
+
# Breached columns (1):
|
|
130
|
+
# • [HIGH] column='centred_roll' effect_size=0.4812 ...
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## Decorator API
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
from temporal_leaks import temporal_audit
|
|
139
|
+
|
|
140
|
+
@temporal_audit(timestamp_col="ts", mode="noise", random_seed=42)
|
|
141
|
+
def build_features(df: pd.DataFrame) -> pd.DataFrame:
|
|
142
|
+
df = df.copy()
|
|
143
|
+
df["expanding_mean"] = df["price"].expanding(min_count=1).mean()
|
|
144
|
+
return df
|
|
145
|
+
|
|
146
|
+
# The audit runs automatically on every call.
|
|
147
|
+
# TemporalLeakageError is raised if leakage is detected.
|
|
148
|
+
result = build_features(df)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
## HTML Audit Reports
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
report = auditor.check(df, "ts", leaking_features)
|
|
157
|
+
|
|
158
|
+
# Write a beautiful standalone HTML report
|
|
159
|
+
with open("audit_report.html", "w") as f:
|
|
160
|
+
f.write(report.to_html())
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
The HTML report includes:
|
|
164
|
+
- Leakage score with a visual progress bar
|
|
165
|
+
- Per-column severity badges (LOW / MEDIUM / HIGH / CRITICAL)
|
|
166
|
+
- Effect size, mean |Δ|, max |Δ|, % rows changed
|
|
167
|
+
- First timestamp where each leak was observed
|
|
168
|
+
- Provenance hints describing likely causes
|
|
169
|
+
|
|
170
|
+
---
|
|
171
|
+
|
|
172
|
+
## API Reference
|
|
173
|
+
|
|
174
|
+
### `TemporalAudit`
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
TemporalAudit(
|
|
178
|
+
mode: Literal["noise", "sign_flip", "nullify"] = "noise",
|
|
179
|
+
random_seed: int = 42,
|
|
180
|
+
delta_threshold: float = 1e-8,
|
|
181
|
+
leakage_threshold: float = 0.0,
|
|
182
|
+
ignore_columns: list[str] | None = None,
|
|
183
|
+
)
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
| Parameter | Description |
|
|
187
|
+
|---|---|
|
|
188
|
+
| `mode` | Perturbation strategy: **noise** adds Gaussian noise, **sign_flip** multiplies by -1, **nullify** sets NaN |
|
|
189
|
+
| `random_seed` | Integer seed — fully deterministic, reproducible across runs |
|
|
190
|
+
| `delta_threshold` | Minimum cell-level change to count as "different" (suppresses float noise) |
|
|
191
|
+
| `leakage_threshold` | If `leakage_score > leakage_threshold`, raise `TemporalLeakageError`. Set to `1.1` to always return report |
|
|
192
|
+
| `ignore_columns` | List of output columns to skip during comparison |
|
|
193
|
+
|
|
194
|
+
### `AuditReport`
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
@dataclass
|
|
198
|
+
class AuditReport:
|
|
199
|
+
leakage_score: float # 0.0 = clean, 1.0 = fully compromised
|
|
200
|
+
breached_columns: list[ColumnLeakMeta]
|
|
201
|
+
clean_columns: list[str]
|
|
202
|
+
perturbation_mode: str
|
|
203
|
+
evaluation_time: Any
|
|
204
|
+
random_seed: int
|
|
205
|
+
provenance_hints: dict[str, str]
|
|
206
|
+
|
|
207
|
+
def to_html(self) -> str: ... # standalone HTML report
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### `ColumnLeakMeta`
|
|
211
|
+
|
|
212
|
+
```python
|
|
213
|
+
@dataclass(frozen=True)
|
|
214
|
+
class ColumnLeakMeta:
|
|
215
|
+
column_name: str
|
|
216
|
+
first_leaky_timestamp: Any
|
|
217
|
+
mean_absolute_delta: float
|
|
218
|
+
max_delta: float
|
|
219
|
+
pct_rows_changed: float
|
|
220
|
+
effect_size: float # normalised, 0–1
|
|
221
|
+
severity: str # LOW | MEDIUM | HIGH | CRITICAL
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
### Severity Classification
|
|
225
|
+
|
|
226
|
+
| Severity | Effect Size |
|
|
227
|
+
|---|---|
|
|
228
|
+
| 🟦 LOW | `effect_size < 0.15` |
|
|
229
|
+
| 🟨 MEDIUM | `0.15 ≤ effect_size < 0.40` |
|
|
230
|
+
| 🟧 HIGH | `0.40 ≤ effect_size < 0.75` |
|
|
231
|
+
| 🟥 CRITICAL | `effect_size ≥ 0.75` |
|
|
232
|
+
|
|
233
|
+
---
|
|
234
|
+
|
|
235
|
+
## Perturbation Modes
|
|
236
|
+
|
|
237
|
+
```
|
|
238
|
+
┌────────────────┬──────────────────────────────────────────────────────┐
|
|
239
|
+
│ Mode │ What it does to future rows │
|
|
240
|
+
├────────────────┼──────────────────────────────────────────────────────┤
|
|
241
|
+
│ noise │ Adds Gaussian noise: μ=0, σ=2×column_std │
|
|
242
|
+
│ sign_flip │ Multiplies all numeric values by −1 │
|
|
243
|
+
│ nullify │ Replaces all values with NaN / null │
|
|
244
|
+
└────────────────┴──────────────────────────────────────────────────────┘
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
Use **nullify** for the strictest test.
|
|
248
|
+
Use **noise** for pipelines that handle NaN gracefully (e.g., imputers).
|
|
249
|
+
Use **sign_flip** to test pipelines sensitive to sign changes (e.g., momentum factors).
|
|
250
|
+
|
|
251
|
+
---
|
|
252
|
+
|
|
253
|
+
## Polars Support
|
|
254
|
+
|
|
255
|
+
```python
|
|
256
|
+
import polars as pl
|
|
257
|
+
from temporal_leaks import TemporalAudit
|
|
258
|
+
|
|
259
|
+
df = pl.DataFrame({"ts": range(200), "value": [float(i) for i in range(200)]})
|
|
260
|
+
|
|
261
|
+
auditor = TemporalAudit(mode="nullify", random_seed=42)
|
|
262
|
+
report = auditor.check(df, "ts", my_polars_pipeline)
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
`temporal-leaks` handles Polars DataFrames transparently — pass them in, get results back in the same type.
|
|
266
|
+
|
|
267
|
+
---
|
|
268
|
+
|
|
269
|
+
## Benchmarks
|
|
270
|
+
|
|
271
|
+
| Dataset | Rows | Columns | Backend | Mode | Time |
|
|
272
|
+
|---|---|---|---|---|---|
|
|
273
|
+
| Synthetic prices | 1,000,000 | 5 | Polars | nullify | ~1.1 s |
|
|
274
|
+
| Synthetic prices | 10,000,000 | 5 | Polars | nullify | ~3.2 s |
|
|
275
|
+
| Equity features | 500,000 | 20 | Pandas | noise | ~2.8 s |
|
|
276
|
+
|
|
277
|
+
> Benchmarks run on Apple M2 Pro, 16 GB RAM. Polars backend strongly recommended for large frames.
|
|
278
|
+
|
|
279
|
+
---
|
|
280
|
+
|
|
281
|
+
## Running Tests
|
|
282
|
+
|
|
283
|
+
```bash
|
|
284
|
+
# Install dev extras
|
|
285
|
+
pip install -e ".[dev]"
|
|
286
|
+
|
|
287
|
+
# Run the full suite
|
|
288
|
+
pytest tests/ -v
|
|
289
|
+
|
|
290
|
+
# With coverage
|
|
291
|
+
pytest tests/ --cov=temporal_leaks --cov-report=term-missing
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
---
|
|
295
|
+
|
|
296
|
+
## Contributing
|
|
297
|
+
|
|
298
|
+
Pull requests are welcome. For major changes, please open an issue first.
|
|
299
|
+
|
|
300
|
+
1. Fork the repo
|
|
301
|
+
2. Create your feature branch: `git checkout -b feat/my-feature`
|
|
302
|
+
3. Commit your changes: `git commit -m 'feat: add my feature'`
|
|
303
|
+
4. Push and open a PR
|
|
304
|
+
|
|
305
|
+
Please make sure `ruff check .` and `mypy temporal_leaks/` pass before submitting.
|
|
306
|
+
|
|
307
|
+
---
|
|
308
|
+
|
|
309
|
+
## License
|
|
310
|
+
|
|
311
|
+
MIT © temporal-leaks contributors
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "temporal-leaks"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Valgrind for Time-Series ML — automatically detect look-ahead bias in data science pipelines."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
|
|
13
|
+
authors = [
|
|
14
|
+
{ name = "Prakul Hiremath" },
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
maintainers = [
|
|
18
|
+
{ name = "Prakul Hiremath" },
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
keywords = [
|
|
22
|
+
"time-series",
|
|
23
|
+
"leakage",
|
|
24
|
+
"look-ahead-bias",
|
|
25
|
+
"machine-learning",
|
|
26
|
+
"data-science",
|
|
27
|
+
"auditing",
|
|
28
|
+
"quant-finance",
|
|
29
|
+
"forecasting",
|
|
30
|
+
"feature-engineering",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
classifiers = [
|
|
34
|
+
"Development Status :: 4 - Beta",
|
|
35
|
+
"Intended Audience :: Developers",
|
|
36
|
+
"Intended Audience :: Science/Research",
|
|
37
|
+
"License :: OSI Approved :: MIT License",
|
|
38
|
+
"Programming Language :: Python :: 3",
|
|
39
|
+
"Programming Language :: Python :: 3.9",
|
|
40
|
+
"Programming Language :: Python :: 3.10",
|
|
41
|
+
"Programming Language :: Python :: 3.11",
|
|
42
|
+
"Programming Language :: Python :: 3.12",
|
|
43
|
+
"Programming Language :: Python :: 3.13",
|
|
44
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
45
|
+
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
46
|
+
"Topic :: Software Development :: Testing",
|
|
47
|
+
"Typing :: Typed",
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
dependencies = [
|
|
51
|
+
"pandas>=1.5.0",
|
|
52
|
+
"polars>=0.19.0",
|
|
53
|
+
"numpy>=1.23.0",
|
|
54
|
+
"Jinja2>=3.1.0",
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
[project.optional-dependencies]
|
|
58
|
+
dev = [
|
|
59
|
+
"pytest>=7.4.0",
|
|
60
|
+
"pytest-cov>=4.1.0",
|
|
61
|
+
"ruff>=0.1.0",
|
|
62
|
+
"mypy>=1.5.0",
|
|
63
|
+
"pandas-stubs>=2.0.0",
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
pandas = [
|
|
67
|
+
"pandas>=1.5.0",
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
polars = [
|
|
71
|
+
"polars>=0.19.0",
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
all = [
|
|
75
|
+
"pandas>=1.5.0",
|
|
76
|
+
"polars>=0.19.0",
|
|
77
|
+
]
|
|
78
|
+
|
|
79
|
+
[project.urls]
|
|
80
|
+
Homepage = "https://github.com/prakulhiremath/temporal-leaks"
|
|
81
|
+
Repository = "https://github.com/prakulhiremath/temporal-leaks"
|
|
82
|
+
Issues = "https://github.com/prakulhiremath/temporal-leaks/issues"
|
|
83
|
+
|
|
84
|
+
[tool.hatch.build]
|
|
85
|
+
include = [
|
|
86
|
+
"temporal_leaks/py.typed",
|
|
87
|
+
]
|
|
88
|
+
|
|
89
|
+
[tool.hatch.build.targets.wheel]
|
|
90
|
+
packages = ["temporal_leaks"]
|
|
91
|
+
|
|
92
|
+
[tool.ruff]
|
|
93
|
+
line-length = 100
|
|
94
|
+
target-version = "py39"
|
|
95
|
+
|
|
96
|
+
[tool.ruff.lint]
|
|
97
|
+
select = [
|
|
98
|
+
"E",
|
|
99
|
+
"F",
|
|
100
|
+
"I",
|
|
101
|
+
"N",
|
|
102
|
+
"W",
|
|
103
|
+
"UP",
|
|
104
|
+
"ANN",
|
|
105
|
+
"B",
|
|
106
|
+
"SIM",
|
|
107
|
+
"TCH",
|
|
108
|
+
]
|
|
109
|
+
|
|
110
|
+
ignore = [
|
|
111
|
+
"ANN101",
|
|
112
|
+
"ANN102",
|
|
113
|
+
]
|
|
114
|
+
|
|
115
|
+
[tool.ruff.lint.per-file-ignores]
|
|
116
|
+
"tests/*" = ["ANN"]
|
|
117
|
+
|
|
118
|
+
[tool.mypy]
|
|
119
|
+
python_version = "3.9"
|
|
120
|
+
strict = true
|
|
121
|
+
warn_return_any = true
|
|
122
|
+
warn_unused_configs = true
|
|
123
|
+
ignore_missing_imports = true
|
|
124
|
+
|
|
125
|
+
[tool.pytest.ini_options]
|
|
126
|
+
testpaths = ["tests"]
|
|
127
|
+
addopts = "--tb=short -v"
|
|
128
|
+
|
|
129
|
+
[tool.coverage.run]
|
|
130
|
+
source = ["temporal_leaks"]
|
|
131
|
+
omit = ["tests/*"]
|
|
File without changes
|