tracepipe 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,813 @@
1
+ # tracepipe/instrumentation/indexer_capture.py
2
+ """
3
+ loc/iloc instrumentation for TracePipe.
4
+
5
+ Operations tracked:
6
+ | Pattern | Type | Completeness |
7
+ |------------------------------|------------|--------------|
8
+ | df.loc[mask] | Filter | FULL |
9
+ | df.loc[mask, 'col'] | Filter | FULL |
10
+ | df.iloc[0:5] | Filter | FULL |
11
+ | df.iloc[[1,3,5]] | Filter | FULL |
12
+ | df.loc[mask, 'col'] = val | Transform | FULL |
13
+ | df.iloc[0:5, 0] = val | Transform | FULL |
14
+ | df.loc[mask] = other_df | Transform | PARTIAL |
15
+
16
+ Key insight: We wrap the indexer's __getitem__ and __setitem__, not DataFrame.loc itself.
17
+ """
18
+
19
+ import warnings
20
+ from typing import Any, Optional, Union
21
+
22
+ import numpy as np
23
+ import pandas as pd
24
+
25
+ from ..context import get_context
26
+ from ..core import ChangeType, CompletenessLevel
27
+ from ..safety import TracePipeWarning, get_caller_info
28
+
29
+
30
+ class _CallableLocIndexer:
31
+ """
32
+ Wrapper for the indexer returned by df.loc(axis=...).
33
+
34
+ This is used internally by pandas methods like dropna.
35
+ We skip tracking when _filter_op_depth > 0 to avoid double-counting
36
+ since the outer operation (e.g., dropna) will track the drops.
37
+ """
38
+
39
+ def __init__(self, indexer, parent_df: pd.DataFrame):
40
+ self._indexer = indexer
41
+ self._parent_df = parent_df
42
+
43
+ def __getitem__(self, key):
44
+ """Pass through to underlying indexer and propagate RIDs."""
45
+ ctx = get_context()
46
+ result = self._indexer[key]
47
+
48
+ if not ctx.enabled:
49
+ return result
50
+
51
+ # Skip step tracking if we're inside a filter operation (parent will track)
52
+ # But still propagate RIDs for the result
53
+ if isinstance(result, pd.DataFrame):
54
+ try:
55
+ row_mgr = ctx.row_manager
56
+ source_df = self._parent_df
57
+
58
+ source_rids = row_mgr.get_ids_array(source_df)
59
+ if source_rids is None:
60
+ row_mgr.register(source_df)
61
+ source_rids = row_mgr.get_ids_array(source_df)
62
+
63
+ # Propagate RIDs to result
64
+ if hasattr(key, "dtype") and key.dtype == bool:
65
+ mask = key.values if isinstance(key, pd.Series) else np.asarray(key)
66
+ row_mgr.propagate_by_mask(source_df, result, mask)
67
+ else:
68
+ row_mgr.register(result)
69
+
70
+ # Only create step if NOT inside a filter operation
71
+ if ctx._filter_op_depth == 0:
72
+ store = ctx.store
73
+ if hasattr(key, "dtype") and key.dtype == bool:
74
+ mask = key.values if isinstance(key, pd.Series) else np.asarray(key)
75
+ dropped_rids, _ = row_mgr.compute_dropped_with_positions(source_rids, mask)
76
+ completeness = CompletenessLevel.FULL
77
+ else:
78
+ result_rids = row_mgr.get_ids_array(result)
79
+ dropped_rids = row_mgr.compute_dropped_ids(source_rids, result_rids)
80
+ completeness = CompletenessLevel.PARTIAL
81
+
82
+ code_file, code_line = get_caller_info(skip_frames=4)
83
+ step_id = store.append_step(
84
+ operation="DataFrame.loc(axis)[]",
85
+ stage=ctx.current_stage,
86
+ code_file=code_file,
87
+ code_line=code_line,
88
+ params={},
89
+ input_shape=source_df.shape,
90
+ output_shape=result.shape,
91
+ completeness=completeness,
92
+ )
93
+
94
+ if len(dropped_rids) > 0:
95
+ store.append_bulk_drops(step_id, dropped_rids)
96
+ except Exception as e:
97
+ if ctx.config.strict_mode:
98
+ raise
99
+ warnings.warn(f"TracePipe: loc(axis)[] capture failed: {e}", TracePipeWarning)
100
+
101
+ return result
102
+
103
+ def __setitem__(self, key, value):
104
+ """Pass through setitem."""
105
+ self._indexer[key] = value
106
+
107
+
108
+ class TrackedLocIndexer:
109
+ """
110
+ Wrapper around pandas _LocIndexer that captures lineage.
111
+
112
+ Usage (internal - user never sees this):
113
+ df.loc # Returns TrackedLocIndexer wrapping the real _LocIndexer
114
+ """
115
+
116
+ def __init__(self, indexer, parent_df: pd.DataFrame):
117
+ self._indexer = indexer
118
+ self._parent_df = parent_df
119
+
120
+ def __getattr__(self, name):
121
+ """Proxy any other attribute access to the underlying indexer."""
122
+ return getattr(self._indexer, name)
123
+
124
+ def __call__(self, axis=None):
125
+ """
126
+ Support for df.loc(axis=...) callable form used internally by pandas.
127
+
128
+ This is used by dropna and other methods that call self.loc(axis=axis)[mask].
129
+ """
130
+ # Return a callable-aware indexer that wraps the result of calling the original
131
+ return _CallableLocIndexer(self._indexer(axis), self._parent_df)
132
+
133
+ def __getitem__(self, key) -> Union[pd.DataFrame, pd.Series, Any]:
134
+ """
135
+ Capture filter operations via loc[].
136
+
137
+ Handles:
138
+ - df.loc[mask] -> DataFrame (filter)
139
+ - df.loc[mask, 'col'] -> Series (filter + column select)
140
+ - df.loc[mask, ['a', 'b']] -> DataFrame (filter + column select)
141
+ - df.loc['label'] -> Row (single row access)
142
+ """
143
+ ctx = get_context()
144
+
145
+ # Always run original first
146
+ result = self._indexer[key]
147
+
148
+ if not ctx.enabled:
149
+ return result
150
+
151
+ try:
152
+ self._capture_loc_getitem(key, result, ctx)
153
+ except Exception as e:
154
+ if ctx.config.strict_mode:
155
+ raise
156
+ warnings.warn(f"TracePipe: loc[] capture failed: {e}", TracePipeWarning)
157
+
158
+ return result
159
+
160
+ def __setitem__(self, key, value) -> None:
161
+ """
162
+ Capture transform operations via loc[] = value.
163
+
164
+ Handles:
165
+ - df.loc[mask, 'col'] = scalar
166
+ - df.loc[mask, 'col'] = array
167
+ - df.loc[mask, ['a', 'b']] = values
168
+ - df.loc[mask] = other_df (PARTIAL - complex assignment)
169
+ """
170
+ ctx = get_context()
171
+
172
+ # Capture before state for watched columns
173
+ before_values = None
174
+ affected_cols = None
175
+ if ctx.enabled and ctx.watched_columns:
176
+ before_values, affected_cols = self._capture_before_state(key, ctx)
177
+
178
+ # Always run original
179
+ self._indexer[key] = value
180
+
181
+ if not ctx.enabled:
182
+ return
183
+
184
+ try:
185
+ self._capture_loc_setitem(key, value, before_values, affected_cols, ctx)
186
+ except Exception as e:
187
+ if ctx.config.strict_mode:
188
+ raise
189
+ warnings.warn(f"TracePipe: loc[]= capture failed: {e}", TracePipeWarning)
190
+
191
+ def _capture_loc_getitem(self, key, result, ctx) -> None:
192
+ """Capture filter via loc[]."""
193
+ if not isinstance(result, pd.DataFrame):
194
+ return # Series or scalar - not a filter operation
195
+
196
+ row_mgr = ctx.row_manager
197
+ source_df = self._parent_df
198
+
199
+ source_rids = row_mgr.get_ids_array(source_df)
200
+ if source_rids is None:
201
+ row_mgr.register(source_df)
202
+ source_rids = row_mgr.get_ids_array(source_df)
203
+
204
+ # Derive kept mask from key
205
+ kept_mask, completeness = self._derive_loc_mask(key, source_df)
206
+
207
+ # Always propagate RIDs
208
+ if kept_mask is not None:
209
+ row_mgr.propagate_by_mask(source_df, result, kept_mask)
210
+ else:
211
+ row_mgr.register(result)
212
+
213
+ # Skip step tracking if we're inside a filter operation (parent will track)
214
+ if ctx._filter_op_depth > 0:
215
+ return
216
+
217
+ store = ctx.store
218
+
219
+ if kept_mask is not None:
220
+ dropped_rids, _ = row_mgr.compute_dropped_with_positions(source_rids, kept_mask)
221
+ else:
222
+ completeness = CompletenessLevel.PARTIAL
223
+ result_rids = row_mgr.get_ids_array(result)
224
+ dropped_rids = row_mgr.compute_dropped_ids(source_rids, result_rids)
225
+
226
+ code_file, code_line = get_caller_info(skip_frames=4)
227
+ step_id = store.append_step(
228
+ operation="DataFrame.loc[]",
229
+ stage=ctx.current_stage,
230
+ code_file=code_file,
231
+ code_line=code_line,
232
+ params={"key_type": type(key).__name__},
233
+ input_shape=source_df.shape,
234
+ output_shape=result.shape,
235
+ completeness=completeness,
236
+ )
237
+
238
+ if len(dropped_rids) > 0:
239
+ store.append_bulk_drops(step_id, dropped_rids)
240
+
241
+ def _derive_loc_mask(
242
+ self, key, df: pd.DataFrame
243
+ ) -> tuple[Optional[np.ndarray], CompletenessLevel]:
244
+ """
245
+ Derive boolean mask from loc key.
246
+
247
+ Key types:
248
+ - Boolean array/Series -> mask directly
249
+ - Slice -> convert to positional mask
250
+ - List of labels -> index.isin()
251
+ - Single label -> single row mask
252
+ - Tuple (row_key, col_key) -> handle row_key
253
+ """
254
+ row_key = key[0] if isinstance(key, tuple) else key
255
+
256
+ # Boolean mask
257
+ if hasattr(row_key, "dtype") and row_key.dtype == bool:
258
+ mask = row_key.values if isinstance(row_key, pd.Series) else np.asarray(row_key)
259
+ if len(mask) == len(df):
260
+ return mask, CompletenessLevel.FULL
261
+
262
+ # List of labels
263
+ if isinstance(row_key, (list, np.ndarray)) and not (
264
+ hasattr(row_key, "dtype") and row_key.dtype == bool
265
+ ):
266
+ mask = df.index.isin(row_key)
267
+ return (
268
+ mask.to_numpy() if hasattr(mask, "to_numpy") else np.asarray(mask),
269
+ CompletenessLevel.FULL,
270
+ )
271
+
272
+ # Slice
273
+ if isinstance(row_key, slice):
274
+ try:
275
+ # Get positional indices from label slice
276
+ start_idx = df.index.get_loc(row_key.start) if row_key.start is not None else 0
277
+ stop_idx = df.index.get_loc(row_key.stop) if row_key.stop is not None else len(df)
278
+ # loc slice is inclusive on both ends
279
+ if isinstance(start_idx, int) and isinstance(stop_idx, int):
280
+ mask = np.zeros(len(df), dtype=bool)
281
+ mask[start_idx : stop_idx + 1] = True
282
+ return mask, CompletenessLevel.FULL
283
+ except (KeyError, TypeError):
284
+ pass
285
+
286
+ # Single label
287
+ if not isinstance(row_key, (list, np.ndarray, slice, pd.Series)):
288
+ try:
289
+ mask = df.index == row_key
290
+ return (
291
+ mask.to_numpy() if hasattr(mask, "to_numpy") else np.asarray(mask),
292
+ CompletenessLevel.FULL,
293
+ )
294
+ except Exception:
295
+ pass
296
+
297
+ return None, CompletenessLevel.PARTIAL
298
+
299
+ def _capture_before_state(self, key, ctx) -> tuple[Optional[dict], Optional[list]]:
300
+ """Capture values before assignment for watched columns."""
301
+ col_key = key[1] if isinstance(key, tuple) and len(key) > 1 else None
302
+
303
+ # Determine affected columns
304
+ if col_key is None:
305
+ affected_cols = list(ctx.watched_columns & set(self._parent_df.columns))
306
+ elif isinstance(col_key, str):
307
+ affected_cols = [col_key] if col_key in ctx.watched_columns else []
308
+ elif isinstance(col_key, list):
309
+ affected_cols = [c for c in col_key if c in ctx.watched_columns]
310
+ else:
311
+ affected_cols = []
312
+
313
+ if not affected_cols:
314
+ return None, None
315
+
316
+ # Derive affected rows
317
+ mask, _ = self._derive_loc_mask(key, self._parent_df)
318
+ if mask is None:
319
+ return None, affected_cols
320
+
321
+ # Capture values (vectorized per column)
322
+ rids = ctx.row_manager.get_ids_array(self._parent_df)
323
+ if rids is None:
324
+ return None, affected_cols
325
+
326
+ before = {}
327
+ affected_positions = np.where(mask)[0]
328
+ for col in affected_cols:
329
+ before[col] = {
330
+ "rids": rids[affected_positions].copy(),
331
+ "values": self._parent_df[col].values[affected_positions].copy(),
332
+ }
333
+
334
+ return before, affected_cols
335
+
336
+ def _capture_loc_setitem(self, key, value, before_values, affected_cols, ctx) -> None:
337
+ """Capture transform via loc[] = value."""
338
+ if before_values is None or not affected_cols:
339
+ return
340
+
341
+ store = ctx.store
342
+
343
+ code_file, code_line = get_caller_info(skip_frames=4)
344
+ step_id = store.append_step(
345
+ operation="DataFrame.loc[]=",
346
+ stage=ctx.current_stage,
347
+ code_file=code_file,
348
+ code_line=code_line,
349
+ params={"columns": affected_cols[:3]},
350
+ input_shape=self._parent_df.shape,
351
+ output_shape=self._parent_df.shape,
352
+ completeness=CompletenessLevel.FULL,
353
+ )
354
+
355
+ from ..utils.value_capture import values_equal
356
+
357
+ for col in affected_cols:
358
+ if col not in before_values:
359
+ continue
360
+
361
+ rids = before_values[col]["rids"]
362
+ old_vals = before_values[col]["values"]
363
+
364
+ # Get current positions for these rids
365
+ mask, _ = self._derive_loc_mask(key, self._parent_df)
366
+ if mask is None:
367
+ continue
368
+
369
+ new_vals = self._parent_df[col].values[np.where(mask)[0]]
370
+
371
+ # Vectorized diff detection
372
+ for rid, old_val, new_val in zip(rids, old_vals, new_vals):
373
+ if not values_equal(old_val, new_val):
374
+ store.append_diff(
375
+ step_id=step_id,
376
+ row_id=int(rid),
377
+ col=col,
378
+ old_val=old_val,
379
+ new_val=new_val,
380
+ change_type=ChangeType.MODIFIED,
381
+ )
382
+
383
+
384
+ class TrackedILocIndexer:
385
+ """
386
+ Wrapper around pandas _iLocIndexer that captures lineage.
387
+
388
+ Similar to TrackedLocIndexer but uses positional indexing.
389
+ """
390
+
391
+ def __init__(self, indexer, parent_df: pd.DataFrame):
392
+ self._indexer = indexer
393
+ self._parent_df = parent_df
394
+
395
+ def __getattr__(self, name):
396
+ """Proxy any other attribute access to the underlying indexer."""
397
+ return getattr(self._indexer, name)
398
+
399
+ def __getitem__(self, key) -> Union[pd.DataFrame, pd.Series, Any]:
400
+ """Capture filter via iloc[]."""
401
+ ctx = get_context()
402
+ result = self._indexer[key]
403
+
404
+ if not ctx.enabled:
405
+ return result
406
+
407
+ try:
408
+ self._capture_iloc_getitem(key, result, ctx)
409
+ except Exception as e:
410
+ if ctx.config.strict_mode:
411
+ raise
412
+ warnings.warn(f"TracePipe: iloc[] capture failed: {e}", TracePipeWarning)
413
+
414
+ return result
415
+
416
+ def __setitem__(self, key, value) -> None:
417
+ """Capture transform via iloc[] = value."""
418
+ ctx = get_context()
419
+
420
+ before_values = None
421
+ affected_cols = None
422
+ if ctx.enabled and ctx.watched_columns:
423
+ before_values, affected_cols = self._capture_before_state(key, ctx)
424
+
425
+ self._indexer[key] = value
426
+
427
+ if not ctx.enabled:
428
+ return
429
+
430
+ try:
431
+ self._capture_iloc_setitem(key, value, before_values, affected_cols, ctx)
432
+ except Exception as e:
433
+ if ctx.config.strict_mode:
434
+ raise
435
+ warnings.warn(f"TracePipe: iloc[]= capture failed: {e}", TracePipeWarning)
436
+
437
+ def _capture_iloc_getitem(self, key, result, ctx) -> None:
438
+ """Capture filter via iloc[]."""
439
+ if not isinstance(result, pd.DataFrame):
440
+ return
441
+
442
+ row_mgr = ctx.row_manager
443
+ source_df = self._parent_df
444
+
445
+ source_rids = row_mgr.get_ids_array(source_df)
446
+ if source_rids is None:
447
+ row_mgr.register(source_df)
448
+ source_rids = row_mgr.get_ids_array(source_df)
449
+
450
+ # Derive positions from key
451
+ positions = self._derive_iloc_positions(key, source_df)
452
+
453
+ # Always propagate RIDs
454
+ if positions is not None:
455
+ row_mgr.propagate_by_positions(source_df, result, positions)
456
+ else:
457
+ row_mgr.register(result)
458
+
459
+ # Skip step tracking if we're inside a filter operation (parent will track)
460
+ if ctx._filter_op_depth > 0:
461
+ return
462
+
463
+ store = ctx.store
464
+
465
+ if positions is not None:
466
+ kept_mask = np.zeros(len(source_df), dtype=bool)
467
+ kept_mask[positions] = True
468
+ dropped_rids, _ = row_mgr.compute_dropped_with_positions(source_rids, kept_mask)
469
+ completeness = CompletenessLevel.FULL
470
+ else:
471
+ completeness = CompletenessLevel.PARTIAL
472
+ result_rids = row_mgr.get_ids_array(result)
473
+ dropped_rids = row_mgr.compute_dropped_ids(source_rids, result_rids)
474
+
475
+ code_file, code_line = get_caller_info(skip_frames=4)
476
+ step_id = store.append_step(
477
+ operation="DataFrame.iloc[]",
478
+ stage=ctx.current_stage,
479
+ code_file=code_file,
480
+ code_line=code_line,
481
+ params={"key_type": type(key).__name__},
482
+ input_shape=source_df.shape,
483
+ output_shape=result.shape,
484
+ completeness=completeness,
485
+ )
486
+
487
+ if len(dropped_rids) > 0:
488
+ store.append_bulk_drops(step_id, dropped_rids)
489
+
490
+ def _derive_iloc_positions(self, key, df: pd.DataFrame) -> Optional[np.ndarray]:
491
+ """Derive position array from iloc key."""
492
+ row_key = key[0] if isinstance(key, tuple) else key
493
+ n = len(df)
494
+
495
+ # Integer
496
+ if isinstance(row_key, int):
497
+ pos = row_key if row_key >= 0 else n + row_key
498
+ return np.array([pos], dtype=np.int64)
499
+
500
+ # Slice
501
+ if isinstance(row_key, slice):
502
+ indices = range(*row_key.indices(n))
503
+ return np.array(list(indices), dtype=np.int64)
504
+
505
+ # List/array of integers
506
+ if isinstance(row_key, (list, np.ndarray)):
507
+ arr = np.asarray(row_key, dtype=np.int64)
508
+ # Handle negative indices
509
+ arr = np.where(arr < 0, n + arr, arr)
510
+ return arr
511
+
512
+ # Boolean array
513
+ if hasattr(row_key, "dtype") and row_key.dtype == bool:
514
+ return np.where(row_key)[0].astype(np.int64)
515
+
516
+ return None
517
+
518
+ def _capture_before_state(self, key, ctx):
519
+ """Capture values before assignment for watched columns."""
520
+ col_key = key[1] if isinstance(key, tuple) and len(key) > 1 else None
521
+
522
+ # Determine affected columns by position
523
+ if col_key is None:
524
+ affected_cols = list(ctx.watched_columns & set(self._parent_df.columns))
525
+ elif isinstance(col_key, int):
526
+ col_name = self._parent_df.columns[col_key]
527
+ affected_cols = [col_name] if col_name in ctx.watched_columns else []
528
+ elif isinstance(col_key, (list, np.ndarray)):
529
+ col_names = [self._parent_df.columns[i] for i in col_key]
530
+ affected_cols = [c for c in col_names if c in ctx.watched_columns]
531
+ elif isinstance(col_key, slice):
532
+ col_names = self._parent_df.columns[col_key].tolist()
533
+ affected_cols = [c for c in col_names if c in ctx.watched_columns]
534
+ else:
535
+ affected_cols = []
536
+
537
+ if not affected_cols:
538
+ return None, None
539
+
540
+ positions = self._derive_iloc_positions(key, self._parent_df)
541
+ if positions is None:
542
+ return None, affected_cols
543
+
544
+ rids = ctx.row_manager.get_ids_array(self._parent_df)
545
+ if rids is None:
546
+ return None, affected_cols
547
+
548
+ before = {}
549
+ for col in affected_cols:
550
+ before[col] = {
551
+ "rids": rids[positions].copy(),
552
+ "values": self._parent_df[col].values[positions].copy(),
553
+ "positions": positions.copy(),
554
+ }
555
+
556
+ return before, affected_cols
557
+
558
+ def _capture_iloc_setitem(self, key, value, before_values, affected_cols, ctx) -> None:
559
+ """Capture transform via iloc[] = value."""
560
+ if before_values is None or not affected_cols:
561
+ return
562
+
563
+ store = ctx.store
564
+
565
+ code_file, code_line = get_caller_info(skip_frames=4)
566
+ step_id = store.append_step(
567
+ operation="DataFrame.iloc[]=",
568
+ stage=ctx.current_stage,
569
+ code_file=code_file,
570
+ code_line=code_line,
571
+ params={"columns": affected_cols[:3]},
572
+ input_shape=self._parent_df.shape,
573
+ output_shape=self._parent_df.shape,
574
+ completeness=CompletenessLevel.FULL,
575
+ )
576
+
577
+ from ..utils.value_capture import values_equal
578
+
579
+ for col in affected_cols:
580
+ if col not in before_values:
581
+ continue
582
+
583
+ rids = before_values[col]["rids"]
584
+ old_vals = before_values[col]["values"]
585
+ positions = before_values[col]["positions"]
586
+ new_vals = self._parent_df[col].values[positions]
587
+
588
+ for rid, old_val, new_val in zip(rids, old_vals, new_vals):
589
+ if not values_equal(old_val, new_val):
590
+ store.append_diff(
591
+ step_id=step_id,
592
+ row_id=int(rid),
593
+ col=col,
594
+ old_val=old_val,
595
+ new_val=new_val,
596
+ change_type=ChangeType.MODIFIED,
597
+ )
598
+
599
+
600
+ # Store original properties for restore
601
+ _original_loc = None
602
+ _original_iloc = None
603
+ _original_at = None
604
+ _original_iat = None
605
+
606
+
607
+ class TrackedAtIndexer:
608
+ """
609
+ Wrapper around pandas _AtIndexer that captures scalar assignments.
610
+
611
+ .at is optimized for scalar access by label.
612
+ """
613
+
614
+ def __init__(self, indexer, df):
615
+ self._indexer = indexer
616
+ self._df = df
617
+
618
+ def __getitem__(self, key):
619
+ return self._indexer[key]
620
+
621
+ def __setitem__(self, key, value) -> None:
622
+ """Capture scalar assignment via at[row, col] = value."""
623
+ ctx = get_context()
624
+ if not ctx or not ctx.enabled:
625
+ self._indexer[key] = value
626
+ return
627
+
628
+ row_label, col = key
629
+ col_str = str(col)
630
+
631
+ # Check if column is watched
632
+ should_track = col_str in ctx.watched_columns if ctx.watched_columns else False
633
+
634
+ # Capture before state
635
+ old_val = None
636
+ if should_track:
637
+ try:
638
+ old_val = self._df.at[row_label, col]
639
+ except (KeyError, IndexError):
640
+ pass
641
+
642
+ # Execute original
643
+ self._indexer[key] = value
644
+
645
+ # Capture after state
646
+ if should_track and ctx.store:
647
+ from .pandas_inst import get_caller_info
648
+
649
+ code_file, code_line = get_caller_info(skip_frames=2)
650
+ step_id = ctx.store.append_step(
651
+ operation="DataFrame.at[]=",
652
+ stage=ctx.current_stage,
653
+ code_file=code_file,
654
+ code_line=code_line,
655
+ params={"row": str(row_label), "col": col_str},
656
+ input_shape=self._df.shape,
657
+ output_shape=self._df.shape,
658
+ )
659
+
660
+ # Get row_id for this position
661
+ try:
662
+ row_pos = self._df.index.get_loc(row_label)
663
+ if isinstance(row_pos, int):
664
+ rids = ctx.row_manager.get_ids_array(self._df)
665
+ if rids is None:
666
+ ctx.row_manager.register(self._df)
667
+ rids = ctx.row_manager.get_ids_array(self._df)
668
+ if rids is not None and row_pos < len(rids):
669
+ from ..core import ChangeType
670
+
671
+ row_id = int(rids[row_pos])
672
+ ctx.store.append_diff(
673
+ step_id=step_id,
674
+ row_id=row_id,
675
+ col=col_str,
676
+ old_val=old_val,
677
+ new_val=value,
678
+ change_type=ChangeType.MODIFIED,
679
+ )
680
+ except (KeyError, TypeError):
681
+ pass
682
+
683
+
684
+ class TrackedIAtIndexer:
685
+ """
686
+ Wrapper around pandas _iAtIndexer that captures scalar assignments.
687
+
688
+ .iat is optimized for scalar access by integer position.
689
+ """
690
+
691
+ def __init__(self, indexer, df):
692
+ self._indexer = indexer
693
+ self._df = df
694
+
695
+ def __getitem__(self, key):
696
+ return self._indexer[key]
697
+
698
+ def __setitem__(self, key, value) -> None:
699
+ """Capture scalar assignment via iat[row, col] = value."""
700
+ ctx = get_context()
701
+ if not ctx or not ctx.enabled:
702
+ self._indexer[key] = value
703
+ return
704
+
705
+ row_pos, col_pos = key
706
+ col_str = self._df.columns[col_pos] if col_pos < len(self._df.columns) else str(col_pos)
707
+
708
+ # Check if column is watched
709
+ should_track = col_str in ctx.watched_columns if ctx.watched_columns else False
710
+
711
+ # Capture before state
712
+ old_val = None
713
+ if should_track:
714
+ try:
715
+ old_val = self._df.iat[row_pos, col_pos]
716
+ except (KeyError, IndexError):
717
+ pass
718
+
719
+ # Execute original
720
+ self._indexer[key] = value
721
+
722
+ # Capture after state
723
+ if should_track and ctx.store:
724
+ from .pandas_inst import get_caller_info
725
+
726
+ code_file, code_line = get_caller_info(skip_frames=2)
727
+ step_id = ctx.store.append_step(
728
+ operation="DataFrame.iat[]=",
729
+ stage=ctx.current_stage,
730
+ code_file=code_file,
731
+ code_line=code_line,
732
+ params={"row": row_pos, "col": col_str},
733
+ input_shape=self._df.shape,
734
+ output_shape=self._df.shape,
735
+ )
736
+
737
+ # Get row_id for this position
738
+ try:
739
+ rids = ctx.row_manager.get_ids_array(self._df)
740
+ if rids is None:
741
+ ctx.row_manager.register(self._df)
742
+ rids = ctx.row_manager.get_ids_array(self._df)
743
+ if rids is not None and row_pos < len(rids):
744
+ from ..core import ChangeType
745
+
746
+ row_id = int(rids[row_pos])
747
+ ctx.store.append_diff(
748
+ step_id=step_id,
749
+ row_id=row_id,
750
+ col=col_str,
751
+ old_val=old_val,
752
+ new_val=value,
753
+ change_type=ChangeType.MODIFIED,
754
+ )
755
+ except (KeyError, TypeError):
756
+ pass
757
+
758
+
759
+ def instrument_indexers():
760
+ """
761
+ Install tracked indexers for loc, iloc, at, iat.
762
+
763
+ Monkey-patches DataFrame.loc, DataFrame.iloc, DataFrame.at, DataFrame.iat properties.
764
+ """
765
+ global _original_loc, _original_iloc, _original_at, _original_iat
766
+
767
+ if _original_loc is not None:
768
+ # Already instrumented
769
+ return
770
+
771
+ _original_loc = pd.DataFrame.loc.fget
772
+ _original_iloc = pd.DataFrame.iloc.fget
773
+ _original_at = pd.DataFrame.at.fget
774
+ _original_iat = pd.DataFrame.iat.fget
775
+
776
+ @property
777
+ def tracked_loc(self):
778
+ return TrackedLocIndexer(_original_loc(self), self)
779
+
780
+ @property
781
+ def tracked_iloc(self):
782
+ return TrackedILocIndexer(_original_iloc(self), self)
783
+
784
+ @property
785
+ def tracked_at(self):
786
+ return TrackedAtIndexer(_original_at(self), self)
787
+
788
+ @property
789
+ def tracked_iat(self):
790
+ return TrackedIAtIndexer(_original_iat(self), self)
791
+
792
+ pd.DataFrame.loc = tracked_loc
793
+ pd.DataFrame.iloc = tracked_iloc
794
+ pd.DataFrame.at = tracked_at
795
+ pd.DataFrame.iat = tracked_iat
796
+
797
+
798
+ def uninstrument_indexers():
799
+ """Restore original loc/iloc/at/iat."""
800
+ global _original_loc, _original_iloc, _original_at, _original_iat
801
+
802
+ if _original_loc is not None:
803
+ pd.DataFrame.loc = property(_original_loc)
804
+ _original_loc = None
805
+ if _original_iloc is not None:
806
+ pd.DataFrame.iloc = property(_original_iloc)
807
+ _original_iloc = None
808
+ if _original_at is not None:
809
+ pd.DataFrame.at = property(_original_at)
810
+ _original_at = None
811
+ if _original_iat is not None:
812
+ pd.DataFrame.iat = property(_original_iat)
813
+ _original_iat = None