storage-leak-diff-detector 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sldd/__init__.py ADDED
@@ -0,0 +1,6 @@
1
+ """sldd — Storage Leak Diff Detector.
2
+
3
+ Cross-platform tool for detecting abnormal filesystem growth.
4
+ """
5
+
6
+ __version__ = "0.1.2"
sldd/adaptive.py ADDED
@@ -0,0 +1,326 @@
1
+ """Adaptive scan engine — start shallow, focus on what changes, discard the rest.
2
+
3
+ Modes:
4
+ auto Smart default. Shallow discovery → focus on growers → compact stable.
5
+ full Always scan at max depth (legacy behavior).
6
+ disabled Skip adaptive logic entirely, use raw ScanConfig as-is.
7
+
8
+ Lifecycle per scan cycle:
9
+ 1. plan_scan() → decide depth, focus paths, skip paths
10
+ 2. (caller runs snapshot with the plan)
11
+ 3. update_tracking() → analyze diff, update path_status table
12
+ 4. compact() → collapse stable subtrees, prune old snapshots
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import logging
18
+ from dataclasses import replace
19
+
20
+ from sldd.diff import compute_diff
21
+ from sldd.models import (
22
+ AdaptiveConfig,
23
+ CompactResult,
24
+ ScanConfig,
25
+ ScanPlan,
26
+ Snapshot,
27
+ SnapshotDiff,
28
+ )
29
+ from sldd.storage import SnapshotStore
30
+
31
+ _log = logging.getLogger("sldd.adaptive")
32
+
33
+
34
+ def plan_scan(
35
+ store: SnapshotStore,
36
+ config: AdaptiveConfig,
37
+ scan_config: ScanConfig,
38
+ ) -> tuple[ScanConfig, ScanPlan]:
39
+ """Decide what to scan based on path tracking history.
40
+
41
+ Returns a modified ScanConfig and a ScanPlan describing the strategy.
42
+ """
43
+ if config.mode == "disabled":
44
+ return scan_config, ScanPlan(
45
+ strategy="full",
46
+ scan_depth=scan_config.max_depth,
47
+ focus_paths=[],
48
+ skip_paths=[],
49
+ scan_number=store.get_scan_number(),
50
+ reason="adaptive disabled",
51
+ )
52
+
53
+ scan_num = store.get_scan_number()
54
+ is_discovery = (
55
+ scan_num == 0
56
+ or scan_num % config.rediscovery_every == 0
57
+ or config.mode == "full"
58
+ )
59
+
60
+ if is_discovery:
61
+ depth = config.initial_depth if config.mode == "auto" else (
62
+ scan_config.max_depth
63
+ if scan_config.max_depth is not None
64
+ else config.initial_depth
65
+ )
66
+ return (
67
+ replace(scan_config, max_depth=depth),
68
+ ScanPlan(
69
+ strategy="discovery",
70
+ scan_depth=depth,
71
+ focus_paths=[],
72
+ skip_paths=[],
73
+ scan_number=scan_num,
74
+ reason=f"discovery scan #{scan_num} at depth {depth}",
75
+ ),
76
+ )
77
+
78
+ focus_rows = store.get_path_statuses(status="focus")
79
+ stable_rows = store.get_path_statuses(status="stable")
80
+
81
+ focus_paths = [r["path"] for r in focus_rows]
82
+ skip_paths = [r["path"] for r in stable_rows]
83
+
84
+ excludes = list(scan_config.excludes) + [
85
+ p for p in skip_paths if p not in scan_config.excludes
86
+ ]
87
+
88
+ depth = config.focus_depth
89
+
90
+ if not focus_paths:
91
+ depth = config.initial_depth
92
+ reason = f"no focus paths yet, scanning at depth {depth}"
93
+ excludes = list(scan_config.excludes)
94
+ else:
95
+ reason = (
96
+ f"focused scan: {len(focus_paths)} focus paths, "
97
+ f"skipping {len(skip_paths)} stable"
98
+ )
99
+
100
+ return (
101
+ replace(scan_config, max_depth=depth, excludes=excludes),
102
+ ScanPlan(
103
+ strategy="focused",
104
+ scan_depth=depth,
105
+ focus_paths=focus_paths,
106
+ skip_paths=skip_paths,
107
+ scan_number=scan_num,
108
+ reason=reason,
109
+ ),
110
+ )
111
+
112
+
113
+ def update_tracking(
114
+ store: SnapshotStore,
115
+ config: AdaptiveConfig,
116
+ diff: SnapshotDiff,
117
+ ) -> dict[str, int]:
118
+ """After a diff, update path_status: promote growers to focus, increment stable counts.
119
+
120
+ Returns summary counts: {"promoted": N, "stabilized": N, "unchanged": N}.
121
+ """
122
+ if config.mode == "disabled":
123
+ return {"promoted": 0, "stabilized": 0, "unchanged": 0}
124
+
125
+ existing: dict[str, dict[str, object]] = {}
126
+ for row in store.get_path_statuses():
127
+ existing[str(row["path"])] = row
128
+
129
+ updates: list[tuple[str, str, int, int, int, int, int]] = []
130
+ promoted = 0
131
+ stabilized = 0
132
+ unchanged = 0
133
+
134
+ for entry in diff.entries:
135
+ prev = existing.get(entry.path)
136
+ prev_consec = int(prev["consecutive_stable"]) if prev else 0
137
+
138
+ if entry.growth_bytes > 0:
139
+ updates.append((
140
+ entry.path, "focus",
141
+ entry.bytes_after, entry.files_after, entry.depth,
142
+ 0, entry.growth_bytes,
143
+ ))
144
+ promoted += 1
145
+ elif entry.growth_bytes == 0:
146
+ new_consec = prev_consec + 1
147
+ if new_consec >= config.stability_scans:
148
+ updates.append((
149
+ entry.path, "stable",
150
+ entry.bytes_after, entry.files_after, entry.depth,
151
+ new_consec, 0,
152
+ ))
153
+ stabilized += 1
154
+ else:
155
+ updates.append((
156
+ entry.path, "active",
157
+ entry.bytes_after, entry.files_after, entry.depth,
158
+ new_consec, 0,
159
+ ))
160
+ unchanged += 1
161
+ else:
162
+ updates.append((
163
+ entry.path, "focus",
164
+ entry.bytes_after, entry.files_after, entry.depth,
165
+ 0, entry.growth_bytes,
166
+ ))
167
+ promoted += 1
168
+
169
+ if updates:
170
+ store.bulk_upsert_path_status(updates)
171
+
172
+ store.set_scan_number(store.get_scan_number() + 1)
173
+
174
+ _log.info(
175
+ "Tracking updated: %d promoted, %d stabilized, %d unchanged",
176
+ promoted, stabilized, unchanged,
177
+ )
178
+ return {"promoted": promoted, "stabilized": stabilized, "unchanged": unchanged}
179
+
180
+
181
+ def compact(
182
+ store: SnapshotStore,
183
+ config: AdaptiveConfig,
184
+ ) -> CompactResult:
185
+ """Run compaction: collapse stable subtrees and prune old snapshots.
186
+
187
+ This is the key storage saver. For every path marked "stable", we delete
188
+ all its deeper child entries from the DB. The parent aggregate row stays,
189
+ so diffs still work at that level — we just lose the ability to drill into
190
+ the subtree of a path we've confirmed doesn't change.
191
+ """
192
+ if config.mode == "disabled" or not config.auto_compact:
193
+ return CompactResult(
194
+ entries_removed=0, bytes_saved_estimate=0,
195
+ paths_collapsed=0, snapshots_pruned=0,
196
+ )
197
+
198
+ stable = store.get_path_statuses(status="stable")
199
+ stable_paths = [str(r["path"]) for r in stable]
200
+
201
+ entries_before = store.total_entry_count()
202
+ if stable_paths:
203
+ store.collapse_stable_children(stable_paths)
204
+
205
+ baseline_id = store.get_baseline_snapshot_id()
206
+ pruned = store.smart_retain(config.retain_snapshots, baseline_id)
207
+
208
+ entries_after = store.total_entry_count()
209
+ removed = entries_before - entries_after
210
+ est_saved = removed * 300
211
+
212
+ if removed > 0:
213
+ try:
214
+ store.vacuum()
215
+ except Exception:
216
+ _log.warning("Vacuum failed (non-fatal)")
217
+
218
+ _log.info(
219
+ "Compaction: %d entries removed (~%d MB saved), "
220
+ "%d subtrees collapsed, %d snapshots pruned",
221
+ removed, est_saved // (1024 * 1024), len(stable_paths), pruned,
222
+ )
223
+ return CompactResult(
224
+ entries_removed=removed,
225
+ bytes_saved_estimate=est_saved,
226
+ paths_collapsed=len(stable_paths),
227
+ snapshots_pruned=pruned,
228
+ )
229
+
230
+
231
+ def ensure_baseline(store: SnapshotStore, snap: Snapshot) -> None:
232
+ """In smart mode, ensure we always have a baseline snapshot.
233
+
234
+ The baseline is the anchor — the first snapshot, or the last snapshot before
235
+ diffs were detected. We never delete it during compaction.
236
+ """
237
+ if store.get_baseline_snapshot_id() is None and snap.id is not None:
238
+ store.set_baseline_snapshot_id(snap.id)
239
+ _log.info("Baseline set to snapshot #%d", snap.id)
240
+
241
+
242
+ def smart_baseline_update(
243
+ store: SnapshotStore,
244
+ diff: SnapshotDiff,
245
+ ) -> None:
246
+ """If no growth detected, update baseline to latest (quiet mode).
247
+
248
+ This ensures we don't accumulate snapshots during quiet periods.
249
+ """
250
+ if diff.total_growth_bytes == 0 and diff.snapshot_new.id is not None:
251
+ store.set_baseline_snapshot_id(diff.snapshot_new.id)
252
+
253
+
254
+ def adaptive_cycle(
255
+ store: SnapshotStore,
256
+ adaptive_config: AdaptiveConfig,
257
+ old_snap: Snapshot,
258
+ new_snap: Snapshot,
259
+ ) -> tuple[SnapshotDiff, dict[str, int], CompactResult]:
260
+ """Full post-scan adaptive cycle: diff → track → compact.
261
+
262
+ Call this after taking a new snapshot.
263
+ Returns (diff, tracking_summary, compact_result).
264
+ """
265
+ diff = compute_diff(store, old_snap, new_snap)
266
+ if diff is None:
267
+ _log.info(
268
+ "Skipping adaptive cycle: incompatible scan depths "
269
+ "(old=%s, new=%s)",
270
+ old_snap.scan_depth,
271
+ new_snap.scan_depth,
272
+ )
273
+ return (
274
+ SnapshotDiff(
275
+ snapshot_old=old_snap,
276
+ snapshot_new=new_snap,
277
+ elapsed_seconds=0.0,
278
+ entries=[],
279
+ total_growth_bytes=0,
280
+ ),
281
+ {"promoted": 0, "stabilized": 0, "unchanged": 0},
282
+ CompactResult(
283
+ entries_removed=0, bytes_saved_estimate=0,
284
+ paths_collapsed=0, snapshots_pruned=0,
285
+ ),
286
+ )
287
+
288
+ ensure_baseline(store, old_snap)
289
+
290
+ tracking = update_tracking(store, adaptive_config, diff)
291
+
292
+ smart_baseline_update(store, diff)
293
+
294
+ scan_num = store.get_scan_number()
295
+ should_compact = (
296
+ adaptive_config.auto_compact
297
+ and scan_num > 1
298
+ and scan_num % 3 == 0
299
+ )
300
+ compact_result = CompactResult(
301
+ entries_removed=0, bytes_saved_estimate=0,
302
+ paths_collapsed=0, snapshots_pruned=0,
303
+ )
304
+ if should_compact:
305
+ compact_result = compact(store, adaptive_config)
306
+
307
+ return diff, tracking, compact_result
308
+
309
+
310
+ def get_adaptive_stats(store: SnapshotStore) -> dict[str, object]:
311
+ """Return a summary of adaptive scan state for the UI."""
312
+ statuses = store.get_path_statuses()
313
+ by_status: dict[str, int] = {"active": 0, "stable": 0, "focus": 0}
314
+ for row in statuses:
315
+ s = str(row["status"])
316
+ by_status[s] = by_status.get(s, 0) + 1
317
+
318
+ return {
319
+ "scan_number": store.get_scan_number(),
320
+ "baseline_snapshot_id": store.get_baseline_snapshot_id(),
321
+ "total_tracked_paths": len(statuses),
322
+ "active_paths": by_status.get("active", 0),
323
+ "stable_paths": by_status.get("stable", 0),
324
+ "focus_paths": by_status.get("focus", 0),
325
+ "total_entries": store.total_entry_count(),
326
+ }