splitsmith 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. splitsmith/__init__.py +3 -0
  2. splitsmith/audit.py +87 -0
  3. splitsmith/automation.py +238 -0
  4. splitsmith/backup.py +298 -0
  5. splitsmith/beep_calibration.py +324 -0
  6. splitsmith/beep_detect.py +371 -0
  7. splitsmith/cleanup.py +327 -0
  8. splitsmith/cli.py +1281 -0
  9. splitsmith/coach.py +253 -0
  10. splitsmith/coach_distributions.py +348 -0
  11. splitsmith/compare/__init__.py +7 -0
  12. splitsmith/compare/cli.py +153 -0
  13. splitsmith/compare/emitter.py +456 -0
  14. splitsmith/compare/filler.py +98 -0
  15. splitsmith/compare/layout.py +164 -0
  16. splitsmith/compare/manifest.py +91 -0
  17. splitsmith/compare/project_loader.py +195 -0
  18. splitsmith/composition.py +606 -0
  19. splitsmith/config.py +442 -0
  20. splitsmith/cross_align.py +210 -0
  21. splitsmith/csv_gen.py +66 -0
  22. splitsmith/data/ensemble_calibration.json +248 -0
  23. splitsmith/data/fonts/Antonio-OFL.txt +93 -0
  24. splitsmith/data/fonts/Antonio-VariableFont.ttf +0 -0
  25. splitsmith/data/fonts/JetBrainsMono-Bold.ttf +0 -0
  26. splitsmith/data/fonts/JetBrainsMono-OFL.txt +93 -0
  27. splitsmith/data/overlay_theme.json +40 -0
  28. splitsmith/data/templates/action-cut.yaml +19 -0
  29. splitsmith/data/templates/match-recap.yaml +20 -0
  30. splitsmith/data/voter_c_gbdt.joblib +0 -0
  31. splitsmith/data/voter_e_visual_probe.joblib +0 -0
  32. splitsmith/ensemble/__init__.py +67 -0
  33. splitsmith/ensemble/agc_state.py +165 -0
  34. splitsmith/ensemble/api.py +419 -0
  35. splitsmith/ensemble/backend.py +89 -0
  36. splitsmith/ensemble/calibration.py +367 -0
  37. splitsmith/ensemble/clap_mel.py +138 -0
  38. splitsmith/ensemble/features.py +680 -0
  39. splitsmith/ensemble/fixtures.py +222 -0
  40. splitsmith/ensemble/tta.py +115 -0
  41. splitsmith/ensemble/visual.py +294 -0
  42. splitsmith/ensemble/voters.py +202 -0
  43. splitsmith/fcp7xml_render.py +558 -0
  44. splitsmith/fcpxml_gen.py +1721 -0
  45. splitsmith/fixture_schema.py +482 -0
  46. splitsmith/lab/__init__.py +79 -0
  47. splitsmith/lab/core.py +1118 -0
  48. splitsmith/lab/promote.py +555 -0
  49. splitsmith/lab/snap_window.py +331 -0
  50. splitsmith/lab/sweeps.py +231 -0
  51. splitsmith/lab_cli.py +750 -0
  52. splitsmith/match_cli.py +315 -0
  53. splitsmith/match_model.py +793 -0
  54. splitsmith/match_registry.py +131 -0
  55. splitsmith/mcp/__init__.py +23 -0
  56. splitsmith/mcp/__main__.py +20 -0
  57. splitsmith/mcp/detect_tools.py +476 -0
  58. splitsmith/mcp/export_tools.py +356 -0
  59. splitsmith/mcp/sandbox.py +77 -0
  60. splitsmith/mcp/server.py +393 -0
  61. splitsmith/mcp/tools.py +207 -0
  62. splitsmith/mcp/write_tools.py +268 -0
  63. splitsmith/model_cli.py +153 -0
  64. splitsmith/models/__init__.py +40 -0
  65. splitsmith/models/cache.py +139 -0
  66. splitsmith/models/download.py +95 -0
  67. splitsmith/models/errors.py +50 -0
  68. splitsmith/models/manifest.py +68 -0
  69. splitsmith/models/registry.py +256 -0
  70. splitsmith/mp4_render.py +513 -0
  71. splitsmith/overlay_render.py +817 -0
  72. splitsmith/overlay_theme.py +146 -0
  73. splitsmith/relink.py +245 -0
  74. splitsmith/report.py +258 -0
  75. splitsmith/runtime.py +268 -0
  76. splitsmith/shot_detect.py +506 -0
  77. splitsmith/shot_refine.py +252 -0
  78. splitsmith/system_check.py +162 -0
  79. splitsmith/templates.py +188 -0
  80. splitsmith/thumbnail.py +230 -0
  81. splitsmith/trim.py +211 -0
  82. splitsmith/ui/__init__.py +10 -0
  83. splitsmith/ui/audio.py +536 -0
  84. splitsmith/ui/embedded.py +312 -0
  85. splitsmith/ui/exports.py +533 -0
  86. splitsmith/ui/jobs.py +652 -0
  87. splitsmith/ui/logging_setup.py +108 -0
  88. splitsmith/ui/match_exports.py +500 -0
  89. splitsmith/ui/project.py +1734 -0
  90. splitsmith/ui/scoreboard/__init__.py +77 -0
  91. splitsmith/ui/scoreboard/cache.py +237 -0
  92. splitsmith/ui/scoreboard/http.py +206 -0
  93. splitsmith/ui/scoreboard/local.py +377 -0
  94. splitsmith/ui/scoreboard/models.py +301 -0
  95. splitsmith/ui/scoreboard/protocol.py +51 -0
  96. splitsmith/ui/server.py +9178 -0
  97. splitsmith/ui_static/package-lock.json +3062 -0
  98. splitsmith/ui_static/tsconfig.app.tsbuildinfo +1 -0
  99. splitsmith/ui_static/tsconfig.node.tsbuildinfo +1 -0
  100. splitsmith/user_config.py +380 -0
  101. splitsmith/video_match.py +159 -0
  102. splitsmith/video_probe.py +143 -0
  103. splitsmith/waveform.py +121 -0
  104. splitsmith/youtube_sidecar.py +293 -0
  105. splitsmith-0.2.0.dist-info/METADATA +301 -0
  106. splitsmith-0.2.0.dist-info/RECORD +109 -0
  107. splitsmith-0.2.0.dist-info/WHEEL +4 -0
  108. splitsmith-0.2.0.dist-info/entry_points.txt +3 -0
  109. splitsmith-0.2.0.dist-info/licenses/LICENSE +21 -0
splitsmith/cleanup.py ADDED
@@ -0,0 +1,327 @@
1
+ """Tiered project cleanup -- plan + apply (issue: reclaim disk space).
2
+
3
+ The disk footprint of a project grows fast: rendered overlays and lossless
4
+ trims are hundreds of MB to multi-GB each, audit-mode trims and extracted
5
+ audio are similar order. Most of these are recreatable from the source
6
+ video + audit JSON, but recomputing them costs minutes of ffmpeg time, so
7
+ the user picks which categories to drop.
8
+
9
+ Two-phase API:
10
+
11
+ - :func:`plan_cleanup` walks the project's resolved directories and returns
12
+ a :class:`CleanupPlan` (file list + per-category totals). Pure: no
13
+ deletion happens here. Callers can preview the plan, render it, decide.
14
+ - :func:`apply_cleanup` walks the plan, unlinks each file, and returns a
15
+ :class:`CleanupResult`. Records to ``<root>/.cleanup.log`` (JSONL) when
16
+ ``root`` is given so the user has an audit trail of what was reclaimed.
17
+
18
+ Categories are independent toggles, NOT a strict hierarchy. The CLI and
19
+ SPA both build the requested set from per-category flags / checkboxes.
20
+
21
+ What is NEVER touched:
22
+
23
+ - ``project.json`` -- contains user's video assignments and beep times.
24
+ - ``raw/`` -- the symlinks that point at the user's original sources.
25
+ - The original source video files themselves.
26
+
27
+ The :class:`CleanupCategory.AUDIT_DATA` bucket *is* destructive (drops
28
+ the user's audit work). It is excluded from the convenience ``--all`` /
29
+ "select all" affordance and gated by an explicit opt-in.
30
+ """
31
+
32
+ from __future__ import annotations
33
+
34
+ import json
35
+ from collections.abc import Iterable
36
+ from datetime import UTC, datetime
37
+ from enum import StrEnum
38
+ from pathlib import Path
39
+
40
+ from pydantic import BaseModel, Field
41
+
42
+ from .ui.project import MatchProject
43
+
44
+ # Filename for the per-project cleanup audit trail. JSONL so multiple
45
+ # cleanups append cleanly. Hidden so it doesn't clutter Finder.
46
+ CLEANUP_LOG_FILENAME = ".cleanup.log"
47
+
48
+
49
+ class CleanupCategory(StrEnum):
50
+ """Logical buckets the user can independently toggle.
51
+
52
+ The string values are the wire format -- CLI flags use them with the
53
+ ``-`` separator (``exports-light``, ``audit-data``) and the SPA passes
54
+ them through unchanged. Adding a new bucket means: extend this enum,
55
+ extend the glob mapping in :func:`_iter_paths`, and add the SPA
56
+ checkbox + CLI flag.
57
+ """
58
+
59
+ CACHES = "caches"
60
+ EXPORTS_LIGHT = "exports-light"
61
+ EXPORTS_OVERLAYS = "exports-overlays"
62
+ EXPORTS_TRIMS = "exports-trims"
63
+ AUDIT_TRIMS = "audit-trims"
64
+ AUDIO = "audio"
65
+ AUDIT_DATA = "audit-data"
66
+
67
+
68
+ # Categories considered safe enough to include in --all / "select all".
69
+ # AUDIT_DATA is excluded; users opt in explicitly via --include-audit.
70
+ SAFE_CATEGORIES: frozenset[CleanupCategory] = frozenset(
71
+ c for c in CleanupCategory if c is not CleanupCategory.AUDIT_DATA
72
+ )
73
+
74
+
75
+ class CleanupItem(BaseModel):
76
+ """One file the plan would unlink."""
77
+
78
+ path: Path
79
+ size_bytes: int
80
+ category: CleanupCategory
81
+
82
+
83
+ class CleanupTotals(BaseModel):
84
+ """Per-category roll-up surfaced in the plan + UI dialog."""
85
+
86
+ file_count: int = 0
87
+ bytes: int = 0
88
+
89
+
90
+ class CleanupPlan(BaseModel):
91
+ """Side-effect description returned by :func:`plan_cleanup`.
92
+
93
+ The plan is sortable and JSON-serialisable; the SPA renders totals
94
+ and the CLI prints them via Rich. ``items`` is sorted by (category,
95
+ path) so the CLI plan output and the SPA preview agree.
96
+ """
97
+
98
+ items: list[CleanupItem] = Field(default_factory=list)
99
+ totals_by_category: dict[CleanupCategory, CleanupTotals] = Field(default_factory=dict)
100
+ total_bytes: int = 0
101
+ total_file_count: int = 0
102
+
103
+
104
+ class CleanupResult(BaseModel):
105
+ """Outcome of :func:`apply_cleanup`."""
106
+
107
+ deleted: list[Path] = Field(default_factory=list)
108
+ failed: list[tuple[Path, str]] = Field(default_factory=list)
109
+ bytes_freed: int = 0
110
+
111
+
112
+ # ---------------------------------------------------------------------------
113
+ # Internals
114
+ # ---------------------------------------------------------------------------
115
+
116
+
117
+ def _iter_paths(
118
+ project: MatchProject,
119
+ root: Path,
120
+ category: CleanupCategory,
121
+ ) -> Iterable[Path]:
122
+ """Yield every file the given category would target.
123
+
124
+ All directory access goes through ``MatchProject`` resolvers so path
125
+ overrides (audio_dir, exports_dir, etc.) are respected. Missing dirs
126
+ yield nothing rather than raising -- a fresh project that has never
127
+ run a job has empty cache dirs and the cleanup should report zero,
128
+ not crash.
129
+
130
+ Symlinks are NOT yielded -- defence-in-depth so a user-placed
131
+ symlink (e.g. someone pointing audio_dir at a shared drive with a
132
+ softlink convention) can never resolve into the original source.
133
+ """
134
+ if category is CleanupCategory.CACHES:
135
+ # Thumbnails (jpg + small preview MP4s), ffprobe JSONs, scoreboard
136
+ # API cache, waveform peaks JSON sitting next to the audio cache.
137
+ for p in _glob(project.thumbs_path(root), "*"):
138
+ yield p
139
+ for p in _glob(project.probes_path(root), "*.json"):
140
+ yield p
141
+ for p in _glob(root / "scoreboard" / "cache", "**/*"):
142
+ yield p
143
+ for p in _glob(project.audio_path(root), "*.peaks-*.json"):
144
+ yield p
145
+
146
+ elif category is CleanupCategory.EXPORTS_LIGHT:
147
+ exp = project.exports_path(root)
148
+ for pat in ("*.fcpxml", "*.csv", "*_report.txt"):
149
+ for p in _glob(exp, pat):
150
+ yield p
151
+
152
+ elif category is CleanupCategory.EXPORTS_OVERLAYS:
153
+ for p in _glob(project.exports_path(root), "*_overlay.mov"):
154
+ yield p
155
+
156
+ elif category is CleanupCategory.EXPORTS_TRIMS:
157
+ # Captures both ``stage<N>_<slug>_trimmed.mp4`` (primary) and
158
+ # ``stage<N>_<slug>_cam_<id>_trimmed.mp4`` (per-camera trims).
159
+ for p in _glob(project.exports_path(root), "*_trimmed.mp4"):
160
+ yield p
161
+
162
+ elif category is CleanupCategory.AUDIT_TRIMS:
163
+ for p in _glob(project.trimmed_path(root), "*.mp4"):
164
+ yield p
165
+
166
+ elif category is CleanupCategory.AUDIO:
167
+ # Peaks JSONs deliberately live in the CACHES bucket (they're
168
+ # tiny and re-derivable from the audio); the AUDIO bucket only
169
+ # carries the heavyweight extracted WAVs.
170
+ for p in _glob(project.audio_path(root), "*.wav"):
171
+ yield p
172
+
173
+ elif category is CleanupCategory.AUDIT_DATA:
174
+ audit = project.audit_path(root)
175
+ for pat in ("stage*.json", "stage*.json.bak"):
176
+ for p in _glob(audit, pat):
177
+ yield p
178
+
179
+
180
+ def _glob(directory: Path, pattern: str) -> Iterable[Path]:
181
+ """Glob ``directory`` for ``pattern`` while tolerating missing dirs.
182
+
183
+ ``rglob`` is used when the pattern starts with ``**`` so the
184
+ scoreboard cache (which has subdirs by content_type) is fully
185
+ swept. Symlinks and non-files are skipped at the source.
186
+ """
187
+ if not directory.exists():
188
+ return
189
+ if pattern.startswith("**"):
190
+ # rglob('**/*') over a missing dir would have raised; we guarded
191
+ # above. Strip the leading '**/' so rglob does not double-prefix.
192
+ suffix = pattern[3:] or "*"
193
+ iterator = directory.rglob(suffix)
194
+ else:
195
+ iterator = directory.glob(pattern)
196
+ for p in iterator:
197
+ if p.is_symlink():
198
+ continue
199
+ if not p.is_file():
200
+ continue
201
+ yield p
202
+
203
+
204
+ def _safe_under_raw(project: MatchProject, root: Path, candidate: Path) -> bool:
205
+ """Defence-in-depth: refuse any item that resolves under ``raw/``.
206
+
207
+ The cleanup never globs into ``raw/``, so this should never fire,
208
+ but a typo in a future glob (or a symlink we missed) shouldn't be
209
+ able to delete a source-video reference.
210
+ """
211
+ try:
212
+ raw = project.raw_path(root).resolve()
213
+ except OSError:
214
+ return True
215
+ try:
216
+ candidate.resolve().relative_to(raw)
217
+ except (OSError, ValueError):
218
+ return True
219
+ return False
220
+
221
+
222
+ # ---------------------------------------------------------------------------
223
+ # Public API
224
+ # ---------------------------------------------------------------------------
225
+
226
+
227
+ def plan_cleanup(
228
+ project: MatchProject,
229
+ root: Path,
230
+ categories: Iterable[CleanupCategory],
231
+ ) -> CleanupPlan:
232
+ """Build a :class:`CleanupPlan` for the given categories.
233
+
234
+ Idempotent and read-only: never deletes, never mutates the project.
235
+ Empty selection returns an empty plan. Categories whose target
236
+ directory is missing contribute zero items but still appear in
237
+ ``totals_by_category`` (with zeros) so the SPA can show the row
238
+ without re-checking.
239
+ """
240
+ requested: set[CleanupCategory] = set(categories)
241
+
242
+ items: list[CleanupItem] = []
243
+ totals: dict[CleanupCategory, CleanupTotals] = {c: CleanupTotals() for c in requested}
244
+
245
+ for category in requested:
246
+ for path in _iter_paths(project, root, category):
247
+ if not _safe_under_raw(project, root, path):
248
+ # Should never happen with the current globs; guard kept
249
+ # so a future bug can't escalate into deleting raw refs.
250
+ continue
251
+ try:
252
+ size = path.lstat().st_size
253
+ except OSError:
254
+ continue
255
+ items.append(CleanupItem(path=path, size_bytes=size, category=category))
256
+ t = totals[category]
257
+ t.file_count += 1
258
+ t.bytes += size
259
+
260
+ items.sort(key=lambda it: (it.category.value, str(it.path)))
261
+ return CleanupPlan(
262
+ items=items,
263
+ totals_by_category=totals,
264
+ total_bytes=sum(t.bytes for t in totals.values()),
265
+ total_file_count=sum(t.file_count for t in totals.values()),
266
+ )
267
+
268
+
269
+ def apply_cleanup(
270
+ plan: CleanupPlan,
271
+ *,
272
+ root: Path | None = None,
273
+ ) -> CleanupResult:
274
+ """Delete every file in ``plan``; never raises on individual failures.
275
+
276
+ Errors are recorded per-file in :attr:`CleanupResult.failed` so the
277
+ caller can surface them. Already-missing files (e.g. concurrent
278
+ delete by another process) are not failures: ``unlink(missing_ok=True)``
279
+ silently succeeds. Bytes are tallied from the planned size, not
280
+ re-stat'd post-delete.
281
+
282
+ When ``root`` is given, appends one JSONL line to
283
+ ``<root>/.cleanup.log`` summarising the run. Missing log directory
284
+ is created. Logging is best-effort: a write failure does not
285
+ invalidate an otherwise-successful cleanup.
286
+ """
287
+ deleted: list[Path] = []
288
+ failed: list[tuple[Path, str]] = []
289
+ bytes_freed = 0
290
+
291
+ for item in plan.items:
292
+ try:
293
+ item.path.unlink(missing_ok=True)
294
+ except OSError as exc:
295
+ failed.append((item.path, str(exc)))
296
+ continue
297
+ deleted.append(item.path)
298
+ bytes_freed += item.size_bytes
299
+
300
+ result = CleanupResult(deleted=deleted, failed=failed, bytes_freed=bytes_freed)
301
+
302
+ if root is not None:
303
+ try:
304
+ _append_log(root, plan, result)
305
+ except OSError:
306
+ pass
307
+
308
+ return result
309
+
310
+
311
+ def _append_log(root: Path, plan: CleanupPlan, result: CleanupResult) -> None:
312
+ """Append one JSONL summary line to ``<root>/.cleanup.log``.
313
+
314
+ Schema is intentionally compact: the file is for human review, not
315
+ rehydration. Bumping fields here is safe -- old lines stay valid.
316
+ """
317
+ log_path = root / CLEANUP_LOG_FILENAME
318
+ log_path.parent.mkdir(parents=True, exist_ok=True)
319
+ record = {
320
+ "ts": datetime.now(UTC).isoformat(),
321
+ "categories": sorted({item.category.value for item in plan.items}),
322
+ "deleted_count": len(result.deleted),
323
+ "failed_count": len(result.failed),
324
+ "bytes_freed": result.bytes_freed,
325
+ }
326
+ with log_path.open("a", encoding="utf-8") as f:
327
+ f.write(json.dumps(record) + "\n")