PyPI - splitsmith - Versions diffs - 0.2.0__py3-none-any.whl - Mend

splitsmith 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

splitsmith/__init__.py +3 -0
splitsmith/audit.py +87 -0
splitsmith/automation.py +238 -0
splitsmith/backup.py +298 -0
splitsmith/beep_calibration.py +324 -0
splitsmith/beep_detect.py +371 -0
splitsmith/cleanup.py +327 -0
splitsmith/cli.py +1281 -0
splitsmith/coach.py +253 -0
splitsmith/coach_distributions.py +348 -0
splitsmith/compare/__init__.py +7 -0
splitsmith/compare/cli.py +153 -0
splitsmith/compare/emitter.py +456 -0
splitsmith/compare/filler.py +98 -0
splitsmith/compare/layout.py +164 -0
splitsmith/compare/manifest.py +91 -0
splitsmith/compare/project_loader.py +195 -0
splitsmith/composition.py +606 -0
splitsmith/config.py +442 -0
splitsmith/cross_align.py +210 -0
splitsmith/csv_gen.py +66 -0
splitsmith/data/ensemble_calibration.json +248 -0
splitsmith/data/fonts/Antonio-OFL.txt +93 -0
splitsmith/data/fonts/Antonio-VariableFont.ttf +0 -0
splitsmith/data/fonts/JetBrainsMono-Bold.ttf +0 -0
splitsmith/data/fonts/JetBrainsMono-OFL.txt +93 -0
splitsmith/data/overlay_theme.json +40 -0
splitsmith/data/templates/action-cut.yaml +19 -0
splitsmith/data/templates/match-recap.yaml +20 -0
splitsmith/data/voter_c_gbdt.joblib +0 -0
splitsmith/data/voter_e_visual_probe.joblib +0 -0
splitsmith/ensemble/__init__.py +67 -0
splitsmith/ensemble/agc_state.py +165 -0
splitsmith/ensemble/api.py +419 -0
splitsmith/ensemble/backend.py +89 -0
splitsmith/ensemble/calibration.py +367 -0
splitsmith/ensemble/clap_mel.py +138 -0
splitsmith/ensemble/features.py +680 -0
splitsmith/ensemble/fixtures.py +222 -0
splitsmith/ensemble/tta.py +115 -0
splitsmith/ensemble/visual.py +294 -0
splitsmith/ensemble/voters.py +202 -0
splitsmith/fcp7xml_render.py +558 -0
splitsmith/fcpxml_gen.py +1721 -0
splitsmith/fixture_schema.py +482 -0
splitsmith/lab/__init__.py +79 -0
splitsmith/lab/core.py +1118 -0
splitsmith/lab/promote.py +555 -0
splitsmith/lab/snap_window.py +331 -0
splitsmith/lab/sweeps.py +231 -0
splitsmith/lab_cli.py +750 -0
splitsmith/match_cli.py +315 -0
splitsmith/match_model.py +793 -0
splitsmith/match_registry.py +131 -0
splitsmith/mcp/__init__.py +23 -0
splitsmith/mcp/__main__.py +20 -0
splitsmith/mcp/detect_tools.py +476 -0
splitsmith/mcp/export_tools.py +356 -0
splitsmith/mcp/sandbox.py +77 -0
splitsmith/mcp/server.py +393 -0
splitsmith/mcp/tools.py +207 -0
splitsmith/mcp/write_tools.py +268 -0
splitsmith/model_cli.py +153 -0
splitsmith/models/__init__.py +40 -0
splitsmith/models/cache.py +139 -0
splitsmith/models/download.py +95 -0
splitsmith/models/errors.py +50 -0
splitsmith/models/manifest.py +68 -0
splitsmith/models/registry.py +256 -0
splitsmith/mp4_render.py +513 -0
splitsmith/overlay_render.py +817 -0
splitsmith/overlay_theme.py +146 -0
splitsmith/relink.py +245 -0
splitsmith/report.py +258 -0
splitsmith/runtime.py +268 -0
splitsmith/shot_detect.py +506 -0
splitsmith/shot_refine.py +252 -0
splitsmith/system_check.py +162 -0
splitsmith/templates.py +188 -0
splitsmith/thumbnail.py +230 -0
splitsmith/trim.py +211 -0
splitsmith/ui/__init__.py +10 -0
splitsmith/ui/audio.py +536 -0
splitsmith/ui/embedded.py +312 -0
splitsmith/ui/exports.py +533 -0
splitsmith/ui/jobs.py +652 -0
splitsmith/ui/logging_setup.py +108 -0
splitsmith/ui/match_exports.py +500 -0
splitsmith/ui/project.py +1734 -0
splitsmith/ui/scoreboard/__init__.py +77 -0
splitsmith/ui/scoreboard/cache.py +237 -0
splitsmith/ui/scoreboard/http.py +206 -0
splitsmith/ui/scoreboard/local.py +377 -0
splitsmith/ui/scoreboard/models.py +301 -0
splitsmith/ui/scoreboard/protocol.py +51 -0
splitsmith/ui/server.py +9178 -0
splitsmith/ui_static/package-lock.json +3062 -0
splitsmith/ui_static/tsconfig.app.tsbuildinfo +1 -0
splitsmith/ui_static/tsconfig.node.tsbuildinfo +1 -0
splitsmith/user_config.py +380 -0
splitsmith/video_match.py +159 -0
splitsmith/video_probe.py +143 -0
splitsmith/waveform.py +121 -0
splitsmith/youtube_sidecar.py +293 -0
splitsmith-0.2.0.dist-info/METADATA +301 -0
splitsmith-0.2.0.dist-info/RECORD +109 -0
splitsmith-0.2.0.dist-info/WHEEL +4 -0
splitsmith-0.2.0.dist-info/entry_points.txt +3 -0
splitsmith-0.2.0.dist-info/licenses/LICENSE +21 -0

splitsmith/cleanup.py ADDED Viewed

@@ -0,0 +1,327 @@
+"""Tiered project cleanup -- plan + apply (issue: reclaim disk space).
+The disk footprint of a project grows fast: rendered overlays and lossless
+trims are hundreds of MB to multi-GB each, audit-mode trims and extracted
+audio are similar order. Most of these are recreatable from the source
+video + audit JSON, but recomputing them costs minutes of ffmpeg time, so
+the user picks which categories to drop.
+Two-phase API:
+- :func:`plan_cleanup` walks the project's resolved directories and returns
+  a :class:`CleanupPlan` (file list + per-category totals). Pure: no
+  deletion happens here. Callers can preview the plan, render it, decide.
+- :func:`apply_cleanup` walks the plan, unlinks each file, and returns a
+  :class:`CleanupResult`. Records to ``<root>/.cleanup.log`` (JSONL) when
+  ``root`` is given so the user has an audit trail of what was reclaimed.
+Categories are independent toggles, NOT a strict hierarchy. The CLI and
+SPA both build the requested set from per-category flags / checkboxes.
+What is NEVER touched:
+- ``project.json`` -- contains user's video assignments and beep times.
+- ``raw/`` -- the symlinks that point at the user's original sources.
+- The original source video files themselves.
+The :class:`CleanupCategory.AUDIT_DATA` bucket *is* destructive (drops
+the user's audit work). It is excluded from the convenience ``--all`` /
+"select all" affordance and gated by an explicit opt-in.
+"""
+from __future__ import annotations
+import json
+from collections.abc import Iterable
+from datetime import UTC, datetime
+from enum import StrEnum
+from pathlib import Path
+from pydantic import BaseModel, Field
+from .ui.project import MatchProject
+# Filename for the per-project cleanup audit trail. JSONL so multiple
+# cleanups append cleanly. Hidden so it doesn't clutter Finder.
+CLEANUP_LOG_FILENAME = ".cleanup.log"
+class CleanupCategory(StrEnum):
+    """Logical buckets the user can independently toggle.
+    The string values are the wire format -- CLI flags use them with the
+    ``-`` separator (``exports-light``, ``audit-data``) and the SPA passes
+    them through unchanged. Adding a new bucket means: extend this enum,
+    extend the glob mapping in :func:`_iter_paths`, and add the SPA
+    checkbox + CLI flag.
+    """
+    CACHES = "caches"
+    EXPORTS_LIGHT = "exports-light"
+    EXPORTS_OVERLAYS = "exports-overlays"
+    EXPORTS_TRIMS = "exports-trims"
+    AUDIT_TRIMS = "audit-trims"
+    AUDIO = "audio"
+    AUDIT_DATA = "audit-data"
+# Categories considered safe enough to include in --all / "select all".
+# AUDIT_DATA is excluded; users opt in explicitly via --include-audit.
+SAFE_CATEGORIES: frozenset[CleanupCategory] = frozenset(
+    c for c in CleanupCategory if c is not CleanupCategory.AUDIT_DATA
+)
+class CleanupItem(BaseModel):
+    """One file the plan would unlink."""
+    path: Path
+    size_bytes: int
+    category: CleanupCategory
+class CleanupTotals(BaseModel):
+    """Per-category roll-up surfaced in the plan + UI dialog."""
+    file_count: int = 0
+    bytes: int = 0
+class CleanupPlan(BaseModel):
+    """Side-effect description returned by :func:`plan_cleanup`.
+    The plan is sortable and JSON-serialisable; the SPA renders totals
+    and the CLI prints them via Rich. ``items`` is sorted by (category,
+    path) so the CLI plan output and the SPA preview agree.
+    """
+    items: list[CleanupItem] = Field(default_factory=list)
+    totals_by_category: dict[CleanupCategory, CleanupTotals] = Field(default_factory=dict)
+    total_bytes: int = 0
+    total_file_count: int = 0
+class CleanupResult(BaseModel):
+    """Outcome of :func:`apply_cleanup`."""
+    deleted: list[Path] = Field(default_factory=list)
+    failed: list[tuple[Path, str]] = Field(default_factory=list)
+    bytes_freed: int = 0
+# ---------------------------------------------------------------------------
+# Internals
+# ---------------------------------------------------------------------------
+def _iter_paths(
+    project: MatchProject,
+    root: Path,
+    category: CleanupCategory,
+) -> Iterable[Path]:
+    """Yield every file the given category would target.
+    All directory access goes through ``MatchProject`` resolvers so path
+    overrides (audio_dir, exports_dir, etc.) are respected. Missing dirs
+    yield nothing rather than raising -- a fresh project that has never
+    run a job has empty cache dirs and the cleanup should report zero,
+    not crash.
+    Symlinks are NOT yielded -- defence-in-depth so a user-placed
+    symlink (e.g. someone pointing audio_dir at a shared drive with a
+    softlink convention) can never resolve into the original source.
+    """
+    if category is CleanupCategory.CACHES:
+        # Thumbnails (jpg + small preview MP4s), ffprobe JSONs, scoreboard
+        # API cache, waveform peaks JSON sitting next to the audio cache.
+        for p in _glob(project.thumbs_path(root), "*"):
+            yield p
+        for p in _glob(project.probes_path(root), "*.json"):
+            yield p
+        for p in _glob(root / "scoreboard" / "cache", "**/*"):
+            yield p
+        for p in _glob(project.audio_path(root), "*.peaks-*.json"):
+            yield p
+    elif category is CleanupCategory.EXPORTS_LIGHT:
+        exp = project.exports_path(root)
+        for pat in ("*.fcpxml", "*.csv", "*_report.txt"):
+            for p in _glob(exp, pat):
+                yield p
+    elif category is CleanupCategory.EXPORTS_OVERLAYS:
+        for p in _glob(project.exports_path(root), "*_overlay.mov"):
+            yield p
+    elif category is CleanupCategory.EXPORTS_TRIMS:
+        # Captures both ``stage<N>_<slug>_trimmed.mp4`` (primary) and
+        # ``stage<N>_<slug>_cam_<id>_trimmed.mp4`` (per-camera trims).
+        for p in _glob(project.exports_path(root), "*_trimmed.mp4"):
+            yield p
+    elif category is CleanupCategory.AUDIT_TRIMS:
+        for p in _glob(project.trimmed_path(root), "*.mp4"):
+            yield p
+    elif category is CleanupCategory.AUDIO:
+        # Peaks JSONs deliberately live in the CACHES bucket (they're
+        # tiny and re-derivable from the audio); the AUDIO bucket only
+        # carries the heavyweight extracted WAVs.
+        for p in _glob(project.audio_path(root), "*.wav"):
+            yield p
+    elif category is CleanupCategory.AUDIT_DATA:
+        audit = project.audit_path(root)
+        for pat in ("stage*.json", "stage*.json.bak"):
+            for p in _glob(audit, pat):
+                yield p
+def _glob(directory: Path, pattern: str) -> Iterable[Path]:
+    """Glob ``directory`` for ``pattern`` while tolerating missing dirs.
+    ``rglob`` is used when the pattern starts with ``**`` so the
+    scoreboard cache (which has subdirs by content_type) is fully
+    swept. Symlinks and non-files are skipped at the source.
+    """
+    if not directory.exists():
+        return
+    if pattern.startswith("**"):
+        # rglob('**/*') over a missing dir would have raised; we guarded
+        # above. Strip the leading '**/' so rglob does not double-prefix.
+        suffix = pattern[3:] or "*"
+        iterator = directory.rglob(suffix)
+    else:
+        iterator = directory.glob(pattern)
+    for p in iterator:
+        if p.is_symlink():
+            continue
+        if not p.is_file():
+            continue
+        yield p
+def _safe_under_raw(project: MatchProject, root: Path, candidate: Path) -> bool:
+    """Defence-in-depth: refuse any item that resolves under ``raw/``.
+    The cleanup never globs into ``raw/``, so this should never fire,
+    but a typo in a future glob (or a symlink we missed) shouldn't be
+    able to delete a source-video reference.
+    """
+    try:
+        raw = project.raw_path(root).resolve()
+    except OSError:
+        return True
+    try:
+        candidate.resolve().relative_to(raw)
+    except (OSError, ValueError):
+        return True
+    return False
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+def plan_cleanup(
+    project: MatchProject,
+    root: Path,
+    categories: Iterable[CleanupCategory],
+) -> CleanupPlan:
+    """Build a :class:`CleanupPlan` for the given categories.
+    Idempotent and read-only: never deletes, never mutates the project.
+    Empty selection returns an empty plan. Categories whose target
+    directory is missing contribute zero items but still appear in
+    ``totals_by_category`` (with zeros) so the SPA can show the row
+    without re-checking.
+    """
+    requested: set[CleanupCategory] = set(categories)
+    items: list[CleanupItem] = []
+    totals: dict[CleanupCategory, CleanupTotals] = {c: CleanupTotals() for c in requested}
+    for category in requested:
+        for path in _iter_paths(project, root, category):
+            if not _safe_under_raw(project, root, path):
+                # Should never happen with the current globs; guard kept
+                # so a future bug can't escalate into deleting raw refs.
+                continue
+            try:
+                size = path.lstat().st_size
+            except OSError:
+                continue
+            items.append(CleanupItem(path=path, size_bytes=size, category=category))
+            t = totals[category]
+            t.file_count += 1
+            t.bytes += size
+    items.sort(key=lambda it: (it.category.value, str(it.path)))
+    return CleanupPlan(
+        items=items,
+        totals_by_category=totals,
+        total_bytes=sum(t.bytes for t in totals.values()),
+        total_file_count=sum(t.file_count for t in totals.values()),
+    )
+def apply_cleanup(
+    plan: CleanupPlan,
+    *,
+    root: Path | None = None,
+) -> CleanupResult:
+    """Delete every file in ``plan``; never raises on individual failures.
+    Errors are recorded per-file in :attr:`CleanupResult.failed` so the
+    caller can surface them. Already-missing files (e.g. concurrent
+    delete by another process) are not failures: ``unlink(missing_ok=True)``
+    silently succeeds. Bytes are tallied from the planned size, not
+    re-stat'd post-delete.
+    When ``root`` is given, appends one JSONL line to
+    ``<root>/.cleanup.log`` summarising the run. Missing log directory
+    is created. Logging is best-effort: a write failure does not
+    invalidate an otherwise-successful cleanup.
+    """
+    deleted: list[Path] = []
+    failed: list[tuple[Path, str]] = []
+    bytes_freed = 0
+    for item in plan.items:
+        try:
+            item.path.unlink(missing_ok=True)
+        except OSError as exc:
+            failed.append((item.path, str(exc)))
+            continue
+        deleted.append(item.path)
+        bytes_freed += item.size_bytes
+    result = CleanupResult(deleted=deleted, failed=failed, bytes_freed=bytes_freed)
+    if root is not None:
+        try:
+            _append_log(root, plan, result)
+        except OSError:
+            pass
+    return result
+def _append_log(root: Path, plan: CleanupPlan, result: CleanupResult) -> None:
+    """Append one JSONL summary line to ``<root>/.cleanup.log``.
+    Schema is intentionally compact: the file is for human review, not
+    rehydration. Bumping fields here is safe -- old lines stay valid.
+    """
+    log_path = root / CLEANUP_LOG_FILENAME
+    log_path.parent.mkdir(parents=True, exist_ok=True)
+    record = {
+        "ts": datetime.now(UTC).isoformat(),
+        "categories": sorted({item.category.value for item in plan.items}),
+        "deleted_count": len(result.deleted),
+        "failed_count": len(result.failed),
+        "bytes_freed": result.bytes_freed,
+    }
+    with log_path.open("a", encoding="utf-8") as f:
+        f.write(json.dumps(record) + "\n")