splitsmith 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- splitsmith/__init__.py +3 -0
- splitsmith/audit.py +87 -0
- splitsmith/automation.py +238 -0
- splitsmith/backup.py +298 -0
- splitsmith/beep_calibration.py +324 -0
- splitsmith/beep_detect.py +371 -0
- splitsmith/cleanup.py +327 -0
- splitsmith/cli.py +1281 -0
- splitsmith/coach.py +253 -0
- splitsmith/coach_distributions.py +348 -0
- splitsmith/compare/__init__.py +7 -0
- splitsmith/compare/cli.py +153 -0
- splitsmith/compare/emitter.py +456 -0
- splitsmith/compare/filler.py +98 -0
- splitsmith/compare/layout.py +164 -0
- splitsmith/compare/manifest.py +91 -0
- splitsmith/compare/project_loader.py +195 -0
- splitsmith/composition.py +606 -0
- splitsmith/config.py +442 -0
- splitsmith/cross_align.py +210 -0
- splitsmith/csv_gen.py +66 -0
- splitsmith/data/ensemble_calibration.json +248 -0
- splitsmith/data/fonts/Antonio-OFL.txt +93 -0
- splitsmith/data/fonts/Antonio-VariableFont.ttf +0 -0
- splitsmith/data/fonts/JetBrainsMono-Bold.ttf +0 -0
- splitsmith/data/fonts/JetBrainsMono-OFL.txt +93 -0
- splitsmith/data/overlay_theme.json +40 -0
- splitsmith/data/templates/action-cut.yaml +19 -0
- splitsmith/data/templates/match-recap.yaml +20 -0
- splitsmith/data/voter_c_gbdt.joblib +0 -0
- splitsmith/data/voter_e_visual_probe.joblib +0 -0
- splitsmith/ensemble/__init__.py +67 -0
- splitsmith/ensemble/agc_state.py +165 -0
- splitsmith/ensemble/api.py +419 -0
- splitsmith/ensemble/backend.py +89 -0
- splitsmith/ensemble/calibration.py +367 -0
- splitsmith/ensemble/clap_mel.py +138 -0
- splitsmith/ensemble/features.py +680 -0
- splitsmith/ensemble/fixtures.py +222 -0
- splitsmith/ensemble/tta.py +115 -0
- splitsmith/ensemble/visual.py +294 -0
- splitsmith/ensemble/voters.py +202 -0
- splitsmith/fcp7xml_render.py +558 -0
- splitsmith/fcpxml_gen.py +1721 -0
- splitsmith/fixture_schema.py +482 -0
- splitsmith/lab/__init__.py +79 -0
- splitsmith/lab/core.py +1118 -0
- splitsmith/lab/promote.py +555 -0
- splitsmith/lab/snap_window.py +331 -0
- splitsmith/lab/sweeps.py +231 -0
- splitsmith/lab_cli.py +750 -0
- splitsmith/match_cli.py +315 -0
- splitsmith/match_model.py +793 -0
- splitsmith/match_registry.py +131 -0
- splitsmith/mcp/__init__.py +23 -0
- splitsmith/mcp/__main__.py +20 -0
- splitsmith/mcp/detect_tools.py +476 -0
- splitsmith/mcp/export_tools.py +356 -0
- splitsmith/mcp/sandbox.py +77 -0
- splitsmith/mcp/server.py +393 -0
- splitsmith/mcp/tools.py +207 -0
- splitsmith/mcp/write_tools.py +268 -0
- splitsmith/model_cli.py +153 -0
- splitsmith/models/__init__.py +40 -0
- splitsmith/models/cache.py +139 -0
- splitsmith/models/download.py +95 -0
- splitsmith/models/errors.py +50 -0
- splitsmith/models/manifest.py +68 -0
- splitsmith/models/registry.py +256 -0
- splitsmith/mp4_render.py +513 -0
- splitsmith/overlay_render.py +817 -0
- splitsmith/overlay_theme.py +146 -0
- splitsmith/relink.py +245 -0
- splitsmith/report.py +258 -0
- splitsmith/runtime.py +268 -0
- splitsmith/shot_detect.py +506 -0
- splitsmith/shot_refine.py +252 -0
- splitsmith/system_check.py +162 -0
- splitsmith/templates.py +188 -0
- splitsmith/thumbnail.py +230 -0
- splitsmith/trim.py +211 -0
- splitsmith/ui/__init__.py +10 -0
- splitsmith/ui/audio.py +536 -0
- splitsmith/ui/embedded.py +312 -0
- splitsmith/ui/exports.py +533 -0
- splitsmith/ui/jobs.py +652 -0
- splitsmith/ui/logging_setup.py +108 -0
- splitsmith/ui/match_exports.py +500 -0
- splitsmith/ui/project.py +1734 -0
- splitsmith/ui/scoreboard/__init__.py +77 -0
- splitsmith/ui/scoreboard/cache.py +237 -0
- splitsmith/ui/scoreboard/http.py +206 -0
- splitsmith/ui/scoreboard/local.py +377 -0
- splitsmith/ui/scoreboard/models.py +301 -0
- splitsmith/ui/scoreboard/protocol.py +51 -0
- splitsmith/ui/server.py +9178 -0
- splitsmith/ui_static/package-lock.json +3062 -0
- splitsmith/ui_static/tsconfig.app.tsbuildinfo +1 -0
- splitsmith/ui_static/tsconfig.node.tsbuildinfo +1 -0
- splitsmith/user_config.py +380 -0
- splitsmith/video_match.py +159 -0
- splitsmith/video_probe.py +143 -0
- splitsmith/waveform.py +121 -0
- splitsmith/youtube_sidecar.py +293 -0
- splitsmith-0.2.0.dist-info/METADATA +301 -0
- splitsmith-0.2.0.dist-info/RECORD +109 -0
- splitsmith-0.2.0.dist-info/WHEEL +4 -0
- splitsmith-0.2.0.dist-info/entry_points.txt +3 -0
- splitsmith-0.2.0.dist-info/licenses/LICENSE +21 -0
splitsmith/__init__.py
ADDED
splitsmith/audit.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Helpers for the hand-audit workflow.
|
|
2
|
+
|
|
3
|
+
Audit flow:
|
|
4
|
+
1. ``audit-prep`` (or the ad-hoc fixture script) emits ``<name>.wav``,
|
|
5
|
+
``<name>.json`` (empty ``shots[]`` ground truth), and
|
|
6
|
+
``<name>-candidates.csv`` with an ``audit_keep`` column.
|
|
7
|
+
2. The user opens the CSV, marks rows they want to keep, saves.
|
|
8
|
+
3. ``audit-apply`` reads the marked CSV and rewrites the JSON's ``shots[]``.
|
|
9
|
+
|
|
10
|
+
This module exposes the read/merge primitives. The CLI wraps them.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import csv
|
|
16
|
+
import json
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
# Anything in this set (case-insensitive, trimmed) marks a candidate as kept.
|
|
20
|
+
_KEEP_TRUTHY: frozenset[str] = frozenset({"y", "yes", "1", "x", "true", "keep", "t"})
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def is_kept(value: str | None) -> bool:
|
|
24
|
+
if value is None:
|
|
25
|
+
return False
|
|
26
|
+
return value.strip().lower() in _KEEP_TRUTHY
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def read_kept_candidate_numbers(candidates_csv: Path) -> list[int]:
|
|
30
|
+
"""Return the candidate_numbers from rows whose ``audit_keep`` cell is truthy.
|
|
31
|
+
|
|
32
|
+
Order is preserved from the CSV, which is the natural shot order.
|
|
33
|
+
"""
|
|
34
|
+
with candidates_csv.open("r", newline="", encoding="utf-8") as f:
|
|
35
|
+
reader = csv.DictReader(f)
|
|
36
|
+
if reader.fieldnames is None or "audit_keep" not in reader.fieldnames:
|
|
37
|
+
raise ValueError(
|
|
38
|
+
f"{candidates_csv}: missing 'audit_keep' column. Expected the audit-prep "
|
|
39
|
+
f"format (audit_keep, candidate_number, ...). Got: {reader.fieldnames}"
|
|
40
|
+
)
|
|
41
|
+
if "candidate_number" not in reader.fieldnames:
|
|
42
|
+
raise ValueError(f"{candidates_csv}: missing 'candidate_number' column")
|
|
43
|
+
kept: list[int] = []
|
|
44
|
+
for row in reader:
|
|
45
|
+
if is_kept(row.get("audit_keep")):
|
|
46
|
+
kept.append(int(row["candidate_number"]))
|
|
47
|
+
return kept
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def apply_audit_to_fixture(candidates_csv: Path, fixture_json: Path) -> int:
|
|
51
|
+
"""Merge marked-keep candidates into ``fixture_json``'s ``shots[]`` array.
|
|
52
|
+
|
|
53
|
+
Returns the number of shots written. The JSON is rewritten in place; the
|
|
54
|
+
``_candidates_pending_audit`` block is preserved unchanged so the audit can
|
|
55
|
+
be re-run from the same source if the user changes their mind.
|
|
56
|
+
"""
|
|
57
|
+
kept_numbers = read_kept_candidate_numbers(candidates_csv)
|
|
58
|
+
data = json.loads(fixture_json.read_text())
|
|
59
|
+
|
|
60
|
+
candidates = data.get("_candidates_pending_audit", {}).get("candidates")
|
|
61
|
+
if not candidates:
|
|
62
|
+
raise ValueError(
|
|
63
|
+
f"{fixture_json}: no candidates block found. Expected " f"'_candidates_pending_audit.candidates'."
|
|
64
|
+
)
|
|
65
|
+
by_num = {c["candidate_number"]: c for c in candidates}
|
|
66
|
+
|
|
67
|
+
missing = sorted(n for n in kept_numbers if n not in by_num)
|
|
68
|
+
if missing:
|
|
69
|
+
raise ValueError(
|
|
70
|
+
f"{candidates_csv}: candidate_number(s) marked keep but not present in "
|
|
71
|
+
f"{fixture_json}: {missing}"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
shots: list[dict] = []
|
|
75
|
+
for kept_idx, cand_num in enumerate(kept_numbers, start=1):
|
|
76
|
+
c = by_num[cand_num]
|
|
77
|
+
shots.append(
|
|
78
|
+
{
|
|
79
|
+
"shot_number": kept_idx,
|
|
80
|
+
"candidate_number": cand_num,
|
|
81
|
+
"time": c["time"],
|
|
82
|
+
"ms_after_beep": c["ms_after_beep"],
|
|
83
|
+
}
|
|
84
|
+
)
|
|
85
|
+
data["shots"] = shots
|
|
86
|
+
fixture_json.write_text(json.dumps(data, indent=2) + "\n")
|
|
87
|
+
return len(shots)
|
splitsmith/automation.py
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
"""Layered automation settings (issue #215).
|
|
2
|
+
|
|
3
|
+
Splitsmith chains certain follow-up jobs automatically -- the canonical
|
|
4
|
+
example is "user marks beep reviewed -> shot detection fires." Some
|
|
5
|
+
users want the rich auto-pipeline; others (CLI / agent flows) want
|
|
6
|
+
manual gates so they can decide each step. This module provides the
|
|
7
|
+
layered settings stack that lets a global default, a project-level
|
|
8
|
+
override, and a per-invocation CLI flag resolve cleanly into one
|
|
9
|
+
effective set of toggles -- with provenance so the UI can show *why*
|
|
10
|
+
a value is what it is.
|
|
11
|
+
|
|
12
|
+
Resolution order, top wins: **CLI > project > global > field default**.
|
|
13
|
+
|
|
14
|
+
The schema is one ``automation`` sub-object so siblings (for example
|
|
15
|
+
``overlay_render_on_audit``, ``beep_detect_on_ingest``) can land later
|
|
16
|
+
without re-shaping the on-disk format.
|
|
17
|
+
|
|
18
|
+
Backed by ``pydantic-settings`` so the global layer composes cleanly
|
|
19
|
+
with the rest of the codebase: YAML loading, env-var overrides
|
|
20
|
+
(``SPLITSMITH_AUTOMATION_*``), and the standard pydantic validation
|
|
21
|
+
story all come for free. The override types and the resolver are
|
|
22
|
+
plain pydantic / dataclasses because they aren't loaded from any
|
|
23
|
+
single source -- they're computed merges.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import logging
|
|
29
|
+
from dataclasses import dataclass
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
from typing import Any, Literal
|
|
32
|
+
|
|
33
|
+
import yaml
|
|
34
|
+
from pydantic import BaseModel
|
|
35
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
36
|
+
|
|
37
|
+
from . import user_config
|
|
38
|
+
|
|
39
|
+
logger = logging.getLogger(__name__)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# Field name in the YAML and on the project model. Centralised so a
|
|
43
|
+
# rename touches one place.
|
|
44
|
+
AUTOMATION_KEY = "automation"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class AutomationSettings(BaseSettings):
|
|
48
|
+
"""The effective set of automation toggles.
|
|
49
|
+
|
|
50
|
+
Default values are the *global fallback* applied when nothing
|
|
51
|
+
else weighs in. The same model is used as the resolved object
|
|
52
|
+
after layering -- both shapes are identical (every field has a
|
|
53
|
+
concrete value at the end), which keeps callers from juggling
|
|
54
|
+
two near-duplicate types.
|
|
55
|
+
|
|
56
|
+
Loaded from the user-config ``config.yaml`` (under the
|
|
57
|
+
``automation`` block) and from ``SPLITSMITH_AUTOMATION_*`` env
|
|
58
|
+
vars; init args win over both.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
model_config = SettingsConfigDict(
|
|
62
|
+
env_prefix="SPLITSMITH_AUTOMATION_",
|
|
63
|
+
extra="ignore",
|
|
64
|
+
# We do the YAML load explicitly via :func:`load_global` so the
|
|
65
|
+
# settings_customise_sources hook stays simple and the test
|
|
66
|
+
# suite can construct AutomationSettings without a disk read.
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
shot_detect_on_beep_verified: bool = True
|
|
70
|
+
"""When the user marks a beep reviewed, fire shot detection."""
|
|
71
|
+
|
|
72
|
+
beep_low_confidence_threshold: float = 0.95
|
|
73
|
+
"""Minimum auto-detector confidence in [0, 1] required to auto-trust a beep.
|
|
74
|
+
|
|
75
|
+
Joins issue #219: an auto-detected beep with confidence at or above
|
|
76
|
+
this threshold pre-sets ``beep_reviewed=True`` (the user is offered
|
|
77
|
+
a one-click confirm but the chain doesn't wait), so shot detection
|
|
78
|
+
fires immediately. Below the threshold the beep lands in the HITL
|
|
79
|
+
queue and the user (or an agent driving the workflow) is asked to
|
|
80
|
+
pick the right candidate from the ranked list.
|
|
81
|
+
|
|
82
|
+
Manual entries always clamp confidence to 1.0 and bypass this gate.
|
|
83
|
+
|
|
84
|
+
History: the original default was 0.6, calibrated against the
|
|
85
|
+
labelled fixture set (~95% top-1 precision in the ``>= 0.7`` band).
|
|
86
|
+
Field use surfaced too many incorrectly-auto-trusted beeps on
|
|
87
|
+
recursive-scan ingests, so the default was raised to 0.95 pending
|
|
88
|
+
a re-calibration with more data.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class AutomationOverride(BaseModel):
|
|
93
|
+
"""Layer override -- ``None`` on a field means "inherit from below".
|
|
94
|
+
|
|
95
|
+
Used for the project-state and CLI-flag layers. Not loaded from
|
|
96
|
+
disk by pydantic-settings: project overrides ride on the project
|
|
97
|
+
JSON; CLI overrides come from typer.
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
shot_detect_on_beep_verified: bool | None = None
|
|
101
|
+
beep_low_confidence_threshold: float | None = None
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
# Where each resolved field came from. The UI uses this for the
|
|
105
|
+
# provenance badge (#216).
|
|
106
|
+
ProvenanceSource = Literal["cli", "project", "global", "default"]
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@dataclass(frozen=True)
|
|
110
|
+
class FieldProvenance:
|
|
111
|
+
"""One resolved field's source + the layer values that fed in.
|
|
112
|
+
|
|
113
|
+
``source`` is the layer that supplied the effective value.
|
|
114
|
+
``cli_value`` / ``project_value`` are ``None`` when that layer
|
|
115
|
+
stayed silent. ``global_value`` is always present because the
|
|
116
|
+
global settings always have a default.
|
|
117
|
+
|
|
118
|
+
Field types are union-typed (``bool | float``) because the
|
|
119
|
+
automation block is now mixed (toggles + thresholds). The UI
|
|
120
|
+
provenance widget renders the values via JSON.stringify so the
|
|
121
|
+
union is invisible on the wire.
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
source: ProvenanceSource
|
|
125
|
+
cli_value: bool | float | None
|
|
126
|
+
project_value: bool | float | None
|
|
127
|
+
global_value: bool | float
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@dataclass(frozen=True)
|
|
131
|
+
class ResolvedAutomation:
|
|
132
|
+
"""Resolved settings + per-field provenance."""
|
|
133
|
+
|
|
134
|
+
settings: AutomationSettings
|
|
135
|
+
provenance: dict[str, FieldProvenance]
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _yaml_block(path: Path) -> dict[str, Any]:
|
|
139
|
+
"""Read the ``automation`` block from ``path``. Empty dict on
|
|
140
|
+
missing file / parse error / missing block -- callers fall back
|
|
141
|
+
to the field defaults."""
|
|
142
|
+
if not path.exists():
|
|
143
|
+
return {}
|
|
144
|
+
try:
|
|
145
|
+
with path.open("r", encoding="utf-8") as f:
|
|
146
|
+
data = yaml.safe_load(f) or {}
|
|
147
|
+
except (OSError, yaml.YAMLError) as exc:
|
|
148
|
+
logger.warning("Ignoring unreadable automation block at %s: %s", path, exc)
|
|
149
|
+
return {}
|
|
150
|
+
block = data.get(AUTOMATION_KEY) if isinstance(data, dict) else None
|
|
151
|
+
if not isinstance(block, dict):
|
|
152
|
+
return {}
|
|
153
|
+
return block
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def load_global(config_path: Path | None = None) -> AutomationSettings:
|
|
157
|
+
"""Load the global automation settings.
|
|
158
|
+
|
|
159
|
+
``config_path`` defaults to the user-config ``config.yaml``; pass
|
|
160
|
+
an explicit path in tests. Env-var overrides
|
|
161
|
+
(``SPLITSMITH_AUTOMATION_*``) layer on top of the YAML and below
|
|
162
|
+
init args, mirroring pydantic-settings' standard precedence.
|
|
163
|
+
"""
|
|
164
|
+
if user_config.is_disabled() and config_path is None:
|
|
165
|
+
return AutomationSettings()
|
|
166
|
+
path = config_path or (user_config.user_config_dir() / user_config.CONFIG_FILENAME)
|
|
167
|
+
block = _yaml_block(path)
|
|
168
|
+
try:
|
|
169
|
+
# Pass the YAML block as init kwargs; pydantic-settings will
|
|
170
|
+
# still let env vars override missing keys.
|
|
171
|
+
return AutomationSettings(**block)
|
|
172
|
+
except Exception as exc: # noqa: BLE001 -- log then fall back so a malformed config doesn't break the app
|
|
173
|
+
logger.warning("Discarding malformed automation block at %s: %s", path, exc)
|
|
174
|
+
return AutomationSettings()
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def resolve_automation(
|
|
178
|
+
*,
|
|
179
|
+
global_settings: AutomationSettings | None = None,
|
|
180
|
+
project_override: AutomationOverride | None = None,
|
|
181
|
+
cli_override: AutomationOverride | None = None,
|
|
182
|
+
) -> ResolvedAutomation:
|
|
183
|
+
"""Layer the three sources into one resolved set of toggles.
|
|
184
|
+
|
|
185
|
+
Resolution per field, top wins:
|
|
186
|
+
|
|
187
|
+
1. CLI override (init args from typer flags).
|
|
188
|
+
2. Project override (``MatchProject.automation``).
|
|
189
|
+
3. Global setting (loaded from ``config.yaml`` + env vars).
|
|
190
|
+
4. Field default on :class:`AutomationSettings` (only reached
|
|
191
|
+
when ``global_settings`` is None and the field isn't set
|
|
192
|
+
anywhere).
|
|
193
|
+
|
|
194
|
+
Returns a :class:`ResolvedAutomation` carrying the effective
|
|
195
|
+
settings and per-field provenance for the UI.
|
|
196
|
+
"""
|
|
197
|
+
if global_settings is None:
|
|
198
|
+
global_settings = load_global()
|
|
199
|
+
|
|
200
|
+
resolved_kwargs: dict[str, Any] = {}
|
|
201
|
+
provenance: dict[str, FieldProvenance] = {}
|
|
202
|
+
for field_name in AutomationSettings.model_fields:
|
|
203
|
+
cli_val = getattr(cli_override, field_name) if cli_override is not None else None
|
|
204
|
+
proj_val = getattr(project_override, field_name) if project_override is not None else None
|
|
205
|
+
global_val = getattr(global_settings, field_name)
|
|
206
|
+
if cli_val is not None:
|
|
207
|
+
effective = cli_val
|
|
208
|
+
source: ProvenanceSource = "cli"
|
|
209
|
+
elif proj_val is not None:
|
|
210
|
+
effective = proj_val
|
|
211
|
+
source = "project"
|
|
212
|
+
else:
|
|
213
|
+
effective = global_val
|
|
214
|
+
source = "global"
|
|
215
|
+
resolved_kwargs[field_name] = effective
|
|
216
|
+
provenance[field_name] = FieldProvenance(
|
|
217
|
+
source=source,
|
|
218
|
+
cli_value=cli_val,
|
|
219
|
+
project_value=proj_val,
|
|
220
|
+
global_value=global_val,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
return ResolvedAutomation(
|
|
224
|
+
settings=AutomationSettings(**resolved_kwargs),
|
|
225
|
+
provenance=provenance,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
__all__ = [
|
|
230
|
+
"AUTOMATION_KEY",
|
|
231
|
+
"AutomationOverride",
|
|
232
|
+
"AutomationSettings",
|
|
233
|
+
"FieldProvenance",
|
|
234
|
+
"ProvenanceSource",
|
|
235
|
+
"ResolvedAutomation",
|
|
236
|
+
"load_global",
|
|
237
|
+
"resolve_automation",
|
|
238
|
+
]
|
splitsmith/backup.py
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
"""Project export/import.
|
|
2
|
+
|
|
3
|
+
Tars the non-regeneratable parts of a :class:`MatchProject` into a single
|
|
4
|
+
``.tar.gz`` so a project can be moved between machines or stashed as a
|
|
5
|
+
disaster-recovery copy.
|
|
6
|
+
|
|
7
|
+
Inclusion policy:
|
|
8
|
+
|
|
9
|
+
* Always: ``project.json``.
|
|
10
|
+
* Default: ``audit/`` (hand-labeled shot corrections) and ``scoreboard/``
|
|
11
|
+
(cached SSI data). Together these are the only artefacts that are
|
|
12
|
+
*truly* irreplaceable.
|
|
13
|
+
* Optional via flags: ``trimmed/``, ``exports/``, ``raw/``, ``audio/`` -- all
|
|
14
|
+
regeneratable from the raw footage, so the user opts in when they
|
|
15
|
+
actually need them in the archive.
|
|
16
|
+
* Never: ``probes/`` and ``thumbs/`` -- pure caches, trivially regenerated.
|
|
17
|
+
|
|
18
|
+
Subdirectories whose path resolves outside the project root (via the absolute
|
|
19
|
+
override fields on :class:`MatchProject`) are skipped: the archive intentionally
|
|
20
|
+
only captures project-local data, so an import on a different machine has a
|
|
21
|
+
chance of working.
|
|
22
|
+
|
|
23
|
+
The archive contains a single top-level directory whose name matches the
|
|
24
|
+
project directory's name, plus a ``BACKUP_MANIFEST.json`` describing what was
|
|
25
|
+
included and which splitsmith version produced the archive.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import io
|
|
31
|
+
import json
|
|
32
|
+
import shutil
|
|
33
|
+
import tarfile
|
|
34
|
+
import tempfile
|
|
35
|
+
from collections.abc import Iterable
|
|
36
|
+
from datetime import UTC, datetime
|
|
37
|
+
from pathlib import Path
|
|
38
|
+
|
|
39
|
+
from pydantic import BaseModel, Field
|
|
40
|
+
|
|
41
|
+
from . import __version__
|
|
42
|
+
from .ui.project import PROJECT_FILE, MatchProject
|
|
43
|
+
|
|
44
|
+
DEFAULT_DIRS: tuple[str, ...] = ("audit", "scoreboard")
|
|
45
|
+
OPTIONAL_DIRS: frozenset[str] = frozenset({"raw", "audio", "trimmed", "exports"})
|
|
46
|
+
NEVER_DIRS: frozenset[str] = frozenset({"probes", "thumbs"})
|
|
47
|
+
|
|
48
|
+
MANIFEST_NAME = "BACKUP_MANIFEST.json"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class SkippedDir(BaseModel):
|
|
52
|
+
name: str
|
|
53
|
+
reason: str
|
|
54
|
+
resolved_path: str | None = None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class ExportResult(BaseModel):
|
|
58
|
+
archive_path: Path
|
|
59
|
+
bytes_written: int
|
|
60
|
+
included: list[str]
|
|
61
|
+
skipped: list[SkippedDir]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class ImportResult(BaseModel):
|
|
65
|
+
project_root: Path
|
|
66
|
+
project_name: str
|
|
67
|
+
manifest: dict[str, object] | None = Field(default=None)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class BackupError(Exception):
|
|
71
|
+
"""Raised when an export or import cannot proceed."""
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _resolve_subdir(project: MatchProject, root: Path, name: str) -> Path:
|
|
75
|
+
"""Resolve ``name`` against ``root`` using the project's override fields."""
|
|
76
|
+
if name == "raw":
|
|
77
|
+
return project.raw_path(root)
|
|
78
|
+
if name == "audio":
|
|
79
|
+
return project.audio_path(root)
|
|
80
|
+
if name == "trimmed":
|
|
81
|
+
return project.trimmed_path(root)
|
|
82
|
+
if name == "exports":
|
|
83
|
+
return project.exports_path(root)
|
|
84
|
+
if name == "audit":
|
|
85
|
+
return project.audit_path(root)
|
|
86
|
+
# No override field for scoreboard; always project-local.
|
|
87
|
+
return root / name
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def export_project(
|
|
91
|
+
project_root: Path,
|
|
92
|
+
output: Path,
|
|
93
|
+
*,
|
|
94
|
+
include_trimmed: bool = False,
|
|
95
|
+
include_exports: bool = False,
|
|
96
|
+
include_raw: bool = False,
|
|
97
|
+
include_audio: bool = False,
|
|
98
|
+
) -> ExportResult:
|
|
99
|
+
"""Write a ``.tar.gz`` archive of ``project_root`` to ``output``.
|
|
100
|
+
|
|
101
|
+
``output`` may be a directory (the archive filename is derived from the
|
|
102
|
+
project name + today's date) or a file path. Returns an :class:`ExportResult`
|
|
103
|
+
describing what was included and which subdirectories were skipped.
|
|
104
|
+
"""
|
|
105
|
+
project_root = project_root.expanduser().resolve()
|
|
106
|
+
if not (project_root / PROJECT_FILE).exists():
|
|
107
|
+
raise BackupError(f"no {PROJECT_FILE} in {project_root}")
|
|
108
|
+
project = MatchProject.load(project_root)
|
|
109
|
+
|
|
110
|
+
output = output.expanduser()
|
|
111
|
+
if output.is_dir():
|
|
112
|
+
stamp = datetime.now(UTC).strftime("%Y%m%d")
|
|
113
|
+
slug = _slug(project.name) or project_root.name
|
|
114
|
+
output = output / f"{slug}-backup-{stamp}.tar.gz"
|
|
115
|
+
output.parent.mkdir(parents=True, exist_ok=True)
|
|
116
|
+
|
|
117
|
+
wanted = list(DEFAULT_DIRS)
|
|
118
|
+
if include_trimmed:
|
|
119
|
+
wanted.append("trimmed")
|
|
120
|
+
if include_exports:
|
|
121
|
+
wanted.append("exports")
|
|
122
|
+
if include_raw:
|
|
123
|
+
wanted.append("raw")
|
|
124
|
+
if include_audio:
|
|
125
|
+
wanted.append("audio")
|
|
126
|
+
|
|
127
|
+
arc_root = project_root.name # top-level dir inside the archive
|
|
128
|
+
included: list[str] = []
|
|
129
|
+
skipped: list[SkippedDir] = []
|
|
130
|
+
|
|
131
|
+
with tarfile.open(output, "w:gz") as tf:
|
|
132
|
+
tf.add(project_root / PROJECT_FILE, arcname=f"{arc_root}/{PROJECT_FILE}")
|
|
133
|
+
for name in wanted:
|
|
134
|
+
src = _resolve_subdir(project, project_root, name)
|
|
135
|
+
try:
|
|
136
|
+
src_resolved = src.resolve()
|
|
137
|
+
except OSError:
|
|
138
|
+
skipped.append(SkippedDir(name=name, reason="missing"))
|
|
139
|
+
continue
|
|
140
|
+
if not src_resolved.exists():
|
|
141
|
+
skipped.append(SkippedDir(name=name, reason="missing", resolved_path=str(src_resolved)))
|
|
142
|
+
continue
|
|
143
|
+
if not _is_inside(src_resolved, project_root):
|
|
144
|
+
skipped.append(
|
|
145
|
+
SkippedDir(
|
|
146
|
+
name=name,
|
|
147
|
+
reason="outside_project_root",
|
|
148
|
+
resolved_path=str(src_resolved),
|
|
149
|
+
)
|
|
150
|
+
)
|
|
151
|
+
continue
|
|
152
|
+
tf.add(src_resolved, arcname=f"{arc_root}/{name}")
|
|
153
|
+
included.append(name)
|
|
154
|
+
|
|
155
|
+
manifest = {
|
|
156
|
+
"splitsmith_version": __version__,
|
|
157
|
+
"created_at": datetime.now(UTC).isoformat(),
|
|
158
|
+
"project_name": project.name,
|
|
159
|
+
"project_root_name": project_root.name,
|
|
160
|
+
"included": ["project.json", *included],
|
|
161
|
+
"skipped": [s.model_dump() for s in skipped],
|
|
162
|
+
"options": {
|
|
163
|
+
"include_trimmed": include_trimmed,
|
|
164
|
+
"include_exports": include_exports,
|
|
165
|
+
"include_raw": include_raw,
|
|
166
|
+
"include_audio": include_audio,
|
|
167
|
+
},
|
|
168
|
+
}
|
|
169
|
+
_add_bytes(tf, f"{arc_root}/{MANIFEST_NAME}", json.dumps(manifest, indent=2).encode())
|
|
170
|
+
|
|
171
|
+
size = output.stat().st_size
|
|
172
|
+
return ExportResult(
|
|
173
|
+
archive_path=output,
|
|
174
|
+
bytes_written=size,
|
|
175
|
+
included=["project.json", *included],
|
|
176
|
+
skipped=skipped,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def import_project(
|
|
181
|
+
archive: Path,
|
|
182
|
+
dest_root: Path,
|
|
183
|
+
*,
|
|
184
|
+
overwrite: bool = False,
|
|
185
|
+
) -> ImportResult:
|
|
186
|
+
"""Extract ``archive`` into ``dest_root``.
|
|
187
|
+
|
|
188
|
+
The archive must contain exactly one top-level directory holding a
|
|
189
|
+
``project.json``. Returns an :class:`ImportResult` pointing at the new
|
|
190
|
+
project root.
|
|
191
|
+
"""
|
|
192
|
+
archive = archive.expanduser().resolve()
|
|
193
|
+
dest_root = dest_root.expanduser().resolve()
|
|
194
|
+
if not archive.exists():
|
|
195
|
+
raise BackupError(f"archive not found: {archive}")
|
|
196
|
+
dest_root.mkdir(parents=True, exist_ok=True)
|
|
197
|
+
|
|
198
|
+
with tempfile.TemporaryDirectory(prefix="splitsmith-import-") as staging_str:
|
|
199
|
+
staging = Path(staging_str)
|
|
200
|
+
with tarfile.open(archive, "r:*") as tf:
|
|
201
|
+
_safe_extract(tf, staging)
|
|
202
|
+
|
|
203
|
+
top = _single_top_dir(staging)
|
|
204
|
+
if top is None:
|
|
205
|
+
raise BackupError("archive must contain exactly one top-level directory")
|
|
206
|
+
if not (top / PROJECT_FILE).exists():
|
|
207
|
+
raise BackupError(f"archive is missing {PROJECT_FILE}")
|
|
208
|
+
|
|
209
|
+
# Validate project.json parses before we move anything.
|
|
210
|
+
try:
|
|
211
|
+
MatchProject.load(top)
|
|
212
|
+
except Exception as exc: # noqa: BLE001 - surface the underlying failure
|
|
213
|
+
raise BackupError(f"invalid project.json in archive: {exc}") from exc
|
|
214
|
+
|
|
215
|
+
manifest: dict[str, object] | None = None
|
|
216
|
+
manifest_path = top / MANIFEST_NAME
|
|
217
|
+
if manifest_path.exists():
|
|
218
|
+
try:
|
|
219
|
+
manifest = json.loads(manifest_path.read_text())
|
|
220
|
+
except json.JSONDecodeError:
|
|
221
|
+
manifest = None
|
|
222
|
+
|
|
223
|
+
target = dest_root / top.name
|
|
224
|
+
if target.exists():
|
|
225
|
+
if not overwrite:
|
|
226
|
+
raise BackupError(f"destination already exists: {target}")
|
|
227
|
+
shutil.rmtree(target)
|
|
228
|
+
shutil.move(str(top), str(target))
|
|
229
|
+
|
|
230
|
+
project = MatchProject.load(target)
|
|
231
|
+
return ImportResult(project_root=target, project_name=project.name, manifest=manifest)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
# ---------------------------------------------------------------------------
|
|
235
|
+
# Helpers
|
|
236
|
+
# ---------------------------------------------------------------------------
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _is_inside(child: Path, parent: Path) -> bool:
|
|
240
|
+
try:
|
|
241
|
+
child.relative_to(parent)
|
|
242
|
+
return True
|
|
243
|
+
except ValueError:
|
|
244
|
+
return False
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _add_bytes(tf: tarfile.TarFile, arcname: str, data: bytes) -> None:
|
|
248
|
+
info = tarfile.TarInfo(name=arcname)
|
|
249
|
+
info.size = len(data)
|
|
250
|
+
info.mtime = int(datetime.now(UTC).timestamp())
|
|
251
|
+
tf.addfile(info, io.BytesIO(data))
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def _safe_extract(tf: tarfile.TarFile, dest: Path) -> None:
|
|
255
|
+
"""Extract guarding against path-traversal entries."""
|
|
256
|
+
dest_resolved = dest.resolve()
|
|
257
|
+
members: list[tarfile.TarInfo] = []
|
|
258
|
+
for m in tf.getmembers():
|
|
259
|
+
target = (dest_resolved / m.name).resolve()
|
|
260
|
+
if not _is_inside(target, dest_resolved):
|
|
261
|
+
raise BackupError(f"archive contains unsafe path: {m.name!r}")
|
|
262
|
+
# Block symlinks/hardlinks pointing outside the staging dir.
|
|
263
|
+
if m.issym() or m.islnk():
|
|
264
|
+
link_target = (dest_resolved / m.name).parent / (m.linkname or "")
|
|
265
|
+
try:
|
|
266
|
+
link_resolved = link_target.resolve()
|
|
267
|
+
except OSError as exc:
|
|
268
|
+
raise BackupError(f"unsafe link in archive: {m.name!r}") from exc
|
|
269
|
+
if not _is_inside(link_resolved, dest_resolved):
|
|
270
|
+
raise BackupError(f"unsafe link in archive: {m.name!r}")
|
|
271
|
+
members.append(m)
|
|
272
|
+
tf.extractall(dest_resolved, members=members)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _single_top_dir(staging: Path) -> Path | None:
|
|
276
|
+
entries = [p for p in staging.iterdir() if not p.name.startswith(".")]
|
|
277
|
+
if len(entries) != 1 or not entries[0].is_dir():
|
|
278
|
+
return None
|
|
279
|
+
return entries[0]
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def _slug(name: str) -> str:
|
|
283
|
+
out = "".join(c if c.isalnum() or c in "-_" else "-" for c in name.strip())
|
|
284
|
+
return out.strip("-").lower()
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
__all__: Iterable[str] = (
|
|
288
|
+
"BackupError",
|
|
289
|
+
"ExportResult",
|
|
290
|
+
"ImportResult",
|
|
291
|
+
"SkippedDir",
|
|
292
|
+
"DEFAULT_DIRS",
|
|
293
|
+
"OPTIONAL_DIRS",
|
|
294
|
+
"NEVER_DIRS",
|
|
295
|
+
"MANIFEST_NAME",
|
|
296
|
+
"export_project",
|
|
297
|
+
"import_project",
|
|
298
|
+
)
|