splitsmith 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. splitsmith/__init__.py +3 -0
  2. splitsmith/audit.py +87 -0
  3. splitsmith/automation.py +238 -0
  4. splitsmith/backup.py +298 -0
  5. splitsmith/beep_calibration.py +324 -0
  6. splitsmith/beep_detect.py +371 -0
  7. splitsmith/cleanup.py +327 -0
  8. splitsmith/cli.py +1281 -0
  9. splitsmith/coach.py +253 -0
  10. splitsmith/coach_distributions.py +348 -0
  11. splitsmith/compare/__init__.py +7 -0
  12. splitsmith/compare/cli.py +153 -0
  13. splitsmith/compare/emitter.py +456 -0
  14. splitsmith/compare/filler.py +98 -0
  15. splitsmith/compare/layout.py +164 -0
  16. splitsmith/compare/manifest.py +91 -0
  17. splitsmith/compare/project_loader.py +195 -0
  18. splitsmith/composition.py +606 -0
  19. splitsmith/config.py +442 -0
  20. splitsmith/cross_align.py +210 -0
  21. splitsmith/csv_gen.py +66 -0
  22. splitsmith/data/ensemble_calibration.json +248 -0
  23. splitsmith/data/fonts/Antonio-OFL.txt +93 -0
  24. splitsmith/data/fonts/Antonio-VariableFont.ttf +0 -0
  25. splitsmith/data/fonts/JetBrainsMono-Bold.ttf +0 -0
  26. splitsmith/data/fonts/JetBrainsMono-OFL.txt +93 -0
  27. splitsmith/data/overlay_theme.json +40 -0
  28. splitsmith/data/templates/action-cut.yaml +19 -0
  29. splitsmith/data/templates/match-recap.yaml +20 -0
  30. splitsmith/data/voter_c_gbdt.joblib +0 -0
  31. splitsmith/data/voter_e_visual_probe.joblib +0 -0
  32. splitsmith/ensemble/__init__.py +67 -0
  33. splitsmith/ensemble/agc_state.py +165 -0
  34. splitsmith/ensemble/api.py +419 -0
  35. splitsmith/ensemble/backend.py +89 -0
  36. splitsmith/ensemble/calibration.py +367 -0
  37. splitsmith/ensemble/clap_mel.py +138 -0
  38. splitsmith/ensemble/features.py +680 -0
  39. splitsmith/ensemble/fixtures.py +222 -0
  40. splitsmith/ensemble/tta.py +115 -0
  41. splitsmith/ensemble/visual.py +294 -0
  42. splitsmith/ensemble/voters.py +202 -0
  43. splitsmith/fcp7xml_render.py +558 -0
  44. splitsmith/fcpxml_gen.py +1721 -0
  45. splitsmith/fixture_schema.py +482 -0
  46. splitsmith/lab/__init__.py +79 -0
  47. splitsmith/lab/core.py +1118 -0
  48. splitsmith/lab/promote.py +555 -0
  49. splitsmith/lab/snap_window.py +331 -0
  50. splitsmith/lab/sweeps.py +231 -0
  51. splitsmith/lab_cli.py +750 -0
  52. splitsmith/match_cli.py +315 -0
  53. splitsmith/match_model.py +793 -0
  54. splitsmith/match_registry.py +131 -0
  55. splitsmith/mcp/__init__.py +23 -0
  56. splitsmith/mcp/__main__.py +20 -0
  57. splitsmith/mcp/detect_tools.py +476 -0
  58. splitsmith/mcp/export_tools.py +356 -0
  59. splitsmith/mcp/sandbox.py +77 -0
  60. splitsmith/mcp/server.py +393 -0
  61. splitsmith/mcp/tools.py +207 -0
  62. splitsmith/mcp/write_tools.py +268 -0
  63. splitsmith/model_cli.py +153 -0
  64. splitsmith/models/__init__.py +40 -0
  65. splitsmith/models/cache.py +139 -0
  66. splitsmith/models/download.py +95 -0
  67. splitsmith/models/errors.py +50 -0
  68. splitsmith/models/manifest.py +68 -0
  69. splitsmith/models/registry.py +256 -0
  70. splitsmith/mp4_render.py +513 -0
  71. splitsmith/overlay_render.py +817 -0
  72. splitsmith/overlay_theme.py +146 -0
  73. splitsmith/relink.py +245 -0
  74. splitsmith/report.py +258 -0
  75. splitsmith/runtime.py +268 -0
  76. splitsmith/shot_detect.py +506 -0
  77. splitsmith/shot_refine.py +252 -0
  78. splitsmith/system_check.py +162 -0
  79. splitsmith/templates.py +188 -0
  80. splitsmith/thumbnail.py +230 -0
  81. splitsmith/trim.py +211 -0
  82. splitsmith/ui/__init__.py +10 -0
  83. splitsmith/ui/audio.py +536 -0
  84. splitsmith/ui/embedded.py +312 -0
  85. splitsmith/ui/exports.py +533 -0
  86. splitsmith/ui/jobs.py +652 -0
  87. splitsmith/ui/logging_setup.py +108 -0
  88. splitsmith/ui/match_exports.py +500 -0
  89. splitsmith/ui/project.py +1734 -0
  90. splitsmith/ui/scoreboard/__init__.py +77 -0
  91. splitsmith/ui/scoreboard/cache.py +237 -0
  92. splitsmith/ui/scoreboard/http.py +206 -0
  93. splitsmith/ui/scoreboard/local.py +377 -0
  94. splitsmith/ui/scoreboard/models.py +301 -0
  95. splitsmith/ui/scoreboard/protocol.py +51 -0
  96. splitsmith/ui/server.py +9178 -0
  97. splitsmith/ui_static/package-lock.json +3062 -0
  98. splitsmith/ui_static/tsconfig.app.tsbuildinfo +1 -0
  99. splitsmith/ui_static/tsconfig.node.tsbuildinfo +1 -0
  100. splitsmith/user_config.py +380 -0
  101. splitsmith/video_match.py +159 -0
  102. splitsmith/video_probe.py +143 -0
  103. splitsmith/waveform.py +121 -0
  104. splitsmith/youtube_sidecar.py +293 -0
  105. splitsmith-0.2.0.dist-info/METADATA +301 -0
  106. splitsmith-0.2.0.dist-info/RECORD +109 -0
  107. splitsmith-0.2.0.dist-info/WHEEL +4 -0
  108. splitsmith-0.2.0.dist-info/entry_points.txt +3 -0
  109. splitsmith-0.2.0.dist-info/licenses/LICENSE +21 -0
splitsmith/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """splitsmith: extract IPSC shot splits from head-mounted camera footage."""
2
+
3
+ __version__ = "0.2.0"
splitsmith/audit.py ADDED
@@ -0,0 +1,87 @@
1
+ """Helpers for the hand-audit workflow.
2
+
3
+ Audit flow:
4
+ 1. ``audit-prep`` (or the ad-hoc fixture script) emits ``<name>.wav``,
5
+ ``<name>.json`` (empty ``shots[]`` ground truth), and
6
+ ``<name>-candidates.csv`` with an ``audit_keep`` column.
7
+ 2. The user opens the CSV, marks rows they want to keep, saves.
8
+ 3. ``audit-apply`` reads the marked CSV and rewrites the JSON's ``shots[]``.
9
+
10
+ This module exposes the read/merge primitives. The CLI wraps them.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import csv
16
+ import json
17
+ from pathlib import Path
18
+
19
+ # Anything in this set (case-insensitive, trimmed) marks a candidate as kept.
20
+ _KEEP_TRUTHY: frozenset[str] = frozenset({"y", "yes", "1", "x", "true", "keep", "t"})
21
+
22
+
23
+ def is_kept(value: str | None) -> bool:
24
+ if value is None:
25
+ return False
26
+ return value.strip().lower() in _KEEP_TRUTHY
27
+
28
+
29
+ def read_kept_candidate_numbers(candidates_csv: Path) -> list[int]:
30
+ """Return the candidate_numbers from rows whose ``audit_keep`` cell is truthy.
31
+
32
+ Order is preserved from the CSV, which is the natural shot order.
33
+ """
34
+ with candidates_csv.open("r", newline="", encoding="utf-8") as f:
35
+ reader = csv.DictReader(f)
36
+ if reader.fieldnames is None or "audit_keep" not in reader.fieldnames:
37
+ raise ValueError(
38
+ f"{candidates_csv}: missing 'audit_keep' column. Expected the audit-prep "
39
+ f"format (audit_keep, candidate_number, ...). Got: {reader.fieldnames}"
40
+ )
41
+ if "candidate_number" not in reader.fieldnames:
42
+ raise ValueError(f"{candidates_csv}: missing 'candidate_number' column")
43
+ kept: list[int] = []
44
+ for row in reader:
45
+ if is_kept(row.get("audit_keep")):
46
+ kept.append(int(row["candidate_number"]))
47
+ return kept
48
+
49
+
50
+ def apply_audit_to_fixture(candidates_csv: Path, fixture_json: Path) -> int:
51
+ """Merge marked-keep candidates into ``fixture_json``'s ``shots[]`` array.
52
+
53
+ Returns the number of shots written. The JSON is rewritten in place; the
54
+ ``_candidates_pending_audit`` block is preserved unchanged so the audit can
55
+ be re-run from the same source if the user changes their mind.
56
+ """
57
+ kept_numbers = read_kept_candidate_numbers(candidates_csv)
58
+ data = json.loads(fixture_json.read_text())
59
+
60
+ candidates = data.get("_candidates_pending_audit", {}).get("candidates")
61
+ if not candidates:
62
+ raise ValueError(
63
+ f"{fixture_json}: no candidates block found. Expected " f"'_candidates_pending_audit.candidates'."
64
+ )
65
+ by_num = {c["candidate_number"]: c for c in candidates}
66
+
67
+ missing = sorted(n for n in kept_numbers if n not in by_num)
68
+ if missing:
69
+ raise ValueError(
70
+ f"{candidates_csv}: candidate_number(s) marked keep but not present in "
71
+ f"{fixture_json}: {missing}"
72
+ )
73
+
74
+ shots: list[dict] = []
75
+ for kept_idx, cand_num in enumerate(kept_numbers, start=1):
76
+ c = by_num[cand_num]
77
+ shots.append(
78
+ {
79
+ "shot_number": kept_idx,
80
+ "candidate_number": cand_num,
81
+ "time": c["time"],
82
+ "ms_after_beep": c["ms_after_beep"],
83
+ }
84
+ )
85
+ data["shots"] = shots
86
+ fixture_json.write_text(json.dumps(data, indent=2) + "\n")
87
+ return len(shots)
@@ -0,0 +1,238 @@
1
+ """Layered automation settings (issue #215).
2
+
3
+ Splitsmith chains certain follow-up jobs automatically -- the canonical
4
+ example is "user marks beep reviewed -> shot detection fires." Some
5
+ users want the rich auto-pipeline; others (CLI / agent flows) want
6
+ manual gates so they can decide each step. This module provides the
7
+ layered settings stack that lets a global default, a project-level
8
+ override, and a per-invocation CLI flag resolve cleanly into one
9
+ effective set of toggles -- with provenance so the UI can show *why*
10
+ a value is what it is.
11
+
12
+ Resolution order, top wins: **CLI > project > global > field default**.
13
+
14
+ The schema is one ``automation`` sub-object so siblings (for example
15
+ ``overlay_render_on_audit``, ``beep_detect_on_ingest``) can land later
16
+ without re-shaping the on-disk format.
17
+
18
+ Backed by ``pydantic-settings`` so the global layer composes cleanly
19
+ with the rest of the codebase: YAML loading, env-var overrides
20
+ (``SPLITSMITH_AUTOMATION_*``), and the standard pydantic validation
21
+ story all come for free. The override types and the resolver are
22
+ plain pydantic / dataclasses because they aren't loaded from any
23
+ single source -- they're computed merges.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import logging
29
+ from dataclasses import dataclass
30
+ from pathlib import Path
31
+ from typing import Any, Literal
32
+
33
+ import yaml
34
+ from pydantic import BaseModel
35
+ from pydantic_settings import BaseSettings, SettingsConfigDict
36
+
37
+ from . import user_config
38
+
39
+ logger = logging.getLogger(__name__)
40
+
41
+
42
+ # Field name in the YAML and on the project model. Centralised so a
43
+ # rename touches one place.
44
+ AUTOMATION_KEY = "automation"
45
+
46
+
47
+ class AutomationSettings(BaseSettings):
48
+ """The effective set of automation toggles.
49
+
50
+ Default values are the *global fallback* applied when nothing
51
+ else weighs in. The same model is used as the resolved object
52
+ after layering -- both shapes are identical (every field has a
53
+ concrete value at the end), which keeps callers from juggling
54
+ two near-duplicate types.
55
+
56
+ Loaded from the user-config ``config.yaml`` (under the
57
+ ``automation`` block) and from ``SPLITSMITH_AUTOMATION_*`` env
58
+ vars; init args win over both.
59
+ """
60
+
61
+ model_config = SettingsConfigDict(
62
+ env_prefix="SPLITSMITH_AUTOMATION_",
63
+ extra="ignore",
64
+ # We do the YAML load explicitly via :func:`load_global` so the
65
+ # settings_customise_sources hook stays simple and the test
66
+ # suite can construct AutomationSettings without a disk read.
67
+ )
68
+
69
+ shot_detect_on_beep_verified: bool = True
70
+ """When the user marks a beep reviewed, fire shot detection."""
71
+
72
+ beep_low_confidence_threshold: float = 0.95
73
+ """Minimum auto-detector confidence in [0, 1] required to auto-trust a beep.
74
+
75
+ Joins issue #219: an auto-detected beep with confidence at or above
76
+ this threshold pre-sets ``beep_reviewed=True`` (the user is offered
77
+ a one-click confirm but the chain doesn't wait), so shot detection
78
+ fires immediately. Below the threshold the beep lands in the HITL
79
+ queue and the user (or an agent driving the workflow) is asked to
80
+ pick the right candidate from the ranked list.
81
+
82
+ Manual entries always clamp confidence to 1.0 and bypass this gate.
83
+
84
+ History: the original default was 0.6, calibrated against the
85
+ labelled fixture set (~95% top-1 precision in the ``>= 0.7`` band).
86
+ Field use surfaced too many incorrectly-auto-trusted beeps on
87
+ recursive-scan ingests, so the default was raised to 0.95 pending
88
+ a re-calibration with more data.
89
+ """
90
+
91
+
92
+ class AutomationOverride(BaseModel):
93
+ """Layer override -- ``None`` on a field means "inherit from below".
94
+
95
+ Used for the project-state and CLI-flag layers. Not loaded from
96
+ disk by pydantic-settings: project overrides ride on the project
97
+ JSON; CLI overrides come from typer.
98
+ """
99
+
100
+ shot_detect_on_beep_verified: bool | None = None
101
+ beep_low_confidence_threshold: float | None = None
102
+
103
+
104
+ # Where each resolved field came from. The UI uses this for the
105
+ # provenance badge (#216).
106
+ ProvenanceSource = Literal["cli", "project", "global", "default"]
107
+
108
+
109
+ @dataclass(frozen=True)
110
+ class FieldProvenance:
111
+ """One resolved field's source + the layer values that fed in.
112
+
113
+ ``source`` is the layer that supplied the effective value.
114
+ ``cli_value`` / ``project_value`` are ``None`` when that layer
115
+ stayed silent. ``global_value`` is always present because the
116
+ global settings always have a default.
117
+
118
+ Field types are union-typed (``bool | float``) because the
119
+ automation block is now mixed (toggles + thresholds). The UI
120
+ provenance widget renders the values via JSON.stringify so the
121
+ union is invisible on the wire.
122
+ """
123
+
124
+ source: ProvenanceSource
125
+ cli_value: bool | float | None
126
+ project_value: bool | float | None
127
+ global_value: bool | float
128
+
129
+
130
+ @dataclass(frozen=True)
131
+ class ResolvedAutomation:
132
+ """Resolved settings + per-field provenance."""
133
+
134
+ settings: AutomationSettings
135
+ provenance: dict[str, FieldProvenance]
136
+
137
+
138
+ def _yaml_block(path: Path) -> dict[str, Any]:
139
+ """Read the ``automation`` block from ``path``. Empty dict on
140
+ missing file / parse error / missing block -- callers fall back
141
+ to the field defaults."""
142
+ if not path.exists():
143
+ return {}
144
+ try:
145
+ with path.open("r", encoding="utf-8") as f:
146
+ data = yaml.safe_load(f) or {}
147
+ except (OSError, yaml.YAMLError) as exc:
148
+ logger.warning("Ignoring unreadable automation block at %s: %s", path, exc)
149
+ return {}
150
+ block = data.get(AUTOMATION_KEY) if isinstance(data, dict) else None
151
+ if not isinstance(block, dict):
152
+ return {}
153
+ return block
154
+
155
+
156
+ def load_global(config_path: Path | None = None) -> AutomationSettings:
157
+ """Load the global automation settings.
158
+
159
+ ``config_path`` defaults to the user-config ``config.yaml``; pass
160
+ an explicit path in tests. Env-var overrides
161
+ (``SPLITSMITH_AUTOMATION_*``) layer on top of the YAML and below
162
+ init args, mirroring pydantic-settings' standard precedence.
163
+ """
164
+ if user_config.is_disabled() and config_path is None:
165
+ return AutomationSettings()
166
+ path = config_path or (user_config.user_config_dir() / user_config.CONFIG_FILENAME)
167
+ block = _yaml_block(path)
168
+ try:
169
+ # Pass the YAML block as init kwargs; pydantic-settings will
170
+ # still let env vars override missing keys.
171
+ return AutomationSettings(**block)
172
+ except Exception as exc: # noqa: BLE001 -- log then fall back so a malformed config doesn't break the app
173
+ logger.warning("Discarding malformed automation block at %s: %s", path, exc)
174
+ return AutomationSettings()
175
+
176
+
177
+ def resolve_automation(
178
+ *,
179
+ global_settings: AutomationSettings | None = None,
180
+ project_override: AutomationOverride | None = None,
181
+ cli_override: AutomationOverride | None = None,
182
+ ) -> ResolvedAutomation:
183
+ """Layer the three sources into one resolved set of toggles.
184
+
185
+ Resolution per field, top wins:
186
+
187
+ 1. CLI override (init args from typer flags).
188
+ 2. Project override (``MatchProject.automation``).
189
+ 3. Global setting (loaded from ``config.yaml`` + env vars).
190
+ 4. Field default on :class:`AutomationSettings` (only reached
191
+ when ``global_settings`` is None and the field isn't set
192
+ anywhere).
193
+
194
+ Returns a :class:`ResolvedAutomation` carrying the effective
195
+ settings and per-field provenance for the UI.
196
+ """
197
+ if global_settings is None:
198
+ global_settings = load_global()
199
+
200
+ resolved_kwargs: dict[str, Any] = {}
201
+ provenance: dict[str, FieldProvenance] = {}
202
+ for field_name in AutomationSettings.model_fields:
203
+ cli_val = getattr(cli_override, field_name) if cli_override is not None else None
204
+ proj_val = getattr(project_override, field_name) if project_override is not None else None
205
+ global_val = getattr(global_settings, field_name)
206
+ if cli_val is not None:
207
+ effective = cli_val
208
+ source: ProvenanceSource = "cli"
209
+ elif proj_val is not None:
210
+ effective = proj_val
211
+ source = "project"
212
+ else:
213
+ effective = global_val
214
+ source = "global"
215
+ resolved_kwargs[field_name] = effective
216
+ provenance[field_name] = FieldProvenance(
217
+ source=source,
218
+ cli_value=cli_val,
219
+ project_value=proj_val,
220
+ global_value=global_val,
221
+ )
222
+
223
+ return ResolvedAutomation(
224
+ settings=AutomationSettings(**resolved_kwargs),
225
+ provenance=provenance,
226
+ )
227
+
228
+
229
+ __all__ = [
230
+ "AUTOMATION_KEY",
231
+ "AutomationOverride",
232
+ "AutomationSettings",
233
+ "FieldProvenance",
234
+ "ProvenanceSource",
235
+ "ResolvedAutomation",
236
+ "load_global",
237
+ "resolve_automation",
238
+ ]
splitsmith/backup.py ADDED
@@ -0,0 +1,298 @@
1
+ """Project export/import.
2
+
3
+ Tars the non-regeneratable parts of a :class:`MatchProject` into a single
4
+ ``.tar.gz`` so a project can be moved between machines or stashed as a
5
+ disaster-recovery copy.
6
+
7
+ Inclusion policy:
8
+
9
+ * Always: ``project.json``.
10
+ * Default: ``audit/`` (hand-labeled shot corrections) and ``scoreboard/``
11
+ (cached SSI data). Together these are the only artefacts that are
12
+ *truly* irreplaceable.
13
+ * Optional via flags: ``trimmed/``, ``exports/``, ``raw/``, ``audio/`` -- all
14
+ regeneratable from the raw footage, so the user opts in when they
15
+ actually need them in the archive.
16
+ * Never: ``probes/`` and ``thumbs/`` -- pure caches, trivially regenerated.
17
+
18
+ Subdirectories whose path resolves outside the project root (via the absolute
19
+ override fields on :class:`MatchProject`) are skipped: the archive intentionally
20
+ only captures project-local data, so an import on a different machine has a
21
+ chance of working.
22
+
23
+ The archive contains a single top-level directory whose name matches the
24
+ project directory's name, plus a ``BACKUP_MANIFEST.json`` describing what was
25
+ included and which splitsmith version produced the archive.
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import io
31
+ import json
32
+ import shutil
33
+ import tarfile
34
+ import tempfile
35
+ from collections.abc import Iterable
36
+ from datetime import UTC, datetime
37
+ from pathlib import Path
38
+
39
+ from pydantic import BaseModel, Field
40
+
41
+ from . import __version__
42
+ from .ui.project import PROJECT_FILE, MatchProject
43
+
44
+ DEFAULT_DIRS: tuple[str, ...] = ("audit", "scoreboard")
45
+ OPTIONAL_DIRS: frozenset[str] = frozenset({"raw", "audio", "trimmed", "exports"})
46
+ NEVER_DIRS: frozenset[str] = frozenset({"probes", "thumbs"})
47
+
48
+ MANIFEST_NAME = "BACKUP_MANIFEST.json"
49
+
50
+
51
+ class SkippedDir(BaseModel):
52
+ name: str
53
+ reason: str
54
+ resolved_path: str | None = None
55
+
56
+
57
+ class ExportResult(BaseModel):
58
+ archive_path: Path
59
+ bytes_written: int
60
+ included: list[str]
61
+ skipped: list[SkippedDir]
62
+
63
+
64
+ class ImportResult(BaseModel):
65
+ project_root: Path
66
+ project_name: str
67
+ manifest: dict[str, object] | None = Field(default=None)
68
+
69
+
70
+ class BackupError(Exception):
71
+ """Raised when an export or import cannot proceed."""
72
+
73
+
74
+ def _resolve_subdir(project: MatchProject, root: Path, name: str) -> Path:
75
+ """Resolve ``name`` against ``root`` using the project's override fields."""
76
+ if name == "raw":
77
+ return project.raw_path(root)
78
+ if name == "audio":
79
+ return project.audio_path(root)
80
+ if name == "trimmed":
81
+ return project.trimmed_path(root)
82
+ if name == "exports":
83
+ return project.exports_path(root)
84
+ if name == "audit":
85
+ return project.audit_path(root)
86
+ # No override field for scoreboard; always project-local.
87
+ return root / name
88
+
89
+
90
+ def export_project(
91
+ project_root: Path,
92
+ output: Path,
93
+ *,
94
+ include_trimmed: bool = False,
95
+ include_exports: bool = False,
96
+ include_raw: bool = False,
97
+ include_audio: bool = False,
98
+ ) -> ExportResult:
99
+ """Write a ``.tar.gz`` archive of ``project_root`` to ``output``.
100
+
101
+ ``output`` may be a directory (the archive filename is derived from the
102
+ project name + today's date) or a file path. Returns an :class:`ExportResult`
103
+ describing what was included and which subdirectories were skipped.
104
+ """
105
+ project_root = project_root.expanduser().resolve()
106
+ if not (project_root / PROJECT_FILE).exists():
107
+ raise BackupError(f"no {PROJECT_FILE} in {project_root}")
108
+ project = MatchProject.load(project_root)
109
+
110
+ output = output.expanduser()
111
+ if output.is_dir():
112
+ stamp = datetime.now(UTC).strftime("%Y%m%d")
113
+ slug = _slug(project.name) or project_root.name
114
+ output = output / f"{slug}-backup-{stamp}.tar.gz"
115
+ output.parent.mkdir(parents=True, exist_ok=True)
116
+
117
+ wanted = list(DEFAULT_DIRS)
118
+ if include_trimmed:
119
+ wanted.append("trimmed")
120
+ if include_exports:
121
+ wanted.append("exports")
122
+ if include_raw:
123
+ wanted.append("raw")
124
+ if include_audio:
125
+ wanted.append("audio")
126
+
127
+ arc_root = project_root.name # top-level dir inside the archive
128
+ included: list[str] = []
129
+ skipped: list[SkippedDir] = []
130
+
131
+ with tarfile.open(output, "w:gz") as tf:
132
+ tf.add(project_root / PROJECT_FILE, arcname=f"{arc_root}/{PROJECT_FILE}")
133
+ for name in wanted:
134
+ src = _resolve_subdir(project, project_root, name)
135
+ try:
136
+ src_resolved = src.resolve()
137
+ except OSError:
138
+ skipped.append(SkippedDir(name=name, reason="missing"))
139
+ continue
140
+ if not src_resolved.exists():
141
+ skipped.append(SkippedDir(name=name, reason="missing", resolved_path=str(src_resolved)))
142
+ continue
143
+ if not _is_inside(src_resolved, project_root):
144
+ skipped.append(
145
+ SkippedDir(
146
+ name=name,
147
+ reason="outside_project_root",
148
+ resolved_path=str(src_resolved),
149
+ )
150
+ )
151
+ continue
152
+ tf.add(src_resolved, arcname=f"{arc_root}/{name}")
153
+ included.append(name)
154
+
155
+ manifest = {
156
+ "splitsmith_version": __version__,
157
+ "created_at": datetime.now(UTC).isoformat(),
158
+ "project_name": project.name,
159
+ "project_root_name": project_root.name,
160
+ "included": ["project.json", *included],
161
+ "skipped": [s.model_dump() for s in skipped],
162
+ "options": {
163
+ "include_trimmed": include_trimmed,
164
+ "include_exports": include_exports,
165
+ "include_raw": include_raw,
166
+ "include_audio": include_audio,
167
+ },
168
+ }
169
+ _add_bytes(tf, f"{arc_root}/{MANIFEST_NAME}", json.dumps(manifest, indent=2).encode())
170
+
171
+ size = output.stat().st_size
172
+ return ExportResult(
173
+ archive_path=output,
174
+ bytes_written=size,
175
+ included=["project.json", *included],
176
+ skipped=skipped,
177
+ )
178
+
179
+
180
+ def import_project(
181
+ archive: Path,
182
+ dest_root: Path,
183
+ *,
184
+ overwrite: bool = False,
185
+ ) -> ImportResult:
186
+ """Extract ``archive`` into ``dest_root``.
187
+
188
+ The archive must contain exactly one top-level directory holding a
189
+ ``project.json``. Returns an :class:`ImportResult` pointing at the new
190
+ project root.
191
+ """
192
+ archive = archive.expanduser().resolve()
193
+ dest_root = dest_root.expanduser().resolve()
194
+ if not archive.exists():
195
+ raise BackupError(f"archive not found: {archive}")
196
+ dest_root.mkdir(parents=True, exist_ok=True)
197
+
198
+ with tempfile.TemporaryDirectory(prefix="splitsmith-import-") as staging_str:
199
+ staging = Path(staging_str)
200
+ with tarfile.open(archive, "r:*") as tf:
201
+ _safe_extract(tf, staging)
202
+
203
+ top = _single_top_dir(staging)
204
+ if top is None:
205
+ raise BackupError("archive must contain exactly one top-level directory")
206
+ if not (top / PROJECT_FILE).exists():
207
+ raise BackupError(f"archive is missing {PROJECT_FILE}")
208
+
209
+ # Validate project.json parses before we move anything.
210
+ try:
211
+ MatchProject.load(top)
212
+ except Exception as exc: # noqa: BLE001 - surface the underlying failure
213
+ raise BackupError(f"invalid project.json in archive: {exc}") from exc
214
+
215
+ manifest: dict[str, object] | None = None
216
+ manifest_path = top / MANIFEST_NAME
217
+ if manifest_path.exists():
218
+ try:
219
+ manifest = json.loads(manifest_path.read_text())
220
+ except json.JSONDecodeError:
221
+ manifest = None
222
+
223
+ target = dest_root / top.name
224
+ if target.exists():
225
+ if not overwrite:
226
+ raise BackupError(f"destination already exists: {target}")
227
+ shutil.rmtree(target)
228
+ shutil.move(str(top), str(target))
229
+
230
+ project = MatchProject.load(target)
231
+ return ImportResult(project_root=target, project_name=project.name, manifest=manifest)
232
+
233
+
234
+ # ---------------------------------------------------------------------------
235
+ # Helpers
236
+ # ---------------------------------------------------------------------------
237
+
238
+
239
+ def _is_inside(child: Path, parent: Path) -> bool:
240
+ try:
241
+ child.relative_to(parent)
242
+ return True
243
+ except ValueError:
244
+ return False
245
+
246
+
247
+ def _add_bytes(tf: tarfile.TarFile, arcname: str, data: bytes) -> None:
248
+ info = tarfile.TarInfo(name=arcname)
249
+ info.size = len(data)
250
+ info.mtime = int(datetime.now(UTC).timestamp())
251
+ tf.addfile(info, io.BytesIO(data))
252
+
253
+
254
+ def _safe_extract(tf: tarfile.TarFile, dest: Path) -> None:
255
+ """Extract guarding against path-traversal entries."""
256
+ dest_resolved = dest.resolve()
257
+ members: list[tarfile.TarInfo] = []
258
+ for m in tf.getmembers():
259
+ target = (dest_resolved / m.name).resolve()
260
+ if not _is_inside(target, dest_resolved):
261
+ raise BackupError(f"archive contains unsafe path: {m.name!r}")
262
+ # Block symlinks/hardlinks pointing outside the staging dir.
263
+ if m.issym() or m.islnk():
264
+ link_target = (dest_resolved / m.name).parent / (m.linkname or "")
265
+ try:
266
+ link_resolved = link_target.resolve()
267
+ except OSError as exc:
268
+ raise BackupError(f"unsafe link in archive: {m.name!r}") from exc
269
+ if not _is_inside(link_resolved, dest_resolved):
270
+ raise BackupError(f"unsafe link in archive: {m.name!r}")
271
+ members.append(m)
272
+ tf.extractall(dest_resolved, members=members)
273
+
274
+
275
+ def _single_top_dir(staging: Path) -> Path | None:
276
+ entries = [p for p in staging.iterdir() if not p.name.startswith(".")]
277
+ if len(entries) != 1 or not entries[0].is_dir():
278
+ return None
279
+ return entries[0]
280
+
281
+
282
+ def _slug(name: str) -> str:
283
+ out = "".join(c if c.isalnum() or c in "-_" else "-" for c in name.strip())
284
+ return out.strip("-").lower()
285
+
286
+
287
+ __all__: Iterable[str] = (
288
+ "BackupError",
289
+ "ExportResult",
290
+ "ImportResult",
291
+ "SkippedDir",
292
+ "DEFAULT_DIRS",
293
+ "OPTIONAL_DIRS",
294
+ "NEVER_DIRS",
295
+ "MANIFEST_NAME",
296
+ "export_project",
297
+ "import_project",
298
+ )