splitsmith 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. splitsmith/__init__.py +3 -0
  2. splitsmith/audit.py +87 -0
  3. splitsmith/automation.py +238 -0
  4. splitsmith/backup.py +298 -0
  5. splitsmith/beep_calibration.py +324 -0
  6. splitsmith/beep_detect.py +371 -0
  7. splitsmith/cleanup.py +327 -0
  8. splitsmith/cli.py +1281 -0
  9. splitsmith/coach.py +253 -0
  10. splitsmith/coach_distributions.py +348 -0
  11. splitsmith/compare/__init__.py +7 -0
  12. splitsmith/compare/cli.py +153 -0
  13. splitsmith/compare/emitter.py +456 -0
  14. splitsmith/compare/filler.py +98 -0
  15. splitsmith/compare/layout.py +164 -0
  16. splitsmith/compare/manifest.py +91 -0
  17. splitsmith/compare/project_loader.py +195 -0
  18. splitsmith/composition.py +606 -0
  19. splitsmith/config.py +442 -0
  20. splitsmith/cross_align.py +210 -0
  21. splitsmith/csv_gen.py +66 -0
  22. splitsmith/data/ensemble_calibration.json +248 -0
  23. splitsmith/data/fonts/Antonio-OFL.txt +93 -0
  24. splitsmith/data/fonts/Antonio-VariableFont.ttf +0 -0
  25. splitsmith/data/fonts/JetBrainsMono-Bold.ttf +0 -0
  26. splitsmith/data/fonts/JetBrainsMono-OFL.txt +93 -0
  27. splitsmith/data/overlay_theme.json +40 -0
  28. splitsmith/data/templates/action-cut.yaml +19 -0
  29. splitsmith/data/templates/match-recap.yaml +20 -0
  30. splitsmith/data/voter_c_gbdt.joblib +0 -0
  31. splitsmith/data/voter_e_visual_probe.joblib +0 -0
  32. splitsmith/ensemble/__init__.py +67 -0
  33. splitsmith/ensemble/agc_state.py +165 -0
  34. splitsmith/ensemble/api.py +419 -0
  35. splitsmith/ensemble/backend.py +89 -0
  36. splitsmith/ensemble/calibration.py +367 -0
  37. splitsmith/ensemble/clap_mel.py +138 -0
  38. splitsmith/ensemble/features.py +680 -0
  39. splitsmith/ensemble/fixtures.py +222 -0
  40. splitsmith/ensemble/tta.py +115 -0
  41. splitsmith/ensemble/visual.py +294 -0
  42. splitsmith/ensemble/voters.py +202 -0
  43. splitsmith/fcp7xml_render.py +558 -0
  44. splitsmith/fcpxml_gen.py +1721 -0
  45. splitsmith/fixture_schema.py +482 -0
  46. splitsmith/lab/__init__.py +79 -0
  47. splitsmith/lab/core.py +1118 -0
  48. splitsmith/lab/promote.py +555 -0
  49. splitsmith/lab/snap_window.py +331 -0
  50. splitsmith/lab/sweeps.py +231 -0
  51. splitsmith/lab_cli.py +750 -0
  52. splitsmith/match_cli.py +315 -0
  53. splitsmith/match_model.py +793 -0
  54. splitsmith/match_registry.py +131 -0
  55. splitsmith/mcp/__init__.py +23 -0
  56. splitsmith/mcp/__main__.py +20 -0
  57. splitsmith/mcp/detect_tools.py +476 -0
  58. splitsmith/mcp/export_tools.py +356 -0
  59. splitsmith/mcp/sandbox.py +77 -0
  60. splitsmith/mcp/server.py +393 -0
  61. splitsmith/mcp/tools.py +207 -0
  62. splitsmith/mcp/write_tools.py +268 -0
  63. splitsmith/model_cli.py +153 -0
  64. splitsmith/models/__init__.py +40 -0
  65. splitsmith/models/cache.py +139 -0
  66. splitsmith/models/download.py +95 -0
  67. splitsmith/models/errors.py +50 -0
  68. splitsmith/models/manifest.py +68 -0
  69. splitsmith/models/registry.py +256 -0
  70. splitsmith/mp4_render.py +513 -0
  71. splitsmith/overlay_render.py +817 -0
  72. splitsmith/overlay_theme.py +146 -0
  73. splitsmith/relink.py +245 -0
  74. splitsmith/report.py +258 -0
  75. splitsmith/runtime.py +268 -0
  76. splitsmith/shot_detect.py +506 -0
  77. splitsmith/shot_refine.py +252 -0
  78. splitsmith/system_check.py +162 -0
  79. splitsmith/templates.py +188 -0
  80. splitsmith/thumbnail.py +230 -0
  81. splitsmith/trim.py +211 -0
  82. splitsmith/ui/__init__.py +10 -0
  83. splitsmith/ui/audio.py +536 -0
  84. splitsmith/ui/embedded.py +312 -0
  85. splitsmith/ui/exports.py +533 -0
  86. splitsmith/ui/jobs.py +652 -0
  87. splitsmith/ui/logging_setup.py +108 -0
  88. splitsmith/ui/match_exports.py +500 -0
  89. splitsmith/ui/project.py +1734 -0
  90. splitsmith/ui/scoreboard/__init__.py +77 -0
  91. splitsmith/ui/scoreboard/cache.py +237 -0
  92. splitsmith/ui/scoreboard/http.py +206 -0
  93. splitsmith/ui/scoreboard/local.py +377 -0
  94. splitsmith/ui/scoreboard/models.py +301 -0
  95. splitsmith/ui/scoreboard/protocol.py +51 -0
  96. splitsmith/ui/server.py +9178 -0
  97. splitsmith/ui_static/package-lock.json +3062 -0
  98. splitsmith/ui_static/tsconfig.app.tsbuildinfo +1 -0
  99. splitsmith/ui_static/tsconfig.node.tsbuildinfo +1 -0
  100. splitsmith/user_config.py +380 -0
  101. splitsmith/video_match.py +159 -0
  102. splitsmith/video_probe.py +143 -0
  103. splitsmith/waveform.py +121 -0
  104. splitsmith/youtube_sidecar.py +293 -0
  105. splitsmith-0.2.0.dist-info/METADATA +301 -0
  106. splitsmith-0.2.0.dist-info/RECORD +109 -0
  107. splitsmith-0.2.0.dist-info/WHEEL +4 -0
  108. splitsmith-0.2.0.dist-info/entry_points.txt +3 -0
  109. splitsmith-0.2.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,324 @@
1
+ """Beep-detector calibration suite -- manifest, ground truth, eval aggregation.
2
+
3
+ This module is the pure-data backbone of the layer-1 work for issue #220
4
+ (``beep: improve detection accuracy``). The detector itself lives in
5
+ ``splitsmith.beep_detect``; this module only describes WHAT to detect and
6
+ HOW to score the result.
7
+
8
+ Two tracks share the suite:
9
+
10
+ * **Clip track** -- the ~10-50 s WAV files already checked into
11
+ ``tests/fixtures/`` (post-trim, with 0.5 s or 5 s pre-beep padding).
12
+ Always available; covers the trivial-baseline case + handheld iPhone
13
+ clips with 5 s of pre-beep noise.
14
+ * **Full track** -- the wide-window WAVs produced by
15
+ ``scripts/extract_full_fixture_audio.py`` under ``tests/fixtures/full/``.
16
+ Covers late-beep / cross-bay scenarios that don't appear in the trimmed
17
+ clips. Optional: only present when the source MP4s have been extracted
18
+ on this machine.
19
+
20
+ The ``ground_truth_in_clip`` and ``ground_truth_in_full`` fields express
21
+ the beep position in seconds within each respective WAV's coordinate
22
+ frame. ``detect_beep`` returns clip-relative or full-relative depending
23
+ on which audio buffer it's fed -- pick the matching ground truth.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import json
29
+ from collections.abc import Iterable
30
+ from dataclasses import dataclass, field
31
+ from pathlib import Path
32
+
33
+ from pydantic import BaseModel, Field
34
+
35
+ # Tolerance the audit JSONs themselves use (``tolerance_ms``). A detected
36
+ # beep is "correct" if it lands within this many ms of ground truth.
37
+ DEFAULT_TOLERANCE_MS = 100.0
38
+
39
+ # Heuristic thresholds applied during manifest build to seed failure-mode
40
+ # tags. These are SUGGESTIONS the user can override by hand-editing the
41
+ # ``tags`` list in ``manifest.yaml``.
42
+ LATE_BEEP_THRESHOLD_S = 10.0
43
+ VERY_LATE_BEEP_THRESHOLD_S = 30.0
44
+ STEEL_PRONE_PLATES_THRESHOLD = 1 # any popper / plate count
45
+
46
+
47
+ class BeepFixtureEntry(BaseModel):
48
+ """One fixture's calibration metadata.
49
+
50
+ The manifest is a list of these. Field semantics:
51
+
52
+ * ``stem`` -- audit JSON / WAV basename (no extension).
53
+ * ``camera_kind`` -- ``head`` for body-mounted cameras (Insta360 GO),
54
+ ``hand`` for handheld phones. The detector should be robust to both
55
+ but the failure-modes differ.
56
+ * ``camera_id`` -- the audit JSON's camera.id, kept for filtering.
57
+ * ``clip_wav`` -- path to the post-trim WAV. Relative to
58
+ ``tests/fixtures/``.
59
+ * ``ground_truth_in_clip`` -- beep time in seconds, relative to start
60
+ of ``clip_wav``. Pulled directly from the audit JSON's ``beep_time``.
61
+ * ``full_wav`` / ``ground_truth_in_full`` / ``full_duration_s`` --
62
+ populated when ``scripts/extract_full_fixture_audio.py`` has been
63
+ run; ``full_wav`` is relative to ``tests/fixtures/``.
64
+ * ``tags`` -- failure-mode buckets. See module-level constants for
65
+ the heuristic auto-tags; humans can add finer tags (``cross-bay``,
66
+ ``steel-fp``, ``ro-chatter``, ...) by editing manifest.yaml.
67
+ """
68
+
69
+ stem: str
70
+ camera_kind: str
71
+ camera_id: str | None = None
72
+ clip_wav: str
73
+ ground_truth_in_clip: float
74
+ tolerance_ms: float = DEFAULT_TOLERANCE_MS
75
+ full_wav: str | None = None
76
+ ground_truth_in_full: float | None = None
77
+ full_duration_s: float | None = None
78
+ tags: list[str] = Field(default_factory=list)
79
+
80
+
81
+ class BeepCalibrationManifest(BaseModel):
82
+ """Top-level manifest persisted to ``manifest.yaml``."""
83
+
84
+ fixtures: list[BeepFixtureEntry] = Field(default_factory=list)
85
+
86
+
87
+ @dataclass(frozen=True)
88
+ class FixtureEvalResult:
89
+ """Outcome of running the detector against one fixture's audio buffer.
90
+
91
+ ``track`` distinguishes the clip vs full evaluation since the same
92
+ ``stem`` produces two rows when both wavs are present.
93
+ """
94
+
95
+ stem: str
96
+ track: str # "clip" or "full"
97
+ tags: tuple[str, ...]
98
+ ground_truth_s: float
99
+ tolerance_s: float
100
+ detected_time_s: float | None
101
+ detected_score: float | None
102
+ error_s: float | None # detected - ground_truth, None if missed
103
+ correct_top1: bool
104
+ correct_in_topn: bool # any candidate within tolerance
105
+ candidate_count: int
106
+ detected_confidence: float | None = None
107
+ error_kind: str | None = None # "not_found", "exception", or None
108
+
109
+
110
+ @dataclass
111
+ class EvalSummary:
112
+ """Aggregated eval result. Used to print the report and gate CI."""
113
+
114
+ total: int = 0
115
+ top1_hits: int = 0
116
+ topn_hits: int = 0
117
+ not_found: int = 0
118
+ exceptions: int = 0
119
+ by_tag: dict[str, EvalSummary] = field(default_factory=dict)
120
+
121
+ @property
122
+ def recall_top1(self) -> float:
123
+ return (self.top1_hits / self.total) if self.total else 0.0
124
+
125
+ @property
126
+ def recall_topn(self) -> float:
127
+ return (self.topn_hits / self.total) if self.total else 0.0
128
+
129
+
130
+ def derive_camera_kind(camera_block: dict | None) -> str:
131
+ """Map an audit-JSON ``camera`` dict to ``head`` / ``hand`` / ``unknown``.
132
+
133
+ The audit schema uses ``mount`` = ``head`` | ``hand`` directly, but a
134
+ few legacy fixtures don't have the camera block at all -- treat those
135
+ as ``unknown`` so the manifest stays explicit instead of guessing.
136
+ """
137
+ if not isinstance(camera_block, dict):
138
+ return "unknown"
139
+ mount = camera_block.get("mount")
140
+ if mount in ("head", "hand"):
141
+ return mount
142
+ return "unknown"
143
+
144
+
145
+ def auto_tags(
146
+ *,
147
+ camera_kind: str,
148
+ ground_truth_in_full: float | None,
149
+ stage_rounds: dict | None,
150
+ ) -> list[str]:
151
+ """Seed failure-mode tags from audit-JSON facts.
152
+
153
+ Always-applicable:
154
+ * ``handheld`` / ``headcam`` -- from camera mount.
155
+
156
+ Conditional (full-audio-only):
157
+ * ``late-beep`` -- beep > 10 s into source. Today's 30 s search cap
158
+ still catches it but silence-preference scoring can drift.
159
+ * ``very-late-beep`` -- beep > 30 s into source. Current detector
160
+ hard-fails on these (search window cap).
161
+ * ``steel-prone`` -- stage has poppers or plates; raises the chance
162
+ of a steel-ring false positive being scored above the beep.
163
+ """
164
+ tags: list[str] = []
165
+ if camera_kind == "head":
166
+ tags.append("headcam")
167
+ elif camera_kind == "hand":
168
+ tags.append("handheld")
169
+ if ground_truth_in_full is not None:
170
+ if ground_truth_in_full >= VERY_LATE_BEEP_THRESHOLD_S:
171
+ tags.append("very-late-beep")
172
+ elif ground_truth_in_full >= LATE_BEEP_THRESHOLD_S:
173
+ tags.append("late-beep")
174
+ if isinstance(stage_rounds, dict):
175
+ plates = int(stage_rounds.get("plates") or 0)
176
+ poppers = int(stage_rounds.get("poppers") or 0)
177
+ if plates + poppers >= STEEL_PRONE_PLATES_THRESHOLD:
178
+ tags.append("steel-prone")
179
+ return tags
180
+
181
+
182
+ def compute_full_beep_time(
183
+ *,
184
+ fixture_window_in_source: tuple[float, float],
185
+ full_window_in_source: tuple[float, float],
186
+ clip_beep_time: float,
187
+ ) -> float:
188
+ """Translate the audit's clip-relative beep into the full-WAV's frame.
189
+
190
+ The audit JSON pins the beep within a TRIMMED clip whose start sits
191
+ at ``fixture_window_in_source[0]`` in source-time. The full WAV
192
+ starts at ``full_window_in_source[0]``. Both are seconds-into-source.
193
+ The beep position in the full WAV is therefore::
194
+
195
+ full_beep = (fws[0] - full[0]) + clip_beep_time
196
+ """
197
+ fws_start = fixture_window_in_source[0]
198
+ full_start = full_window_in_source[0]
199
+ return (fws_start - full_start) + clip_beep_time
200
+
201
+
202
+ def evaluate_detection(
203
+ *,
204
+ stem: str,
205
+ track: str,
206
+ tags: Iterable[str],
207
+ ground_truth_s: float,
208
+ tolerance_ms: float,
209
+ detected_time_s: float | None,
210
+ detected_score: float | None,
211
+ detected_confidence: float | None = None,
212
+ candidate_times_s: Iterable[float] = (),
213
+ error_kind: str | None = None,
214
+ ) -> FixtureEvalResult:
215
+ """Score one detector call against the ground truth.
216
+
217
+ ``candidate_times_s`` are the runner-up candidate timestamps the
218
+ detector surfaced (``BeepDetection.candidates[1:].time``). They count
219
+ toward ``correct_in_topn`` even when the top-1 winner was wrong --
220
+ this is the signal that matters for the HITL flow (#219): if the
221
+ real beep is in the top-N list, the human can pick it without
222
+ typing a timestamp.
223
+ """
224
+ tol_s = tolerance_ms / 1000.0
225
+ if detected_time_s is None:
226
+ return FixtureEvalResult(
227
+ stem=stem,
228
+ track=track,
229
+ tags=tuple(tags),
230
+ ground_truth_s=ground_truth_s,
231
+ tolerance_s=tol_s,
232
+ detected_time_s=None,
233
+ detected_score=detected_score,
234
+ detected_confidence=detected_confidence,
235
+ error_s=None,
236
+ correct_top1=False,
237
+ correct_in_topn=False,
238
+ candidate_count=0,
239
+ error_kind=error_kind or "not_found",
240
+ )
241
+ error = detected_time_s - ground_truth_s
242
+ correct_top1 = abs(error) <= tol_s
243
+ candidates = list(candidate_times_s)
244
+ correct_in_topn = correct_top1 or any(abs(c - ground_truth_s) <= tol_s for c in candidates)
245
+ return FixtureEvalResult(
246
+ stem=stem,
247
+ track=track,
248
+ tags=tuple(tags),
249
+ ground_truth_s=ground_truth_s,
250
+ tolerance_s=tol_s,
251
+ detected_time_s=detected_time_s,
252
+ detected_score=detected_score,
253
+ detected_confidence=detected_confidence,
254
+ error_s=error,
255
+ correct_top1=correct_top1,
256
+ correct_in_topn=correct_in_topn,
257
+ candidate_count=len(candidates) + 1,
258
+ error_kind=error_kind,
259
+ )
260
+
261
+
262
+ def summarize(results: Iterable[FixtureEvalResult]) -> EvalSummary:
263
+ """Aggregate per-fixture results into an overall + per-tag summary."""
264
+ overall = EvalSummary()
265
+ for r in results:
266
+ overall.total += 1
267
+ if r.correct_top1:
268
+ overall.top1_hits += 1
269
+ if r.correct_in_topn:
270
+ overall.topn_hits += 1
271
+ if r.error_kind == "not_found":
272
+ overall.not_found += 1
273
+ elif r.error_kind == "exception":
274
+ overall.exceptions += 1
275
+ for tag in r.tags:
276
+ bucket = overall.by_tag.setdefault(tag, EvalSummary())
277
+ bucket.total += 1
278
+ if r.correct_top1:
279
+ bucket.top1_hits += 1
280
+ if r.correct_in_topn:
281
+ bucket.topn_hits += 1
282
+ if r.error_kind == "not_found":
283
+ bucket.not_found += 1
284
+ elif r.error_kind == "exception":
285
+ bucket.exceptions += 1
286
+ return overall
287
+
288
+
289
+ def load_manifest(path: Path) -> BeepCalibrationManifest:
290
+ """Read a manifest YAML. Missing file returns an empty manifest."""
291
+ import yaml
292
+
293
+ if not path.exists():
294
+ return BeepCalibrationManifest()
295
+ raw = yaml.safe_load(path.read_text()) or {}
296
+ return BeepCalibrationManifest.model_validate(raw)
297
+
298
+
299
+ def save_manifest(manifest: BeepCalibrationManifest, path: Path) -> None:
300
+ """Write a manifest YAML in a stable, hand-editable format."""
301
+ import yaml
302
+
303
+ path.parent.mkdir(parents=True, exist_ok=True)
304
+ payload = manifest.model_dump(exclude_none=True)
305
+ path.write_text(yaml.safe_dump(payload, sort_keys=False, indent=2))
306
+
307
+
308
+ def fixtures_with_full_audio(
309
+ manifest: BeepCalibrationManifest,
310
+ fixtures_dir: Path,
311
+ ) -> list[BeepFixtureEntry]:
312
+ """Subset of the manifest where the full-track WAV exists on disk."""
313
+ rows = []
314
+ for entry in manifest.fixtures:
315
+ if not entry.full_wav:
316
+ continue
317
+ if (fixtures_dir / entry.full_wav).exists():
318
+ rows.append(entry)
319
+ return rows
320
+
321
+
322
+ def read_audit_json(path: Path) -> dict:
323
+ """Thin wrapper -- only exists so tests can stub the read."""
324
+ return json.loads(path.read_text())
@@ -0,0 +1,371 @@
1
+ """Detect the start beep timestamp via bandpass + envelope peak detection.
2
+
3
+ Strategy:
4
+
5
+ 1. Bandpass to ``[freq_min_hz, freq_max_hz]`` (typical shot-timer beep
6
+ 2-5 kHz). Hilbert envelope, smoothed at ``envelope_smoothing_ms``
7
+ (40 ms by default -- wide enough to bridge the natural intra-beep
8
+ wobble, narrow enough to keep the 300-500 ms beep distinct from
9
+ sustained ambient noise).
10
+
11
+ 2. **Adaptive cutoff**: a candidate run must clear ``max(min_amplitude *
12
+ global_peak, noise_floor * noise_factor, min_abs_peak)``. The noise-
13
+ floor leg is what recovers handheld / phone clips where the beep is
14
+ faint in absolute terms but still 10x+ above the median noise floor.
15
+ ``global_peak`` is held in reserve for cases where a gunshot dominates
16
+ the band; ``min_abs_peak`` is a sub-noise sanity floor.
17
+
18
+ 3. **Composite scoring**: each candidate is ranked by
19
+ ``silence_score * tonal_score`` where:
20
+
21
+ * ``silence_score = run_peak / (mean envelope in pre-silence window)``.
22
+ IPSC beeps are preceded by ~3 s of "Are you ready / Stand by" + a
23
+ pause; mid-stage transients are not. Higher = quieter pre-roll.
24
+ * ``tonal_score = energy_in_3_kHz_band / energy_in_full_band``,
25
+ in [0, 1]. The IPSC timer emits a near-pure ~3.0-3.3 kHz tone;
26
+ gunshots, steel rings, and RO chatter spread energy across the
27
+ full 2-5 kHz band. ``tonal_weight`` controls how strongly this
28
+ component tilts the ranking.
29
+
30
+ 4. **Adaptive rise-foot leading edge**: walk backward from the run's peak
31
+ while the envelope stays above ``max(peak * RISE_FOOT_FRAC, noise_floor
32
+ * RISE_FOOT_NOISE_FACTOR)``. The noise-floor lower bound stops the
33
+ walk from sliding into pre-beep noise on faint beeps where 5 % of the
34
+ peak falls below the noise floor.
35
+
36
+ This shares the "leading edge" definition with shot_detect: peak-relative
37
+ when the beep is loud, noise-floor-relative when it isn't, insensitive
38
+ to gain / distance / ambient noise, and lands at the visibly audible
39
+ start of the rise.
40
+
41
+ Pure function: takes audio + sample rate + config, returns a BeepDetection. No
42
+ file I/O. ``load_audio`` is provided as a thin convenience for callers.
43
+ """
44
+
45
+ from __future__ import annotations
46
+
47
+ import math
48
+ from pathlib import Path
49
+
50
+ import numpy as np
51
+ import soundfile as sf
52
+ from scipy.signal import butter, hilbert, sosfiltfilt
53
+
54
+ from .config import BeepCandidate, BeepDetectConfig, BeepDetection
55
+
56
+ # Rise-foot leading-edge parameters. Same definition as shot_detect (the
57
+ # burst's own peak is the reference, so detection is insensitive to gain /
58
+ # distance / ambient noise). Tied to the smoothed bandpass envelope -- the
59
+ # tone's amplitude profile, not the raw oscillation. The noise-floor
60
+ # multiplier kicks in when the burst is only marginally above the floor:
61
+ # walking back to 5 % of a faint peak otherwise crosses into pre-beep noise.
62
+ _RISE_FOOT_FRAC = 0.05
63
+ _RISE_FOOT_NOISE_FACTOR = 1.5
64
+ # Fine smoothing window applied to the rise-foot envelope only. Just enough
65
+ # to suppress single-sample wobble; not so wide that it shifts the onset.
66
+ _LEADING_EDGE_SMOOTHING_MS = 10.0
67
+
68
+ # Confidence-formula weights. Empirically tuned against the labelled
69
+ # calibration suite (issue #220 layer 3): the resulting distribution has
70
+ # >=0.7 right ~95 % of the time and 0.5-0.7 sitting around chance, which
71
+ # is the gap the HITL queue (#219) needs. Bump these only with paired
72
+ # eval-set numbers in the commit; the threshold settings downstream
73
+ # rely on the calibration holding.
74
+ _CONFIDENCE_TONAL_WEIGHT = 0.45
75
+ _CONFIDENCE_DURATION_WEIGHT = 0.30
76
+ _CONFIDENCE_SILENCE_WEIGHT = 0.25
77
+ # Silence-score saturation point: tanh(silence / SILENCE_SCALE) maps the
78
+ # raw ratio to [0, 1]. 5x is "comfortably above pre-roll noise" -- below
79
+ # it we're in steel-ring / mag-swap-quiet territory; above it the metric
80
+ # is saturated.
81
+ _CONFIDENCE_SILENCE_SCALE = 5.0
82
+ # Duration normalisation: ramp from 0 at MIN_MS to 1 at FULL_MS. Slightly
83
+ # wider than the ranking-side prior so a 250 ms beep still gets ~0.25
84
+ # confidence (it would land in HITL, which is the right call).
85
+ _CONFIDENCE_DUR_MIN_MS = 200.0
86
+ _CONFIDENCE_DUR_FULL_MS = 400.0
87
+ # Margin tilt: the runner-up's score is folded in via
88
+ # ``mix * (margin_floor + (1 - margin_floor) * margin)``. ``margin = 0``
89
+ # (ties with runner-up) leaves only ``margin_floor`` of the quality
90
+ # score; ``margin = 1`` (runner-up scores 0) preserves the full quality.
91
+ _CONFIDENCE_MARGIN_FLOOR = 0.6
92
+
93
+
94
+ def candidate_confidence(
95
+ *,
96
+ silence_score: float,
97
+ tonal_score: float,
98
+ duration_ms: float,
99
+ score: float,
100
+ runner_up_score: float,
101
+ ) -> float:
102
+ """Map per-candidate diagnostic features to a calibrated [0, 1].
103
+
104
+ The formula is a weighted blend of three quality components --
105
+ tonal purity, duration plausibility, saturating silence preference
106
+ -- multiplied by a margin tilt that demotes the winner when a
107
+ runner-up scores nearly as high. Calibration evidence lives in
108
+ ``tests/fixtures/beep_calibration/baseline.json``; bumping the
109
+ constants without re-checking the eval-set bins is asking for a
110
+ silent regression.
111
+
112
+ Pure function: no audio, no I/O. Tests cover the corner shapes
113
+ (peak winner, tied runner-up, sub-min duration, etc.).
114
+ """
115
+ tonal_norm = max(0.0, min(1.0, tonal_score))
116
+ dur_span = max(1.0, _CONFIDENCE_DUR_FULL_MS - _CONFIDENCE_DUR_MIN_MS)
117
+ dur_norm = max(0.0, min(1.0, (duration_ms - _CONFIDENCE_DUR_MIN_MS) / dur_span))
118
+ silence_norm = math.tanh(max(0.0, silence_score) / _CONFIDENCE_SILENCE_SCALE)
119
+ quality = (
120
+ _CONFIDENCE_TONAL_WEIGHT * tonal_norm
121
+ + _CONFIDENCE_DURATION_WEIGHT * dur_norm
122
+ + _CONFIDENCE_SILENCE_WEIGHT * silence_norm
123
+ )
124
+ if score > 0.0:
125
+ margin = max(0.0, min(1.0, 1.0 - runner_up_score / score))
126
+ else:
127
+ margin = 0.0
128
+ margin_factor = _CONFIDENCE_MARGIN_FLOOR + (1.0 - _CONFIDENCE_MARGIN_FLOOR) * margin
129
+ return max(0.0, min(1.0, quality * margin_factor))
130
+
131
+
132
+ class BeepNotFoundError(RuntimeError):
133
+ """No beep candidate met the duration + amplitude criteria."""
134
+
135
+
136
+ def load_audio(path: Path) -> tuple[np.ndarray, int]:
137
+ """Load an audio file and return (mono float32 samples, sample rate)."""
138
+ data, sr = sf.read(path, always_2d=False)
139
+ if data.ndim > 1:
140
+ data = data.mean(axis=1)
141
+ return data.astype(np.float32, copy=False), int(sr)
142
+
143
+
144
+ def _bandpass_envelope(
145
+ audio: np.ndarray, sample_rate: int, lo: float, hi: float, smoothing_ms: float
146
+ ) -> np.ndarray:
147
+ """4th-order Butterworth bandpass + Hilbert envelope + moving-average smooth."""
148
+ sos = butter(4, [lo, hi], btype="band", fs=sample_rate, output="sos")
149
+ band = sosfiltfilt(sos, audio)
150
+ env = np.abs(hilbert(band)).astype(np.float32)
151
+ smooth_win = max(1, int(round(sample_rate * smoothing_ms / 1000.0)))
152
+ if smooth_win > 1:
153
+ kernel = np.ones(smooth_win, dtype=np.float32) / smooth_win
154
+ env = np.convolve(env, kernel, mode="same")
155
+ return env
156
+
157
+
158
+ def detect_beep(
159
+ audio: np.ndarray,
160
+ sample_rate: int,
161
+ config: BeepDetectConfig,
162
+ ) -> BeepDetection:
163
+ """Locate the start beep in ``audio`` and return its leading-edge timestamp.
164
+
165
+ Raises ``BeepNotFoundError`` if no candidate satisfies the duration/amplitude
166
+ thresholds.
167
+ """
168
+ if audio.ndim != 1:
169
+ raise ValueError("audio must be 1-D (mono); mix down before calling detect_beep")
170
+ if audio.size == 0:
171
+ raise ValueError("audio is empty")
172
+
173
+ # Limit the search to the configured leading window. This prevents mid-
174
+ # stage low-activity moments from out-scoring the real beep on silence
175
+ # preference alone (e.g. a steel ring after a long reload, late in the
176
+ # stage, can have lower pre-window energy than the beep itself).
177
+ if config.search_window_s and config.search_window_s > 0:
178
+ search_hi = min(audio.size, int(round(config.search_window_s * sample_rate)))
179
+ audio = audio[:search_hi]
180
+ if audio.size == 0:
181
+ raise BeepNotFoundError("search window is empty")
182
+
183
+ # Two envelopes: coarse (40 ms) for run detection / scoring, fine (10
184
+ # ms) for rise-foot leading-edge timing. The coarse envelope bridges
185
+ # intra-beep dips; the fine envelope keeps the leading-edge sample
186
+ # accurate -- a wide moving-average smear shifts the apparent onset
187
+ # earlier by ~half the smoothing window, which would otherwise blow
188
+ # the ~15 ms tolerance the audit JSONs use.
189
+ env = _bandpass_envelope(
190
+ audio,
191
+ sample_rate,
192
+ config.freq_min_hz,
193
+ config.freq_max_hz,
194
+ config.envelope_smoothing_ms,
195
+ )
196
+ env_fine = _bandpass_envelope(
197
+ audio,
198
+ sample_rate,
199
+ config.freq_min_hz,
200
+ config.freq_max_hz,
201
+ _LEADING_EDGE_SMOOTHING_MS,
202
+ )
203
+
204
+ peak_value = float(env.max())
205
+ if peak_value <= 0.0:
206
+ raise BeepNotFoundError("flat audio: no energy in beep band")
207
+
208
+ # Noise floor = median of the smoothed envelope. Robust to gunshots /
209
+ # steel rings (a few high samples don't move the median) and to long
210
+ # quiet leads (most samples are near-silent so median stays small).
211
+ noise_floor = float(np.median(env))
212
+
213
+ # Effective cutoff: see ``BeepDetectConfig`` -- three legs, take the max.
214
+ cutoff = max(
215
+ config.min_amplitude * peak_value,
216
+ config.noise_floor_factor * noise_floor,
217
+ config.min_abs_peak,
218
+ )
219
+ above = env >= cutoff
220
+ edges = np.diff(above.astype(np.int8), prepend=0, append=0)
221
+ starts = np.flatnonzero(edges == 1)
222
+ ends = np.flatnonzero(edges == -1) # exclusive
223
+
224
+ min_run_samples = int(round(sample_rate * config.min_duration_ms / 1000.0))
225
+ pre_window_samples = int(round(sample_rate * config.silence_window_s))
226
+ pre_skip_samples = int(round(sample_rate * config.silence_pre_skip_s))
227
+
228
+ # Tonal-quality envelope: same audio, narrower bandpass around the
229
+ # IPSC timer fundamental. We compare run-window energy in this band
230
+ # against the wider band envelope above.
231
+ tonal_env = _bandpass_envelope(
232
+ audio,
233
+ sample_rate,
234
+ config.tonal_band_lo_hz,
235
+ config.tonal_band_hi_hz,
236
+ config.envelope_smoothing_ms,
237
+ )
238
+
239
+ candidates: list[tuple[int, int, float, float, float, float]] = []
240
+ for s, e in zip(starts, ends, strict=True):
241
+ if (e - s) < min_run_samples:
242
+ continue
243
+ run_peak = float(env[s:e].max())
244
+ # Silence-preference uses the MAX of the pre-window envelope, not
245
+ # its mean. Mean-based scoring let mid-stage candidates beat the
246
+ # real beep when the pre-window happened to span a magazine swap
247
+ # or a brief lull between shots: the lull dragged the mean down
248
+ # even when the window also contained one or two loud transients.
249
+ # Max-based scoring asks "is there anything else loud in recent
250
+ # past?" -- a real beep has a clean pre-roll, so the answer is no.
251
+ #
252
+ # Candidates near t=0 don't have a full pre-window. The metric is
253
+ # undefined for them, so we substitute a neutral 1.0: tonal +
254
+ # duration must do the discrimination instead. Otherwise a
255
+ # truncated-pre-window candidate gets ``peak / noise_floor``,
256
+ # which beats real beeps whose pre-window contains RO chatter.
257
+ pre_hi = max(0, s - pre_skip_samples)
258
+ pre_lo = max(0, pre_hi - pre_window_samples)
259
+ available_pre_s = (pre_hi - pre_lo) / sample_rate
260
+ if available_pre_s < config.min_pre_window_s:
261
+ silence_score = 1.0
262
+ else:
263
+ pre_max = float(env[pre_lo:pre_hi].max())
264
+ pre_max = max(pre_max, noise_floor)
265
+ silence_score = run_peak / (pre_max + 1e-6)
266
+
267
+ # Tonal concentration: energy in the IPSC fundamental band over
268
+ # energy in the full search band, computed on the smoothed envelope.
269
+ # Sums over a few hundred ms of run samples are stable; a single-
270
+ # sample peak ratio would be noisy.
271
+ wide_energy = float(np.sum(env[s:e]))
272
+ narrow_energy = float(np.sum(tonal_env[s:e]))
273
+ tonal_ratio = narrow_energy / (wide_energy + 1e-6)
274
+ tonal_ratio = max(0.0, min(1.0, tonal_ratio))
275
+
276
+ # Composite score: silence-preference, modulated by tonal quality
277
+ # AND duration-match. tonal_weight=0 + dur_match_weight=0 falls
278
+ # back to legacy silence-only behaviour.
279
+ weight = max(0.0, min(1.0, config.tonal_weight))
280
+ tonal_factor = (1.0 - weight) + weight * tonal_ratio
281
+ # Duration-match factor: ramp from 0 at min_ms to 1 at full_ms,
282
+ # squared to make the penalty bite harder on short transients.
283
+ # A 168 ms shot (typical post-smoothing length) lands at
284
+ # ((168-150)/150)^2 = 0.014; a 340 ms beep at 1.0. The squaring
285
+ # is what actually demotes shots whose pre-window happens to be
286
+ # quiet (magazine-swap lulls etc.) -- silence-preference alone
287
+ # can't tell those from the real beep.
288
+ dur_ms = (e - s) * 1000.0 / sample_rate
289
+ span_ms = max(1.0, config.dur_match_full_ms - config.dur_match_min_ms)
290
+ dur_ratio = max(0.0, min(1.0, (dur_ms - config.dur_match_min_ms) / span_ms))
291
+ dur_weight = max(0.0, min(1.0, config.dur_match_weight))
292
+ dur_factor = (1.0 - dur_weight) + dur_weight * dur_ratio * dur_ratio
293
+ score = silence_score * tonal_factor * dur_factor
294
+
295
+ candidates.append((s, e, run_peak, score, silence_score, tonal_ratio))
296
+
297
+ if not candidates:
298
+ raise BeepNotFoundError(
299
+ f"no beep candidate of >={config.min_duration_ms} ms above "
300
+ f"cutoff {cutoff:.4f} (peak={peak_value:.4f}, "
301
+ f"noise_floor={noise_floor:.4f}) in [{config.freq_min_hz}, "
302
+ f"{config.freq_max_hz}] Hz"
303
+ )
304
+
305
+ # Rank by composite score (highest first). Compute the rise-foot
306
+ # leading edge for every candidate so the UI can show alternatives
307
+ # without a second pass.
308
+ ranked = sorted(candidates, key=lambda c: c[3], reverse=True)
309
+ runner_up_score = ranked[1][3] if len(ranked) > 1 else 0.0
310
+ ranked_models: list[BeepCandidate] = []
311
+ for run_start, run_end, run_peak, score, silence_score, tonal_ratio in ranked:
312
+ leading_idx = _rise_foot_leading_edge(env_fine, run_start, run_end, noise_floor)
313
+ duration_ms = (run_end - run_start) * 1000.0 / sample_rate
314
+ # Confidence uses the GLOBAL runner-up's score for every
315
+ # candidate, not the next-lower in the sorted list. The HITL
316
+ # protocol cares about "is the winner clearly better than the
317
+ # next-best alternative?"; a runner-up's own confidence is
318
+ # mostly informational so the UI can colour the chip.
319
+ confidence = candidate_confidence(
320
+ silence_score=silence_score,
321
+ tonal_score=tonal_ratio,
322
+ duration_ms=duration_ms,
323
+ score=score,
324
+ runner_up_score=runner_up_score,
325
+ )
326
+ ranked_models.append(
327
+ BeepCandidate(
328
+ time=leading_idx / sample_rate,
329
+ score=score,
330
+ peak_amplitude=run_peak,
331
+ duration_ms=duration_ms,
332
+ silence_score=silence_score,
333
+ tonal_score=tonal_ratio,
334
+ confidence=confidence,
335
+ )
336
+ )
337
+
338
+ top_n = config.top_n_candidates if config.top_n_candidates > 0 else 1
339
+ surfaced = ranked_models[:top_n]
340
+ winner = ranked_models[0]
341
+ return BeepDetection(
342
+ time=winner.time,
343
+ peak_amplitude=winner.peak_amplitude,
344
+ duration_ms=winner.duration_ms,
345
+ confidence=winner.confidence,
346
+ candidates=surfaced,
347
+ )
348
+
349
+
350
+ def _rise_foot_leading_edge(env: np.ndarray, run_start: int, run_end: int, noise_floor: float) -> int:
351
+ """Rise-foot of the tone: walk backward from the envelope peak (within the
352
+ strong run) while the envelope stays at or above ``max(peak *
353
+ RISE_FOOT_FRAC, noise_floor * RISE_FOOT_NOISE_FACTOR)``. The earliest
354
+ such sample is the foot of the rise.
355
+
356
+ The noise-floor lower bound prevents the walk from continuing into
357
+ pre-beep silence on faint beeps where 5 % of the peak falls below the
358
+ median noise floor (e.g. iPhone handheld clips with ~10x SNR).
359
+ """
360
+ if run_end <= run_start:
361
+ return run_start
362
+ peak_offset = int(np.argmax(env[run_start:run_end]))
363
+ peak_idx = run_start + peak_offset
364
+ peak = float(env[peak_idx])
365
+ if peak <= 0.0:
366
+ return run_start
367
+ foot = max(peak * _RISE_FOOT_FRAC, noise_floor * _RISE_FOOT_NOISE_FACTOR)
368
+ i = peak_idx
369
+ while i > 0 and env[i - 1] >= foot:
370
+ i -= 1
371
+ return i