w2t-bkin 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
w2t_bkin/sync/ttl.py ADDED
@@ -0,0 +1,254 @@
1
+ """TTL synchronization utilities.
2
+
3
+ Provides functions for aligning Bpod trials to TTL sync signals.
4
+
5
+ Note: TTL loading functions (load_ttl_file, get_ttl_pulses) have been moved
6
+ to w2t_bkin.ttl.
7
+
8
+ Example:
9
+ >>> from w2t_bkin.sync.ttl import align_bpod_trials_to_ttl
10
+ >>> from w2t_bkin.ttl import get_ttl_pulses
11
+ """
12
+
13
+ import logging
14
+ from typing import Dict, List, Optional, Protocol, Tuple
15
+
16
+ import numpy as np
17
+
18
+ from ..exceptions import SyncError
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ def get_sync_time_from_bpod_trial(trial_data: Dict, sync_signal: str) -> Optional[float]:
24
+ """Extract sync signal start time from Bpod trial.
25
+
26
+ Args:
27
+ trial_data: Trial data with States structure
28
+ sync_signal: State name (e.g. "W2L_Audio")
29
+
30
+ Returns:
31
+ Start time relative to trial start, or None if not found
32
+
33
+ Example:
34
+ >>> sync_time = get_sync_time_from_bpod_trial(trial, "W2L_Audio")
35
+ """
36
+ from ..utils import convert_matlab_struct, is_nan_or_none
37
+
38
+ # Convert MATLAB struct to dict if needed
39
+ trial_data = convert_matlab_struct(trial_data)
40
+
41
+ states = trial_data.get("States", {})
42
+ if not states:
43
+ return None
44
+
45
+ # Convert states to dict if it's a MATLAB struct
46
+ states = convert_matlab_struct(states)
47
+
48
+ sync_times = states.get(sync_signal)
49
+ if sync_times is None:
50
+ return None
51
+
52
+ if not isinstance(sync_times, (list, tuple, np.ndarray)) or len(sync_times) < 2:
53
+ return None
54
+
55
+ start_time = sync_times[0]
56
+ if is_nan_or_none(start_time):
57
+ return None
58
+
59
+ return float(start_time)
60
+
61
+
62
+ class BpodTrialTypeProtocol(Protocol):
63
+ """Protocol for Bpod trial type configuration access.
64
+
65
+ Defines minimal interface needed by sync.ttl module without
66
+ importing from domain.session.BpodTrialType.
67
+
68
+ Attributes:
69
+ trial_type: Trial type identifier
70
+ sync_signal: Bpod state/event name for alignment
71
+ sync_ttl: TTL channel ID for sync pulses
72
+ description: Human-readable description
73
+ """
74
+
75
+ trial_type: int
76
+ sync_signal: str
77
+ sync_ttl: str
78
+ description: str
79
+
80
+
81
+ def align_bpod_trials_to_ttl(
82
+ trial_type_configs: List[BpodTrialTypeProtocol],
83
+ bpod_data: Dict,
84
+ ttl_pulses: Dict[str, List[float]],
85
+ ) -> Tuple[Dict[int, float], List[str]]:
86
+ """Align Bpod trials to absolute time using TTL sync signals (low-level, Session-free).
87
+
88
+ Converts Bpod relative timestamps to absolute time by matching per-trial
89
+ sync signals to corresponding TTL pulses. Returns per-trial offsets that
90
+ can be used with events.extract_trials() and events.extract_behavioral_events()
91
+ to convert relative timestamps to absolute timestamps.
92
+
93
+ Algorithm:
94
+ ----------
95
+ 1. For each trial, determine trial_type from Bpod TrialTypes array
96
+ 2. Lookup sync configuration from trial_type_configs list
97
+ 3. Extract sync_signal start time (relative to trial start) from States
98
+ 4. Match to next available TTL pulse from corresponding channel
99
+ 5. Compute offset accounting for TrialStartTimestamp:
100
+ offset = ttl_pulse_time - (TrialStartTimestamp + sync_time_rel)
101
+ 6. Return offsets for use: t_abs = offset + TrialStartTimestamp
102
+
103
+ Edge Cases:
104
+ -----------
105
+ - Missing sync_signal: Skip trial, record warning
106
+ - Extra TTL pulses: Ignore surplus, log warning
107
+ - Fewer TTL pulses: Align what's possible, mark remaining as unaligned
108
+ - Jitter: Allow small timing differences, log debug info
109
+
110
+ Args:
111
+ trial_type_configs: List of trial type sync configurations
112
+ (from session.bpod.trial_types)
113
+ bpod_data: Parsed Bpod data (SessionData structure from events.parse_bpod)
114
+ ttl_pulses: Dict mapping TTL channel ID to sorted list of absolute timestamps
115
+ (typically from w2t_bkin.ttl.get_ttl_pulses)
116
+
117
+ Returns:
118
+ Tuple of:
119
+ - trial_offsets: Dict mapping trial_number → absolute time offset
120
+ - warnings: List of warning messages for trials that couldn't be aligned
121
+
122
+ Raises:
123
+ SyncError: If trial_type config missing or data structure invalid
124
+
125
+ Example:
126
+ >>> from w2t_bkin.ttl import get_ttl_pulses
127
+ >>> from w2t_bkin.sync.ttl import align_bpod_trials_to_ttl
128
+ >>> from w2t_bkin.bpod.code import parse_bpod
129
+ >>> from pathlib import Path
130
+ >>>
131
+ >>> # Low-level approach with primitives
132
+ >>> session_dir = Path("data/Session-001")
133
+ >>> bpod_data = parse_bpod(session_dir, "Bpod/*.mat", "name_asc")
134
+ >>> ttl_patterns = {"ttl_bpod": "TTLs/bod*.txt"}
135
+ >>> ttl_pulses = get_ttl_pulses(session_dir, ttl_patterns)
136
+ >>>
137
+ >>> # Define trial type configs
138
+ >>> from w2t_bkin.domain.session import BpodTrialType
139
+ >>> trial_configs = [
140
+ ... BpodTrialType(trial_type=1, sync_signal="W2L_Audio",
141
+ ... sync_ttl="ttl_bpod", description="W2L")
142
+ ... ]
143
+ >>>
144
+ >>> # Compute alignment offsets
145
+ >>> trial_offsets, warnings = align_bpod_trials_to_ttl(
146
+ ... trial_configs, bpod_data, ttl_pulses
147
+ ... )
148
+ """
149
+ from ..utils import convert_matlab_struct, to_scalar
150
+
151
+ # Validate Bpod structure
152
+ if "SessionData" not in bpod_data:
153
+ raise SyncError("Invalid Bpod structure: missing SessionData")
154
+
155
+ session_data = convert_matlab_struct(bpod_data["SessionData"])
156
+ n_trials = int(session_data["nTrials"])
157
+
158
+ if n_trials == 0:
159
+ logger.info("No trials to align")
160
+ return {}, []
161
+
162
+ # Build trial_type → sync config mapping
163
+ trial_type_map = {}
164
+ for tt_config in trial_type_configs:
165
+ trial_type_map[tt_config.trial_type] = {
166
+ "sync_signal": tt_config.sync_signal,
167
+ "sync_ttl": tt_config.sync_ttl,
168
+ }
169
+
170
+ if not trial_type_map:
171
+ raise SyncError("No trial_type sync configuration provided in trial_type_configs")
172
+
173
+ # Prepare TTL pulse pointers (track consumption per channel)
174
+ ttl_pointers = {ttl_id: 0 for ttl_id in ttl_pulses.keys()}
175
+
176
+ # Extract raw events
177
+ raw_events = convert_matlab_struct(session_data["RawEvents"])
178
+ trial_data_list = raw_events["Trial"]
179
+
180
+ # Extract TrialTypes if available
181
+ trial_types_array = session_data.get("TrialTypes")
182
+ if trial_types_array is None:
183
+ # Default to trial_type 1 for all trials if not specified
184
+ trial_types_array = [1] * n_trials
185
+ logger.warning("TrialTypes not found in Bpod data, defaulting all trials to type 1")
186
+
187
+ trial_offsets = {}
188
+ warnings_list = []
189
+
190
+ for i in range(n_trials):
191
+ trial_num = i + 1
192
+ trial_data = convert_matlab_struct(trial_data_list[i])
193
+
194
+ # Get trial type (handle numpy arrays)
195
+ trial_type = int(to_scalar(trial_types_array, i))
196
+
197
+ if trial_type not in trial_type_map:
198
+ warnings_list.append(f"Trial {trial_num}: trial_type {trial_type} not in session config, skipping")
199
+ logger.warning(warnings_list[-1])
200
+ continue
201
+
202
+ sync_config = trial_type_map[trial_type]
203
+ sync_signal = sync_config["sync_signal"]
204
+ sync_ttl_id = sync_config["sync_ttl"]
205
+
206
+ # Extract sync time from trial (relative to trial start)
207
+ sync_time_rel = get_sync_time_from_bpod_trial(trial_data, sync_signal)
208
+ if sync_time_rel is None:
209
+ warnings_list.append(f"Trial {trial_num}: sync_signal '{sync_signal}' not found or not visited, skipping")
210
+ logger.warning(warnings_list[-1])
211
+ continue
212
+
213
+ # Get next TTL pulse
214
+ if sync_ttl_id not in ttl_pulses:
215
+ warnings_list.append(f"Trial {trial_num}: TTL channel '{sync_ttl_id}' not found in ttl_pulses, skipping")
216
+ logger.error(warnings_list[-1])
217
+ continue
218
+
219
+ ttl_channel = ttl_pulses[sync_ttl_id]
220
+ ttl_ptr = ttl_pointers[sync_ttl_id]
221
+
222
+ if ttl_ptr >= len(ttl_channel):
223
+ warnings_list.append(f"Trial {trial_num}: No more TTL pulses available for '{sync_ttl_id}', skipping")
224
+ logger.warning(warnings_list[-1])
225
+ continue
226
+
227
+ ttl_pulse_time = ttl_channel[ttl_ptr]
228
+ ttl_pointers[sync_ttl_id] += 1
229
+
230
+ # Get trial start timestamp from Bpod (may be non-zero after merge)
231
+ trial_start_timestamp = float(to_scalar(session_data["TrialStartTimestamp"], i))
232
+
233
+ # Compute offset: absolute_time = offset + TrialStartTimestamp
234
+ # The sync signal occurs at: trial_start_timestamp + sync_time_rel (in Bpod timeline)
235
+ # And should align to: ttl_pulse_time (in absolute timeline)
236
+ # Therefore: offset + (trial_start_timestamp + sync_time_rel) = ttl_pulse_time
237
+ offset_abs = ttl_pulse_time - (trial_start_timestamp + sync_time_rel)
238
+ trial_offsets[trial_num] = offset_abs
239
+
240
+ logger.debug(
241
+ f"Trial {trial_num}: type={trial_type}, sync_signal={sync_signal}, "
242
+ f"trial_start={trial_start_timestamp:.4f}s, sync_rel={sync_time_rel:.4f}s, "
243
+ f"ttl_abs={ttl_pulse_time:.4f}s, offset={offset_abs:.4f}s"
244
+ ) # fmt: skip
245
+
246
+ # Warn about unused TTL pulses
247
+ for ttl_id, ptr in ttl_pointers.items():
248
+ unused = len(ttl_pulses[ttl_id]) - ptr
249
+ if unused > 0:
250
+ warnings_list.append(f"TTL channel '{ttl_id}' has {unused} unused pulses")
251
+ logger.warning(warnings_list[-1])
252
+
253
+ logger.info(f"Computed offsets for {len(trial_offsets)} out of {n_trials} trials using TTL sync")
254
+ return trial_offsets, warnings_list
@@ -0,0 +1,38 @@
1
+ """Video transcoding module with idempotence and content addressing (Phase 3 - Optional).
2
+
3
+ Transcodes raw video recordings to a mezzanine format using FFmpeg with content-based
4
+ output paths for idempotent processing.
5
+
6
+ Public API:
7
+ -----------
8
+ All public functions and models are re-exported at the package level:
9
+
10
+ from w2t_bkin.transcode import (
11
+ TranscodeOptions,
12
+ TranscodedVideo,
13
+ create_transcode_options,
14
+ transcode_video,
15
+ is_already_transcoded,
16
+ )
17
+
18
+ See core and models modules for detailed documentation.
19
+ """
20
+
21
+ # Re-export core functions
22
+ from .core import TranscodeError, create_transcode_options, is_already_transcoded, transcode_video, update_manifest_with_transcode
23
+
24
+ # Re-export models
25
+ from .models import TranscodedVideo, TranscodeOptions
26
+
27
+ __all__ = [
28
+ # Models
29
+ "TranscodeOptions",
30
+ "TranscodedVideo",
31
+ # Exceptions
32
+ "TranscodeError",
33
+ # Core functions
34
+ "create_transcode_options",
35
+ "is_already_transcoded",
36
+ "transcode_video",
37
+ "update_manifest_with_transcode",
38
+ ]
@@ -0,0 +1,303 @@
1
+ """Video transcoding module with idempotence and content addressing (Phase 3 - Optional).
2
+
3
+ Transcodes raw video recordings to a mezzanine format (e.g., H.264 in MP4 container)
4
+ using FFmpeg, with content-based output paths for idempotent processing. Ensures
5
+ reproducibility by hashing source content and transcode parameters.
6
+
7
+ The module handles format conversion, codec selection, resolution scaling, and
8
+ frame rate adjustment while preserving original video metadata for provenance tracking.
9
+
10
+ Key Features:
11
+ -------------
12
+ - **FFmpeg Integration**: Subprocess-based transcoding with configurable codecs
13
+ - **Content Addressing**: SHA-256 hashes for idempotent output paths
14
+ - **Parameter Hashing**: Transcode options included in output path
15
+ - **Metadata Preservation**: Original format, resolution, frame rate tracked
16
+ - **Idempotence**: Re-running with same inputs produces same output path
17
+ - **Validation**: Verify transcoded video properties match expectations
18
+
19
+ Main Functions:
20
+ ---------------
21
+ - transcode_video: Main transcoding function with FFmpeg
22
+ - compute_transcode_hash: Generate content-based output path
23
+ - validate_transcoded_video: Verify output properties
24
+ - get_video_metadata: Extract source video metadata
25
+ - build_ffmpeg_command: Construct FFmpeg command line
26
+
27
+ Requirements:
28
+ -------------
29
+ - FR-4: Transcode videos to mezzanine format
30
+ - FR-TRANS-1: Use FFmpeg for transcoding
31
+ - FR-TRANS-2: Content-based output paths
32
+ - NFR-2: Idempotent processing
33
+
34
+ Acceptance Criteria:
35
+ -------------------
36
+ - A-TRANS-1: Transcode video using FFmpeg
37
+ - A-TRANS-2: Generate content-based output path
38
+ - A-TRANS-3: Validate transcoded video properties
39
+ - A-TRANS-4: Preserve original metadata
40
+
41
+ Data Flow:
42
+ ----------
43
+ 1. get_video_metadata → Extract source properties
44
+ 2. compute_transcode_hash → Generate output path
45
+ 3. build_ffmpeg_command → Construct transcode command
46
+ 4. transcode_video → Execute FFmpeg subprocess
47
+ 5. validate_transcoded_video → Verify output
48
+
49
+ Example:
50
+ --------
51
+ >>> from w2t_bkin.transcode import transcode_video, TranscodeOptions
52
+ >>> from pathlib import Path
53
+ >>>
54
+ >>> # Define transcode options
55
+ >>> options = TranscodeOptions(
56
+ ... codec="libx264",
57
+ ... preset="medium",
58
+ ... crf=23,
59
+ ... target_fps=30.0
60
+ ... )
61
+ >>>
62
+ >>> # Transcode video
63
+ >>> result = transcode_video(
64
+ ... source=Path("raw_video.avi"),
65
+ ... output_dir=Path("data/processed/videos"),
66
+ ... options=options
67
+ ... )
68
+ >>>
69
+ >>> print(f"Transcoded: {result.output_path}")
70
+ >>> print(f"Original size: {result.source_size_bytes}")
71
+ >>> print(f"Transcoded size: {result.output_size_bytes}")
72
+ >>> print(f"Compression ratio: {result.compression_ratio:.2f}")
73
+ """
74
+
75
+ import hashlib
76
+ import json
77
+ import logging
78
+ from pathlib import Path
79
+ import subprocess
80
+ from typing import Dict, Optional
81
+
82
+ import cv2
83
+
84
+ from w2t_bkin.utils import compute_file_checksum, ensure_directory
85
+
86
+ from .models import TranscodedVideo, TranscodeOptions
87
+
88
+ logger = logging.getLogger(__name__)
89
+
90
+
91
+ class TranscodeError(Exception):
92
+ """Base exception for transcode-related errors."""
93
+
94
+ pass
95
+
96
+
97
+ def create_transcode_options(codec: str = "libx264", crf: int = 18, preset: str = "medium", keyint: int = 15) -> TranscodeOptions:
98
+ """Create TranscodeOptions with validation.
99
+
100
+ Args:
101
+ codec: Video codec (default: libx264)
102
+ crf: Constant Rate Factor 0-51 (default: 18, lower=better quality)
103
+ preset: Encoding preset (default: medium)
104
+ keyint: Keyframe interval (default: 15)
105
+
106
+ Returns:
107
+ TranscodeOptions object
108
+
109
+ Raises:
110
+ ValueError: If CRF out of range [0, 51]
111
+ """
112
+ if not 0 <= crf <= 51:
113
+ raise ValueError(f"CRF must be in range [0, 51], got {crf}")
114
+
115
+ return TranscodeOptions(codec=codec, crf=crf, preset=preset, keyint=keyint)
116
+
117
+
118
+ def is_already_transcoded(video_path: Path, options: TranscodeOptions, transcoded_path: Path) -> bool:
119
+ """Check if video is already transcoded with given options.
120
+
121
+ Args:
122
+ video_path: Original video path
123
+ options: Transcode options to check
124
+ transcoded_path: Path to transcoded output
125
+
126
+ Returns:
127
+ True if already transcoded with same options, False otherwise
128
+ """
129
+ # Simple check: does the transcoded file exist?
130
+ if not transcoded_path.exists():
131
+ return False
132
+
133
+ # Could add more sophisticated checks here (e.g., compare metadata)
134
+ # For now, existence check is sufficient for GREEN phase
135
+ return True
136
+
137
+
138
+ def transcode_video(video_path: Path, options: TranscodeOptions, output_dir: Path) -> TranscodedVideo:
139
+ """Transcode video to mezzanine format.
140
+
141
+ Args:
142
+ video_path: Path to input video
143
+ options: Transcoding options
144
+ output_dir: Output directory
145
+
146
+ Returns:
147
+ TranscodedVideo metadata
148
+
149
+ Raises:
150
+ TranscodeError: If transcoding fails
151
+ """
152
+ if not video_path.exists():
153
+ raise TranscodeError(f"Video file not found: {video_path}")
154
+
155
+ try:
156
+ # Compute checksum for content addressing
157
+ checksum = compute_file_checksum(video_path, algorithm="sha256")
158
+ checksum_prefix = checksum[:12] # Use first 12 chars
159
+
160
+ # Extract camera ID from filename (e.g., "cam0_...")
161
+ camera_id = "cam0" # Default
162
+ if "_" in video_path.stem:
163
+ parts = video_path.stem.split("_")
164
+ if parts[0].startswith("cam"):
165
+ camera_id = parts[0]
166
+
167
+ # Create output path with checksum
168
+ ensure_directory(output_dir)
169
+ output_filename = f"{camera_id}_transcoded_{checksum_prefix}.mp4"
170
+ output_path = output_dir / output_filename
171
+
172
+ # Get frame count from original video
173
+ cap = cv2.VideoCapture(str(video_path))
174
+ frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
175
+ cap.release()
176
+
177
+ # Build ffmpeg command
178
+ ffmpeg_cmd = [
179
+ "ffmpeg",
180
+ "-y", # Overwrite output
181
+ "-i",
182
+ str(video_path),
183
+ "-c:v",
184
+ options.codec,
185
+ "-crf",
186
+ str(options.crf),
187
+ "-preset",
188
+ options.preset,
189
+ "-g",
190
+ str(options.keyint),
191
+ "-pix_fmt",
192
+ "yuv420p",
193
+ str(output_path),
194
+ ]
195
+
196
+ # Execute ffmpeg
197
+ logger.info(f"Transcoding {video_path.name} to {output_path.name}")
198
+ result = subprocess.run(ffmpeg_cmd, capture_output=True, text=True, check=True)
199
+
200
+ # Verify output exists
201
+ if not output_path.exists():
202
+ raise TranscodeError("Transcode completed but output file not found")
203
+
204
+ # Create metadata
205
+ transcoded = TranscodedVideo(camera_id=camera_id, original_path=video_path, output_path=output_path, codec=options.codec, checksum=checksum, frame_count=frame_count)
206
+
207
+ return transcoded
208
+
209
+ except subprocess.CalledProcessError as e:
210
+ raise TranscodeError(f"FFmpeg failed: {e.stderr}")
211
+ except Exception as e:
212
+ raise TranscodeError(f"Transcode failed: {e}")
213
+
214
+
215
+ def update_manifest_with_transcode(manifest: Dict, transcoded: TranscodedVideo) -> Dict:
216
+ """Update manifest with transcoded video path.
217
+
218
+ Args:
219
+ manifest: Session manifest dict
220
+ transcoded: TranscodedVideo metadata
221
+
222
+ Returns:
223
+ Updated manifest dict
224
+ """
225
+ # Make a copy to avoid mutating original
226
+ updated = manifest.copy()
227
+
228
+ # Find matching video entry by camera_id
229
+ for video in updated.get("videos", []):
230
+ if video.get("camera_id") == transcoded.camera_id:
231
+ video["transcoded_path"] = str(transcoded.output_path)
232
+ video["transcoded_checksum"] = transcoded.checksum
233
+ break
234
+
235
+ return updated
236
+
237
+
238
+ if __name__ == "__main__":
239
+ """Usage examples for transcode module."""
240
+ from pathlib import Path
241
+
242
+ print("=" * 70)
243
+ print("W2T-BKIN Transcode Module - Usage Examples")
244
+ print("=" * 70)
245
+ print()
246
+
247
+ print("Example 1: Transcode Configuration")
248
+ print("-" * 50)
249
+
250
+ # Example transcode parameters
251
+ config = {
252
+ "codec": "h264",
253
+ "crf": 18,
254
+ "preset": "medium",
255
+ "pixel_format": "yuv420p",
256
+ "max_resolution": (1920, 1080),
257
+ }
258
+
259
+ print(f"Codec: {config['codec']}")
260
+ print(f"CRF (quality): {config['crf']} (lower = better)")
261
+ print(f"Preset: {config['preset']}")
262
+ print(f"Pixel format: {config['pixel_format']}")
263
+ print(f"Max resolution: {config['max_resolution'][0]}x{config['max_resolution'][1]}")
264
+ print()
265
+
266
+ print("Example 2: Video Checksum")
267
+ print("-" * 50)
268
+
269
+ # Note: This would require an actual video file
270
+ print("To compute video checksum:")
271
+ print(" from w2t_bkin.utils import compute_file_checksum")
272
+ print(" checksum = compute_file_checksum(video_path, algorithm='sha256')")
273
+ print(" # Returns SHA256 hash of video file")
274
+ print()
275
+
276
+ print("Example 3: Transcoding Pipeline")
277
+ print("-" * 50)
278
+
279
+ print("Full transcode workflow:")
280
+ print(" 1. Check if video needs transcoding (codec, resolution)")
281
+ print(" 2. Run FFmpeg with specified parameters")
282
+ print(" 3. Verify output integrity with checksum")
283
+ print(" 4. Update manifest with transcoded path")
284
+ print()
285
+ print("Production usage:")
286
+ print(" from w2t_bkin.transcode import transcode_video")
287
+ print(" result = transcode_video(")
288
+ print(" input_path='raw_video.avi',")
289
+ print(" output_path='transcoded_video.mp4',")
290
+ print(" config={'codec': 'h264', 'crf': 18}")
291
+ print(" )")
292
+ print()
293
+
294
+ print("Benefits of transcoding:")
295
+ print(" ✓ Standardized codec for compatibility")
296
+ print(" ✓ Reduced file size with quality preservation")
297
+ print(" ✓ Deterministic output (same input → same output)")
298
+ print(" ✓ Idempotent (can re-run safely)")
299
+ print()
300
+
301
+ print("=" * 70)
302
+ print("Examples completed. See module docstring for API details.")
303
+ print("=" * 70)
@@ -0,0 +1,96 @@
1
+ """Transcode module-local models for video transcoding metadata.
2
+
3
+ This module defines models owned by the transcode module for representing
4
+ video transcoding configuration and output metadata with content-addressed
5
+ checksums for idempotent operations.
6
+
7
+ Model ownership follows the target architecture where each module owns
8
+ its own models rather than sharing through a central domain package.
9
+ """
10
+
11
+ from pathlib import Path
12
+ from typing import Literal
13
+
14
+ from pydantic import BaseModel, Field
15
+
16
+ __all__ = ["TranscodeOptions", "TranscodedVideo"]
17
+
18
+
19
+ class TranscodeOptions(BaseModel):
20
+ """Transcoding configuration options.
21
+
22
+ Defines ffmpeg parameters for video transcoding to mezzanine format.
23
+
24
+ Attributes:
25
+ codec: ffmpeg video codec (e.g., "libx264", "libx265")
26
+ crf: Constant Rate Factor for quality (0-51, lower=better)
27
+ preset: ffmpeg preset (e.g., "ultrafast", "medium", "veryslow")
28
+ keyint: Keyframe interval in frames
29
+
30
+ Requirements:
31
+ - FR-4: Configurable transcoding
32
+
33
+ Example:
34
+ >>> from w2t_bkin.transcode.models import TranscodeOptions
35
+ >>> options = TranscodeOptions(
36
+ ... codec="libx264",
37
+ ... crf=23,
38
+ ... preset="medium",
39
+ ... keyint=30
40
+ ... )
41
+ """
42
+
43
+ model_config = {"frozen": True, "extra": "forbid"}
44
+
45
+ codec: Literal["libx264", "libx265", "libvpx-vp9", "libaom-av1"] = Field(..., description="ffmpeg video codec: 'libx264' | 'libx265' | 'libvpx-vp9' | 'libaom-av1'")
46
+ crf: int = Field(..., description="Constant Rate Factor (0-51, lower=better quality)", ge=0, le=51)
47
+ preset: Literal["ultrafast", "superfast", "veryfast", "faster", "fast", "medium", "slow", "slower", "veryslow"] = Field(
48
+ ..., description="ffmpeg encoding preset (speed vs. compression trade-off)"
49
+ )
50
+ keyint: int = Field(..., description="Keyframe interval in frames", gt=0)
51
+
52
+
53
+ class TranscodedVideo(BaseModel):
54
+ """Metadata for a transcoded video file.
55
+
56
+ Tracks transcoding operation results with content-addressed
57
+ checksums for idempotent operations.
58
+
59
+ Attributes:
60
+ camera_id: Camera identifier
61
+ original_path: Path to original raw video
62
+ output_path: Path to transcoded mezzanine video
63
+ codec: Video codec used for transcoding
64
+ checksum: Content-addressed hash (e.g., SHA256)
65
+ frame_count: Total frame count (for verification)
66
+
67
+ Requirements:
68
+ - FR-4: Transcode to mezzanine format
69
+ - NFR-2: Idempotent operations via checksums
70
+
71
+ Design Notes:
72
+ - output_path should be content-addressed using checksum
73
+ - Re-running transcoding with same inputs produces same checksum
74
+ - Allows skipping transcoding if output exists with matching checksum
75
+
76
+ Example:
77
+ >>> from pathlib import Path
78
+ >>> from w2t_bkin.transcode.models import TranscodedVideo
79
+ >>> video = TranscodedVideo(
80
+ ... camera_id="cam0",
81
+ ... original_path=Path("raw/session_cam0.avi"),
82
+ ... output_path=Path("intermediate/session_cam0_abc123.mp4"),
83
+ ... codec="libx264",
84
+ ... checksum="abc123def456...",
85
+ ... frame_count=8580
86
+ ... )
87
+ """
88
+
89
+ model_config = {"frozen": True, "extra": "forbid"}
90
+
91
+ camera_id: str = Field(..., description="Camera identifier")
92
+ original_path: Path = Field(..., description="Path to original raw video file")
93
+ output_path: Path = Field(..., description="Path to transcoded mezzanine video (content-addressed)")
94
+ codec: Literal["libx264", "libx265", "libvpx-vp9", "libaom-av1"] = Field(..., description="Video codec used for transcoding")
95
+ checksum: str = Field(..., description="Content-addressed hash (e.g., SHA256) for idempotent operations")
96
+ frame_count: int = Field(..., description="Total frame count for verification", ge=0)