w2t-bkin 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
w2t_bkin/sync/stats.py ADDED
@@ -0,0 +1,176 @@
1
+ """Create and persist alignment statistics.
2
+
3
+ This module handles the representation, creation, and persistence of
4
+ synchronization quality metrics.
5
+
6
+ Example:
7
+ >>> stats = create_alignment_stats(
8
+ ... timebase_source="ttl",
9
+ ... mapping="nearest",
10
+ ... offset_s=0.0,
11
+ ... max_jitter_s=0.008,
12
+ ... p95_jitter_s=0.005,
13
+ ... aligned_samples=1000
14
+ ... )
15
+ >>> write_alignment_stats(stats, Path("alignment.json"))
16
+ """
17
+
18
+ from datetime import datetime
19
+ import json
20
+ import logging
21
+ from pathlib import Path
22
+ from typing import Literal, Union
23
+
24
+ from pydantic import BaseModel, Field
25
+
26
+ from ..exceptions import SyncError
27
+ from ..utils import write_json
28
+
29
+ __all__ = [
30
+ "AlignmentStats",
31
+ "create_alignment_stats",
32
+ "write_alignment_stats",
33
+ "load_alignment_manifest",
34
+ "compute_alignment",
35
+ ]
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+
40
+ class AlignmentStats(BaseModel):
41
+ """Alignment quality metrics.
42
+
43
+ Attributes:
44
+ timebase_source: "nominal_rate", "ttl", or "neuropixels"
45
+ mapping: "nearest" or "linear"
46
+ offset_s: Time offset in seconds
47
+ max_jitter_s: Maximum jitter in seconds
48
+ p95_jitter_s: 95th percentile jitter in seconds
49
+ aligned_samples: Number of aligned samples
50
+ """
51
+
52
+ model_config = {"frozen": True, "extra": "forbid"}
53
+
54
+ timebase_source: Literal["nominal_rate", "ttl", "neuropixels"] = Field(..., description="Source of reference timebase: 'nominal_rate' | 'ttl' | 'neuropixels'")
55
+ mapping: Literal["nearest", "linear"] = Field(..., description="Alignment mapping strategy: 'nearest' | 'linear'")
56
+ offset_s: float = Field(..., description="Time offset applied to timebase in seconds")
57
+ max_jitter_s: float = Field(..., description="Maximum jitter observed in seconds", ge=0)
58
+ p95_jitter_s: float = Field(..., description="95th percentile jitter in seconds", ge=0)
59
+ aligned_samples: int = Field(..., description="Number of samples successfully aligned", ge=0)
60
+
61
+
62
+ def create_alignment_stats(
63
+ timebase_source: str,
64
+ mapping: str,
65
+ offset_s: float,
66
+ max_jitter_s: float,
67
+ p95_jitter_s: float,
68
+ aligned_samples: int,
69
+ ) -> AlignmentStats:
70
+ """Create alignment statistics object.
71
+
72
+ Args:
73
+ timebase_source: "nominal_rate", "ttl", or "neuropixels"
74
+ mapping: "nearest" or "linear"
75
+ offset_s: Time offset in seconds
76
+ max_jitter_s: Maximum jitter
77
+ p95_jitter_s: 95th percentile jitter
78
+ aligned_samples: Number of aligned samples
79
+
80
+ Returns:
81
+ AlignmentStats instance
82
+ """
83
+ return AlignmentStats(
84
+ timebase_source=timebase_source,
85
+ mapping=mapping,
86
+ offset_s=offset_s,
87
+ max_jitter_s=max_jitter_s,
88
+ p95_jitter_s=p95_jitter_s,
89
+ aligned_samples=aligned_samples,
90
+ )
91
+
92
+
93
+ def write_alignment_stats(stats: AlignmentStats, output_path: Path) -> None:
94
+ """Write alignment stats to JSON file.
95
+
96
+ Args:
97
+ stats: AlignmentStats instance
98
+ output_path: Output JSON path
99
+ """
100
+ data = stats.model_dump()
101
+ data["generated_at"] = datetime.utcnow().isoformat()
102
+ write_json(data, output_path)
103
+ logger.info(f"Wrote alignment stats to {output_path}")
104
+
105
+
106
+ def load_alignment_manifest(alignment_path: Union[str, Path]) -> dict:
107
+ """Load alignment manifest from JSON (stub).
108
+
109
+ Args:
110
+ alignment_path: Path to alignment.json
111
+
112
+ Returns:
113
+ Dict with alignment data per camera
114
+
115
+ Raises:
116
+ SyncError: File not found or invalid JSON
117
+
118
+ Note:
119
+ Returns mock data if file doesn't exist.
120
+ """
121
+ alignment_path = Path(alignment_path) if isinstance(alignment_path, str) else alignment_path
122
+
123
+ if not alignment_path.exists():
124
+ # For Phase 3 integration tests, return mock data if file doesn't exist
125
+ logger.warning(f"Alignment manifest not found: {alignment_path}, returning mock data")
126
+ return {
127
+ "cam0": {
128
+ "timestamps": [i / 30.0 for i in range(100)], # 100 frames at 30fps
129
+ "source": "nominal_rate",
130
+ "mapping": "nearest",
131
+ }
132
+ }
133
+
134
+ try:
135
+ with open(alignment_path, "r") as f:
136
+ data = json.load(f)
137
+ return data
138
+ except Exception as e:
139
+ raise SyncError(f"Failed to load alignment manifest from {alignment_path}: {e}")
140
+
141
+
142
+ def compute_alignment(manifest: dict, config: dict) -> dict:
143
+ """Compute timebase alignment for all cameras (stub).
144
+
145
+ Args:
146
+ manifest: Manifest from ingest
147
+ config: Timebase configuration
148
+
149
+ Returns:
150
+ Dict with timestamps per camera
151
+
152
+ Raises:
153
+ SyncError: Alignment failed
154
+
155
+ Note:
156
+ Currently returns mock data.
157
+ """
158
+ # Stub implementation - returns mock alignment data
159
+ alignment = {}
160
+
161
+ for camera in manifest.get("cameras", []):
162
+ camera_id = camera.get("camera_id", "cam0")
163
+ frame_count = camera.get("frame_count", 1000)
164
+
165
+ # Generate mock timestamps at 30 fps
166
+ timestamps = [i / 30.0 for i in range(frame_count)]
167
+
168
+ alignment[camera_id] = {
169
+ "timestamps": timestamps,
170
+ "source": "nominal_rate",
171
+ "mapping": "nearest",
172
+ "frame_count": frame_count,
173
+ }
174
+
175
+ logger.info(f"Computed alignment for {len(alignment)} cameras (stub)")
176
+ return alignment
@@ -0,0 +1,311 @@
1
+ """Timebase providers for temporal synchronization.
2
+
3
+ Provides nominal rate, TTL, and Neuropixels timebase sources.
4
+
5
+ Example:
6
+ >>> from w2t_bkin.sync import create_timebase_provider
7
+ >>> provider = create_timebase_provider(source="nominal_rate", rate=30.0)
8
+ >>> timestamps = provider.get_timestamps(n_samples=100)
9
+ """
10
+
11
+ from abc import ABC, abstractmethod
12
+ from pathlib import Path
13
+ from typing import Any, List, Optional
14
+
15
+ from ..exceptions import SyncError
16
+
17
+ __all__ = [
18
+ "TimebaseProvider",
19
+ "NominalRateProvider",
20
+ "TTLProvider",
21
+ "NeuropixelsProvider",
22
+ "create_timebase_provider",
23
+ "create_timebase_provider_from_config",
24
+ ]
25
+
26
+
27
+ # =============================================================================
28
+ # Timebase Provider Abstraction
29
+ # =============================================================================
30
+
31
+
32
+ class TimebaseProvider(ABC):
33
+ """Base class for timebase providers.
34
+
35
+ Subclasses must implement get_timestamps().
36
+ """
37
+
38
+ def __init__(self, source: str, offset_s: float = 0.0):
39
+ """Initialize timebase provider.
40
+
41
+ Args:
42
+ source: Identifier for timebase source (e.g., "nominal_rate", "ttl")
43
+ offset_s: Time offset to apply to all timestamps
44
+ """
45
+ self.source = source
46
+ self.offset_s = offset_s
47
+
48
+ @abstractmethod
49
+ def get_timestamps(self, n_samples: Optional[int] = None) -> List[float]:
50
+ """Get timestamps from this timebase.
51
+
52
+ Args:
53
+ n_samples: Number of samples (required for synthetic timebases)
54
+
55
+ Returns:
56
+ List of timestamps in seconds
57
+ """
58
+ pass
59
+
60
+
61
+ class NominalRateProvider(TimebaseProvider):
62
+ """Generate timestamps from constant sample rate.
63
+
64
+ Example:
65
+ >>> provider = NominalRateProvider(rate=30.0)
66
+ >>> timestamps = provider.get_timestamps(n_samples=100)
67
+ """
68
+
69
+ def __init__(self, rate: float, offset_s: float = 0.0):
70
+ """Initialize nominal rate provider.
71
+
72
+ Args:
73
+ rate: Sample rate in Hz (e.g., 30.0 for 30 fps video)
74
+ offset_s: Time offset to apply to all timestamps
75
+ """
76
+ super().__init__(source="nominal_rate", offset_s=offset_s)
77
+ self.rate = rate
78
+
79
+ def get_timestamps(self, n_samples: Optional[int] = None) -> List[float]:
80
+ """Generate synthetic timestamps from nominal rate.
81
+
82
+ Args:
83
+ n_samples: Number of samples to generate (required)
84
+
85
+ Returns:
86
+ List of timestamps starting at offset_s
87
+
88
+ Raises:
89
+ ValueError: If n_samples is None
90
+ """
91
+ if n_samples is None:
92
+ raise ValueError("n_samples required for NominalRateProvider")
93
+
94
+ timestamps = [self.offset_s + i / self.rate for i in range(n_samples)]
95
+ return timestamps
96
+
97
+
98
+ class TTLProvider(TimebaseProvider):
99
+ """Load timestamps from TTL hardware sync files.
100
+
101
+ Example:
102
+ >>> provider = TTLProvider(ttl_id="camera_sync", ttl_files=["TTLs/cam0.txt"])
103
+ >>> timestamps = provider.get_timestamps()
104
+ """
105
+
106
+ def __init__(self, ttl_id: str, ttl_files: List[str], offset_s: float = 0.0):
107
+ """Initialize TTL provider.
108
+
109
+ Args:
110
+ ttl_id: Identifier for this TTL channel
111
+ ttl_files: List of TTL file paths to load
112
+ offset_s: Time offset to apply to all timestamps
113
+
114
+ Raises:
115
+ SyncError: If TTL files cannot be loaded or parsed
116
+ """
117
+ super().__init__(source="ttl", offset_s=offset_s)
118
+ self.ttl_id = ttl_id
119
+ self.ttl_files = ttl_files
120
+ self._timestamps = None
121
+ self._load_timestamps()
122
+
123
+ def _load_timestamps(self):
124
+ """Load timestamps from TTL files.
125
+
126
+ Raises:
127
+ SyncError: If TTL file not found or invalid format
128
+ """
129
+ timestamps = []
130
+
131
+ for ttl_file in self.ttl_files:
132
+ path = Path(ttl_file)
133
+ if not path.exists():
134
+ raise SyncError(f"TTL file not found: {ttl_file}")
135
+
136
+ try:
137
+ with open(path, "r") as f:
138
+ for line in f:
139
+ line = line.strip()
140
+ if line:
141
+ timestamps.append(float(line))
142
+ except Exception as e:
143
+ raise SyncError(f"Failed to parse TTL file {ttl_file}: {e}")
144
+
145
+ # Apply offset and sort
146
+ self._timestamps = [t + self.offset_s for t in sorted(timestamps)]
147
+
148
+ def get_timestamps(self, n_samples: Optional[int] = None) -> List[float]:
149
+ """Get timestamps from TTL files.
150
+
151
+ Args:
152
+ n_samples: Ignored for TTL provider (returns all loaded timestamps)
153
+
154
+ Returns:
155
+ List of timestamps from TTL files (sorted)
156
+ """
157
+ return self._timestamps
158
+
159
+
160
+ class NeuropixelsProvider(TimebaseProvider):
161
+ """Load timestamps from Neuropixels recordings (stub).
162
+
163
+ Currently generates synthetic 30 kHz timestamps.
164
+ """
165
+
166
+ def __init__(self, stream: str, offset_s: float = 0.0):
167
+ """Initialize Neuropixels provider.
168
+
169
+ Args:
170
+ stream: Neuropixels stream identifier
171
+ offset_s: Time offset to apply
172
+ """
173
+ super().__init__(source="neuropixels", offset_s=offset_s)
174
+ self.stream = stream
175
+
176
+ def get_timestamps(self, n_samples: Optional[int] = None) -> List[float]:
177
+ """Get timestamps from Neuropixels stream (stub).
178
+
179
+ Args:
180
+ n_samples: Number of samples (default: 1000)
181
+
182
+ Returns:
183
+ Stub timestamps at 30 kHz sampling rate
184
+ """
185
+ if n_samples is None:
186
+ n_samples = 1000
187
+
188
+ # Stub: 30 kHz sampling
189
+ rate = 30000.0
190
+ timestamps = [self.offset_s + i / rate for i in range(n_samples)]
191
+ return timestamps
192
+
193
+
194
+ # =============================================================================
195
+ # Factory Function
196
+ # =============================================================================
197
+
198
+
199
+ def create_timebase_provider(
200
+ source: str,
201
+ offset_s: float = 0.0,
202
+ rate: Optional[float] = None,
203
+ ttl_id: Optional[str] = None,
204
+ ttl_files: Optional[List[str]] = None,
205
+ neuropixels_stream: Optional[str] = None,
206
+ ) -> TimebaseProvider:
207
+ """Create timebase provider.
208
+
209
+ Args:
210
+ source: "nominal_rate", "ttl", or "neuropixels"
211
+ offset_s: Time offset in seconds
212
+ rate: Sample rate (required for nominal_rate)
213
+ ttl_id: TTL channel ID (required for ttl)
214
+ ttl_files: TTL file paths (required for ttl)
215
+ neuropixels_stream: Stream ID (required for neuropixels)
216
+
217
+ Returns:
218
+ TimebaseProvider instance
219
+
220
+ Raises:
221
+ SyncError: Invalid source or missing parameters
222
+
223
+ Example:
224
+ >>> provider = create_timebase_provider(source="nominal_rate", rate=30.0)
225
+ >>> timestamps = provider.get_timestamps(n_samples=100)
226
+ """
227
+ if source == "nominal_rate":
228
+ if rate is None:
229
+ raise SyncError("rate required when source='nominal_rate'")
230
+ return NominalRateProvider(rate=rate, offset_s=offset_s)
231
+
232
+ elif source == "ttl":
233
+ if ttl_id is None:
234
+ raise SyncError("ttl_id required when source='ttl'")
235
+ if ttl_files is None:
236
+ raise SyncError("ttl_files required when source='ttl'")
237
+ return TTLProvider(ttl_id=ttl_id, ttl_files=ttl_files, offset_s=offset_s)
238
+
239
+ elif source == "neuropixels":
240
+ if neuropixels_stream is None:
241
+ raise SyncError("neuropixels_stream required when source='neuropixels'")
242
+ return NeuropixelsProvider(stream=neuropixels_stream, offset_s=offset_s)
243
+
244
+ else:
245
+ raise SyncError(f"Invalid timebase source: {source}")
246
+
247
+
248
+ def create_timebase_provider_from_config(config, manifest: Optional[Any] = None) -> TimebaseProvider:
249
+ """Create timebase provider from Config and Manifest (high-level wrapper).
250
+
251
+ Convenience wrapper that extracts primitive arguments from Config/Manifest
252
+ and delegates to the low-level create_timebase_provider() function.
253
+
254
+ Args:
255
+ config: Pipeline configuration with timebase settings
256
+ manifest: Session manifest (required for TTL provider)
257
+
258
+ Returns:
259
+ TimebaseProvider instance
260
+
261
+ Raises:
262
+ SyncError: If invalid source or missing required data
263
+
264
+ Example:
265
+ >>> from w2t_bkin.config import load_config
266
+ >>> from w2t_bkin.ingest import build_and_count_manifest
267
+ >>>
268
+ >>> config = load_config("config.toml")
269
+ >>> session = load_session("session.toml")
270
+ >>> manifest = build_and_count_manifest(config, session)
271
+ >>>
272
+ >>> provider = create_timebase_provider_from_config(config, manifest)
273
+ >>> timestamps = provider.get_timestamps(n_samples=1000)
274
+ """
275
+ source = config.timebase.source
276
+ offset_s = config.timebase.offset_s
277
+
278
+ if source == "nominal_rate":
279
+ # Default to 30 Hz for cameras
280
+ rate = 30.0
281
+ return create_timebase_provider(source="nominal_rate", rate=rate, offset_s=offset_s)
282
+
283
+ elif source == "ttl":
284
+ if manifest is None:
285
+ raise SyncError("Manifest required for TTL timebase provider")
286
+
287
+ ttl_id = config.timebase.ttl_id
288
+ if not ttl_id:
289
+ raise SyncError("timebase.ttl_id required when source='ttl'")
290
+
291
+ # Find TTL files in manifest
292
+ ttl_files = None
293
+ for ttl in manifest.ttls:
294
+ if ttl.ttl_id == ttl_id:
295
+ ttl_files = ttl.files
296
+ break
297
+
298
+ if not ttl_files:
299
+ raise SyncError(f"TTL {ttl_id} not found in manifest")
300
+
301
+ return create_timebase_provider(source="ttl", ttl_id=ttl_id, ttl_files=ttl_files, offset_s=offset_s)
302
+
303
+ elif source == "neuropixels":
304
+ stream = config.timebase.neuropixels_stream
305
+ if not stream:
306
+ raise SyncError("timebase.neuropixels_stream required when source='neuropixels'")
307
+
308
+ return create_timebase_provider(source="neuropixels", neuropixels_stream=stream, offset_s=offset_s)
309
+
310
+ else:
311
+ raise SyncError(f"Invalid timebase source: {source}")