w2t-bkin 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
w2t_bkin/pipeline.py ADDED
@@ -0,0 +1,665 @@
1
+ """Pipeline orchestration module for W2T-BKIN (Phase 2 - Orchestration Layer).
2
+
3
+ This module provides the high-level orchestration API that owns Config and Session
4
+ and coordinates all pipeline stages. It translates Session/Config into primitive
5
+ arguments for low-level tools, orchestrates execution order, and collects results
6
+ into structured outputs.
7
+
8
+ Architecture:
9
+ -------------
10
+ Phase 2 establishes a clear layering:
11
+ - **Orchestration layer** (this module): Owns Config/Session, coordinates stages
12
+ - **Mid-level helpers**: Optional wrappers with _from_session/_from_config suffixes
13
+ - **Low-level tools**: Accept primitives only (paths, dicts, lists, scalars)
14
+
15
+ This module is the ONLY place where Config/Session flow into the pipeline. All
16
+ downstream modules receive primitives derived from Session/Manifest at this layer.
17
+
18
+ Key Functions:
19
+ --------------
20
+ - run_session: Complete session processing workflow
21
+ - run_validation: NWB validation using nwbinspector
22
+
23
+ Result Structure:
24
+ -----------------
25
+ RunResult contains:
26
+ - manifest: File discovery and counts
27
+ - alignment_stats: Timebase alignment quality metrics (if computed)
28
+ - task_recording: Behavioral task recording with ndx-structured-behavior (if Bpod files present)
29
+ - trials_table: Trials table with behavior data (if Bpod files present)
30
+ - facemap_bundle: Facial motion signals (if computed)
31
+ - transcoded_videos: Mezzanine format videos (if transcoding enabled)
32
+ - nwb_path: Path to assembled NWB file (if assembly completes)
33
+
34
+ Requirements:
35
+ -------------
36
+ - FR-1..17: Coordinate all pipeline stages
37
+ - NFR-1: Deterministic processing with provenance
38
+ - NFR-3: Clear error messages and logging
39
+ - NFR-11: Configuration-driven execution
40
+
41
+ Example:
42
+ --------
43
+ >>> from pathlib import Path
44
+ >>> from w2t_bkin.pipeline import run_session
45
+ >>>
46
+ >>> # Run complete session processing
47
+ >>> result = run_session(
48
+ ... config_path="config.toml",
49
+ ... session_id="Session-000001",
50
+ ... options={"skip_nwb": False, "skip_validation": False}
51
+ ... )
52
+ >>>
53
+ >>> print(f"Manifest: {len(result['manifest'].cameras)} cameras")
54
+ >>> print(f"Alignment: {result['alignment_stats'].max_jitter_s:.6f}s max jitter")
55
+ >>> print(f"NWB: {result['nwb_path']}")
56
+ >>>
57
+ >>> # Validate NWB output
58
+ >>> validation = run_validation(result['nwb_path'])
59
+ >>> print(f"Validation: {validation['status']}")
60
+ """
61
+
62
+ from datetime import datetime
63
+ import json
64
+ import logging
65
+ from pathlib import Path
66
+ from typing import Any, Dict, List, Optional, TypedDict, Union
67
+
68
+ from pynwb import NWBFile
69
+
70
+ from w2t_bkin.behavior import (
71
+ TaskRecording,
72
+ build_task_recording,
73
+ build_trials_table,
74
+ extract_action_types,
75
+ extract_actions,
76
+ extract_event_types,
77
+ extract_events,
78
+ extract_state_types,
79
+ extract_states,
80
+ )
81
+ from w2t_bkin.bpod import parse_bpod
82
+ from w2t_bkin.config import Config, load_config
83
+ from w2t_bkin.dlc import DLCInferenceOptions, DLCInferenceResult, run_dlc_inference_batch
84
+
85
+ # from w2t_bkin.facemap import FacemapBundle # Temporarily disabled
86
+ from w2t_bkin.session import add_video_acquisition, create_nwb_file, write_nwb_file
87
+ from w2t_bkin.sync import AlignmentStats, create_timebase_provider_from_config
88
+ from w2t_bkin.transcode import TranscodedVideo
89
+ from w2t_bkin.ttl import add_ttl_table_to_nwb, get_ttl_pulses
90
+ from w2t_bkin.utils import compute_hash, count_ttl_pulses, count_video_frames, discover_files, ensure_directory
91
+
92
+ logger = logging.getLogger(__name__)
93
+
94
+
95
+ # =============================================================================
96
+ # Result Models
97
+ # =============================================================================
98
+
99
+
100
+ class RunResult(TypedDict, total=False):
101
+ """Result of run_session execution.
102
+
103
+ Contains all outputs from pipeline stages. Fields are optional to support
104
+ partial execution (e.g., skip NWB assembly, optional pose/facemap).
105
+
106
+ Attributes:
107
+ nwbfile: In-memory NWBFile object with all data
108
+ nwb_path: Path to written NWB file (if written to disk)
109
+ alignment_stats: Timebase alignment quality metrics (optional)
110
+ task_recording: Behavioral task recording with ndx-structured-behavior (optional)
111
+ trials_table: Trials table with behavior data (optional)
112
+ dlc_inference_results: DLC inference results for each camera (optional)
113
+ facemap_bundle: Facial motion signals (optional)
114
+ transcoded_videos: List of transcoded videos (optional)
115
+ provenance: Pipeline execution metadata
116
+ """
117
+
118
+ nwbfile: NWBFile
119
+ nwb_path: Optional[Path]
120
+ alignment_stats: Optional[AlignmentStats]
121
+ task_recording: Optional[TaskRecording]
122
+ trials_table: Optional[Any] # TrialsTable from ndx-structured-behavior
123
+ dlc_inference_results: Optional[List[DLCInferenceResult]]
124
+ facemap_bundle: Optional[Any] # FacemapBundle - temporarily disabled
125
+ transcoded_videos: Optional[List[TranscodedVideo]]
126
+ provenance: Dict[str, Any]
127
+
128
+
129
+ class ValidationResult(TypedDict):
130
+ """Result of run_validation execution.
131
+
132
+ Attributes:
133
+ status: Validation status ("pass" | "warn" | "fail")
134
+ errors: List of validation errors
135
+ warnings: List of validation warnings
136
+ nwb_path: Path to validated NWB file
137
+ """
138
+
139
+ status: str
140
+ errors: List[str]
141
+ warnings: List[str]
142
+ nwb_path: Path
143
+
144
+
145
+ # =============================================================================
146
+ # Core Orchestration
147
+ # =============================================================================
148
+
149
+
150
+ def run_session(
151
+ config_path: Union[str, Path],
152
+ session_id: str,
153
+ options: Optional[Dict[str, Any]] = None,
154
+ ) -> RunResult:
155
+ """Run complete session processing workflow.
156
+
157
+ Orchestrates all pipeline stages:
158
+ 1. Load Config and Session
159
+ 2. Build and verify Manifest
160
+ 3. Parse events (if Bpod files present)
161
+ 4. Run DLC inference (if enabled in config)
162
+ 5. Import pose/facemap (if available)
163
+ 6. Transcode videos (if enabled)
164
+ 7. Create timebase and compute alignment
165
+ 8. Assemble NWB file (if not skipped)
166
+
167
+ This function owns Config/Session and translates them into primitive
168
+ arguments for all low-level tools.
169
+
170
+ Args:
171
+ config_path: Path to config.toml
172
+ session_id: Session identifier (must match session.toml session.id)
173
+ options: Optional execution options:
174
+ - skip_nwb: Skip NWB assembly (default: False)
175
+ - skip_validation: Skip verification stage (default: False)
176
+ - transcode_videos: Enable video transcoding (default: False)
177
+
178
+ Returns:
179
+ RunResult with all pipeline outputs and provenance
180
+
181
+ Raises:
182
+ ConfigError: Configuration loading/validation failed
183
+ SessionError: Session loading/validation failed
184
+ IngestError: File discovery or verification failed
185
+ SyncError: Alignment or jitter budget exceeded
186
+ EventsError: Bpod parsing failed
187
+ NWBError: NWB assembly failed
188
+
189
+ Example:
190
+ >>> result = run_session(
191
+ ... config_path="config.toml",
192
+ ... session_id="Session-000001"
193
+ ... )
194
+ >>> print(f"Cameras: {len(result['manifest'].cameras)}")
195
+ >>> print(f"Max jitter: {result['alignment_stats'].max_jitter_s:.6f}s")
196
+ """
197
+ # Parse options
198
+ options = options or {}
199
+ skip_nwb = options.get("skip_nwb", False)
200
+ skip_validation = options.get("skip_validation", False)
201
+ transcode_videos = options.get("transcode_videos", False)
202
+
203
+ logger.info("=" * 70)
204
+ logger.info("W2T-BKIN Pipeline - Session Processing (NWB-First)")
205
+ logger.info("=" * 70)
206
+ logger.info(f"Config: {config_path}")
207
+ logger.info(f"Session: {session_id}")
208
+ logger.info("=" * 70)
209
+
210
+ # -------------------------------------------------------------------------
211
+ # Phase 0: Load Configuration and Create NWBFile
212
+ # -------------------------------------------------------------------------
213
+ logger.info("\n[Phase 0] Loading configuration and creating NWBFile...")
214
+ config_path = Path(config_path)
215
+ config = load_config(config_path)
216
+ logger.info(f" ✓ Config loaded: {config.project.name}")
217
+
218
+ # Find session.toml and create NWBFile (NWB-first architecture)
219
+ session_dir = Path(config.paths.raw_root) / session_id
220
+ session_path = session_dir / config.paths.metadata_file
221
+ nwbfile = create_nwb_file(session_path)
222
+ logger.info(f" ✓ NWBFile created: {nwbfile.identifier}")
223
+
224
+ # Verify session_id matches
225
+ if nwbfile.identifier != session_id:
226
+ raise ValueError(f"Session ID mismatch: requested '{session_id}', " f"found '{nwbfile.identifier}' in {session_path}")
227
+
228
+ # NWBFile is already created (NWB-first architecture)
229
+ logger.info(f" ✓ NWBFile created: {nwbfile.identifier}")
230
+
231
+ # -------------------------------------------------------------------------
232
+ # Phase 1: Discover Files and Add Acquisition Data
233
+ # -------------------------------------------------------------------------
234
+ logger.info("\n[Phase 1] Discovering files and populating NWBFile acquisition...")
235
+
236
+ # Track discovered files for later processing
237
+ discovered_cameras = []
238
+ bpod_files = []
239
+
240
+ # Discover and verify camera files
241
+ for camera_config in session.cameras:
242
+ logger.info(f" Processing camera: {camera_config.id}")
243
+
244
+ # Discover video files
245
+ video_files = discover_files(session_dir, camera_config.paths, sort=True)
246
+ if not video_files:
247
+ logger.warning(f" ⚠ No videos found for pattern: {camera_config.paths}")
248
+ continue
249
+
250
+ logger.info(f" ✓ Found {len(video_files)} video file(s)")
251
+
252
+ # Count frames
253
+ frame_count = 0
254
+ for video_path in video_files:
255
+ try:
256
+ frames = count_video_frames(video_path)
257
+ frame_count += frames
258
+ logger.debug(f" {video_path.name}: {frames} frames")
259
+ except Exception as e:
260
+ logger.error(f" ✗ Failed to count frames in {video_path.name}: {e}")
261
+ raise
262
+
263
+ logger.info(f" ✓ Total frames: {frame_count}")
264
+
265
+ # Count TTL pulses (for verification)
266
+ ttl_pulse_count = 0
267
+ ttl_config = None
268
+ for ttl in session.TTLs:
269
+ if ttl.id == camera_config.ttl_id:
270
+ ttl_config = ttl
271
+ ttl_files = discover_files(session_dir, ttl.paths, sort=True)
272
+ for ttl_path in ttl_files:
273
+ ttl_pulse_count += count_ttl_pulses(ttl_path)
274
+ break
275
+
276
+ if ttl_config:
277
+ logger.info(f" ✓ TTL pulses: {ttl_pulse_count}")
278
+
279
+ # Verify frame/TTL alignment
280
+ if not skip_validation and ttl_config:
281
+ mismatch = abs(frame_count - ttl_pulse_count)
282
+ tolerance = config.verification.mismatch_tolerance_frames
283
+
284
+ if mismatch > tolerance:
285
+ logger.error(
286
+ f" ✗ Verification failed:\n" f" Frames: {frame_count}\n" f" TTL pulses: {ttl_pulse_count}\n" f" Mismatch: {mismatch} (tolerance: {tolerance})"
287
+ )
288
+ raise ValueError(f"Frame/TTL verification failed for camera {camera_config.id}: " f"mismatch {mismatch} exceeds tolerance {tolerance}")
289
+
290
+ logger.info(f" ✓ Verification passed (mismatch: {mismatch})")
291
+
292
+ # Add ImageSeries to NWBFile
293
+ add_video_acquisition(
294
+ nwbfile,
295
+ camera_id=camera_config.id,
296
+ video_files=[str(f) for f in video_files],
297
+ frame_rate=getattr(camera_config, "frame_rate", 30.0),
298
+ )
299
+ logger.info(f" ✓ Added to NWBFile acquisition")
300
+
301
+ # Track for later use
302
+ discovered_cameras.append(
303
+ {
304
+ "camera_id": camera_config.id,
305
+ "video_files": video_files,
306
+ "frame_count": frame_count,
307
+ "ttl_pulse_count": ttl_pulse_count,
308
+ }
309
+ )
310
+
311
+ logger.info(f" ✓ Processed {len(discovered_cameras)} camera(s)")
312
+
313
+ # Discover Bpod files
314
+ if hasattr(session, "bpod") and session.bpod:
315
+ bpod_files = discover_files(session_dir, session.bpod.path, sort=True)
316
+ logger.info(f" ✓ Discovered {len(bpod_files)} Bpod file(s)")
317
+
318
+ # -------------------------------------------------------------------------
319
+ # Phase 2: Parse Behavior (Optional - ndx-structured-behavior)
320
+ # -------------------------------------------------------------------------
321
+ task_recording: Optional[TaskRecording] = None
322
+ trials_table: Optional[Any] = None
323
+
324
+ if bpod_files and len(bpod_files) > 0:
325
+ logger.info("\n[Phase 2] Building behavioral data (ndx-structured-behavior)...")
326
+
327
+ try:
328
+ # Extract primitives from Session
329
+ bpod_pattern = session.bpod.path
330
+ bpod_order = session.bpod.order
331
+
332
+ # Parse Bpod files with low-level API
333
+ bpod_data = parse_bpod(
334
+ session_dir=session_dir,
335
+ pattern=bpod_pattern,
336
+ order=bpod_order,
337
+ continuous_time=True,
338
+ )
339
+ n_trials = bpod_data["SessionData"]["nTrials"]
340
+ logger.info(f" ✓ Parsed {n_trials} trials from Bpod files")
341
+
342
+ # Extract type tables (metadata)
343
+ state_types = extract_state_types(bpod_data)
344
+ event_types = extract_event_types(bpod_data)
345
+ action_types = extract_action_types(bpod_data)
346
+ logger.info(f" ✓ Extracted types: {len(state_types)} states, {len(event_types)} events, {len(action_types)} actions")
347
+
348
+ # Extract data tables (no alignment yet - trial_offsets=None)
349
+ states, state_indices = extract_states(bpod_data, state_types, trial_offsets=None)
350
+ events, event_indices = extract_events(bpod_data, event_types, trial_offsets=None)
351
+ actions, action_indices = extract_actions(bpod_data, action_types, trial_offsets=None)
352
+ logger.info(f" ✓ Extracted data: {len(states)} state occurrences, {len(events)} events, {len(actions)} actions")
353
+
354
+ # Build trials table and task recording
355
+ trials_table = build_trials_table(bpod_data, states, events, actions, state_indices, event_indices, action_indices, trial_offsets=None)
356
+ task_recording = build_task_recording(states, events, actions)
357
+ logger.info(f" ✓ Built TrialsTable ({n_trials} trials) and TaskRecording")
358
+
359
+ except Exception as e:
360
+ logger.warning(f" ⚠ Behavior data extraction failed: {e}")
361
+ task_recording = None
362
+ trials_table = None
363
+
364
+ # -------------------------------------------------------------------------
365
+ # Phase 3: Synchronization (Placeholder)
366
+ # -------------------------------------------------------------------------
367
+ logger.info("\n[Phase 3] Creating timebase and alignment...")
368
+
369
+ # Compute alignment stats (placeholder - would normally align all modalities)
370
+ alignment_stats: Optional[AlignmentStats] = None
371
+
372
+ # For now, create minimal alignment stats if TTL-based
373
+ if config.timebase.source == "ttl":
374
+ # Extract TTL pulses for alignment
375
+ ttl_patterns = {ttl.id: ttl.path for ttl in session.TTLs}
376
+ ttl_pulses = get_ttl_pulses(ttl_patterns, session_dir)
377
+
378
+ # Count total pulses
379
+ total_pulses = sum(len(pulses) for pulses in ttl_pulses.values())
380
+
381
+ alignment_stats = AlignmentStats(
382
+ timebase_source=config.timebase.source,
383
+ mapping=config.timebase.mapping,
384
+ offset_s=config.timebase.offset_s,
385
+ max_jitter_s=0.0, # Placeholder
386
+ p95_jitter_s=0.0, # Placeholder
387
+ aligned_samples=total_pulses,
388
+ )
389
+ logger.info(f" ✓ Alignment stats created: {total_pulses} samples aligned")
390
+ else:
391
+ # Nominal rate - create minimal stats
392
+ alignment_stats = AlignmentStats(
393
+ timebase_source=config.timebase.source,
394
+ mapping=config.timebase.mapping,
395
+ offset_s=config.timebase.offset_s,
396
+ max_jitter_s=0.0,
397
+ p95_jitter_s=0.0,
398
+ aligned_samples=0,
399
+ )
400
+ logger.info(" ✓ Alignment stats created (nominal rate)")
401
+
402
+ # Add TTL events to NWBFile (Phase 3.5 - after TTL loading)
403
+ if config.timebase.source == "ttl":
404
+ logger.info("\n[Phase 3.5] Adding TTL events to NWBFile...")
405
+
406
+ try:
407
+ # Extract TTL descriptions and sources from session config
408
+ ttl_descriptions = {}
409
+ ttl_sources = {}
410
+
411
+ for ttl in session.TTLs:
412
+ if hasattr(ttl, "description") and ttl.description:
413
+ ttl_descriptions[ttl.id] = ttl.description
414
+ # Source defaults to "unknown" if not in config
415
+ if hasattr(ttl, "source"):
416
+ ttl_sources[ttl.id] = ttl.source
417
+
418
+ # Add TTL events using ndx-events EventsTable
419
+ nwbfile = add_ttl_table_to_nwb(
420
+ nwbfile,
421
+ ttl_pulses,
422
+ descriptions=ttl_descriptions,
423
+ sources=ttl_sources,
424
+ container_name="TTLEvents",
425
+ )
426
+
427
+ total_events = sum(len(pulses) for pulses in ttl_pulses.values())
428
+ logger.info(f" ✓ Added {total_events} TTL events from {len(ttl_pulses)} channels to NWBFile")
429
+
430
+ except Exception as e:
431
+ logger.warning(f" ⚠ Failed to add TTL events to NWBFile: {e}")
432
+
433
+ # -------------------------------------------------------------------------
434
+ # Phase 4: Optional Modalities (DLC Inference, Facemap, Transcode)
435
+ # -------------------------------------------------------------------------
436
+ dlc_inference_results: Optional[List[DLCInferenceResult]] = None
437
+ facemap_bundle: Optional[Any] = None # FacemapBundle - temporarily disabled
438
+ transcoded_videos: Optional[List[TranscodedVideo]] = None
439
+
440
+ logger.info("\n[Phase 4] Checking for optional modalities...")
441
+
442
+ # DLC Inference (if enabled)
443
+ if config.labels.dlc.run_inference:
444
+ logger.info("\n[Phase 4.1] Running DLC inference...")
445
+
446
+ try:
447
+ # Extract primitives from Config/Session
448
+ model_dir_name = config.labels.dlc.model
449
+ model_config_path = Path(config.paths.models_root) / model_dir_name / "config.yaml"
450
+
451
+ # Collect all camera video paths from discovered cameras
452
+ video_paths = []
453
+ for camera in discovered_cameras:
454
+ # Each camera may have multiple video files (concatenated later)
455
+ if camera["video_files"]:
456
+ video_paths.extend(camera["video_files"])
457
+
458
+ if not video_paths:
459
+ logger.warning(" ⚠ No video files found - skipping DLC inference")
460
+ elif not model_config_path.exists():
461
+ logger.error(f" ✗ DLC model config not found: {model_config_path}")
462
+ raise FileNotFoundError(f"DLC model config.yaml not found: {model_config_path}")
463
+ else:
464
+ # Create output directory
465
+ output_dir = Path(config.paths.intermediate_root) / session_id / "dlc"
466
+ output_dir.mkdir(parents=True, exist_ok=True)
467
+
468
+ # Create options from config
469
+ options = DLCInferenceOptions(
470
+ gputouse=config.labels.dlc.gputouse,
471
+ save_as_csv=False,
472
+ allow_growth=True,
473
+ allow_fallback=True,
474
+ )
475
+
476
+ logger.info(f" → Model: {model_dir_name}")
477
+ logger.info(f" → Videos: {len(video_paths)} files")
478
+ logger.info(f" → GPU: {options.gputouse if options.gputouse is not None else 'auto-detect'}")
479
+ logger.info(f" → Output: {output_dir}")
480
+
481
+ # Run batch inference (low-level API with primitives only)
482
+ dlc_inference_results = run_dlc_inference_batch(
483
+ video_paths=video_paths,
484
+ model_config_path=model_config_path,
485
+ output_dir=output_dir,
486
+ options=options,
487
+ )
488
+
489
+ # Report results
490
+ success_count = sum(1 for r in dlc_inference_results if r.success)
491
+ total_time = sum(r.inference_time_s for r in dlc_inference_results)
492
+ total_frames = sum(r.frame_count for r in dlc_inference_results if r.success)
493
+
494
+ logger.info(f" ✓ DLC inference complete: {success_count}/{len(dlc_inference_results)} videos succeeded")
495
+ logger.info(f" ✓ Total time: {total_time:.1f}s ({total_frames} frames)")
496
+
497
+ # Log any failures
498
+ for result in dlc_inference_results:
499
+ if not result.success:
500
+ logger.warning(f" ⚠ Failed: {result.video_path.name} - {result.error_message}")
501
+
502
+ # TODO: Update manifest with H5 paths for downstream pose import
503
+ # This would map each result.h5_output_path back to the corresponding camera
504
+
505
+ except Exception as e:
506
+ logger.error(f" ✗ DLC inference failed: {e}")
507
+ raise
508
+
509
+ else:
510
+ logger.info(" ⊘ DLC inference: Disabled in config")
511
+
512
+ # Pose import (placeholder)
513
+ logger.info(" ⊘ Pose import: Not implemented")
514
+ logger.info(" ⊘ Facemap computation: Not implemented")
515
+
516
+ if transcode_videos:
517
+ logger.info(" ⊘ Video transcoding: Not implemented")
518
+
519
+ # -------------------------------------------------------------------------
520
+ # Phase 5: NWB Assembly and Writing
521
+ # -------------------------------------------------------------------------
522
+ nwb_path: Optional[Path] = None
523
+
524
+ if not skip_nwb:
525
+ logger.info("\n[Phase 5] Writing NWB file...")
526
+
527
+ # Add provenance to NWBFile
528
+ provenance = {
529
+ "pipeline_version": "0.1.0",
530
+ "config_path": str(config_path),
531
+ "session_id": session_id,
532
+ "execution_time": datetime.now().isoformat(),
533
+ "config_hash": compute_hash(str(config.model_dump())),
534
+ "session_hash": compute_hash(str(session.model_dump())),
535
+ "timebase": {
536
+ "source": config.timebase.source,
537
+ "mapping": config.timebase.mapping,
538
+ "offset_s": config.timebase.offset_s,
539
+ },
540
+ "dlc_inference": {
541
+ "enabled": config.labels.dlc.run_inference,
542
+ "model": config.labels.dlc.model if config.labels.dlc.run_inference else None,
543
+ "gputouse": config.labels.dlc.gputouse if config.labels.dlc.run_inference else None,
544
+ "results": len(dlc_inference_results) if dlc_inference_results else 0,
545
+ "success_count": sum(1 for r in dlc_inference_results if r.success) if dlc_inference_results else 0,
546
+ },
547
+ }
548
+
549
+ # Embed provenance in NWB file notes
550
+ nwbfile.notes = json.dumps(provenance, indent=2)
551
+
552
+ # Add task recording and trials if available
553
+ if task_recording:
554
+ nwbfile.add_acquisition(task_recording)
555
+ logger.info(" ✓ Added TaskRecording to NWBFile")
556
+
557
+ if trials_table:
558
+ nwbfile.trials = trials_table
559
+ logger.info(" ✓ Added TrialsTable to NWBFile")
560
+
561
+ # Write to disk
562
+ output_dir = Path(config.paths.output_root) / session_id
563
+ ensure_directory(output_dir)
564
+ nwb_path = output_dir / f"{session_id}.nwb"
565
+
566
+ write_nwb_file(nwbfile, nwb_path)
567
+ logger.info(f" ✓ NWB file written: {nwb_path}")
568
+ logger.info(f" ✓ Size: {nwb_path.stat().st_size / (1024 * 1024):.2f} MB")
569
+ else:
570
+ logger.info("\n[Phase 5] NWB writing skipped")
571
+ # Still compute provenance for result
572
+ provenance = {
573
+ "pipeline_version": "0.1.0",
574
+ "config_path": str(config_path),
575
+ "session_id": session_id,
576
+ "execution_time": datetime.now().isoformat(),
577
+ "config_hash": compute_hash(str(config.model_dump())),
578
+ "session_hash": compute_hash(str(session.model_dump())),
579
+ }
580
+
581
+ # -------------------------------------------------------------------------
582
+ # Build Result
583
+ # -------------------------------------------------------------------------
584
+ logger.info("\n[Complete] Pipeline execution finished")
585
+ logger.info("=" * 70)
586
+
587
+ result: RunResult = {
588
+ "nwbfile": nwbfile,
589
+ "nwb_path": nwb_path,
590
+ "alignment_stats": alignment_stats,
591
+ "task_recording": task_recording,
592
+ "trials_table": trials_table,
593
+ "dlc_inference_results": dlc_inference_results,
594
+ "facemap_bundle": facemap_bundle,
595
+ "transcoded_videos": transcoded_videos,
596
+ "provenance": provenance,
597
+ }
598
+
599
+ return result
600
+
601
+
602
+ def run_validation(nwb_path: Union[str, Path]) -> ValidationResult:
603
+ """Validate NWB file using nwbinspector.
604
+
605
+ Simple wrapper around nwbinspector for NWB file validation.
606
+ Returns structured validation report.
607
+
608
+ Args:
609
+ nwb_path: Path to NWB file to validate
610
+
611
+ Returns:
612
+ ValidationResult with status and messages
613
+
614
+ Raises:
615
+ FileNotFoundError: NWB file not found
616
+ ValidationError: Validation execution failed
617
+
618
+ Example:
619
+ >>> result = run_validation("output/Session-000001/session.nwb")
620
+ >>> if result['status'] == 'fail':
621
+ ... print(f"Errors: {result['errors']}")
622
+ """
623
+ logger.info("\n" + "=" * 70)
624
+ logger.info("W2T-BKIN Pipeline - NWB Validation")
625
+ logger.info("=" * 70)
626
+
627
+ nwb_path = Path(nwb_path)
628
+
629
+ if not nwb_path.exists():
630
+ raise FileNotFoundError(f"NWB file not found: {nwb_path}")
631
+
632
+ logger.info(f"Validating: {nwb_path}")
633
+
634
+ # Placeholder - would normally call nwbinspector
635
+ # try:
636
+ # from nwbinspector import inspect_nwb
637
+ # messages = list(inspect_nwb(nwb_path))
638
+ # errors = [m for m in messages if m.severity == "CRITICAL"]
639
+ # warnings = [m for m in messages if m.severity == "BEST_PRACTICE_VIOLATION"]
640
+ # except Exception as e:
641
+ # raise ValidationError(f"Validation failed: {e}")
642
+
643
+ logger.info(" ⊘ Validation: Placeholder for future implementation")
644
+ logger.info("=" * 70)
645
+
646
+ result: ValidationResult = {
647
+ "status": "pass",
648
+ "errors": [],
649
+ "warnings": [],
650
+ "nwb_path": nwb_path,
651
+ }
652
+
653
+ return result
654
+
655
+
656
+ # =============================================================================
657
+ # Public API
658
+ # =============================================================================
659
+
660
+ __all__ = [
661
+ "run_session",
662
+ "run_validation",
663
+ "RunResult",
664
+ "ValidationResult",
665
+ ]