w2t-bkin 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
w2t_bkin/dlc/core.py ADDED
@@ -0,0 +1,448 @@
1
+ """DLC inference core functions.
2
+
3
+ Low-level inference functions that accept primitives only. These functions
4
+ never import config, Session, or Manifest and operate on raw file paths and
5
+ simple arguments.
6
+
7
+ Requirements:
8
+ - REQ-DLC-1: Accept primitive arguments only
9
+ - REQ-DLC-2: Never import config/Session/Manifest
10
+ - REQ-DLC-3: Support batch inference
11
+ - REQ-DLC-4: Return deterministic H5 output paths
12
+ - REQ-DLC-5: Validate model before inference
13
+
14
+ Architecture:
15
+ - Low-level: Accepts Path, int, bool, str, List primitives
16
+ - No high-level dependencies
17
+ - Module-local models only
18
+ """
19
+
20
+ import logging
21
+ from pathlib import Path
22
+ from typing import Dict, List, Optional
23
+
24
+ import yaml
25
+
26
+ from w2t_bkin.dlc.models import DLCInferenceOptions, DLCInferenceResult, DLCModelInfo
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class DLCInferenceError(Exception):
32
+ """Exception raised for DLC inference errors.
33
+
34
+ Raised for:
35
+ - Invalid model (missing config.yaml, corrupt structure)
36
+ - Critical failures (GPU not found, disk full)
37
+ - Pre-flight validation failures
38
+
39
+ Not raised for:
40
+ - Individual video failures in batch (handled gracefully)
41
+ """
42
+
43
+ pass
44
+
45
+
46
+ def validate_dlc_model(config_path: Path) -> DLCModelInfo:
47
+ """Validate DLC model structure and extract metadata.
48
+
49
+ Pre-flight validation before inference. Checks model structure and
50
+ extracts metadata from config.yaml.
51
+
52
+ Args:
53
+ config_path: Path to DLC project config.yaml
54
+
55
+ Returns:
56
+ DLCModelInfo with validated metadata
57
+
58
+ Raises:
59
+ DLCInferenceError: If model invalid or config.yaml missing/corrupt
60
+
61
+ Requirements:
62
+ - REQ-DLC-5: Validate model before inference
63
+ - REQ-DLC-9: Raise error if config.yaml missing
64
+
65
+ Example:
66
+ >>> model_info = validate_dlc_model(Path("models/dlc/config.yaml"))
67
+ >>> print(f"Scorer: {model_info.scorer}")
68
+ >>> print(f"Bodyparts: {model_info.bodyparts}")
69
+ """
70
+ # Check config.yaml exists
71
+ if not config_path.exists():
72
+ raise DLCInferenceError(f"DLC config.yaml not found: {config_path}")
73
+
74
+ if not config_path.is_file():
75
+ raise DLCInferenceError(f"DLC config path must be a file: {config_path}")
76
+
77
+ # Parse YAML
78
+ try:
79
+ with open(config_path, "r") as f:
80
+ config = yaml.safe_load(f)
81
+ except yaml.YAMLError as e:
82
+ raise DLCInferenceError(f"Failed to parse DLC config.yaml: {e}")
83
+ except Exception as e:
84
+ raise DLCInferenceError(f"Failed to read DLC config.yaml: {e}")
85
+
86
+ if not isinstance(config, dict):
87
+ raise DLCInferenceError(f"DLC config.yaml must contain a YAML dictionary, got {type(config).__name__}")
88
+
89
+ # Extract and validate required fields
90
+ required_fields = ["Task", "bodyparts"]
91
+ missing = [f for f in required_fields if f not in config]
92
+ if missing:
93
+ raise DLCInferenceError(f"DLC config.yaml missing required fields: {missing}")
94
+
95
+ # Extract bodyparts
96
+ bodyparts = config["bodyparts"]
97
+ if not isinstance(bodyparts, list):
98
+ raise DLCInferenceError(f"DLC config 'bodyparts' must be a list, got {type(bodyparts).__name__}")
99
+ if not bodyparts:
100
+ raise DLCInferenceError("DLC config 'bodyparts' list is empty")
101
+
102
+ # Determine project_path (parent of config.yaml)
103
+ project_path = config_path.parent
104
+
105
+ # Build scorer name if available, otherwise use a default pattern
106
+ # DLC scorer format for filenames: <network>_<Task><date>shuffle<N>_<iteration>
107
+ # Note: "DLC_" prefix is added in the filename, not in the scorer itself
108
+ task = config["Task"]
109
+ date = config.get("date", "unknown")
110
+
111
+ # Try to construct scorer from snapshot info or use a simplified version
112
+ scorer_parts = []
113
+
114
+ # Add network if available
115
+ if "net_type" in config:
116
+ scorer_parts.append(config["net_type"])
117
+
118
+ # Add task
119
+ scorer_parts.append(task)
120
+
121
+ # Add date if available and not 'unknown'
122
+ if date != "unknown":
123
+ # Convert to string in case YAML parsed it as a date object
124
+ date_str = str(date).replace("-", "")
125
+ scorer_parts.append(date_str)
126
+
127
+ # Add shuffle info
128
+ shuffle = config.get("TrainingFraction", [1])[0] if "TrainingFraction" in config else 1
129
+ scorer_parts.append(f"shuffle{shuffle}")
130
+
131
+ # Add iteration if available
132
+ iteration = config.get("iteration", 0)
133
+ if "snapshotindex" in config:
134
+ scorer_parts.append(str(config["snapshotindex"]))
135
+
136
+ scorer = "_".join(scorer_parts)
137
+
138
+ # Extract skeleton edges from config (optional)
139
+ skeleton = config.get("skeleton", [])
140
+ if skeleton and not isinstance(skeleton, list):
141
+ logger.warning(f"DLC config 'skeleton' is not a list, ignoring: {type(skeleton).__name__}")
142
+ skeleton = []
143
+
144
+ # Validate skeleton edges if present
145
+ skeleton_edges = []
146
+ if skeleton:
147
+ for edge in skeleton:
148
+ if isinstance(edge, (list, tuple)) and len(edge) == 2:
149
+ try:
150
+ idx0, idx1 = int(edge[0]), int(edge[1])
151
+ if 0 <= idx0 < len(bodyparts) and 0 <= idx1 < len(bodyparts):
152
+ skeleton_edges.append([idx0, idx1])
153
+ else:
154
+ logger.warning(f"Skeleton edge indices out of range: {edge}")
155
+ except (ValueError, TypeError):
156
+ logger.warning(f"Invalid skeleton edge format: {edge}")
157
+ else:
158
+ logger.warning(f"Skeleton edge must be a pair of indices: {edge}")
159
+
160
+ # Log validation success
161
+ logger.debug(f"Validated DLC model: task='{task}', bodyparts={len(bodyparts)}, scorer='{scorer}', skeleton_edges={len(skeleton_edges)}")
162
+
163
+ return DLCModelInfo(
164
+ config_path=config_path,
165
+ project_path=project_path,
166
+ scorer=scorer,
167
+ bodyparts=bodyparts,
168
+ num_outputs=len(bodyparts) * 3, # x, y, likelihood per bodypart
169
+ skeleton=skeleton_edges,
170
+ task=task,
171
+ date=str(date) if date != "unknown" else date,
172
+ )
173
+
174
+
175
+ def predict_output_paths(
176
+ video_path: Path,
177
+ model_info: DLCModelInfo,
178
+ output_dir: Path,
179
+ save_csv: bool = False,
180
+ ) -> Dict[str, Path]:
181
+ """Predict DLC output file paths before inference.
182
+
183
+ DLC uses deterministic naming convention:
184
+ - H5: {video_stem}DLC_{scorer}.h5
185
+ - CSV: {video_stem}DLC_{scorer}.csv (if requested)
186
+
187
+ Args:
188
+ video_path: Input video file path
189
+ model_info: Validated model metadata
190
+ output_dir: Output directory for H5/CSV files
191
+ save_csv: Whether CSV will be generated
192
+
193
+ Returns:
194
+ Dict with 'h5' key and optionally 'csv' key
195
+
196
+ Requirements:
197
+ - REQ-DLC-4: Return deterministic H5 output paths
198
+
199
+ Example:
200
+ >>> paths = predict_output_paths(
201
+ ... Path("video.mp4"),
202
+ ... model_info,
203
+ ... Path("output"),
204
+ ... save_csv=True
205
+ ... )
206
+ >>> paths['h5']
207
+ PosixPath('output/videoDLC_scorer.h5')
208
+ """
209
+ # Extract video stem (filename without extension)
210
+ video_stem = video_path.stem
211
+
212
+ # Build output filename following DLC naming convention
213
+ # Format: {video_stem}DLC_{scorer}.h5
214
+ base_name = f"{video_stem}DLC_{model_info.scorer}"
215
+
216
+ # Build output paths
217
+ result = {"h5": output_dir / f"{base_name}.h5"}
218
+
219
+ # Add CSV path if requested
220
+ if save_csv:
221
+ result["csv"] = output_dir / f"{base_name}.csv"
222
+
223
+ logger.debug(f"Predicted output paths for '{video_path.name}': h5={result['h5'].name}")
224
+
225
+ return result
226
+
227
+
228
+ def auto_detect_gpu() -> Optional[int]:
229
+ """Auto-detect first available GPU.
230
+
231
+ Uses TensorFlow to detect available GPUs. Returns 0 if any GPU is
232
+ available, None for CPU-only systems.
233
+
234
+ Returns:
235
+ 0 if GPU available, None for CPU
236
+
237
+ Requirements:
238
+ - REQ-DLC-7: Auto-detect GPU when not specified
239
+
240
+ Example:
241
+ >>> gpu_index = auto_detect_gpu()
242
+ >>> if gpu_index is not None:
243
+ ... print(f"Using GPU {gpu_index}")
244
+ ... else:
245
+ ... print("Using CPU")
246
+ """
247
+ try:
248
+ # Attempt to import TensorFlow
249
+ import tensorflow as tf
250
+
251
+ # Get list of physical GPU devices
252
+ gpus = tf.config.list_physical_devices("GPU")
253
+
254
+ if gpus:
255
+ logger.debug(f"Auto-detected {len(gpus)} GPU(s), using GPU 0")
256
+ return 0
257
+ else:
258
+ logger.debug("No GPUs detected, will use CPU")
259
+ return None
260
+
261
+ except ImportError:
262
+ # TensorFlow not available, fall back to CPU
263
+ logger.debug("TensorFlow not available for GPU detection, will use CPU")
264
+ return None
265
+ except Exception as e:
266
+ # Any other error in GPU detection, fall back to CPU
267
+ logger.warning(f"GPU detection failed: {e}, will use CPU")
268
+ return None
269
+
270
+
271
+ def run_dlc_inference_batch(
272
+ video_paths: List[Path],
273
+ model_config_path: Path,
274
+ output_dir: Path,
275
+ options: Optional[DLCInferenceOptions] = None,
276
+ ) -> List[DLCInferenceResult]:
277
+ """Run DLC inference on multiple videos in a single batch.
278
+
279
+ Low-level function accepting primitives only. Processes all videos
280
+ in a single call to deeplabcut.analyze_videos for optimal GPU
281
+ utilization.
282
+
283
+ Args:
284
+ video_paths: List of video file paths
285
+ model_config_path: Path to DLC project config.yaml
286
+ output_dir: Directory for H5/CSV outputs
287
+ options: Inference options (None = defaults)
288
+
289
+ Returns:
290
+ List of results (one per video, ordered)
291
+
292
+ Raises:
293
+ DLCInferenceError: If model invalid or critical failure
294
+
295
+ Requirements:
296
+ - REQ-DLC-1: Accept primitives only
297
+ - REQ-DLC-3: Support batch inference
298
+ - REQ-DLC-6: Execute when config.labels.dlc.run_inference is true
299
+ - REQ-DLC-10: Continue processing on individual video failure
300
+ - REQ-DLC-13: Graceful partial failure handling
301
+
302
+ Implementation Flow:
303
+ 1. Validate model with validate_dlc_model()
304
+ 2. Auto-detect GPU if options.gputouse is None
305
+ 3. Call deeplabcut.analyze_videos with video list
306
+ 4. Handle partial failures gracefully
307
+ 5. Return results for all videos (success + failures)
308
+
309
+ Example:
310
+ >>> results = run_dlc_inference_batch(
311
+ ... video_paths=[Path("cam0.mp4"), Path("cam1.mp4")],
312
+ ... model_config_path=Path("models/dlc/config.yaml"),
313
+ ... output_dir=Path("output/dlc"),
314
+ ... options=DLCInferenceOptions(gputouse=0)
315
+ ... )
316
+ >>> success_count = sum(1 for r in results if r.success)
317
+ >>> print(f"{success_count}/{len(results)} videos succeeded")
318
+ """
319
+ import time
320
+
321
+ # Initialize options with defaults if not provided
322
+ if options is None:
323
+ options = DLCInferenceOptions()
324
+
325
+ # Ensure output directory exists
326
+ output_dir.mkdir(parents=True, exist_ok=True)
327
+
328
+ # Step 1: Validate model
329
+ logger.info(f"Validating DLC model: {model_config_path}")
330
+ try:
331
+ model_info = validate_dlc_model(model_config_path)
332
+ except DLCInferenceError as e:
333
+ # Model validation failed - critical error, cannot proceed
334
+ logger.error(f"Model validation failed: {e}")
335
+ raise
336
+
337
+ # Step 2: Resolve GPU selection
338
+ gpu_to_use = options.gputouse
339
+ if gpu_to_use is None:
340
+ gpu_to_use = auto_detect_gpu()
341
+ logger.info(f"GPU auto-detection: {gpu_to_use if gpu_to_use is not None else 'CPU'}")
342
+ else:
343
+ logger.info(f"Using specified GPU: {gpu_to_use}")
344
+
345
+ # Step 3: Prepare for batch inference
346
+ logger.info(f"Starting DLC inference for {len(video_paths)} video(s)")
347
+ logger.debug(f"Output directory: {output_dir}")
348
+ logger.debug(f"Model scorer: {model_info.scorer}")
349
+ logger.debug(f"Save CSV: {options.save_as_csv}")
350
+
351
+ # Initialize results list
352
+ results = []
353
+
354
+ # Process each video (DLC analyze_videos handles batch internally)
355
+ for video_path in video_paths:
356
+ start_time = time.time()
357
+
358
+ try:
359
+ # Import deeplabcut
360
+ try:
361
+ import deeplabcut
362
+ except ImportError as e:
363
+ raise DLCInferenceError(f"DeepLabCut not available: {e}")
364
+
365
+ # Predict output paths
366
+ predicted_paths = predict_output_paths(
367
+ video_path=video_path,
368
+ model_info=model_info,
369
+ output_dir=output_dir,
370
+ save_csv=options.save_as_csv,
371
+ )
372
+
373
+ logger.info(f"Processing video: {video_path.name}")
374
+
375
+ # Call DLC inference
376
+ # Note: analyze_videos processes one video at a time but can be called in sequence
377
+ deeplabcut.analyze_videos(
378
+ config=str(model_config_path),
379
+ videos=[str(video_path)],
380
+ destfolder=str(output_dir),
381
+ gputouse=gpu_to_use,
382
+ save_as_csv=options.save_as_csv,
383
+ allow_growth=options.allow_growth,
384
+ )
385
+
386
+ # Calculate elapsed time
387
+ elapsed_time = time.time() - start_time
388
+
389
+ # Verify output file exists
390
+ h5_output = predicted_paths["h5"]
391
+ if not h5_output.exists():
392
+ raise DLCInferenceError(f"Expected output H5 file not found: {h5_output}")
393
+
394
+ # Get frame count from H5 file
395
+ try:
396
+ import pandas as pd
397
+
398
+ df = pd.read_hdf(h5_output)
399
+ frame_count = len(df)
400
+ except Exception as e:
401
+ logger.warning(f"Could not read frame count from H5: {e}")
402
+ frame_count = 0
403
+
404
+ # Create success result
405
+ result = DLCInferenceResult(
406
+ video_path=video_path,
407
+ h5_output_path=h5_output,
408
+ csv_output_path=predicted_paths.get("csv"),
409
+ model_config_path=model_config_path,
410
+ frame_count=frame_count,
411
+ inference_time_s=elapsed_time,
412
+ gpu_used=gpu_to_use,
413
+ success=True,
414
+ error_message=None,
415
+ )
416
+
417
+ logger.info(f"✓ Completed {video_path.name} in {elapsed_time:.1f}s ({frame_count} frames)")
418
+ results.append(result)
419
+
420
+ except Exception as e:
421
+ # Individual video failure - log and continue
422
+ elapsed_time = time.time() - start_time
423
+ error_msg = str(e)
424
+
425
+ logger.error(f"✗ Failed {video_path.name}: {error_msg}")
426
+
427
+ # Create failure result
428
+ result = DLCInferenceResult(
429
+ video_path=video_path,
430
+ h5_output_path=None,
431
+ csv_output_path=None,
432
+ model_config_path=model_config_path,
433
+ frame_count=0,
434
+ inference_time_s=elapsed_time,
435
+ gpu_used=gpu_to_use,
436
+ success=False,
437
+ error_message=error_msg,
438
+ )
439
+ results.append(result)
440
+
441
+ # Continue to next video (graceful partial failure handling)
442
+ continue
443
+
444
+ # Summary
445
+ success_count = sum(1 for r in results if r.success)
446
+ logger.info(f"Batch inference complete: {success_count}/{len(results)} videos succeeded")
447
+
448
+ return results
w2t_bkin/dlc/models.py ADDED
@@ -0,0 +1,124 @@
1
+ """DLC inference data models.
2
+
3
+ Module-local models for DLC inference operations. These models are owned by
4
+ the dlc module and follow the established pattern:
5
+
6
+ - Frozen dataclasses (immutable)
7
+ - Type-annotated fields
8
+ - Comprehensive docstrings
9
+ - No external dependencies (except stdlib + pathlib)
10
+
11
+ Requirements:
12
+ - Architecture: Module-local model ownership pattern
13
+ """
14
+
15
+ from dataclasses import dataclass
16
+ from pathlib import Path
17
+ from typing import List, Optional
18
+
19
+
20
+ @dataclass(frozen=True)
21
+ class DLCInferenceOptions:
22
+ """Configuration options for DLC inference (immutable).
23
+
24
+ Attributes:
25
+ gputouse: GPU index to use (0, 1, ...), -1 for CPU, None for auto-detect
26
+ save_as_csv: Also generate CSV output in addition to H5
27
+ allow_growth: Enable TensorFlow GPU memory growth (prevents OOM)
28
+ allow_fallback: Fallback to CPU if GPU fails with OOM
29
+ batch_size: TensorFlow batch size for inference
30
+
31
+ Example:
32
+ >>> options = DLCInferenceOptions(gputouse=0, save_as_csv=False)
33
+ >>> options.gputouse
34
+ 0
35
+ """
36
+
37
+ gputouse: Optional[int] = None
38
+ save_as_csv: bool = False
39
+ allow_growth: bool = True
40
+ allow_fallback: bool = True
41
+ batch_size: int = 1
42
+
43
+
44
+ @dataclass(frozen=True)
45
+ class DLCInferenceResult:
46
+ """Result of DLC inference on a single video (immutable).
47
+
48
+ Attributes:
49
+ video_path: Input video file path
50
+ h5_output_path: Generated H5 file path (None if failed)
51
+ csv_output_path: Generated CSV file path (None if not requested or failed)
52
+ model_config_path: DLC model config.yaml path used
53
+ frame_count: Number of frames processed
54
+ inference_time_s: Time taken for inference in seconds
55
+ gpu_used: GPU index used (None if CPU)
56
+ success: Whether inference succeeded
57
+ error_message: Error description if failed (None if success)
58
+
59
+ Example:
60
+ >>> result = DLCInferenceResult(
61
+ ... video_path=Path("video.mp4"),
62
+ ... h5_output_path=Path("videoDLC_scorer.h5"),
63
+ ... csv_output_path=None,
64
+ ... model_config_path=Path("model/config.yaml"),
65
+ ... frame_count=1000,
66
+ ... inference_time_s=45.2,
67
+ ... gpu_used=0,
68
+ ... success=True,
69
+ ... error_message=None,
70
+ ... )
71
+ >>> result.success
72
+ True
73
+ """
74
+
75
+ video_path: Path
76
+ h5_output_path: Optional[Path]
77
+ csv_output_path: Optional[Path]
78
+ model_config_path: Path
79
+ frame_count: int
80
+ inference_time_s: float
81
+ gpu_used: Optional[int]
82
+ success: bool
83
+ error_message: Optional[str] = None
84
+
85
+
86
+ @dataclass(frozen=True)
87
+ class DLCModelInfo:
88
+ """Validated DLC model metadata (immutable).
89
+
90
+ Extracted from DLC project config.yaml.
91
+
92
+ Attributes:
93
+ config_path: Path to config.yaml
94
+ project_path: Parent directory of config.yaml (project root)
95
+ scorer: DLC scorer name from config
96
+ bodyparts: List of bodypart names from config
97
+ num_outputs: Number of output values (len(bodyparts) * 3 for x, y, likelihood)
98
+ skeleton: Skeleton edge pairs from config (list of [node_idx, node_idx] pairs)
99
+ task: DLC task name from config
100
+ date: DLC project date from config
101
+
102
+ Example:
103
+ >>> model_info = DLCModelInfo(
104
+ ... config_path=Path("model/config.yaml"),
105
+ ... project_path=Path("model"),
106
+ ... scorer="DLC_resnet50_BA_W2T_cam0shuffle1_150000",
107
+ ... bodyparts=["nose", "left_ear", "right_ear"],
108
+ ... num_outputs=9,
109
+ ... skeleton=[[0, 1], [1, 2]],
110
+ ... task="BA_W2T_cam0",
111
+ ... date="2024-01-01",
112
+ ... )
113
+ >>> model_info.num_outputs
114
+ 9
115
+ """
116
+
117
+ config_path: Path
118
+ project_path: Path
119
+ scorer: str
120
+ bodyparts: List[str]
121
+ num_outputs: int
122
+ skeleton: List[List[int]]
123
+ task: str
124
+ date: str