wavedl 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wavedl/models/base.py ADDED
@@ -0,0 +1,173 @@
1
+ """
2
+ Base Model Abstract Class
3
+ ==========================
4
+
5
+ Defines the interface contract that all models must implement for compatibility
6
+ with the training pipeline. Provides common utilities and enforces consistency.
7
+
8
+ Author: Ductho Le (ductho.le@outlook.com)
9
+ Version: 1.0.0
10
+ """
11
+
12
+ from abc import ABC, abstractmethod
13
+ from typing import Any
14
+
15
+ import torch
16
+ import torch.nn as nn
17
+
18
+
19
+ class BaseModel(nn.Module, ABC):
20
+ """
21
+ Abstract base class for all regression models.
22
+
23
+ All models in this framework must inherit from BaseModel and implement
24
+ the required abstract methods. This ensures compatibility with the
25
+ training pipeline and provides a consistent interface.
26
+
27
+ Supports any input dimensionality:
28
+ - 1D: in_shape = (L,) for signals/waveforms
29
+ - 2D: in_shape = (H, W) for images/spectrograms
30
+ - 3D: in_shape = (D, H, W) for volumes
31
+
32
+ Attributes:
33
+ in_shape: Input spatial dimensions (varies by dimensionality)
34
+ out_size: Number of output targets
35
+
36
+ Example:
37
+ from wavedl.models.base import BaseModel
38
+ from wavedl.models.registry import register_model
39
+
40
+ @register_model("my_model")
41
+ class MyModel(BaseModel):
42
+ def __init__(self, in_shape, out_size, **kwargs):
43
+ super().__init__(in_shape, out_size)
44
+ # Build layers...
45
+
46
+ def forward(self, x):
47
+ # Forward pass...
48
+ return output
49
+ """
50
+
51
+ @abstractmethod
52
+ def __init__(
53
+ self,
54
+ in_shape: tuple[int] | tuple[int, int] | tuple[int, int, int],
55
+ out_size: int,
56
+ **kwargs,
57
+ ):
58
+ """
59
+ Initialize the model.
60
+
61
+ Args:
62
+ in_shape: Input spatial dimensions, excluding batch and channel dims:
63
+ - 1D: (L,) for signal length
64
+ - 2D: (H, W) for image dimensions
65
+ - 3D: (D, H, W) for volume dimensions
66
+ out_size: Number of regression output targets
67
+ **kwargs: Model-specific hyperparameters
68
+ """
69
+ super().__init__()
70
+ self.in_shape = in_shape
71
+ self.out_size = out_size
72
+
73
+ @abstractmethod
74
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
75
+ """
76
+ Forward pass of the model.
77
+
78
+ Args:
79
+ x: Input tensor of shape (B, C, H, W)
80
+
81
+ Returns:
82
+ Output tensor of shape (B, out_size)
83
+ """
84
+ pass
85
+
86
+ def count_parameters(self, trainable_only: bool = True) -> int:
87
+ """
88
+ Count the number of parameters in the model.
89
+
90
+ Args:
91
+ trainable_only: If True, count only trainable parameters
92
+
93
+ Returns:
94
+ Number of parameters
95
+ """
96
+ if trainable_only:
97
+ return sum(p.numel() for p in self.parameters() if p.requires_grad)
98
+ return sum(p.numel() for p in self.parameters())
99
+
100
+ def parameter_summary(self) -> dict[str, Any]:
101
+ """
102
+ Generate a summary of model parameters.
103
+
104
+ Returns:
105
+ Dictionary with parameter statistics
106
+ """
107
+ total = self.count_parameters(trainable_only=False)
108
+ trainable = self.count_parameters(trainable_only=True)
109
+ return {
110
+ "total_parameters": total,
111
+ "trainable_parameters": trainable,
112
+ "frozen_parameters": total - trainable,
113
+ "total_mb": total * 4 / (1024 * 1024), # Assuming float32
114
+ }
115
+
116
+ @classmethod
117
+ def get_default_config(cls) -> dict[str, Any]:
118
+ """
119
+ Return default configuration for this model.
120
+ Override in subclasses to provide model-specific defaults.
121
+
122
+ Returns:
123
+ Dictionary of default hyperparameters
124
+ """
125
+ return {}
126
+
127
+ def get_optimizer_groups(self, base_lr: float, weight_decay: float = 1e-4) -> list:
128
+ """
129
+ Get parameter groups for optimizer with optional layer-wise learning rates.
130
+ Override in subclasses for custom parameter grouping (e.g., no decay on biases).
131
+
132
+ Args:
133
+ base_lr: Base learning rate
134
+ weight_decay: Weight decay coefficient
135
+
136
+ Returns:
137
+ List of parameter group dictionaries
138
+ """
139
+ # Default: no weight decay on bias and normalization layers
140
+ decay_params = []
141
+ no_decay_params = []
142
+
143
+ for name, param in self.named_parameters():
144
+ if not param.requires_grad:
145
+ continue
146
+ # Skip weight decay for bias and normalization parameters
147
+ if "bias" in name or "norm" in name or "bn" in name:
148
+ no_decay_params.append(param)
149
+ else:
150
+ decay_params.append(param)
151
+
152
+ # Handle empty parameter lists gracefully
153
+ groups = []
154
+ if decay_params:
155
+ groups.append(
156
+ {"params": decay_params, "lr": base_lr, "weight_decay": weight_decay}
157
+ )
158
+ if no_decay_params:
159
+ groups.append(
160
+ {"params": no_decay_params, "lr": base_lr, "weight_decay": 0.0}
161
+ )
162
+
163
+ return (
164
+ groups
165
+ if groups
166
+ else [
167
+ {
168
+ "params": self.parameters(),
169
+ "lr": base_lr,
170
+ "weight_decay": weight_decay,
171
+ }
172
+ ]
173
+ )
wavedl/models/cnn.py ADDED
@@ -0,0 +1,249 @@
1
+ """
2
+ CNN: A Dimension-Agnostic Convolutional Neural Network
3
+ ======================================================
4
+
5
+ A flexible CNN architecture that automatically adapts to 1D, 2D, or 3D inputs.
6
+ Dynamically selects appropriate convolution, pooling, and dropout layers based
7
+ on input dimensionality.
8
+
9
+ **Dimensionality Support**:
10
+ - 1D: Waveforms, signals, time-series (N, 1, L) → Conv1d
11
+ - 2D: Images, spectrograms (N, 1, H, W) → Conv2d
12
+ - 3D: Volumetric data, CT/MRI (N, 1, D, H, W) → Conv3d
13
+
14
+ Use this as:
15
+ - A baseline for comparing more complex architectures
16
+ - A lightweight option for any spatial data type
17
+ - A starting point for custom modifications
18
+
19
+ Author: Ductho Le (ductho.le@outlook.com)
20
+ Version: 1.0.0
21
+ """
22
+
23
+ from typing import Any
24
+
25
+ import torch
26
+ import torch.nn as nn
27
+
28
+ from wavedl.models.base import BaseModel
29
+ from wavedl.models.registry import register_model
30
+
31
+
32
+ # Type alias for spatial shapes
33
+ SpatialShape = tuple[int] | tuple[int, int] | tuple[int, int, int]
34
+
35
+
36
+ def _get_conv_layers(
37
+ dim: int,
38
+ ) -> tuple[type[nn.Module], type[nn.Module], type[nn.Module]]:
39
+ """
40
+ Get the appropriate Conv, MaxPool, and Dropout classes for a given dimensionality.
41
+
42
+ Args:
43
+ dim: Spatial dimensionality (1, 2, or 3)
44
+
45
+ Returns:
46
+ Tuple of (Conv, MaxPool, Dropout) layer classes
47
+
48
+ Raises:
49
+ ValueError: If dim is not 1, 2, or 3
50
+ """
51
+ if dim == 1:
52
+ return nn.Conv1d, nn.MaxPool1d, nn.Dropout1d
53
+ elif dim == 2:
54
+ return nn.Conv2d, nn.MaxPool2d, nn.Dropout2d
55
+ elif dim == 3:
56
+ return nn.Conv3d, nn.MaxPool3d, nn.Dropout3d
57
+ else:
58
+ raise ValueError(f"Unsupported dimensionality: {dim}D. Supported: 1D, 2D, 3D.")
59
+
60
+
61
+ @register_model("cnn")
62
+ class CNN(BaseModel):
63
+ """
64
+ Universal CNN: A dimension-agnostic convolutional network for regression.
65
+
66
+ Automatically detects input dimensionality from in_shape and builds the
67
+ appropriate architecture using Conv1d/2d/3d layers.
68
+
69
+ Architecture:
70
+ - 5 Encoder blocks: Conv → GroupNorm → LeakyReLU → MaxPool [→ Dropout]
71
+ Channel progression: 32 → 64 → 128 → 256 → 512 (with base_channels=32)
72
+ - Global adaptive pooling to 1×1 (handles variable spatial dimensions)
73
+ - 3-layer MLP regression head: 512 → 256 → 128 → 64 → out_size
74
+
75
+ Args:
76
+ in_shape: Spatial dimensions as tuple:
77
+ - 1D: (L,) for signals/waveforms
78
+ - 2D: (H, W) for images
79
+ - 3D: (D, H, W) for volumes
80
+ out_size: Number of regression output targets
81
+ base_channels: Base channel count, multiplied through encoder (default: 32)
82
+ dropout_rate: Dropout rate for regularization (default: 0.1)
83
+
84
+ Input Shape:
85
+ (B, 1, *spatial_dims) where spatial_dims matches in_shape
86
+
87
+ Output Shape:
88
+ (B, out_size)
89
+
90
+ Example:
91
+ >>> model = CNN(in_shape=(500, 500), out_size=3) # 2D dispersion curve
92
+ >>> x = torch.randn(4, 1, 500, 500)
93
+ >>> out = model(x) # Shape: (4, 3)
94
+
95
+ >>> model = CNN(in_shape=(512,), out_size=5) # 1D waveform input
96
+ >>> x = torch.randn(4, 1, 512)
97
+ >>> out = model(x) # Shape: (4, 5)
98
+ """
99
+
100
+ def __init__(
101
+ self,
102
+ in_shape: SpatialShape,
103
+ out_size: int,
104
+ base_channels: int = 32,
105
+ dropout_rate: float = 0.1,
106
+ **kwargs,
107
+ ):
108
+ super().__init__(in_shape, out_size)
109
+
110
+ self.base_channels = base_channels
111
+ self.dropout_rate = dropout_rate
112
+ self.dim = len(in_shape)
113
+
114
+ # Get dimension-appropriate layer classes
115
+ self._Conv, self._MaxPool, self._Dropout = _get_conv_layers(self.dim)
116
+
117
+ # Adaptive pooling for consistent feature size regardless of input resolution
118
+ self._AdaptivePool = (
119
+ nn.AdaptiveAvgPool1d
120
+ if self.dim == 1
121
+ else nn.AdaptiveAvgPool2d
122
+ if self.dim == 2
123
+ else nn.AdaptiveAvgPool3d
124
+ )
125
+
126
+ # Channel progression: 32 → 64 → 128 → 256 → 512 (5 blocks for deeper features)
127
+ c1, c2, c3, c4, c5 = (
128
+ base_channels,
129
+ base_channels * 2,
130
+ base_channels * 4,
131
+ base_channels * 8,
132
+ base_channels * 16,
133
+ )
134
+
135
+ # Encoder blocks with progressive dropout (5 blocks)
136
+ self.block1 = self._make_conv_block(1, c1)
137
+ self.block2 = self._make_conv_block(c1, c2)
138
+ self.block3 = self._make_conv_block(c2, c3, dropout=0.05)
139
+ self.block4 = self._make_conv_block(c3, c4, dropout=0.05)
140
+ self.block5 = self._make_conv_block(c4, c5, dropout=dropout_rate)
141
+
142
+ # Global average pooling (output size = 1) - ONNX compatible with any input size
143
+ # Using output size 1 ensures compatibility regardless of encoder output dimensions
144
+ self.adaptive_pool = self._AdaptivePool(1)
145
+
146
+ # Compute flattened feature size (1 element per channel after global pooling)
147
+ flat_size = c5 # 512 channels × 1 spatial element
148
+
149
+ # Regression head: 512 → 256 → 128 → 64 → out_size
150
+ self.head = nn.Sequential(
151
+ nn.Dropout(dropout_rate),
152
+ nn.Linear(flat_size, 256),
153
+ nn.LayerNorm(256),
154
+ nn.LeakyReLU(0.01, inplace=True),
155
+ nn.Dropout(dropout_rate),
156
+ nn.Linear(256, 128),
157
+ nn.LayerNorm(128),
158
+ nn.LeakyReLU(0.01, inplace=True),
159
+ nn.Dropout(dropout_rate),
160
+ nn.Linear(128, 64),
161
+ nn.LayerNorm(64),
162
+ nn.LeakyReLU(0.01, inplace=True),
163
+ nn.Dropout(dropout_rate),
164
+ nn.Linear(64, out_size),
165
+ )
166
+
167
+ @staticmethod
168
+ def _compute_num_groups(num_channels: int, target_groups: int = 4) -> int:
169
+ """
170
+ Compute valid num_groups for GroupNorm that divides num_channels.
171
+
172
+ Finds the largest divisor of num_channels that is <= target_groups,
173
+ or falls back to 1 if no suitable divisor exists.
174
+
175
+ Args:
176
+ num_channels: Number of channels (must be positive)
177
+ target_groups: Desired number of groups (default: 4)
178
+
179
+ Returns:
180
+ Valid num_groups that satisfies num_channels % num_groups == 0
181
+ """
182
+ # Try target_groups down to 1, return first valid divisor
183
+ for g in range(min(target_groups, num_channels), 0, -1):
184
+ if num_channels % g == 0:
185
+ return g
186
+ return 1 # Fallback (always valid)
187
+
188
+ def _make_conv_block(
189
+ self, in_channels: int, out_channels: int, dropout: float = 0.0
190
+ ) -> nn.Sequential:
191
+ """
192
+ Create a convolutional block with dimension-appropriate layers.
193
+
194
+ Args:
195
+ in_channels: Input channel count
196
+ out_channels: Output channel count
197
+ dropout: Dropout rate (0 to disable)
198
+
199
+ Returns:
200
+ Sequential block: Conv → GroupNorm → LeakyReLU → MaxPool [→ Dropout]
201
+ """
202
+ num_groups = self._compute_num_groups(out_channels, target_groups=4)
203
+
204
+ layers = [
205
+ self._Conv(in_channels, out_channels, kernel_size=3, padding=1),
206
+ nn.GroupNorm(num_groups, out_channels),
207
+ nn.LeakyReLU(0.01, inplace=True),
208
+ self._MaxPool(2),
209
+ ]
210
+
211
+ if dropout > 0:
212
+ layers.append(self._Dropout(dropout))
213
+
214
+ return nn.Sequential(*layers)
215
+
216
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
217
+ """
218
+ Forward pass through encoder and regression head.
219
+
220
+ Args:
221
+ x: Input tensor of shape (B, 1, *spatial_dims)
222
+
223
+ Returns:
224
+ Output tensor of shape (B, out_size)
225
+ """
226
+ # Encoder (5 blocks)
227
+ x = self.block1(x)
228
+ x = self.block2(x)
229
+ x = self.block3(x)
230
+ x = self.block4(x)
231
+ x = self.block5(x)
232
+
233
+ # Adaptive pooling ensures consistent feature size
234
+ x = self.adaptive_pool(x)
235
+
236
+ # Flatten and regress
237
+ x = x.flatten(1)
238
+ return self.head(x)
239
+
240
+ @classmethod
241
+ def get_default_config(cls) -> dict[str, Any]:
242
+ """Return default configuration for CNN."""
243
+ return {"base_channels": 32, "dropout_rate": 0.1}
244
+
245
+ def __repr__(self) -> str:
246
+ return (
247
+ f"CNN({self.dim}D, in_shape={self.in_shape}, out_size={self.out_size}, "
248
+ f"channels={self.base_channels}, dropout={self.dropout_rate})"
249
+ )