PyPI - wavedl - Versions diffs - 1.5.6__py3-none-any.whl → 1.6.0__py3-none-any.whl - Mend

wavedl 1.5.6py3-none-any.whl → 1.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

wavedl/__init__.py +1 -1
wavedl/models/__init__.py +52 -4
wavedl/models/_timm_utils.py +238 -0
wavedl/models/caformer.py +270 -0
wavedl/models/convnext.py +108 -33
wavedl/models/convnext_v2.py +504 -0
wavedl/models/densenet.py +5 -5
wavedl/models/efficientnet.py +30 -13
wavedl/models/efficientnetv2.py +32 -9
wavedl/models/fastvit.py +285 -0
wavedl/models/mamba.py +535 -0
wavedl/models/maxvit.py +251 -0
wavedl/models/mobilenetv3.py +35 -12
wavedl/models/regnet.py +39 -16
wavedl/models/resnet.py +5 -5
wavedl/models/resnet3d.py +2 -2
wavedl/models/swin.py +41 -9
wavedl/models/tcn.py +25 -5
wavedl/models/unet.py +1 -1
wavedl/models/vit.py +6 -6
wavedl/test.py +7 -3
wavedl/train.py +57 -23
wavedl/utils/constraints.py +11 -5
wavedl/utils/data.py +120 -18
wavedl/utils/metrics.py +287 -326
{wavedl-1.5.6.dist-info → wavedl-1.6.0.dist-info}/METADATA +104 -67
wavedl-1.6.0.dist-info/RECORD +44 -0
wavedl-1.5.6.dist-info/RECORD +0 -38
{wavedl-1.5.6.dist-info → wavedl-1.6.0.dist-info}/LICENSE +0 -0
{wavedl-1.5.6.dist-info → wavedl-1.6.0.dist-info}/WHEEL +0 -0
{wavedl-1.5.6.dist-info → wavedl-1.6.0.dist-info}/entry_points.txt +0 -0
{wavedl-1.5.6.dist-info → wavedl-1.6.0.dist-info}/top_level.txt +0 -0

wavedl/models/maxvit.py ADDED Viewed

@@ -0,0 +1,251 @@
+"""
+MaxViT: Multi-Axis Vision Transformer
+======================================
+MaxViT combines local and global attention with O(n) complexity using
+multi-axis attention: block attention (local) + grid attention (global sparse).
+**Key Features**:
+    - Multi-axis attention for both local and global context
+    - Hybrid design with MBConv + attention
+    - Linear O(n) complexity
+    - Hierarchical multi-scale features
+**Variants**:
+    - maxvit_tiny: 31M params
+    - maxvit_small: 69M params
+    - maxvit_base: 120M params
+**Requirements**:
+    - timm (for pretrained models and architecture)
+    - torchvision (fallback, limited support)
+Reference:
+    Tu, Z., et al. (2022). MaxViT: Multi-Axis Vision Transformer.
+    ECCV 2022. https://arxiv.org/abs/2204.01697
+Author: Ductho Le (ductho.le@outlook.com)
+"""
+import torch
+import torch.nn as nn
+from wavedl.models._timm_utils import build_regression_head
+from wavedl.models.base import BaseModel
+from wavedl.models.registry import register_model
+__all__ = [
+    "MaxViTBase",
+    "MaxViTBaseLarge",
+    "MaxViTSmall",
+    "MaxViTTiny",
+]
+# =============================================================================
+# MAXVIT BASE CLASS
+# =============================================================================
+class MaxViTBase(BaseModel):
+    """
+    MaxViT base class wrapping timm implementation.
+    Multi-axis attention with local block and global grid attention.
+    2D only due to attention structure.
+    """
+    def __init__(
+        self,
+        in_shape: tuple[int, int],
+        out_size: int,
+        model_name: str = "maxvit_tiny_tf_224",
+        pretrained: bool = True,
+        freeze_backbone: bool = False,
+        dropout_rate: float = 0.3,
+        **kwargs,
+    ):
+        super().__init__(in_shape, out_size)
+        if len(in_shape) != 2:
+            raise ValueError(f"MaxViT requires 2D input (H, W), got {len(in_shape)}D")
+        self.pretrained = pretrained
+        self.freeze_backbone = freeze_backbone
+        self.model_name = model_name
+        # Try to load from timm
+        try:
+            import timm
+            self.backbone = timm.create_model(
+                model_name,
+                pretrained=pretrained,
+                num_classes=0,  # Remove classifier
+            )
+            # Get feature dimension
+            with torch.no_grad():
+                dummy = torch.zeros(1, 3, *in_shape)
+                features = self.backbone(dummy)
+                in_features = features.shape[-1]
+        except ImportError:
+            raise ImportError(
+                "timm is required for MaxViT. Install with: pip install timm"
+            )
+        except Exception as e:
+            raise RuntimeError(f"Failed to load MaxViT model '{model_name}': {e}")
+        # Adapt input channels (3 -> 1)
+        self._adapt_input_channels()
+        # Regression head
+        self.head = build_regression_head(in_features, out_size, dropout_rate)
+        if freeze_backbone:
+            self._freeze_backbone()
+    def _adapt_input_channels(self):
+        """Adapt first conv layer for single-channel input."""
+        # MaxViT uses stem.conv1 (Conv2dSame from timm)
+        adapted = False
+        # Find the first Conv2d with 3 input channels
+        for name, module in self.backbone.named_modules():
+            if hasattr(module, "in_channels") and module.in_channels == 3:
+                # Get parent and child names
+                parts = name.split(".")
+                parent = self.backbone
+                for part in parts[:-1]:
+                    parent = getattr(parent, part)
+                child_name = parts[-1]
+                # Create new conv with 1 input channel
+                new_conv = self._make_new_conv(module)
+                setattr(parent, child_name, new_conv)
+                adapted = True
+                break
+        if not adapted:
+            import warnings
+            warnings.warn(
+                "Could not adapt MaxViT input channels. Model may fail.", stacklevel=2
+            )
+    def _make_new_conv(self, old_conv: nn.Module) -> nn.Module:
+        """Create new conv layer with 1 input channel."""
+        # Handle both Conv2d and Conv2dSame from timm
+        type(old_conv)
+        # Get common parameters
+        kwargs = {
+            "out_channels": old_conv.out_channels,
+            "kernel_size": old_conv.kernel_size,
+            "stride": old_conv.stride,
+            "padding": old_conv.padding if hasattr(old_conv, "padding") else 0,
+            "bias": old_conv.bias is not None,
+        }
+        # Create new conv (use regular Conv2d for simplicity)
+        new_conv = nn.Conv2d(1, **kwargs)
+        if self.pretrained:
+            with torch.no_grad():
+                new_conv.weight.copy_(old_conv.weight.mean(dim=1, keepdim=True))
+                if old_conv.bias is not None:
+                    new_conv.bias.copy_(old_conv.bias)
+        return new_conv
+    def _freeze_backbone(self):
+        """Freeze backbone parameters."""
+        for param in self.backbone.parameters():
+            param.requires_grad = False
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        features = self.backbone(x)
+        return self.head(features)
+# =============================================================================
+# REGISTERED VARIANTS
+# =============================================================================
+@register_model("maxvit_tiny")
+class MaxViTTiny(MaxViTBase):
+    """
+    MaxViT Tiny: ~30.1M backbone parameters.
+    Multi-axis attention with local+global context.
+    2D only.
+    Example:
+        >>> model = MaxViTTiny(in_shape=(224, 224), out_size=3)
+        >>> x = torch.randn(4, 1, 224, 224)
+        >>> out = model(x)  # (4, 3)
+    """
+    def __init__(self, in_shape: tuple[int, int], out_size: int, **kwargs):
+        super().__init__(
+            in_shape=in_shape,
+            out_size=out_size,
+            model_name="maxvit_tiny_tf_224",
+            **kwargs,
+        )
+    def __repr__(self) -> str:
+        return (
+            f"MaxViT_Tiny(in_shape={self.in_shape}, out_size={self.out_size}, "
+            f"pretrained={self.pretrained})"
+        )
+@register_model("maxvit_small")
+class MaxViTSmall(MaxViTBase):
+    """
+    MaxViT Small: ~67.6M backbone parameters.
+    Multi-axis attention with local+global context.
+    2D only.
+    """
+    def __init__(self, in_shape: tuple[int, int], out_size: int, **kwargs):
+        super().__init__(
+            in_shape=in_shape,
+            out_size=out_size,
+            model_name="maxvit_small_tf_224",
+            **kwargs,
+        )
+    def __repr__(self) -> str:
+        return (
+            f"MaxViT_Small(in_shape={self.in_shape}, out_size={self.out_size}, "
+            f"pretrained={self.pretrained})"
+        )
+@register_model("maxvit_base")
+class MaxViTBaseLarge(MaxViTBase):
+    """
+    MaxViT Base: ~118.1M backbone parameters.
+    Multi-axis attention with local+global context.
+    2D only.
+    """
+    def __init__(self, in_shape: tuple[int, int], out_size: int, **kwargs):
+        super().__init__(
+            in_shape=in_shape,
+            out_size=out_size,
+            model_name="maxvit_base_tf_224",
+            **kwargs,
+        )
+    def __repr__(self) -> str:
+        return (
+            f"MaxViT_Base(in_shape={self.in_shape}, out_size={self.out_size}, "
+            f"pretrained={self.pretrained})"
+        )

wavedl/models/mobilenetv3.py CHANGED Viewed

@@ -13,8 +13,8 @@ optimization to achieve excellent accuracy with minimal computational cost.
     - Designed for real-time inference on CPUs and edge devices
 **Variants**:
-    - mobilenet_v3_small: Ultra-lightweight (~1.1M params) - Edge/embedded
-    - mobilenet_v3_large: Balanced (~3.2M params) - Mobile deployment
+    - mobilenet_v3_small: Ultra-lightweight (~0.9M backbone params) - Edge/embedded
+    - mobilenet_v3_large: Balanced (~3.0M backbone params) - Mobile deployment
 **Use Cases**:
     - Real-time structural health monitoring on embedded systems
@@ -136,10 +136,37 @@ class MobileNetV3Base(BaseModel):
             nn.Linear(regression_hidden, out_size),
         )
-        # Optionally freeze backbone for fine-tuning
+        # Adapt first conv for single-channel input (3× memory savings vs expand)
+        self._adapt_input_channels()
+        # Optionally freeze backbone for fine-tuning (after adaptation so new conv is frozen too)
         if freeze_backbone:
             self._freeze_backbone()
+    def _adapt_input_channels(self):
+        """Modify first conv to accept single-channel input.
+        Instead of expanding 1→3 channels in forward (which triples memory),
+        we replace the first conv layer with a 1-channel version and initialize
+        weights as the mean of the pretrained RGB filters.
+        """
+        old_conv = self.backbone.features[0][0]
+        new_conv = nn.Conv2d(
+            1,  # Single channel input
+            old_conv.out_channels,
+            kernel_size=old_conv.kernel_size,
+            stride=old_conv.stride,
+            padding=old_conv.padding,
+            dilation=old_conv.dilation,
+            groups=old_conv.groups,
+            padding_mode=old_conv.padding_mode,
+            bias=old_conv.bias is not None,
+        )
+        if self.pretrained:
+            with torch.no_grad():
+                new_conv.weight.copy_(old_conv.weight.mean(dim=1, keepdim=True))
+        self.backbone.features[0][0] = new_conv
     def _freeze_backbone(self):
         """Freeze all backbone parameters except the classifier."""
         for name, param in self.backbone.named_parameters():
@@ -151,15 +178,11 @@ class MobileNetV3Base(BaseModel):
         Forward pass.
         Args:
-            x: Input tensor of shape (B, C, H, W) where C is 1 or 3
+            x: Input tensor of shape (B, 1, H, W)
         Returns:
             Output tensor of shape (B, out_size)
         """
-        # Expand single channel to 3 channels for pretrained weights compatibility
-        if x.size(1) == 1:
-            x = x.expand(-1, 3, -1, -1)
         return self.backbone(x)
     @classmethod
@@ -183,7 +206,7 @@ class MobileNetV3Small(MobileNetV3Base):
     """
     MobileNetV3-Small: Ultra-lightweight for edge deployment.
-    ~1.1M parameters. Designed for the most constrained environments.
+    ~0.9M backbone parameters. Designed for the most constrained environments.
     Achieves ~67% ImageNet accuracy with minimal compute.
     Recommended for:
@@ -194,7 +217,7 @@ class MobileNetV3Small(MobileNetV3Base):
     Performance (approximate):
         - CPU inference: ~6ms (single core)
-        - Parameters: 2.5M
+        - Parameters: ~0.9M backbone
         - MAdds: 56M
     Args:
@@ -230,7 +253,7 @@ class MobileNetV3Large(MobileNetV3Base):
     """
     MobileNetV3-Large: Balanced efficiency and accuracy.
-    ~3.2M parameters. Best trade-off for mobile/portable deployment.
+    ~3.0M backbone parameters. Best trade-off for mobile/portable deployment.
     Achieves ~75% ImageNet accuracy with efficient inference.
     Recommended for:
@@ -241,7 +264,7 @@ class MobileNetV3Large(MobileNetV3Base):
     Performance (approximate):
         - CPU inference: ~20ms (single core)
-        - Parameters: 5.4M
+        - Parameters: ~3.0M backbone
         - MAdds: 219M
     Args:

wavedl/models/regnet.py CHANGED Viewed

@@ -13,11 +13,11 @@ Models scale smoothly from mobile to server deployments.
     - Optional Squeeze-and-Excitation (SE) attention
 **Variants** (RegNetY includes SE attention):
-    - regnet_y_400mf: Ultra-light (~4.0M params, 0.4 GFLOPs)
-    - regnet_y_800mf: Light (~5.8M params, 0.8 GFLOPs)
-    - regnet_y_1_6gf: Medium (~10.5M params, 1.6 GFLOPs) - Recommended
-    - regnet_y_3_2gf: Large (~18.3M params, 3.2 GFLOPs)
-    - regnet_y_8gf: Very large (~37.9M params, 8.0 GFLOPs)
+    - regnet_y_400mf: Ultra-light (~3.9M backbone params, 0.4 GFLOPs)
+    - regnet_y_800mf: Light (~5.7M backbone params, 0.8 GFLOPs)
+    - regnet_y_1_6gf: Medium (~10.3M backbone params, 1.6 GFLOPs) - Recommended
+    - regnet_y_3_2gf: Large (~17.9M backbone params, 3.2 GFLOPs)
+    - regnet_y_8gf: Very large (~37.4M backbone params, 8.0 GFLOPs)
 **When to Use RegNet**:
     - When you need predictable performance at a given compute budget
@@ -140,10 +140,37 @@ class RegNetBase(BaseModel):
             nn.Linear(regression_hidden, out_size),
         )
-        # Optionally freeze backbone for fine-tuning
+        # Adapt first conv for single-channel input (3× memory savings vs expand)
+        self._adapt_input_channels()
+        # Optionally freeze backbone for fine-tuning (after adaptation so new conv is frozen too)
         if freeze_backbone:
             self._freeze_backbone()
+    def _adapt_input_channels(self):
+        """Modify first conv to accept single-channel input.
+        Instead of expanding 1→3 channels in forward (which triples memory),
+        we replace the first conv layer with a 1-channel version and initialize
+        weights as the mean of the pretrained RGB filters.
+        """
+        old_conv = self.backbone.stem[0]
+        new_conv = nn.Conv2d(
+            1,  # Single channel input
+            old_conv.out_channels,
+            kernel_size=old_conv.kernel_size,
+            stride=old_conv.stride,
+            padding=old_conv.padding,
+            dilation=old_conv.dilation,
+            groups=old_conv.groups,
+            padding_mode=old_conv.padding_mode,
+            bias=old_conv.bias is not None,
+        )
+        if self.pretrained:
+            with torch.no_grad():
+                new_conv.weight.copy_(old_conv.weight.mean(dim=1, keepdim=True))
+        self.backbone.stem[0] = new_conv
     def _freeze_backbone(self):
         """Freeze all backbone parameters except the fc layer."""
         for name, param in self.backbone.named_parameters():
@@ -155,15 +182,11 @@ class RegNetBase(BaseModel):
         Forward pass.
         Args:
-            x: Input tensor of shape (B, C, H, W) where C is 1 or 3
+            x: Input tensor of shape (B, 1, H, W)
         Returns:
             Output tensor of shape (B, out_size)
         """
-        # Expand single channel to 3 channels for pretrained weights compatibility
-        if x.size(1) == 1:
-            x = x.expand(-1, 3, -1, -1)
         return self.backbone(x)
     @classmethod
@@ -187,7 +210,7 @@ class RegNetY400MF(RegNetBase):
     """
     RegNetY-400MF: Ultra-lightweight for constrained environments.
-    ~4.0M parameters, 0.4 GFLOPs. Smallest RegNet variant with SE attention.
+    ~3.9M backbone parameters, 0.4 GFLOPs. Smallest RegNet variant with SE attention.
     Recommended for:
         - Edge deployment with moderate accuracy needs
@@ -227,7 +250,7 @@ class RegNetY800MF(RegNetBase):
     """
     RegNetY-800MF: Light variant with good accuracy.
-    ~6.4M parameters, 0.8 GFLOPs. Good balance for mobile deployment.
+    ~5.7M backbone parameters, 0.8 GFLOPs. Good balance for mobile deployment.
     Recommended for:
         - Mobile/portable devices
@@ -267,7 +290,7 @@ class RegNetY1_6GF(RegNetBase):
     """
     RegNetY-1.6GF: Recommended default for balanced performance.
-    ~11.2M parameters, 1.6 GFLOPs. Best trade-off of accuracy and efficiency.
+    ~10.3M backbone parameters, 1.6 GFLOPs. Best trade-off of accuracy and efficiency.
     Comparable to ResNet50 but more efficient.
     Recommended for:
@@ -308,7 +331,7 @@ class RegNetY3_2GF(RegNetBase):
     """
     RegNetY-3.2GF: Higher accuracy for demanding tasks.
-    ~19.4M parameters, 3.2 GFLOPs. Use when 1.6GF isn't sufficient.
+    ~17.9M backbone parameters, 3.2 GFLOPs. Use when 1.6GF isn't sufficient.
     Recommended for:
         - Larger datasets requiring more capacity
@@ -348,7 +371,7 @@ class RegNetY8GF(RegNetBase):
     """
     RegNetY-8GF: High capacity for large-scale tasks.
-    ~39.2M parameters, 8.0 GFLOPs. Use for maximum accuracy needs.
+    ~37.4M backbone parameters, 8.0 GFLOPs. Use for maximum accuracy needs.
     Recommended for:
         - Very large datasets (>50k samples)

wavedl/models/resnet.py CHANGED Viewed

@@ -11,9 +11,9 @@ Provides multiple depth variants (18, 34, 50) with optional pretrained weights f
     - 3D: Volumetric data, CT/MRI (N, 1, D, H, W) → Conv3d
 **Variants**:
-    - resnet18: Lightweight, fast training (~11M params)
-    - resnet34: Balanced capacity (~21M params)
-    - resnet50: Higher capacity with bottleneck blocks (~25M params)
+    - resnet18: Lightweight, fast training (~11.2M backbone params)
+    - resnet34: Balanced capacity (~21.3M backbone params)
+    - resnet50: Higher capacity with bottleneck blocks (~23.5M backbone params)
 References:
     He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep Residual Learning
@@ -534,7 +534,7 @@ class ResNet18Pretrained(PretrainedResNetBase):
     """
     ResNet-18 with ImageNet pretrained weights (2D only).
-    ~11M parameters. Good for: Transfer learning, faster convergence.
+    ~11.2M backbone parameters. Good for: Transfer learning, faster convergence.
     Args:
         in_shape: (H, W) image dimensions
@@ -563,7 +563,7 @@ class ResNet50Pretrained(PretrainedResNetBase):
     """
     ResNet-50 with ImageNet pretrained weights (2D only).
-    ~25M parameters. Good for: High accuracy with transfer learning.
+    ~23.5M backbone parameters. Good for: High accuracy with transfer learning.
     Args:
         in_shape: (H, W) image dimensions

wavedl/models/resnet3d.py CHANGED Viewed

@@ -179,7 +179,7 @@ class ResNet3D18(ResNet3DBase):
     """
     ResNet3D-18: Lightweight 3D ResNet for volumetric data.
-    ~33M parameters. Uses 3D convolutions throughout for true volumetric processing.
+    ~33.2M backbone parameters. Uses 3D convolutions throughout for true volumetric processing.
     Pretrained on Kinetics-400 (video action recognition).
     Recommended for:
@@ -221,7 +221,7 @@ class MC3_18(ResNet3DBase):
     """
     MC3-18: Mixed Convolution 3D ResNet (3D stem + 2D residual blocks).
-    ~11M parameters. More efficient than pure 3D ResNet while maintaining
+    ~11.5M backbone parameters. More efficient than pure 3D ResNet while maintaining
     good spatiotemporal modeling. Uses 3D convolutions in early layers
     and 2D convolutions in later layers.

wavedl/models/swin.py CHANGED Viewed

@@ -141,10 +141,46 @@ class SwinTransformerBase(BaseModel):
             nn.Linear(regression_hidden // 2, out_size),
         )
-        # Optionally freeze backbone for fine-tuning
+        # Adapt patch embedding conv for single-channel input (3× memory savings vs expand)
+        self._adapt_input_channels()
+        # Optionally freeze backbone for fine-tuning (after adaptation so new conv is frozen too)
         if freeze_backbone:
             self._freeze_backbone()
+    def _adapt_input_channels(self):
+        """Modify patch embedding conv to accept single-channel input.
+        Instead of expanding 1→3 channels in forward (which triples memory),
+        we replace the patch embedding conv with a 1-channel version and
+        initialize weights as the mean of the pretrained RGB filters.
+        """
+        # Swin's patch embedding is at features[0][0]
+        try:
+            old_conv = self.backbone.features[0][0]
+        except (IndexError, AttributeError, TypeError) as e:
+            raise RuntimeError(
+                f"Swin patch embed structure changed in this torchvision version. "
+                f"Cannot adapt input channels. Error: {e}"
+            ) from e
+        new_conv = nn.Conv2d(
+            1,  # Single channel input
+            old_conv.out_channels,
+            kernel_size=old_conv.kernel_size,
+            stride=old_conv.stride,
+            padding=old_conv.padding,
+            dilation=old_conv.dilation,
+            groups=old_conv.groups,
+            padding_mode=old_conv.padding_mode,
+            bias=old_conv.bias is not None,
+        )
+        if self.pretrained:
+            with torch.no_grad():
+                new_conv.weight.copy_(old_conv.weight.mean(dim=1, keepdim=True))
+                if old_conv.bias is not None:
+                    new_conv.bias.copy_(old_conv.bias)
+        self.backbone.features[0][0] = new_conv
     def _freeze_backbone(self):
         """Freeze all backbone parameters except the head."""
         for name, param in self.backbone.named_parameters():
@@ -156,15 +192,11 @@ class SwinTransformerBase(BaseModel):
         Forward pass.
         Args:
-            x: Input tensor of shape (B, C, H, W) where C is 1 or 3
+            x: Input tensor of shape (B, 1, H, W)
         Returns:
             Output tensor of shape (B, out_size)
         """
-        # Expand single channel to 3 channels for pretrained weights compatibility
-        if x.size(1) == 1:
-            x = x.expand(-1, 3, -1, -1)
         return self.backbone(x)
     @classmethod
@@ -272,7 +304,7 @@ class SwinTiny(SwinTransformerBase):
     """
     Swin-T (Tiny): Efficient default for most wave-based tasks.
-    ~28M parameters. Good balance of accuracy and computational cost.
+    ~27.5M backbone parameters. Good balance of accuracy and computational cost.
     Outperforms ResNet50 while being more efficient.
     Recommended for:
@@ -321,7 +353,7 @@ class SwinSmall(SwinTransformerBase):
     """
     Swin-S (Small): Higher accuracy with moderate compute.
-    ~50M parameters. Better accuracy than Swin-T for larger datasets.
+    ~48.8M backbone parameters. Better accuracy than Swin-T for larger datasets.
     Recommended for:
         - Larger datasets (>20k samples)
@@ -368,7 +400,7 @@ class SwinBase(SwinTransformerBase):
     """
     Swin-B (Base): Maximum accuracy for large-scale tasks.
-    ~88M parameters. Best accuracy but requires more compute and data.
+    ~86.7M backbone parameters. Best accuracy but requires more compute and data.
     Recommended for:
         - Very large datasets (>50k samples)

wavedl/models/tcn.py CHANGED Viewed

@@ -45,6 +45,26 @@ from wavedl.models.base import BaseModel
 from wavedl.models.registry import register_model
+def _find_group_count(channels: int, max_groups: int = 8) -> int:
+    """
+    Find largest valid group count for GroupNorm.
+    GroupNorm requires channels to be divisible by num_groups.
+    This finds the largest divisor up to max_groups.
+    Args:
+        channels: Number of channels
+        max_groups: Maximum group count to consider (default: 8)
+    Returns:
+        Largest valid group count (always >= 1)
+    """
+    for g in range(min(max_groups, channels), 0, -1):
+        if channels % g == 0:
+            return g
+    return 1
 class CausalConv1d(nn.Module):
     """
     Causal 1D convolution with dilation.
@@ -101,13 +121,13 @@ class TemporalBlock(nn.Module):
         # First causal convolution
         self.conv1 = CausalConv1d(in_channels, out_channels, kernel_size, dilation)
-        self.norm1 = nn.GroupNorm(min(8, out_channels), out_channels)
+        self.norm1 = nn.GroupNorm(_find_group_count(out_channels), out_channels)
         self.act1 = nn.GELU()
         self.dropout1 = nn.Dropout(dropout)
         # Second causal convolution
         self.conv2 = CausalConv1d(out_channels, out_channels, kernel_size, dilation)
-        self.norm2 = nn.GroupNorm(min(8, out_channels), out_channels)
+        self.norm2 = nn.GroupNorm(_find_group_count(out_channels), out_channels)
         self.act2 = nn.GELU()
         self.dropout2 = nn.Dropout(dropout)
@@ -276,7 +296,7 @@ class TCN(TCNBase):
     """
     TCN: Standard Temporal Convolutional Network.
-    ~7.0M parameters. 8 temporal blocks with channels [64→128→256→256→512→512→512→512].
+    ~6.9M backbone parameters. 8 temporal blocks with channels [64→128→256→256→512→512→512→512].
     Receptive field: 511 samples with kernel_size=3.
     Recommended for:
@@ -318,7 +338,7 @@ class TCNSmall(TCNBase):
     """
     TCN-Small: Lightweight variant for quick experiments.
-    ~1.0M parameters. 6 temporal blocks with channels [32→64→128→128→256→256].
+    ~0.9M backbone parameters. 6 temporal blocks with channels [32→64→128→128→256→256].
     Receptive field: 127 samples with kernel_size=3.
     Recommended for:
@@ -356,7 +376,7 @@ class TCNLarge(TCNBase):
     """
     TCN-Large: High-capacity variant for complex patterns.
-    ~10.2M parameters. 10 temporal blocks with channels [64→128→256→256→512→512→512→512→512→512].
+    ~10.0M backbone parameters. 10 temporal blocks with channels [64→128→256→256→512→512→512→512→512→512].
     Receptive field: 2047 samples with kernel_size=3.
     Recommended for:

wavedl/models/unet.py CHANGED Viewed

@@ -119,7 +119,7 @@ class UNetRegression(BaseModel):
     Uses U-Net encoder-decoder architecture with skip connections,
     then applies global pooling for standard vector regression output.
-    ~31.1M parameters (2D). Good for leveraging multi-scale features
+    ~31.0M backbone parameters (2D). Good for leveraging multi-scale features
     and skip connections for regression tasks.
     Args:

wavedl 1.5.6__py3-none-any.whl → 1.6.0__py3-none-any.whl

wavedl 1.5.6py3-none-any.whl → 1.6.0py3-none-any.whl