PyPI - wavedl - Versions diffs - 1.5.7__py3-none-any.whl → 1.6.0__py3-none-any.whl - Mend

wavedl 1.5.7py3-none-any.whl → 1.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

wavedl/__init__.py +1 -1
wavedl/models/__init__.py +52 -4
wavedl/models/_timm_utils.py +238 -0
wavedl/models/caformer.py +270 -0
wavedl/models/convnext.py +108 -33
wavedl/models/convnext_v2.py +504 -0
wavedl/models/densenet.py +5 -5
wavedl/models/efficientnet.py +6 -6
wavedl/models/efficientnetv2.py +3 -3
wavedl/models/fastvit.py +285 -0
wavedl/models/mamba.py +535 -0
wavedl/models/maxvit.py +251 -0
wavedl/models/mobilenetv3.py +6 -6
wavedl/models/regnet.py +10 -10
wavedl/models/resnet.py +5 -5
wavedl/models/resnet3d.py +2 -2
wavedl/models/swin.py +3 -3
wavedl/models/tcn.py +3 -3
wavedl/models/unet.py +1 -1
wavedl/models/vit.py +6 -6
wavedl/train.py +21 -16
wavedl/utils/data.py +39 -6
{wavedl-1.5.7.dist-info → wavedl-1.6.0.dist-info}/METADATA +90 -62
wavedl-1.6.0.dist-info/RECORD +44 -0
wavedl-1.5.7.dist-info/RECORD +0 -38
{wavedl-1.5.7.dist-info → wavedl-1.6.0.dist-info}/LICENSE +0 -0
{wavedl-1.5.7.dist-info → wavedl-1.6.0.dist-info}/WHEEL +0 -0
{wavedl-1.5.7.dist-info → wavedl-1.6.0.dist-info}/entry_points.txt +0 -0
{wavedl-1.5.7.dist-info → wavedl-1.6.0.dist-info}/top_level.txt +0 -0

wavedl/models/convnext.py CHANGED Viewed

@@ -11,9 +11,9 @@ Features: inverted bottleneck, LayerNorm, GELU activation, depthwise convolution
     - 3D: Volumetric data, CT/MRI (N, 1, D, H, W) → Conv3d
 **Variants**:
-    - convnext_tiny: Smallest (~28M params for 2D)
-    - convnext_small: Medium (~50M params for 2D)
-    - convnext_base: Standard (~89M params for 2D)
+    - convnext_tiny: Smallest (~27.8M backbone params for 2D)
+    - convnext_small: Medium (~49.5M backbone params for 2D)
+    - convnext_base: Standard (~87.6M backbone params for 2D)
 References:
     Liu, Z., et al. (2022). A ConvNet for the 2020s.
@@ -26,6 +26,7 @@ from typing import Any
 import torch
 import torch.nn as nn
+import torch.nn.functional as F
 from wavedl.models.base import BaseModel
 from wavedl.models.registry import register_model
@@ -51,40 +52,75 @@ class LayerNormNd(nn.Module):
     """
     LayerNorm for N-dimensional tensors (channels-first format).
-    Normalizes over the channel dimension, supporting Conv1d/2d/3d outputs.
+    Implements channels-last LayerNorm as used in the original ConvNeXt paper.
+    Permutes data to channels-last, applies LayerNorm per-channel over spatial
+    dimensions, and permutes back to channels-first format.
+    This matches PyTorch's nn.LayerNorm behavior when applied to the channel
+    dimension, providing stable gradients for deep ConvNeXt networks.
+    References:
+        Liu, Z., et al. (2022). A ConvNet for the 2020s. CVPR 2022.
+        https://github.com/facebookresearch/ConvNeXt
     """
     def __init__(self, num_channels: int, dim: int, eps: float = 1e-6):
         super().__init__()
         self.dim = dim
+        self.num_channels = num_channels
         self.weight = nn.Parameter(torch.ones(num_channels))
         self.bias = nn.Parameter(torch.zeros(num_channels))
         self.eps = eps
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        # x: (B, C, ..spatial..)
-        # Normalize over channel dimension
-        mean = x.mean(dim=1, keepdim=True)
-        var = x.var(dim=1, keepdim=True, unbiased=False)
-        x = (x - mean) / (var + self.eps).sqrt()
-        # Apply learnable parameters
-        shape = [1, -1] + [1] * self.dim  # (1, C, 1, ...) for broadcasting
-        x = x * self.weight.view(*shape) + self.bias.view(*shape)
+        """
+        Apply LayerNorm in channels-last format.
+        Args:
+            x: Input tensor in channels-first format
+               - 1D: (B, C, L)
+               - 2D: (B, C, H, W)
+               - 3D: (B, C, D, H, W)
+        Returns:
+            Normalized tensor in same format as input
+        """
+        if self.dim == 1:
+            # (B, C, L) -> (B, L, C) -> LayerNorm -> (B, C, L)
+            x = x.permute(0, 2, 1)
+            x = F.layer_norm(x, (self.num_channels,), self.weight, self.bias, self.eps)
+            x = x.permute(0, 2, 1)
+        elif self.dim == 2:
+            # (B, C, H, W) -> (B, H, W, C) -> LayerNorm -> (B, C, H, W)
+            x = x.permute(0, 2, 3, 1)
+            x = F.layer_norm(x, (self.num_channels,), self.weight, self.bias, self.eps)
+            x = x.permute(0, 3, 1, 2)
+        else:
+            # (B, C, D, H, W) -> (B, D, H, W, C) -> LayerNorm -> (B, C, D, H, W)
+            x = x.permute(0, 2, 3, 4, 1)
+            x = F.layer_norm(x, (self.num_channels,), self.weight, self.bias, self.eps)
+            x = x.permute(0, 4, 1, 2, 3)
         return x
 class ConvNeXtBlock(nn.Module):
     """
-    ConvNeXt block with inverted bottleneck design.
-    Architecture:
-    - 7x7 depthwise conv
-    - LayerNorm
-    - 1x1 conv (expand by 4x)
-    - GELU
-    - 1x1 conv (reduce back)
-    - Residual connection
+    ConvNeXt block matching the official Facebook implementation.
+    Uses the second variant from the paper which is slightly faster in PyTorch:
+    1. DwConv (channels-first)
+    2. Permute to channels-last
+    3. LayerNorm → Linear → GELU → Linear (all channels-last)
+    4. LayerScale (gamma * x)
+    5. Permute back to channels-first
+    6. Residual connection
+    The LayerScale mechanism is critical for stable training in deep networks.
+    It scales the output by a learnable parameter initialized to 1e-6.
+    References:
+        Liu, Z., et al. (2022). A ConvNet for the 2020s. CVPR 2022.
+        https://github.com/facebookresearch/ConvNeXt
     """
     def __init__(
@@ -93,21 +129,36 @@ class ConvNeXtBlock(nn.Module):
         dim: int = 2,
         expansion_ratio: float = 4.0,
         drop_path: float = 0.0,
+        layer_scale_init_value: float = 1e-6,
     ):
         super().__init__()
+        self.dim = dim
         Conv = _get_conv_layer(dim)
         hidden_dim = int(channels * expansion_ratio)
-        # Depthwise conv (7x7)
+        # Depthwise conv (7x7) - operates in channels-first
         self.dwconv = Conv(
             channels, channels, kernel_size=7, padding=3, groups=channels
         )
-        self.norm = LayerNormNd(channels, dim)
-        # Pointwise convs (1x1)
-        self.pwconv1 = Conv(channels, hidden_dim, kernel_size=1)
+        # LayerNorm (channels-last format, using standard nn.LayerNorm)
+        self.norm = nn.LayerNorm(channels, eps=1e-6)
+        # Pointwise convs implemented with Linear layers (channels-last)
+        # This matches the official implementation and is slightly faster
+        self.pwconv1 = nn.Linear(channels, hidden_dim)
         self.act = nn.GELU()
-        self.pwconv2 = Conv(hidden_dim, channels, kernel_size=1)
+        self.pwconv2 = nn.Linear(hidden_dim, channels)
+        # LayerScale: learnable per-channel scaling (critical for deep networks)
+        # Initialized to small value (1e-6) to prevent gradient explosion
+        self.gamma = (
+            nn.Parameter(
+                layer_scale_init_value * torch.ones(channels), requires_grad=True
+            )
+            if layer_scale_init_value > 0
+            else None
+        )
         # Stochastic depth (drop path) - simplified version
         self.drop_path = nn.Identity()  # Can be replaced with DropPath if needed
@@ -115,14 +166,38 @@ class ConvNeXtBlock(nn.Module):
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         residual = x
+        # Depthwise conv in channels-first format
         x = self.dwconv(x)
+        # Permute to channels-last for LayerNorm and Linear layers
+        if self.dim == 1:
+            x = x.permute(0, 2, 1)  # (B, C, L) -> (B, L, C)
+        elif self.dim == 2:
+            x = x.permute(0, 2, 3, 1)  # (B, C, H, W) -> (B, H, W, C)
+        else:
+            x = x.permute(0, 2, 3, 4, 1)  # (B, C, D, H, W) -> (B, D, H, W, C)
+        # LayerNorm + MLP (all in channels-last)
         x = self.norm(x)
         x = self.pwconv1(x)
         x = self.act(x)
         x = self.pwconv2(x)
-        x = self.drop_path(x)
-        return residual + x
+        # Apply LayerScale
+        if self.gamma is not None:
+            x = self.gamma * x
+        # Permute back to channels-first
+        if self.dim == 1:
+            x = x.permute(0, 2, 1)  # (B, L, C) -> (B, C, L)
+        elif self.dim == 2:
+            x = x.permute(0, 3, 1, 2)  # (B, H, W, C) -> (B, C, H, W)
+        else:
+            x = x.permute(0, 4, 1, 2, 3)  # (B, D, H, W, C) -> (B, C, D, H, W)
+        # Residual connection with drop path
+        x = residual + self.drop_path(x)
+        return x
 class ConvNeXtBase(BaseModel):
@@ -244,7 +319,7 @@ class ConvNeXtTiny(ConvNeXtBase):
     """
     ConvNeXt-Tiny: Smallest variant.
-    ~28M parameters (2D). Good for: Limited compute, fast training.
+    ~27.8M backbone parameters (2D). Good for: Limited compute, fast training.
     Args:
         in_shape: (L,), (H, W), or (D, H, W)
@@ -270,7 +345,7 @@ class ConvNeXtSmall(ConvNeXtBase):
     """
     ConvNeXt-Small: Medium variant.
-    ~50M parameters (2D). Good for: Balanced performance.
+    ~49.5M backbone parameters (2D). Good for: Balanced performance.
     Args:
         in_shape: (L,), (H, W), or (D, H, W)
@@ -296,7 +371,7 @@ class ConvNeXtBase_(ConvNeXtBase):
     """
     ConvNeXt-Base: Standard variant.
-    ~89M parameters (2D). Good for: High accuracy, larger datasets.
+    ~87.6M backbone parameters (2D). Good for: High accuracy, larger datasets.
     Args:
         in_shape: (L,), (H, W), or (D, H, W)
@@ -337,7 +412,7 @@ class ConvNeXtTinyPretrained(BaseModel):
     """
     ConvNeXt-Tiny with ImageNet pretrained weights (2D only).
-    ~28M parameters. Good for: Transfer learning with modern CNN.
+    ~27.8M backbone parameters. Good for: Transfer learning with modern CNN.
     Args:
         in_shape: (H, W) image dimensions

wavedl 1.5.7__py3-none-any.whl → 1.6.0__py3-none-any.whl

wavedl 1.5.7py3-none-any.whl → 1.6.0py3-none-any.whl