PyPI - wavedl - Versions diffs - 1.5.7__py3-none-any.whl → 1.6.1__py3-none-any.whl - Mend

wavedl 1.5.7py3-none-any.whl → 1.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

wavedl/__init__.py +1 -1
wavedl/hpo.py +451 -451
wavedl/models/__init__.py +80 -4
wavedl/models/_pretrained_utils.py +366 -0
wavedl/models/base.py +48 -0
wavedl/models/caformer.py +270 -0
wavedl/models/cnn.py +2 -27
wavedl/models/convnext.py +113 -51
wavedl/models/convnext_v2.py +488 -0
wavedl/models/densenet.py +10 -23
wavedl/models/efficientnet.py +6 -6
wavedl/models/efficientnetv2.py +315 -315
wavedl/models/efficientvit.py +398 -0
wavedl/models/fastvit.py +252 -0
wavedl/models/mamba.py +555 -0
wavedl/models/maxvit.py +254 -0
wavedl/models/mobilenetv3.py +295 -295
wavedl/models/regnet.py +406 -406
wavedl/models/resnet.py +19 -61
wavedl/models/resnet3d.py +258 -258
wavedl/models/swin.py +443 -443
wavedl/models/tcn.py +393 -409
wavedl/models/unet.py +2 -6
wavedl/models/unireplknet.py +491 -0
wavedl/models/vit.py +9 -9
wavedl/train.py +1430 -1425
wavedl/utils/config.py +367 -367
wavedl/utils/cross_validation.py +530 -530
wavedl/utils/data.py +39 -6
wavedl/utils/losses.py +216 -216
wavedl/utils/optimizers.py +216 -216
wavedl/utils/schedulers.py +251 -251
{wavedl-1.5.7.dist-info → wavedl-1.6.1.dist-info}/METADATA +150 -82
wavedl-1.6.1.dist-info/RECORD +46 -0
wavedl-1.5.7.dist-info/RECORD +0 -38
{wavedl-1.5.7.dist-info → wavedl-1.6.1.dist-info}/LICENSE +0 -0
{wavedl-1.5.7.dist-info → wavedl-1.6.1.dist-info}/WHEEL +0 -0
{wavedl-1.5.7.dist-info → wavedl-1.6.1.dist-info}/entry_points.txt +0 -0
{wavedl-1.5.7.dist-info → wavedl-1.6.1.dist-info}/top_level.txt +0 -0

wavedl/models/caformer.py ADDED Viewed

@@ -0,0 +1,270 @@
+"""
+CaFormer: MetaFormer with Convolution and Attention
+====================================================
+CaFormer implements the MetaFormer architecture using depthwise separable
+convolutions in early stages and vanilla self-attention in later stages.
+**Key Features**:
+    - MetaFormer principle: architecture > token mixer
+    - Hybrid: Conv (early) + Attention (late)
+    - StarReLU activation for efficiency
+    - State-of-the-art on ImageNet (85.5%)
+**Variants**:
+    - caformer_s18: 26M params
+    - caformer_s36: 39M params
+    - caformer_m36: 56M params
+**Related Models**:
+    - PoolFormer: Uses pooling instead of attention
+    - ConvFormer: Uses only convolutions
+**Requirements**:
+    - timm >= 0.9.0 (for CaFormer models)
+Reference:
+    Yu, W., et al. (2023). MetaFormer Baselines for Vision.
+    TPAMI 2023. https://arxiv.org/abs/2210.13452
+Author: Ductho Le (ductho.le@outlook.com)
+"""
+import torch
+import torch.nn as nn
+from wavedl.models._pretrained_utils import build_regression_head
+from wavedl.models.base import BaseModel
+from wavedl.models.registry import register_model
+__all__ = [
+    "CaFormerBase",
+    "CaFormerM36",
+    "CaFormerS18",
+    "CaFormerS36",
+    "PoolFormerS12",
+]
+# =============================================================================
+# CAFORMER BASE CLASS
+# =============================================================================
+class CaFormerBase(BaseModel):
+    """
+    CaFormer base class wrapping timm implementation.
+    MetaFormer with conv (early) + attention (late) token mixing.
+    2D only.
+    """
+    def __init__(
+        self,
+        in_shape: tuple[int, int],
+        out_size: int,
+        model_name: str = "caformer_s18",
+        pretrained: bool = True,
+        freeze_backbone: bool = False,
+        dropout_rate: float = 0.3,
+        **kwargs,
+    ):
+        super().__init__(in_shape, out_size)
+        if len(in_shape) != 2:
+            raise ValueError(f"CaFormer requires 2D input (H, W), got {len(in_shape)}D")
+        self.pretrained = pretrained
+        self.freeze_backbone = freeze_backbone
+        self.model_name = model_name
+        # Try to load from timm
+        try:
+            import timm
+            self.backbone = timm.create_model(
+                model_name,
+                pretrained=pretrained,
+                num_classes=0,  # Remove classifier
+            )
+            # Get feature dimension
+            with torch.no_grad():
+                dummy = torch.zeros(1, 3, *in_shape)
+                features = self.backbone(dummy)
+                in_features = features.shape[-1]
+        except ImportError:
+            raise ImportError(
+                "timm >= 0.9.0 is required for CaFormer. "
+                "Install with: pip install timm>=0.9.0"
+            )
+        except Exception as e:
+            raise RuntimeError(f"Failed to load CaFormer model '{model_name}': {e}")
+        # Adapt input channels (3 -> 1)
+        self._adapt_input_channels()
+        # Regression head
+        self.head = build_regression_head(in_features, out_size, dropout_rate)
+        if freeze_backbone:
+            self._freeze_backbone()
+    def _adapt_input_channels(self):
+        """Adapt first conv layer for single-channel input."""
+        # CaFormer uses stem for first layer
+        if hasattr(self.backbone, "stem"):
+            first_conv = None
+            # Find first conv in stem
+            for name, module in self.backbone.stem.named_modules():
+                if isinstance(module, nn.Conv2d):
+                    first_conv = (name, module)
+                    break
+            if first_conv is not None:
+                name, old_conv = first_conv
+                new_conv = self._make_new_conv(old_conv)
+                # Set the new conv (handle nested structure)
+                self._set_module(self.backbone.stem, name, new_conv)
+    def _make_new_conv(self, old_conv: nn.Conv2d) -> nn.Conv2d:
+        """Create new conv layer with 1 input channel."""
+        new_conv = nn.Conv2d(
+            1,
+            old_conv.out_channels,
+            kernel_size=old_conv.kernel_size,
+            stride=old_conv.stride,
+            padding=old_conv.padding,
+            bias=old_conv.bias is not None,
+        )
+        if self.pretrained:
+            with torch.no_grad():
+                new_conv.weight.copy_(old_conv.weight.mean(dim=1, keepdim=True))
+                if old_conv.bias is not None:
+                    new_conv.bias.copy_(old_conv.bias)
+        return new_conv
+    def _set_module(self, parent: nn.Module, name: str, module: nn.Module):
+        """Set a nested module by name."""
+        parts = name.split(".")
+        for part in parts[:-1]:
+            parent = getattr(parent, part)
+        setattr(parent, parts[-1], module)
+    def _freeze_backbone(self):
+        """Freeze backbone parameters."""
+        for param in self.backbone.parameters():
+            param.requires_grad = False
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        features = self.backbone(x)
+        return self.head(features)
+# =============================================================================
+# REGISTERED VARIANTS
+# =============================================================================
+@register_model("caformer_s18")
+class CaFormerS18(CaFormerBase):
+    """
+    CaFormer-S18: ~23.2M backbone parameters.
+    MetaFormer with conv + attention.
+    2D only.
+    Example:
+        >>> model = CaFormerS18(in_shape=(224, 224), out_size=3)
+        >>> x = torch.randn(4, 1, 224, 224)
+        >>> out = model(x)  # (4, 3)
+    """
+    def __init__(self, in_shape: tuple[int, int], out_size: int, **kwargs):
+        super().__init__(
+            in_shape=in_shape,
+            out_size=out_size,
+            model_name="caformer_s18",
+            **kwargs,
+        )
+    def __repr__(self) -> str:
+        return (
+            f"CaFormer_S18(in_shape={self.in_shape}, out_size={self.out_size}, "
+            f"pretrained={self.pretrained})"
+        )
+@register_model("caformer_s36")
+class CaFormerS36(CaFormerBase):
+    """
+    CaFormer-S36: ~36.2M backbone parameters.
+    Deeper MetaFormer variant.
+    2D only.
+    """
+    def __init__(self, in_shape: tuple[int, int], out_size: int, **kwargs):
+        super().__init__(
+            in_shape=in_shape,
+            out_size=out_size,
+            model_name="caformer_s36",
+            **kwargs,
+        )
+    def __repr__(self) -> str:
+        return (
+            f"CaFormer_S36(in_shape={self.in_shape}, out_size={self.out_size}, "
+            f"pretrained={self.pretrained})"
+        )
+@register_model("caformer_m36")
+class CaFormerM36(CaFormerBase):
+    """
+    CaFormer-M36: ~52.6M backbone parameters.
+    Medium-size MetaFormer variant.
+    2D only.
+    """
+    def __init__(self, in_shape: tuple[int, int], out_size: int, **kwargs):
+        super().__init__(
+            in_shape=in_shape,
+            out_size=out_size,
+            model_name="caformer_m36",
+            **kwargs,
+        )
+    def __repr__(self) -> str:
+        return (
+            f"CaFormer_M36(in_shape={self.in_shape}, out_size={self.out_size}, "
+            f"pretrained={self.pretrained})"
+        )
+@register_model("poolformer_s12")
+class PoolFormerS12(CaFormerBase):
+    """
+    PoolFormer-S12: ~11.4M backbone parameters.
+    MetaFormer with simple pooling token mixer.
+    Proves that architecture matters more than complex attention.
+    2D only.
+    """
+    def __init__(self, in_shape: tuple[int, int], out_size: int, **kwargs):
+        super().__init__(
+            in_shape=in_shape,
+            out_size=out_size,
+            model_name="poolformer_s12",
+            **kwargs,
+        )
+    def __repr__(self) -> str:
+        return (
+            f"PoolFormer_S12(in_shape={self.in_shape}, out_size={self.out_size}, "
+            f"pretrained={self.pretrained})"
+        )

wavedl/models/cnn.py CHANGED Viewed

@@ -24,14 +24,10 @@ from typing import Any
 import torch
 import torch.nn as nn
-from wavedl.models.base import BaseModel
+from wavedl.models.base import BaseModel, SpatialShape, compute_num_groups
 from wavedl.models.registry import register_model
-# Type alias for spatial shapes
-SpatialShape = tuple[int] | tuple[int, int] | tuple[int, int, int]
 def _get_conv_layers(
     dim: int,
 ) -> tuple[type[nn.Module], type[nn.Module], type[nn.Module]]:
@@ -163,27 +159,6 @@ class CNN(BaseModel):
             nn.Linear(64, out_size),
         )
-    @staticmethod
-    def _compute_num_groups(num_channels: int, target_groups: int = 4) -> int:
-        """
-        Compute valid num_groups for GroupNorm that divides num_channels.
-        Finds the largest divisor of num_channels that is <= target_groups,
-        or falls back to 1 if no suitable divisor exists.
-        Args:
-            num_channels: Number of channels (must be positive)
-            target_groups: Desired number of groups (default: 4)
-        Returns:
-            Valid num_groups that satisfies num_channels % num_groups == 0
-        """
-        # Try target_groups down to 1, return first valid divisor
-        for g in range(min(target_groups, num_channels), 0, -1):
-            if num_channels % g == 0:
-                return g
-        return 1  # Fallback (always valid)
     def _make_conv_block(
         self, in_channels: int, out_channels: int, dropout: float = 0.0
     ) -> nn.Sequential:
@@ -198,7 +173,7 @@ class CNN(BaseModel):
         Returns:
             Sequential block: Conv → GroupNorm → LeakyReLU → MaxPool [→ Dropout]
         """
-        num_groups = self._compute_num_groups(out_channels, target_groups=4)
+        num_groups = compute_num_groups(out_channels, preferred_groups=4)
         layers = [
             self._Conv(in_channels, out_channels, kernel_size=3, padding=1),

wavedl/models/convnext.py CHANGED Viewed

@@ -11,9 +11,9 @@ Features: inverted bottleneck, LayerNorm, GELU activation, depthwise convolution
     - 3D: Volumetric data, CT/MRI (N, 1, D, H, W) → Conv3d
 **Variants**:
-    - convnext_tiny: Smallest (~28M params for 2D)
-    - convnext_small: Medium (~50M params for 2D)
-    - convnext_base: Standard (~89M params for 2D)
+    - convnext_tiny: Smallest (~27.8M backbone params for 2D)
+    - convnext_small: Medium (~49.5M backbone params for 2D)
+    - convnext_base: Standard (~87.6M backbone params for 2D)
 References:
     Liu, Z., et al. (2022). A ConvNet for the 2020s.
@@ -26,15 +26,12 @@ from typing import Any
 import torch
 import torch.nn as nn
+import torch.nn.functional as F
-from wavedl.models.base import BaseModel
+from wavedl.models.base import BaseModel, SpatialShape
 from wavedl.models.registry import register_model
-# Type alias for spatial shapes
-SpatialShape = tuple[int] | tuple[int, int] | tuple[int, int, int]
 def _get_conv_layer(dim: int) -> type[nn.Module]:
     """Get dimension-appropriate Conv class."""
     if dim == 1:
@@ -51,40 +48,75 @@ class LayerNormNd(nn.Module):
     """
     LayerNorm for N-dimensional tensors (channels-first format).
-    Normalizes over the channel dimension, supporting Conv1d/2d/3d outputs.
+    Implements channels-last LayerNorm as used in the original ConvNeXt paper.
+    Permutes data to channels-last, applies LayerNorm per-channel over spatial
+    dimensions, and permutes back to channels-first format.
+    This matches PyTorch's nn.LayerNorm behavior when applied to the channel
+    dimension, providing stable gradients for deep ConvNeXt networks.
+    References:
+        Liu, Z., et al. (2022). A ConvNet for the 2020s. CVPR 2022.
+        https://github.com/facebookresearch/ConvNeXt
     """
     def __init__(self, num_channels: int, dim: int, eps: float = 1e-6):
         super().__init__()
         self.dim = dim
+        self.num_channels = num_channels
         self.weight = nn.Parameter(torch.ones(num_channels))
         self.bias = nn.Parameter(torch.zeros(num_channels))
         self.eps = eps
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        # x: (B, C, ..spatial..)
-        # Normalize over channel dimension
-        mean = x.mean(dim=1, keepdim=True)
-        var = x.var(dim=1, keepdim=True, unbiased=False)
-        x = (x - mean) / (var + self.eps).sqrt()
-        # Apply learnable parameters
-        shape = [1, -1] + [1] * self.dim  # (1, C, 1, ...) for broadcasting
-        x = x * self.weight.view(*shape) + self.bias.view(*shape)
+        """
+        Apply LayerNorm in channels-last format.
+        Args:
+            x: Input tensor in channels-first format
+               - 1D: (B, C, L)
+               - 2D: (B, C, H, W)
+               - 3D: (B, C, D, H, W)
+        Returns:
+            Normalized tensor in same format as input
+        """
+        if self.dim == 1:
+            # (B, C, L) -> (B, L, C) -> LayerNorm -> (B, C, L)
+            x = x.permute(0, 2, 1)
+            x = F.layer_norm(x, (self.num_channels,), self.weight, self.bias, self.eps)
+            x = x.permute(0, 2, 1)
+        elif self.dim == 2:
+            # (B, C, H, W) -> (B, H, W, C) -> LayerNorm -> (B, C, H, W)
+            x = x.permute(0, 2, 3, 1)
+            x = F.layer_norm(x, (self.num_channels,), self.weight, self.bias, self.eps)
+            x = x.permute(0, 3, 1, 2)
+        else:
+            # (B, C, D, H, W) -> (B, D, H, W, C) -> LayerNorm -> (B, C, D, H, W)
+            x = x.permute(0, 2, 3, 4, 1)
+            x = F.layer_norm(x, (self.num_channels,), self.weight, self.bias, self.eps)
+            x = x.permute(0, 4, 1, 2, 3)
         return x
 class ConvNeXtBlock(nn.Module):
     """
-    ConvNeXt block with inverted bottleneck design.
-    Architecture:
-    - 7x7 depthwise conv
-    - LayerNorm
-    - 1x1 conv (expand by 4x)
-    - GELU
-    - 1x1 conv (reduce back)
-    - Residual connection
+    ConvNeXt block matching the official Facebook implementation.
+    Uses the second variant from the paper which is slightly faster in PyTorch:
+    1. DwConv (channels-first)
+    2. Permute to channels-last
+    3. LayerNorm → Linear → GELU → Linear (all channels-last)
+    4. LayerScale (gamma * x)
+    5. Permute back to channels-first
+    6. Residual connection
+    The LayerScale mechanism is critical for stable training in deep networks.
+    It scales the output by a learnable parameter initialized to 1e-6.
+    References:
+        Liu, Z., et al. (2022). A ConvNet for the 2020s. CVPR 2022.
+        https://github.com/facebookresearch/ConvNeXt
     """
     def __init__(
@@ -93,21 +125,36 @@ class ConvNeXtBlock(nn.Module):
         dim: int = 2,
         expansion_ratio: float = 4.0,
         drop_path: float = 0.0,
+        layer_scale_init_value: float = 1e-6,
     ):
         super().__init__()
+        self.dim = dim
         Conv = _get_conv_layer(dim)
         hidden_dim = int(channels * expansion_ratio)
-        # Depthwise conv (7x7)
+        # Depthwise conv (7x7) - operates in channels-first
         self.dwconv = Conv(
             channels, channels, kernel_size=7, padding=3, groups=channels
         )
-        self.norm = LayerNormNd(channels, dim)
-        # Pointwise convs (1x1)
-        self.pwconv1 = Conv(channels, hidden_dim, kernel_size=1)
+        # LayerNorm (channels-last format, using standard nn.LayerNorm)
+        self.norm = nn.LayerNorm(channels, eps=1e-6)
+        # Pointwise convs implemented with Linear layers (channels-last)
+        # This matches the official implementation and is slightly faster
+        self.pwconv1 = nn.Linear(channels, hidden_dim)
         self.act = nn.GELU()
-        self.pwconv2 = Conv(hidden_dim, channels, kernel_size=1)
+        self.pwconv2 = nn.Linear(hidden_dim, channels)
+        # LayerScale: learnable per-channel scaling (critical for deep networks)
+        # Initialized to small value (1e-6) to prevent gradient explosion
+        self.gamma = (
+            nn.Parameter(
+                layer_scale_init_value * torch.ones(channels), requires_grad=True
+            )
+            if layer_scale_init_value > 0
+            else None
+        )
         # Stochastic depth (drop path) - simplified version
         self.drop_path = nn.Identity()  # Can be replaced with DropPath if needed
@@ -115,14 +162,38 @@ class ConvNeXtBlock(nn.Module):
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         residual = x
+        # Depthwise conv in channels-first format
         x = self.dwconv(x)
+        # Permute to channels-last for LayerNorm and Linear layers
+        if self.dim == 1:
+            x = x.permute(0, 2, 1)  # (B, C, L) -> (B, L, C)
+        elif self.dim == 2:
+            x = x.permute(0, 2, 3, 1)  # (B, C, H, W) -> (B, H, W, C)
+        else:
+            x = x.permute(0, 2, 3, 4, 1)  # (B, C, D, H, W) -> (B, D, H, W, C)
+        # LayerNorm + MLP (all in channels-last)
         x = self.norm(x)
         x = self.pwconv1(x)
         x = self.act(x)
         x = self.pwconv2(x)
-        x = self.drop_path(x)
-        return residual + x
+        # Apply LayerScale
+        if self.gamma is not None:
+            x = self.gamma * x
+        # Permute back to channels-first
+        if self.dim == 1:
+            x = x.permute(0, 2, 1)  # (B, L, C) -> (B, C, L)
+        elif self.dim == 2:
+            x = x.permute(0, 3, 1, 2)  # (B, H, W, C) -> (B, C, H, W)
+        else:
+            x = x.permute(0, 4, 1, 2, 3)  # (B, D, H, W, C) -> (B, C, D, H, W)
+        # Residual connection with drop path
+        x = residual + self.drop_path(x)
+        return x
 class ConvNeXtBase(BaseModel):
@@ -244,7 +315,7 @@ class ConvNeXtTiny(ConvNeXtBase):
     """
     ConvNeXt-Tiny: Smallest variant.
-    ~28M parameters (2D). Good for: Limited compute, fast training.
+    ~27.8M backbone parameters (2D). Good for: Limited compute, fast training.
     Args:
         in_shape: (L,), (H, W), or (D, H, W)
@@ -270,7 +341,7 @@ class ConvNeXtSmall(ConvNeXtBase):
     """
     ConvNeXt-Small: Medium variant.
-    ~50M parameters (2D). Good for: Balanced performance.
+    ~49.5M backbone parameters (2D). Good for: Balanced performance.
     Args:
         in_shape: (L,), (H, W), or (D, H, W)
@@ -296,7 +367,7 @@ class ConvNeXtBase_(ConvNeXtBase):
     """
     ConvNeXt-Base: Standard variant.
-    ~89M parameters (2D). Good for: High accuracy, larger datasets.
+    ~87.6M backbone parameters (2D). Good for: High accuracy, larger datasets.
     Args:
         in_shape: (L,), (H, W), or (D, H, W)
@@ -337,7 +408,7 @@ class ConvNeXtTinyPretrained(BaseModel):
     """
     ConvNeXt-Tiny with ImageNet pretrained weights (2D only).
-    ~28M parameters. Good for: Transfer learning with modern CNN.
+    ~27.8M backbone parameters. Good for: Transfer learning with modern CNN.
     Args:
         in_shape: (H, W) image dimensions
@@ -393,20 +464,11 @@ class ConvNeXtTinyPretrained(BaseModel):
         )
         # Modify first conv for single-channel input
-        old_conv = self.backbone.features[0][0]
-        self.backbone.features[0][0] = nn.Conv2d(
-            1,
-            old_conv.out_channels,
-            kernel_size=old_conv.kernel_size,
-            stride=old_conv.stride,
-            padding=old_conv.padding,
-            bias=old_conv.bias is not None,
+        from wavedl.models._pretrained_utils import adapt_first_conv_for_single_channel
+        adapt_first_conv_for_single_channel(
+            self.backbone, "features.0.0", pretrained=pretrained
         )
-        if pretrained:
-            with torch.no_grad():
-                self.backbone.features[0][0].weight = nn.Parameter(
-                    old_conv.weight.mean(dim=1, keepdim=True)
-                )
         if freeze_backbone:
             self._freeze_backbone()

wavedl 1.5.7__py3-none-any.whl → 1.6.1__py3-none-any.whl

wavedl 1.5.7py3-none-any.whl → 1.6.1py3-none-any.whl