PyPI - wavedl - Versions diffs - 1.6.0__py3-none-any.whl → 1.6.1__py3-none-any.whl - Mend

wavedl 1.6.0py3-none-any.whl → 1.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

wavedl/__init__.py +1 -1
wavedl/hpo.py +451 -451
wavedl/models/__init__.py +28 -0
wavedl/models/{_timm_utils.py → _pretrained_utils.py} +128 -0
wavedl/models/base.py +48 -0
wavedl/models/caformer.py +1 -1
wavedl/models/cnn.py +2 -27
wavedl/models/convnext.py +5 -18
wavedl/models/convnext_v2.py +6 -22
wavedl/models/densenet.py +5 -18
wavedl/models/efficientnetv2.py +315 -315
wavedl/models/efficientvit.py +398 -0
wavedl/models/fastvit.py +6 -39
wavedl/models/mamba.py +44 -24
wavedl/models/maxvit.py +51 -48
wavedl/models/mobilenetv3.py +295 -295
wavedl/models/regnet.py +406 -406
wavedl/models/resnet.py +14 -56
wavedl/models/resnet3d.py +258 -258
wavedl/models/swin.py +443 -443
wavedl/models/tcn.py +393 -409
wavedl/models/unet.py +1 -5
wavedl/models/unireplknet.py +491 -0
wavedl/models/vit.py +3 -3
wavedl/train.py +1430 -1430
wavedl/utils/config.py +367 -367
wavedl/utils/cross_validation.py +530 -530
wavedl/utils/losses.py +216 -216
wavedl/utils/optimizers.py +216 -216
wavedl/utils/schedulers.py +251 -251
{wavedl-1.6.0.dist-info → wavedl-1.6.1.dist-info}/METADATA +93 -53
wavedl-1.6.1.dist-info/RECORD +46 -0
wavedl-1.6.0.dist-info/RECORD +0 -44
{wavedl-1.6.0.dist-info → wavedl-1.6.1.dist-info}/LICENSE +0 -0
{wavedl-1.6.0.dist-info → wavedl-1.6.1.dist-info}/WHEEL +0 -0
{wavedl-1.6.0.dist-info → wavedl-1.6.1.dist-info}/entry_points.txt +0 -0
{wavedl-1.6.0.dist-info → wavedl-1.6.1.dist-info}/top_level.txt +0 -0

wavedl/models/maxvit.py CHANGED Viewed

@@ -28,9 +28,9 @@ Author: Ductho Le (ductho.le@outlook.com)
 """
 import torch
-import torch.nn as nn
+import torch.nn.functional as F
-from wavedl.models._timm_utils import build_regression_head
+from wavedl.models._pretrained_utils import build_regression_head
 from wavedl.models.base import BaseModel
 from wavedl.models.registry import register_model
@@ -54,8 +54,16 @@ class MaxViTBase(BaseModel):
     Multi-axis attention with local block and global grid attention.
     2D only due to attention structure.
+    Note:
+        MaxViT requires input dimensions divisible by 28 (4x stem downsample × 7 window).
+        This implementation automatically resizes inputs to the nearest compatible size.
     """
+    # MaxViT stem downsamples by 4x, then requires divisibility by 7 (window size)
+    # So original input must be divisible by 4 * 7 = 28
+    _DIVISOR = 28
     def __init__(
         self,
         in_shape: tuple[int, int],
@@ -75,6 +83,9 @@ class MaxViTBase(BaseModel):
         self.freeze_backbone = freeze_backbone
         self.model_name = model_name
+        # Compute compatible input size for MaxViT attention windows
+        self._target_size = self._compute_compatible_size(in_shape)
         # Try to load from timm
         try:
             import timm
@@ -85,9 +96,9 @@ class MaxViTBase(BaseModel):
                 num_classes=0,  # Remove classifier
             )
-            # Get feature dimension
+            # Get feature dimension using compatible size
             with torch.no_grad():
-                dummy = torch.zeros(1, 3, *in_shape)
+                dummy = torch.zeros(1, 3, *self._target_size)
                 features = self.backbone(dummy)
                 in_features = features.shape[-1]
@@ -109,62 +120,54 @@ class MaxViTBase(BaseModel):
     def _adapt_input_channels(self):
         """Adapt first conv layer for single-channel input."""
-        # MaxViT uses stem.conv1 (Conv2dSame from timm)
-        adapted = False
-        # Find the first Conv2d with 3 input channels
-        for name, module in self.backbone.named_modules():
-            if hasattr(module, "in_channels") and module.in_channels == 3:
-                # Get parent and child names
-                parts = name.split(".")
-                parent = self.backbone
-                for part in parts[:-1]:
-                    parent = getattr(parent, part)
-                child_name = parts[-1]
-                # Create new conv with 1 input channel
-                new_conv = self._make_new_conv(module)
-                setattr(parent, child_name, new_conv)
-                adapted = True
-                break
-        if not adapted:
+        from wavedl.models._pretrained_utils import find_and_adapt_input_convs
+        adapted_count = find_and_adapt_input_convs(
+            self.backbone, pretrained=self.pretrained, adapt_all=False
+        )
+        if adapted_count == 0:
             import warnings
             warnings.warn(
                 "Could not adapt MaxViT input channels. Model may fail.", stacklevel=2
             )
-    def _make_new_conv(self, old_conv: nn.Module) -> nn.Module:
-        """Create new conv layer with 1 input channel."""
-        # Handle both Conv2d and Conv2dSame from timm
-        type(old_conv)
-        # Get common parameters
-        kwargs = {
-            "out_channels": old_conv.out_channels,
-            "kernel_size": old_conv.kernel_size,
-            "stride": old_conv.stride,
-            "padding": old_conv.padding if hasattr(old_conv, "padding") else 0,
-            "bias": old_conv.bias is not None,
-        }
-        # Create new conv (use regular Conv2d for simplicity)
-        new_conv = nn.Conv2d(1, **kwargs)
-        if self.pretrained:
-            with torch.no_grad():
-                new_conv.weight.copy_(old_conv.weight.mean(dim=1, keepdim=True))
-                if old_conv.bias is not None:
-                    new_conv.bias.copy_(old_conv.bias)
-        return new_conv
     def _freeze_backbone(self):
         """Freeze backbone parameters."""
         for param in self.backbone.parameters():
             param.requires_grad = False
+    def _compute_compatible_size(self, in_shape: tuple[int, int]) -> tuple[int, int]:
+        """
+        Compute the nearest input size compatible with MaxViT attention windows.
+        MaxViT requires input dimensions divisible by 28 (4x stem downsample × 7 window).
+        This rounds up to the nearest compatible size.
+        Args:
+            in_shape: Original (H, W) input shape
+        Returns:
+            Compatible (H, W) shape divisible by 28
+        """
+        import math
+        h, w = in_shape
+        target_h = math.ceil(h / self._DIVISOR) * self._DIVISOR
+        target_w = math.ceil(w / self._DIVISOR) * self._DIVISOR
+        return (target_h, target_w)
     def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # Resize input to compatible size if needed
+        _, _, h, w = x.shape
+        if (h, w) != self._target_size:
+            x = F.interpolate(
+                x,
+                size=self._target_size,
+                mode="bilinear",
+                align_corners=False,
+            )
         features = self.backbone(x)
         return self.head(features)

wavedl 1.6.0__py3-none-any.whl → 1.6.1__py3-none-any.whl

wavedl 1.6.0py3-none-any.whl → 1.6.1py3-none-any.whl