PyPI - wavedl - Versions diffs - 1.6.0__py3-none-any.whl → 1.6.1__py3-none-any.whl - Mend

wavedl 1.6.0py3-none-any.whl → 1.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

wavedl/__init__.py +1 -1
wavedl/hpo.py +451 -451
wavedl/models/__init__.py +28 -0
wavedl/models/{_timm_utils.py → _pretrained_utils.py} +128 -0
wavedl/models/base.py +48 -0
wavedl/models/caformer.py +1 -1
wavedl/models/cnn.py +2 -27
wavedl/models/convnext.py +5 -18
wavedl/models/convnext_v2.py +6 -22
wavedl/models/densenet.py +5 -18
wavedl/models/efficientnetv2.py +315 -315
wavedl/models/efficientvit.py +398 -0
wavedl/models/fastvit.py +6 -39
wavedl/models/mamba.py +44 -24
wavedl/models/maxvit.py +51 -48
wavedl/models/mobilenetv3.py +295 -295
wavedl/models/regnet.py +406 -406
wavedl/models/resnet.py +14 -56
wavedl/models/resnet3d.py +258 -258
wavedl/models/swin.py +443 -443
wavedl/models/tcn.py +393 -409
wavedl/models/unet.py +1 -5
wavedl/models/unireplknet.py +491 -0
wavedl/models/vit.py +3 -3
wavedl/train.py +1430 -1430
wavedl/utils/config.py +367 -367
wavedl/utils/cross_validation.py +530 -530
wavedl/utils/losses.py +216 -216
wavedl/utils/optimizers.py +216 -216
wavedl/utils/schedulers.py +251 -251
{wavedl-1.6.0.dist-info → wavedl-1.6.1.dist-info}/METADATA +93 -53
wavedl-1.6.1.dist-info/RECORD +46 -0
wavedl-1.6.0.dist-info/RECORD +0 -44
{wavedl-1.6.0.dist-info → wavedl-1.6.1.dist-info}/LICENSE +0 -0
{wavedl-1.6.0.dist-info → wavedl-1.6.1.dist-info}/WHEEL +0 -0
{wavedl-1.6.0.dist-info → wavedl-1.6.1.dist-info}/entry_points.txt +0 -0
{wavedl-1.6.0.dist-info → wavedl-1.6.1.dist-info}/top_level.txt +0 -0

wavedl/models/efficientvit.py ADDED Viewed

@@ -0,0 +1,398 @@
+"""
+EfficientViT: Memory-Efficient Vision Transformer with Cascaded Group Attention
+================================================================================
+EfficientViT (MIT) achieves state-of-the-art speed-accuracy trade-off by using
+cascaded group attention (CGA) which reduces computational redundancy in
+multi-head self-attention while maintaining model capability.
+**Key Features**:
+    - Cascaded Group Attention (CGA): Linear complexity attention
+    - Memory-efficient design for edge deployment
+    - Faster than Swin Transformer with similar accuracy
+    - Excellent for real-time NDE applications
+**Variants**:
+    - efficientvit_m0: 2.3M params (mobile, fastest)
+    - efficientvit_m1: 2.9M params (mobile)
+    - efficientvit_m2: 4.2M params (mobile)
+    - efficientvit_b0: 3.4M params (balanced)
+    - efficientvit_b1: 9.1M params (balanced)
+    - efficientvit_b2: 24M params (balanced)
+    - efficientvit_b3: 49M params (balanced)
+    - efficientvit_l1: 53M params (large)
+    - efficientvit_l2: 64M params (large)
+**Requirements**:
+    - timm >= 0.9.0 (for EfficientViT models)
+Reference:
+    Liu, X., et al. (2023). EfficientViT: Memory Efficient Vision Transformer
+    with Cascaded Group Attention. CVPR 2023.
+    https://arxiv.org/abs/2305.07027
+Author: Ductho Le (ductho.le@outlook.com)
+"""
+import torch
+from wavedl.models._pretrained_utils import build_regression_head
+from wavedl.models.base import BaseModel
+from wavedl.models.registry import register_model
+__all__ = [
+    "EfficientViTB0",
+    "EfficientViTB1",
+    "EfficientViTB2",
+    "EfficientViTB3",
+    "EfficientViTBase",
+    "EfficientViTL1",
+    "EfficientViTL2",
+    "EfficientViTM0",
+    "EfficientViTM1",
+    "EfficientViTM2",
+]
+# =============================================================================
+# EFFICIENTVIT BASE CLASS
+# =============================================================================
+class EfficientViTBase(BaseModel):
+    """
+    EfficientViT base class wrapping timm implementation.
+    Uses Cascaded Group Attention for efficient multi-head attention with
+    linear complexity. 2D only due to attention structure.
+    Args:
+        in_shape: (H, W) input shape (2D only)
+        out_size: Number of regression targets
+        model_name: timm model name
+        pretrained: Whether to load pretrained weights
+        freeze_backbone: Whether to freeze backbone for fine-tuning
+        dropout_rate: Dropout rate for regression head
+    """
+    def __init__(
+        self,
+        in_shape: tuple[int, int],
+        out_size: int,
+        model_name: str = "efficientvit_b0",
+        pretrained: bool = True,
+        freeze_backbone: bool = False,
+        dropout_rate: float = 0.3,
+        **kwargs,
+    ):
+        super().__init__(in_shape, out_size)
+        if len(in_shape) != 2:
+            raise ValueError(
+                f"EfficientViT requires 2D input (H, W), got {len(in_shape)}D"
+            )
+        self.pretrained = pretrained
+        self.freeze_backbone = freeze_backbone
+        self.model_name = model_name
+        # Load from timm
+        try:
+            import timm
+            self.backbone = timm.create_model(
+                model_name,
+                pretrained=pretrained,
+                num_classes=0,  # Remove classifier
+            )
+            # Get feature dimension
+            with torch.no_grad():
+                dummy = torch.zeros(1, 3, *in_shape)
+                features = self.backbone(dummy)
+                in_features = features.shape[-1]
+        except ImportError:
+            raise ImportError(
+                "timm >= 0.9.0 is required for EfficientViT. "
+                "Install with: pip install timm>=0.9.0"
+            )
+        except Exception as e:
+            raise RuntimeError(f"Failed to load EfficientViT model '{model_name}': {e}")
+        # Adapt input channels (3 -> 1)
+        self._adapt_input_channels()
+        # Regression head
+        self.head = build_regression_head(in_features, out_size, dropout_rate)
+        if freeze_backbone:
+            self._freeze_backbone()
+    def _adapt_input_channels(self):
+        """Adapt first conv layer for single-channel input."""
+        from wavedl.models._pretrained_utils import find_and_adapt_input_convs
+        adapted_count = find_and_adapt_input_convs(
+            self.backbone, pretrained=self.pretrained, adapt_all=False
+        )
+        if adapted_count == 0:
+            import warnings
+            warnings.warn(
+                "Could not adapt EfficientViT input channels. Model may fail.",
+                stacklevel=2,
+            )
+    def _freeze_backbone(self):
+        """Freeze backbone parameters."""
+        for param in self.backbone.parameters():
+            param.requires_grad = False
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        features = self.backbone(x)
+        return self.head(features)
+# =============================================================================
+# MOBILE VARIANTS (Ultra-lightweight)
+# =============================================================================
+@register_model("efficientvit_m0")
+class EfficientViTM0(EfficientViTBase):
+    """
+    EfficientViT-M0: ~2.2M backbone parameters (fastest mobile variant).
+    Cascaded group attention for efficient inference.
+    Ideal for edge deployment and real-time NDE applications.
+    2D only.
+    Example:
+        >>> model = EfficientViTM0(in_shape=(224, 224), out_size=3)
+        >>> x = torch.randn(4, 1, 224, 224)
+        >>> out = model(x)  # (4, 3)
+    """
+    def __init__(self, in_shape: tuple[int, int], out_size: int, **kwargs):
+        super().__init__(
+            in_shape=in_shape,
+            out_size=out_size,
+            model_name="efficientvit_m0",
+            **kwargs,
+        )
+    def __repr__(self) -> str:
+        return (
+            f"EfficientViT_M0(in_shape={self.in_shape}, out_size={self.out_size}, "
+            f"pretrained={self.pretrained})"
+        )
+@register_model("efficientvit_m1")
+class EfficientViTM1(EfficientViTBase):
+    """
+    EfficientViT-M1: ~2.6M backbone parameters.
+    Slightly larger mobile variant with better accuracy.
+    2D only.
+    """
+    def __init__(self, in_shape: tuple[int, int], out_size: int, **kwargs):
+        super().__init__(
+            in_shape=in_shape,
+            out_size=out_size,
+            model_name="efficientvit_m1",
+            **kwargs,
+        )
+    def __repr__(self) -> str:
+        return (
+            f"EfficientViT_M1(in_shape={self.in_shape}, out_size={self.out_size}, "
+            f"pretrained={self.pretrained})"
+        )
+@register_model("efficientvit_m2")
+class EfficientViTM2(EfficientViTBase):
+    """
+    EfficientViT-M2: ~3.8M backbone parameters.
+    Largest mobile variant, best accuracy among M-series.
+    2D only.
+    """
+    def __init__(self, in_shape: tuple[int, int], out_size: int, **kwargs):
+        super().__init__(
+            in_shape=in_shape,
+            out_size=out_size,
+            model_name="efficientvit_m2",
+            **kwargs,
+        )
+    def __repr__(self) -> str:
+        return (
+            f"EfficientViT_M2(in_shape={self.in_shape}, out_size={self.out_size}, "
+            f"pretrained={self.pretrained})"
+        )
+# =============================================================================
+# BALANCED VARIANTS (B-series)
+# =============================================================================
+@register_model("efficientvit_b0")
+class EfficientViTB0(EfficientViTBase):
+    """
+    EfficientViT-B0: ~2.1M backbone parameters.
+    Smallest balanced variant. Good accuracy-speed trade-off.
+    2D only.
+    Example:
+        >>> model = EfficientViTB0(in_shape=(224, 224), out_size=3)
+        >>> x = torch.randn(4, 1, 224, 224)
+        >>> out = model(x)  # (4, 3)
+    """
+    def __init__(self, in_shape: tuple[int, int], out_size: int, **kwargs):
+        super().__init__(
+            in_shape=in_shape,
+            out_size=out_size,
+            model_name="efficientvit_b0",
+            **kwargs,
+        )
+    def __repr__(self) -> str:
+        return (
+            f"EfficientViT_B0(in_shape={self.in_shape}, out_size={self.out_size}, "
+            f"pretrained={self.pretrained})"
+        )
+@register_model("efficientvit_b1")
+class EfficientViTB1(EfficientViTBase):
+    """
+    EfficientViT-B1: ~7.5M backbone parameters.
+    Medium balanced variant with improved capacity.
+    2D only.
+    """
+    def __init__(self, in_shape: tuple[int, int], out_size: int, **kwargs):
+        super().__init__(
+            in_shape=in_shape,
+            out_size=out_size,
+            model_name="efficientvit_b1",
+            **kwargs,
+        )
+    def __repr__(self) -> str:
+        return (
+            f"EfficientViT_B1(in_shape={self.in_shape}, out_size={self.out_size}, "
+            f"pretrained={self.pretrained})"
+        )
+@register_model("efficientvit_b2")
+class EfficientViTB2(EfficientViTBase):
+    """
+    EfficientViT-B2: ~21.8M backbone parameters.
+    Larger balanced variant for complex patterns.
+    2D only.
+    """
+    def __init__(self, in_shape: tuple[int, int], out_size: int, **kwargs):
+        super().__init__(
+            in_shape=in_shape,
+            out_size=out_size,
+            model_name="efficientvit_b2",
+            **kwargs,
+        )
+    def __repr__(self) -> str:
+        return (
+            f"EfficientViT_B2(in_shape={self.in_shape}, out_size={self.out_size}, "
+            f"pretrained={self.pretrained})"
+        )
+@register_model("efficientvit_b3")
+class EfficientViTB3(EfficientViTBase):
+    """
+    EfficientViT-B3: ~46.1M backbone parameters.
+    Largest balanced variant, highest accuracy in B-series.
+    2D only.
+    """
+    def __init__(self, in_shape: tuple[int, int], out_size: int, **kwargs):
+        super().__init__(
+            in_shape=in_shape,
+            out_size=out_size,
+            model_name="efficientvit_b3",
+            **kwargs,
+        )
+    def __repr__(self) -> str:
+        return (
+            f"EfficientViT_B3(in_shape={self.in_shape}, out_size={self.out_size}, "
+            f"pretrained={self.pretrained})"
+        )
+# =============================================================================
+# LARGE VARIANTS (L-series)
+# =============================================================================
+@register_model("efficientvit_l1")
+class EfficientViTL1(EfficientViTBase):
+    """
+    EfficientViT-L1: ~49.5M backbone parameters.
+    Large variant for maximum accuracy.
+    2D only.
+    """
+    def __init__(self, in_shape: tuple[int, int], out_size: int, **kwargs):
+        super().__init__(
+            in_shape=in_shape,
+            out_size=out_size,
+            model_name="efficientvit_l1",
+            **kwargs,
+        )
+    def __repr__(self) -> str:
+        return (
+            f"EfficientViT_L1(in_shape={self.in_shape}, out_size={self.out_size}, "
+            f"pretrained={self.pretrained})"
+        )
+@register_model("efficientvit_l2")
+class EfficientViTL2(EfficientViTBase):
+    """
+    EfficientViT-L2: ~60.5M backbone parameters.
+    Largest variant, best accuracy.
+    2D only.
+    """
+    def __init__(self, in_shape: tuple[int, int], out_size: int, **kwargs):
+        super().__init__(
+            in_shape=in_shape,
+            out_size=out_size,
+            model_name="efficientvit_l2",
+            **kwargs,
+        )
+    def __repr__(self) -> str:
+        return (
+            f"EfficientViT_L2(in_shape={self.in_shape}, out_size={self.out_size}, "
+            f"pretrained={self.pretrained})"
+        )

wavedl/models/fastvit.py CHANGED Viewed

@@ -29,9 +29,8 @@ Author: Ductho Le (ductho.le@outlook.com)
 """
 import torch
-import torch.nn as nn
-from wavedl.models._timm_utils import build_regression_head
+from wavedl.models._pretrained_utils import build_regression_head
 from wavedl.models.base import BaseModel
 from wavedl.models.registry import register_model
@@ -114,26 +113,11 @@ class FastViTBase(BaseModel):
         """Adapt all conv layers with 3 input channels for single-channel input."""
         # FastViT may have multiple modules with 3 input channels (e.g., conv_kxk, conv_scale)
         # We need to adapt all of them
-        adapted_count = 0
-        for name, module in self.backbone.named_modules():
-            if hasattr(module, "in_channels") and module.in_channels == 3:
-                # Check if this is a wrapper (e.g., ConvNormAct) with inner .conv
-                if hasattr(module, "conv") and isinstance(module.conv, nn.Conv2d):
-                    # Adapt the inner conv layer
-                    old_conv = module.conv
-                    module.conv = self._make_new_conv(old_conv)
-                    adapted_count += 1
-                elif isinstance(module, nn.Conv2d):
-                    # Direct Conv2d - replace it
-                    parts = name.split(".")
-                    parent = self.backbone
-                    for part in parts[:-1]:
-                        parent = getattr(parent, part)
-                    child_name = parts[-1]
-                    new_conv = self._make_new_conv(module)
-                    setattr(parent, child_name, new_conv)
-                    adapted_count += 1
+        from wavedl.models._pretrained_utils import find_and_adapt_input_convs
+        adapted_count = find_and_adapt_input_convs(
+            self.backbone, pretrained=self.pretrained, adapt_all=True
+        )
         if adapted_count == 0:
             import warnings
@@ -142,23 +126,6 @@ class FastViTBase(BaseModel):
                 "Could not adapt FastViT input channels. Model may fail.", stacklevel=2
             )
-    def _make_new_conv(self, old_conv: nn.Conv2d) -> nn.Conv2d:
-        """Create new conv layer with 1 input channel."""
-        new_conv = nn.Conv2d(
-            1,
-            old_conv.out_channels,
-            kernel_size=old_conv.kernel_size,
-            stride=old_conv.stride,
-            padding=old_conv.padding,
-            bias=old_conv.bias is not None,
-        )
-        if self.pretrained:
-            with torch.no_grad():
-                new_conv.weight.copy_(old_conv.weight.mean(dim=1, keepdim=True))
-                if old_conv.bias is not None:
-                    new_conv.bias.copy_(old_conv.bias)
-        return new_conv
     def _freeze_backbone(self):
         """Freeze backbone parameters."""
         for param in self.backbone.parameters():

wavedl/models/mamba.py CHANGED Viewed

@@ -34,12 +34,12 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from wavedl.models.base import BaseModel
+from wavedl.models.base import BaseModel, SpatialShape1D, SpatialShape2D
 from wavedl.models.registry import register_model
-# Type aliases
-SpatialShape = tuple[int] | tuple[int, int]
+# Type alias for Mamba models (1D and 2D only)
+SpatialShape = SpatialShape1D | SpatialShape2D
 __all__ = [
     "Mamba1D",
@@ -154,35 +154,55 @@ class SelectiveSSM(nn.Module):
         D: torch.Tensor,
     ) -> torch.Tensor:
         """
-        Simplified selective scan.
+        Vectorized selective scan using parallel associative scan.
-        For real applications, use the CUDA-optimized version from mamba-ssm.
-        This implementation is for understanding and testing only.
+        This implementation avoids the sequential for-loop by computing
+        all timesteps in parallel using cumulative products and sums.
+        ~100x faster than the naive sequential implementation.
         """
-        B_batch, L, d_inner = x.shape
-        d_state = A.shape[0]
-        # Initialize state
-        h = torch.zeros(B_batch, d_inner, d_state, device=x.device, dtype=x.dtype)
+        # Compute discretized A_bar for all timesteps: (B, L, d_inner, d_state)
+        A_bar = torch.exp(delta.unsqueeze(-1) * A)  # (B, L, d_inner, d_state)
-        outputs = []
-        for t in range(L):
-            x_t = x[:, t, :]  # (B, d_inner)
-            delta_t = delta[:, t, :]  # (B, d_inner)
-            B_t = B[:, t, :]  # (B, d_state)
-            C_t = C[:, t, :]  # (B, d_state)
+        # Compute input contribution: delta * B * x for all timesteps
+        # B: (B, L, d_state), x: (B, L, d_inner), delta: (B, L, d_inner)
+        # Result: (B, L, d_inner, d_state)
+        BX = delta.unsqueeze(-1) * B.unsqueeze(2) * x.unsqueeze(-1)
-            # Discretize: A_bar = exp(delta * A)
-            A_bar = torch.exp(delta_t.unsqueeze(-1) * A)  # (B, d_inner, d_state)
+        # Parallel scan using log-space cumulative products for numerical stability
+        # For SSM: h[t] = A_bar[t] * h[t-1] + BX[t]
+        # This is a linear recurrence that can be solved with associative scan
-            # Update state: h = A_bar * h + delta * B * x
-            h = A_bar * h + delta_t.unsqueeze(-1) * B_t.unsqueeze(1) * x_t.unsqueeze(-1)
+        # Use chunked approach for memory efficiency with parallel scan
+        # Compute cumulative product of A_bar (in log space for stability)
+        log_A_bar = torch.log(A_bar.clamp(min=1e-10))
+        log_A_cumsum = torch.cumsum(log_A_bar, dim=1)  # (B, L, d_inner, d_state)
+        A_cumsum = torch.exp(log_A_cumsum)
-            # Output: y = C * h + D * x
-            y_t = (C_t.unsqueeze(1) * h).sum(-1) + D * x_t  # (B, d_inner)
-            outputs.append(y_t)
+        # For each timestep t, we need: sum_{s=0}^{t} (prod_{k=s+1}^{t} A_bar[k]) * BX[s]
+        # = sum_{s=0}^{t} (A_cumsum[t] / A_cumsum[s]) * BX[s]
+        # = A_cumsum[t] * sum_{s=0}^{t} (BX[s] / A_cumsum[s])
-        return torch.stack(outputs, dim=1)  # (B, L, d_inner)
+        # Compute BX / A_cumsum (use A_cumsum shifted by 1 for proper indexing)
+        # A_cumsum[s] represents prod_{k=0}^{s} A_bar[k], but we need prod_{k=0}^{s-1}
+        # So we shift: use A_cumsum from previous timestep
+        A_cumsum_shifted = F.pad(A_cumsum[:, :-1], (0, 0, 0, 0, 1, 0), value=1.0)
+        # Weighted input: BX[s] / A_cumsum[s-1] = BX[s] * exp(-log_A_cumsum[s-1])
+        weighted_BX = BX / A_cumsum_shifted.clamp(min=1e-10)
+        # Cumulative sum of weighted inputs
+        weighted_BX_cumsum = torch.cumsum(weighted_BX, dim=1)
+        # Final state at each timestep: h[t] = A_cumsum[t] * weighted_BX_cumsum[t]
+        # But A_cumsum includes A_bar[0], so adjust
+        h = A_cumsum * weighted_BX_cumsum / A_bar.clamp(min=1e-10)
+        # Output: y = C * h + D * x
+        # h: (B, L, d_inner, d_state), C: (B, L, d_state)
+        y = (C.unsqueeze(2) * h).sum(-1) + D * x  # (B, L, d_inner)
+        return y
 # =============================================================================

wavedl 1.6.0__py3-none-any.whl → 1.6.1__py3-none-any.whl

wavedl 1.6.0py3-none-any.whl → 1.6.1py3-none-any.whl