PyPI - wavedl - Versions diffs - 1.5.5__py3-none-any.whl → 1.5.7__py3-none-any.whl - Mend

wavedl 1.5.5py3-none-any.whl → 1.5.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

wavedl/__init__.py +1 -1
wavedl/models/efficientnet.py +24 -7
wavedl/models/efficientnetv2.py +29 -6
wavedl/models/mobilenetv3.py +31 -8
wavedl/models/regnet.py +29 -6
wavedl/models/swin.py +38 -6
wavedl/models/tcn.py +22 -2
wavedl/models/vit.py +85 -25
wavedl/test.py +7 -3
wavedl/train.py +79 -18
wavedl/utils/constraints.py +11 -5
wavedl/utils/data.py +130 -39
wavedl/utils/metrics.py +287 -326
{wavedl-1.5.5.dist-info → wavedl-1.5.7.dist-info}/METADATA +37 -27
{wavedl-1.5.5.dist-info → wavedl-1.5.7.dist-info}/RECORD +19 -19
{wavedl-1.5.5.dist-info → wavedl-1.5.7.dist-info}/LICENSE +0 -0
{wavedl-1.5.5.dist-info → wavedl-1.5.7.dist-info}/WHEEL +0 -0
{wavedl-1.5.5.dist-info → wavedl-1.5.7.dist-info}/entry_points.txt +0 -0
{wavedl-1.5.5.dist-info → wavedl-1.5.7.dist-info}/top_level.txt +0 -0

wavedl/__init__.py CHANGED Viewed

@@ -18,7 +18,7 @@ For inference:
     # or: python -m wavedl.test --checkpoint best_checkpoint --data_path test.npz
 """
-__version__ = "1.5.5"
+__version__ = "1.5.7"
 __author__ = "Ductho Le"
 __email__ = "ductho.le@outlook.com"

wavedl/models/efficientnet.py CHANGED Viewed

@@ -110,9 +110,30 @@ class EfficientNetBase(BaseModel):
             self._freeze_backbone()
     def _adapt_input_channels(self):
-        """Modify first conv to handle single-channel input by expanding to 3ch."""
-        # We'll handle this in forward by repeating channels
-        pass
+        """Modify first conv to accept single-channel input.
+        Instead of expanding 1→3 channels in forward (which triples memory),
+        we replace the first conv layer with a 1-channel version and initialize
+        weights as the mean of the pretrained RGB filters.
+        """
+        # EfficientNet stem conv is at: features[0][0]
+        old_conv = self.backbone.features[0][0]
+        new_conv = nn.Conv2d(
+            1,  # Single channel input
+            old_conv.out_channels,
+            kernel_size=old_conv.kernel_size,
+            stride=old_conv.stride,
+            padding=old_conv.padding,
+            dilation=old_conv.dilation,
+            groups=old_conv.groups,
+            padding_mode=old_conv.padding_mode,
+            bias=old_conv.bias is not None,
+        )
+        if self.pretrained:
+            # Initialize with mean of pretrained RGB weights
+            with torch.no_grad():
+                new_conv.weight.copy_(old_conv.weight.mean(dim=1, keepdim=True))
+        self.backbone.features[0][0] = new_conv
     def _freeze_backbone(self):
         """Freeze all backbone parameters except the classifier."""
@@ -130,10 +151,6 @@ class EfficientNetBase(BaseModel):
         Returns:
             Output tensor of shape (B, out_size)
         """
-        # Expand single channel to 3 channels for pretrained weights
-        if x.size(1) == 1:
-            x = x.expand(-1, 3, -1, -1)
         return self.backbone(x)
     @classmethod

wavedl/models/efficientnetv2.py CHANGED Viewed

@@ -129,10 +129,37 @@ class EfficientNetV2Base(BaseModel):
             nn.Linear(regression_hidden // 2, out_size),
         )
-        # Optionally freeze backbone for fine-tuning
+        # Adapt first conv for single-channel input (3× memory savings vs expand)
+        self._adapt_input_channels()
+        # Optionally freeze backbone for fine-tuning (after adaptation so new conv is frozen too)
         if freeze_backbone:
             self._freeze_backbone()
+    def _adapt_input_channels(self):
+        """Modify first conv to accept single-channel input.
+        Instead of expanding 1→3 channels in forward (which triples memory),
+        we replace the first conv layer with a 1-channel version and initialize
+        weights as the mean of the pretrained RGB filters.
+        """
+        old_conv = self.backbone.features[0][0]
+        new_conv = nn.Conv2d(
+            1,  # Single channel input
+            old_conv.out_channels,
+            kernel_size=old_conv.kernel_size,
+            stride=old_conv.stride,
+            padding=old_conv.padding,
+            dilation=old_conv.dilation,
+            groups=old_conv.groups,
+            padding_mode=old_conv.padding_mode,
+            bias=old_conv.bias is not None,
+        )
+        if self.pretrained:
+            with torch.no_grad():
+                new_conv.weight.copy_(old_conv.weight.mean(dim=1, keepdim=True))
+        self.backbone.features[0][0] = new_conv
     def _freeze_backbone(self):
         """Freeze all backbone parameters except the classifier."""
         for name, param in self.backbone.named_parameters():
@@ -144,15 +171,11 @@ class EfficientNetV2Base(BaseModel):
         Forward pass.
         Args:
-            x: Input tensor of shape (B, C, H, W) where C is 1 or 3
+            x: Input tensor of shape (B, 1, H, W)
         Returns:
             Output tensor of shape (B, out_size)
         """
-        # Expand single channel to 3 channels for pretrained weights compatibility
-        if x.size(1) == 1:
-            x = x.expand(-1, 3, -1, -1)
         return self.backbone(x)
     @classmethod

wavedl/models/mobilenetv3.py CHANGED Viewed

@@ -136,10 +136,37 @@ class MobileNetV3Base(BaseModel):
             nn.Linear(regression_hidden, out_size),
         )
-        # Optionally freeze backbone for fine-tuning
+        # Adapt first conv for single-channel input (3× memory savings vs expand)
+        self._adapt_input_channels()
+        # Optionally freeze backbone for fine-tuning (after adaptation so new conv is frozen too)
         if freeze_backbone:
             self._freeze_backbone()
+    def _adapt_input_channels(self):
+        """Modify first conv to accept single-channel input.
+        Instead of expanding 1→3 channels in forward (which triples memory),
+        we replace the first conv layer with a 1-channel version and initialize
+        weights as the mean of the pretrained RGB filters.
+        """
+        old_conv = self.backbone.features[0][0]
+        new_conv = nn.Conv2d(
+            1,  # Single channel input
+            old_conv.out_channels,
+            kernel_size=old_conv.kernel_size,
+            stride=old_conv.stride,
+            padding=old_conv.padding,
+            dilation=old_conv.dilation,
+            groups=old_conv.groups,
+            padding_mode=old_conv.padding_mode,
+            bias=old_conv.bias is not None,
+        )
+        if self.pretrained:
+            with torch.no_grad():
+                new_conv.weight.copy_(old_conv.weight.mean(dim=1, keepdim=True))
+        self.backbone.features[0][0] = new_conv
     def _freeze_backbone(self):
         """Freeze all backbone parameters except the classifier."""
         for name, param in self.backbone.named_parameters():
@@ -151,15 +178,11 @@ class MobileNetV3Base(BaseModel):
         Forward pass.
         Args:
-            x: Input tensor of shape (B, C, H, W) where C is 1 or 3
+            x: Input tensor of shape (B, 1, H, W)
         Returns:
             Output tensor of shape (B, out_size)
         """
-        # Expand single channel to 3 channels for pretrained weights compatibility
-        if x.size(1) == 1:
-            x = x.expand(-1, 3, -1, -1)
         return self.backbone(x)
     @classmethod
@@ -194,7 +217,7 @@ class MobileNetV3Small(MobileNetV3Base):
     Performance (approximate):
         - CPU inference: ~6ms (single core)
-        - Parameters: 2.5M
+        - Parameters: ~1.1M
         - MAdds: 56M
     Args:
@@ -241,7 +264,7 @@ class MobileNetV3Large(MobileNetV3Base):
     Performance (approximate):
         - CPU inference: ~20ms (single core)
-        - Parameters: 5.4M
+        - Parameters: ~3.2M
         - MAdds: 219M
     Args:

wavedl/models/regnet.py CHANGED Viewed

@@ -140,10 +140,37 @@ class RegNetBase(BaseModel):
             nn.Linear(regression_hidden, out_size),
         )
-        # Optionally freeze backbone for fine-tuning
+        # Adapt first conv for single-channel input (3× memory savings vs expand)
+        self._adapt_input_channels()
+        # Optionally freeze backbone for fine-tuning (after adaptation so new conv is frozen too)
         if freeze_backbone:
             self._freeze_backbone()
+    def _adapt_input_channels(self):
+        """Modify first conv to accept single-channel input.
+        Instead of expanding 1→3 channels in forward (which triples memory),
+        we replace the first conv layer with a 1-channel version and initialize
+        weights as the mean of the pretrained RGB filters.
+        """
+        old_conv = self.backbone.stem[0]
+        new_conv = nn.Conv2d(
+            1,  # Single channel input
+            old_conv.out_channels,
+            kernel_size=old_conv.kernel_size,
+            stride=old_conv.stride,
+            padding=old_conv.padding,
+            dilation=old_conv.dilation,
+            groups=old_conv.groups,
+            padding_mode=old_conv.padding_mode,
+            bias=old_conv.bias is not None,
+        )
+        if self.pretrained:
+            with torch.no_grad():
+                new_conv.weight.copy_(old_conv.weight.mean(dim=1, keepdim=True))
+        self.backbone.stem[0] = new_conv
     def _freeze_backbone(self):
         """Freeze all backbone parameters except the fc layer."""
         for name, param in self.backbone.named_parameters():
@@ -155,15 +182,11 @@ class RegNetBase(BaseModel):
         Forward pass.
         Args:
-            x: Input tensor of shape (B, C, H, W) where C is 1 or 3
+            x: Input tensor of shape (B, 1, H, W)
         Returns:
             Output tensor of shape (B, out_size)
         """
-        # Expand single channel to 3 channels for pretrained weights compatibility
-        if x.size(1) == 1:
-            x = x.expand(-1, 3, -1, -1)
         return self.backbone(x)
     @classmethod

wavedl/models/swin.py CHANGED Viewed

@@ -141,10 +141,46 @@ class SwinTransformerBase(BaseModel):
             nn.Linear(regression_hidden // 2, out_size),
         )
-        # Optionally freeze backbone for fine-tuning
+        # Adapt patch embedding conv for single-channel input (3× memory savings vs expand)
+        self._adapt_input_channels()
+        # Optionally freeze backbone for fine-tuning (after adaptation so new conv is frozen too)
         if freeze_backbone:
             self._freeze_backbone()
+    def _adapt_input_channels(self):
+        """Modify patch embedding conv to accept single-channel input.
+        Instead of expanding 1→3 channels in forward (which triples memory),
+        we replace the patch embedding conv with a 1-channel version and
+        initialize weights as the mean of the pretrained RGB filters.
+        """
+        # Swin's patch embedding is at features[0][0]
+        try:
+            old_conv = self.backbone.features[0][0]
+        except (IndexError, AttributeError, TypeError) as e:
+            raise RuntimeError(
+                f"Swin patch embed structure changed in this torchvision version. "
+                f"Cannot adapt input channels. Error: {e}"
+            ) from e
+        new_conv = nn.Conv2d(
+            1,  # Single channel input
+            old_conv.out_channels,
+            kernel_size=old_conv.kernel_size,
+            stride=old_conv.stride,
+            padding=old_conv.padding,
+            dilation=old_conv.dilation,
+            groups=old_conv.groups,
+            padding_mode=old_conv.padding_mode,
+            bias=old_conv.bias is not None,
+        )
+        if self.pretrained:
+            with torch.no_grad():
+                new_conv.weight.copy_(old_conv.weight.mean(dim=1, keepdim=True))
+                if old_conv.bias is not None:
+                    new_conv.bias.copy_(old_conv.bias)
+        self.backbone.features[0][0] = new_conv
     def _freeze_backbone(self):
         """Freeze all backbone parameters except the head."""
         for name, param in self.backbone.named_parameters():
@@ -156,15 +192,11 @@ class SwinTransformerBase(BaseModel):
         Forward pass.
         Args:
-            x: Input tensor of shape (B, C, H, W) where C is 1 or 3
+            x: Input tensor of shape (B, 1, H, W)
         Returns:
             Output tensor of shape (B, out_size)
         """
-        # Expand single channel to 3 channels for pretrained weights compatibility
-        if x.size(1) == 1:
-            x = x.expand(-1, 3, -1, -1)
         return self.backbone(x)
     @classmethod

wavedl/models/tcn.py CHANGED Viewed

@@ -45,6 +45,26 @@ from wavedl.models.base import BaseModel
 from wavedl.models.registry import register_model
+def _find_group_count(channels: int, max_groups: int = 8) -> int:
+    """
+    Find largest valid group count for GroupNorm.
+    GroupNorm requires channels to be divisible by num_groups.
+    This finds the largest divisor up to max_groups.
+    Args:
+        channels: Number of channels
+        max_groups: Maximum group count to consider (default: 8)
+    Returns:
+        Largest valid group count (always >= 1)
+    """
+    for g in range(min(max_groups, channels), 0, -1):
+        if channels % g == 0:
+            return g
+    return 1
 class CausalConv1d(nn.Module):
     """
     Causal 1D convolution with dilation.
@@ -101,13 +121,13 @@ class TemporalBlock(nn.Module):
         # First causal convolution
         self.conv1 = CausalConv1d(in_channels, out_channels, kernel_size, dilation)
-        self.norm1 = nn.GroupNorm(min(8, out_channels), out_channels)
+        self.norm1 = nn.GroupNorm(_find_group_count(out_channels), out_channels)
         self.act1 = nn.GELU()
         self.dropout1 = nn.Dropout(dropout)
         # Second causal convolution
         self.conv2 = CausalConv1d(out_channels, out_channels, kernel_size, dilation)
-        self.norm2 = nn.GroupNorm(min(8, out_channels), out_channels)
+        self.norm2 = nn.GroupNorm(_find_group_count(out_channels), out_channels)
         self.act2 = nn.GELU()
         self.dropout2 = nn.Dropout(dropout)

wavedl/models/vit.py CHANGED Viewed

@@ -42,47 +42,89 @@ class PatchEmbed(nn.Module):
     Supports 1D and 2D inputs:
     - 1D: Input (B, 1, L) → (B, num_patches, embed_dim)
     - 2D: Input (B, 1, H, W) → (B, num_patches, embed_dim)
+    Args:
+        in_shape: Spatial shape (L,) for 1D or (H, W) for 2D
+        patch_size: Size of each patch
+        embed_dim: Embedding dimension
+        pad_if_needed: If True, pad input to nearest patch-aligned size instead of
+            dropping edge pixels. Important for NDE/QUS applications where edge
+            effects matter. Default: False (original behavior with warning).
     """
-    def __init__(self, in_shape: SpatialShape, patch_size: int, embed_dim: int):
+    def __init__(
+        self,
+        in_shape: SpatialShape,
+        patch_size: int,
+        embed_dim: int,
+        pad_if_needed: bool = False,
+    ):
         super().__init__()
         self.dim = len(in_shape)
         self.patch_size = patch_size
         self.embed_dim = embed_dim
+        self.pad_if_needed = pad_if_needed
+        self._padding = None  # Will be set if padding is needed
         if self.dim == 1:
             # 1D: segment patches
             L = in_shape[0]
-            if L % patch_size != 0:
-                import warnings
-                warnings.warn(
-                    f"Input length {L} not divisible by patch_size {patch_size}. "
-                    f"Last {L % patch_size} elements will be dropped. "
-                    f"Consider padding input to {((L // patch_size) + 1) * patch_size}.",
-                    UserWarning,
-                    stacklevel=2,
-                )
-            self.num_patches = L // patch_size
+            remainder = L % patch_size
+            if remainder != 0:
+                if pad_if_needed:
+                    # Pad to next multiple of patch_size
+                    pad_amount = patch_size - remainder
+                    self._padding = (0, pad_amount)  # (left, right)
+                    L_padded = L + pad_amount
+                    self.num_patches = L_padded // patch_size
+                else:
+                    import warnings
+                    warnings.warn(
+                        f"Input length {L} not divisible by patch_size {patch_size}. "
+                        f"Last {remainder} elements will be dropped. "
+                        f"Consider using pad_if_needed=True or padding input to "
+                        f"{((L // patch_size) + 1) * patch_size}.",
+                        UserWarning,
+                        stacklevel=2,
+                    )
+                    self.num_patches = L // patch_size
+            else:
+                self.num_patches = L // patch_size
             self.proj = nn.Conv1d(
                 1, embed_dim, kernel_size=patch_size, stride=patch_size
             )
         elif self.dim == 2:
             # 2D: grid patches
             H, W = in_shape
-            if H % patch_size != 0 or W % patch_size != 0:
-                import warnings
-                warnings.warn(
-                    f"Input shape ({H}, {W}) not divisible by patch_size {patch_size}. "
-                    f"Border pixels will be dropped (H: {H % patch_size}, W: {W % patch_size}). "
-                    f"Consider padding to ({((H // patch_size) + 1) * patch_size}, "
-                    f"{((W // patch_size) + 1) * patch_size}).",
-                    UserWarning,
-                    stacklevel=2,
-                )
-            self.num_patches = (H // patch_size) * (W // patch_size)
+            h_rem, w_rem = H % patch_size, W % patch_size
+            if h_rem != 0 or w_rem != 0:
+                if pad_if_needed:
+                    # Pad to next multiple of patch_size
+                    h_pad = (patch_size - h_rem) % patch_size
+                    w_pad = (patch_size - w_rem) % patch_size
+                    # Padding format: (left, right, top, bottom)
+                    self._padding = (0, w_pad, 0, h_pad)
+                    H_padded, W_padded = H + h_pad, W + w_pad
+                    self.num_patches = (H_padded // patch_size) * (
+                        W_padded // patch_size
+                    )
+                else:
+                    import warnings
+                    warnings.warn(
+                        f"Input shape ({H}, {W}) not divisible by patch_size {patch_size}. "
+                        f"Border pixels will be dropped (H: {h_rem}, W: {w_rem}). "
+                        f"Consider using pad_if_needed=True or padding to "
+                        f"({((H // patch_size) + 1) * patch_size}, "
+                        f"{((W // patch_size) + 1) * patch_size}).",
+                        UserWarning,
+                        stacklevel=2,
+                    )
+                    self.num_patches = (H // patch_size) * (W // patch_size)
+            else:
+                self.num_patches = (H // patch_size) * (W // patch_size)
             self.proj = nn.Conv2d(
                 1, embed_dim, kernel_size=patch_size, stride=patch_size
             )
@@ -97,6 +139,10 @@ class PatchEmbed(nn.Module):
         Returns:
             Patch embeddings (B, num_patches, embed_dim)
         """
+        # Apply padding if configured
+        if self._padding is not None:
+            x = nn.functional.pad(x, self._padding, mode="constant", value=0)
         x = self.proj(x)  # (B, embed_dim, ..reduced_spatial..)
         x = x.flatten(2)  # (B, embed_dim, num_patches)
         x = x.transpose(1, 2)  # (B, num_patches, embed_dim)
@@ -185,6 +231,18 @@ class ViTBase(BaseModel):
     3. Transformer encoder blocks
     4. Extract CLS token
     5. Regression head
+    Args:
+        in_shape: Spatial shape (L,) for 1D or (H, W) for 2D
+        out_size: Number of regression targets
+        patch_size: Size of each patch (default: 16)
+        embed_dim: Embedding dimension (default: 768)
+        depth: Number of transformer blocks (default: 12)
+        num_heads: Number of attention heads (default: 12)
+        mlp_ratio: MLP hidden dim multiplier (default: 4.0)
+        dropout_rate: Dropout rate (default: 0.1)
+        pad_if_needed: If True, pad input to nearest patch-aligned size instead
+            of dropping edge pixels. Important for NDE/QUS applications.
     """
     def __init__(
@@ -197,6 +255,7 @@ class ViTBase(BaseModel):
         num_heads: int = 12,
         mlp_ratio: float = 4.0,
         dropout_rate: float = 0.1,
+        pad_if_needed: bool = False,
         **kwargs,
     ):
         super().__init__(in_shape, out_size)
@@ -207,9 +266,10 @@ class ViTBase(BaseModel):
         self.num_heads = num_heads
         self.dropout_rate = dropout_rate
         self.dim = len(in_shape)
+        self.pad_if_needed = pad_if_needed
         # Patch embedding
-        self.patch_embed = PatchEmbed(in_shape, patch_size, embed_dim)
+        self.patch_embed = PatchEmbed(in_shape, patch_size, embed_dim, pad_if_needed)
         num_patches = self.patch_embed.num_patches
         # Learnable CLS token and position embeddings

wavedl/test.py CHANGED Viewed

@@ -311,7 +311,7 @@ def load_data_for_inference(
 # ==============================================================================
 def load_checkpoint(
     checkpoint_dir: str,
-    in_shape: tuple[int, int],
+    in_shape: tuple[int, ...],
     out_size: int,
     model_name: str | None = None,
 ) -> tuple[nn.Module, any]:
@@ -320,7 +320,7 @@ def load_checkpoint(
     Args:
         checkpoint_dir: Path to checkpoint directory
-        in_shape: Input image shape (H, W)
+        in_shape: Input spatial shape - (L,) for 1D, (H, W) for 2D, or (D, H, W) for 3D
         out_size: Number of output parameters
         model_name: Model architecture name (auto-detect if None)
@@ -376,7 +376,11 @@ def load_checkpoint(
                 )
     logging.info(f"   Building model: {model_name}")
-    model = build_model(model_name, in_shape=in_shape, out_size=out_size)
+    # Use pretrained=False: checkpoint weights will overwrite any pretrained weights,
+    # so downloading ImageNet weights is wasteful and breaks offline/HPC inference.
+    model = build_model(
+        model_name, in_shape=in_shape, out_size=out_size, pretrained=False
+    )
     # Load weights (check multiple formats in order of preference)
     weight_path = None

wavedl 1.5.5__py3-none-any.whl → 1.5.7__py3-none-any.whl

wavedl 1.5.5py3-none-any.whl → 1.5.7py3-none-any.whl