PyPI - wavedl - Versions diffs - 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl - Mend

wavedl 1.3.0py3-none-any.whl → 1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

wavedl/__init__.py +1 -1
wavedl/hpc.py +28 -26
wavedl/models/__init__.py +33 -7
wavedl/models/_template.py +0 -1
wavedl/models/base.py +0 -1
wavedl/models/cnn.py +0 -1
wavedl/models/convnext.py +4 -1
wavedl/models/densenet.py +4 -1
wavedl/models/efficientnet.py +9 -5
wavedl/models/efficientnetv2.py +292 -0
wavedl/models/mobilenetv3.py +272 -0
wavedl/models/registry.py +0 -1
wavedl/models/regnet.py +383 -0
wavedl/models/resnet.py +7 -4
wavedl/models/resnet3d.py +258 -0
wavedl/models/swin.py +390 -0
wavedl/models/tcn.py +389 -0
wavedl/models/unet.py +44 -110
wavedl/models/vit.py +8 -4
wavedl/train.py +1113 -1117
{wavedl-1.3.0.dist-info → wavedl-1.4.0.dist-info}/METADATA +111 -93
wavedl-1.4.0.dist-info/RECORD +37 -0
wavedl-1.3.0.dist-info/RECORD +0 -31
{wavedl-1.3.0.dist-info → wavedl-1.4.0.dist-info}/LICENSE +0 -0
{wavedl-1.3.0.dist-info → wavedl-1.4.0.dist-info}/WHEEL +0 -0
{wavedl-1.3.0.dist-info → wavedl-1.4.0.dist-info}/entry_points.txt +0 -0
{wavedl-1.3.0.dist-info → wavedl-1.4.0.dist-info}/top_level.txt +0 -0

wavedl/models/tcn.py ADDED Viewed

@@ -0,0 +1,389 @@
+"""
+Temporal Convolutional Network (TCN): Dilated Causal Convolutions for 1D Signals
+=================================================================================
+A dedicated 1D architecture using dilated causal convolutions to capture
+long-range temporal dependencies in waveforms and time-series data.
+Provides exponentially growing receptive field with linear parameter growth.
+**Key Features**:
+    - Dilated convolutions: Exponentially growing receptive field
+    - Causal padding: No information leakage from future
+    - Residual connections: Stable gradient flow
+    - Weight normalization: Faster convergence
+**Variants**:
+    - tcn: Standard TCN with configurable depth and channels
+    - tcn_small: Lightweight variant for quick experiments
+    - tcn_large: Higher capacity for complex patterns
+**Receptive Field Calculation**:
+    RF = 1 + (kernel_size - 1) * sum(dilation[i] for i in layers)
+    With default settings (kernel=3, 8 layers, dilation=2^i):
+    RF = 1 + 2 * (1+2+4+8+16+32+64+128) = 511 samples
+**Note**: TCN is 1D-only. For 2D/3D data, use ResNet, EfficientNet, or Swin.
+References:
+    Bai, S., Kolter, J.Z., & Koltun, V. (2018). An Empirical Evaluation of
+    Generic Convolutional and Recurrent Networks for Sequence Modeling.
+    arXiv:1803.01271. https://arxiv.org/abs/1803.01271
+    van den Oord, A., et al. (2016). WaveNet: A Generative Model for Raw Audio.
+    arXiv:1609.03499. https://arxiv.org/abs/1609.03499
+Author: Ductho Le (ductho.le@outlook.com)
+"""
+from typing import Any
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from wavedl.models.base import BaseModel
+from wavedl.models.registry import register_model
+class CausalConv1d(nn.Module):
+    """
+    Causal 1D convolution with dilation.
+    Ensures output at time t only depends on inputs at times <= t.
+    Uses left-side padding to achieve causal behavior.
+    """
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: int,
+        dilation: int = 1,
+    ):
+        super().__init__()
+        self.kernel_size = kernel_size
+        self.dilation = dilation
+        # Causal padding: only pad on the left
+        self.padding = (kernel_size - 1) * dilation
+        self.conv = nn.Conv1d(
+            in_channels,
+            out_channels,
+            kernel_size,
+            dilation=dilation,
+            padding=0,  # We handle padding manually for causality
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # Pad on the left only (causal)
+        x = F.pad(x, (self.padding, 0))
+        return self.conv(x)
+class TemporalBlock(nn.Module):
+    """
+    Temporal block with two causal dilated convolutions and residual connection.
+    Architecture:
+        Input → CausalConv → LayerNorm → GELU → Dropout →
+                CausalConv → LayerNorm → GELU → Dropout → (+Input) → Output
+    """
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: int,
+        dilation: int,
+        dropout: float = 0.1,
+    ):
+        super().__init__()
+        # First causal convolution
+        self.conv1 = CausalConv1d(in_channels, out_channels, kernel_size, dilation)
+        self.norm1 = nn.GroupNorm(min(8, out_channels), out_channels)
+        self.act1 = nn.GELU()
+        self.dropout1 = nn.Dropout(dropout)
+        # Second causal convolution
+        self.conv2 = CausalConv1d(out_channels, out_channels, kernel_size, dilation)
+        self.norm2 = nn.GroupNorm(min(8, out_channels), out_channels)
+        self.act2 = nn.GELU()
+        self.dropout2 = nn.Dropout(dropout)
+        # Residual connection (1x1 conv if channels change)
+        self.downsample = (
+            nn.Conv1d(in_channels, out_channels, 1)
+            if in_channels != out_channels
+            else nn.Identity()
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        residual = self.downsample(x)
+        # First conv block
+        out = self.conv1(x)
+        out = self.norm1(out)
+        out = self.act1(out)
+        out = self.dropout1(out)
+        # Second conv block
+        out = self.conv2(out)
+        out = self.norm2(out)
+        out = self.act2(out)
+        out = self.dropout2(out)
+        return out + residual
+class TCNBase(BaseModel):
+    """
+    Base Temporal Convolutional Network for 1D regression.
+    Architecture:
+    1. Input projection (optional channel expansion)
+    2. Stack of temporal blocks with exponentially increasing dilation
+    3. Global average pooling
+    4. Regression head
+    The receptive field grows exponentially with depth:
+    RF = 1 + (kernel_size - 1) * sum(2^i for i in range(num_layers))
+    """
+    def __init__(
+        self,
+        in_shape: tuple[int],
+        out_size: int,
+        num_channels: list[int],
+        kernel_size: int = 3,
+        dropout_rate: float = 0.1,
+        **kwargs,
+    ):
+        """
+        Initialize TCN for regression.
+        Args:
+            in_shape: (L,) input signal length
+            out_size: Number of regression output targets
+            num_channels: List of channel sizes for each temporal block
+            kernel_size: Convolution kernel size (default: 3)
+            dropout_rate: Dropout rate (default: 0.1)
+        """
+        super().__init__(in_shape, out_size)
+        if len(in_shape) != 1:
+            raise ValueError(
+                f"TCN requires 1D input (L,), got {len(in_shape)}D. "
+                "For 2D/3D data, use ResNet, EfficientNet, or Swin."
+            )
+        self.num_channels = num_channels
+        self.kernel_size = kernel_size
+        self.dropout_rate = dropout_rate
+        # Build temporal blocks with exponentially increasing dilation
+        layers = []
+        num_levels = len(num_channels)
+        for i in range(num_levels):
+            dilation = 2**i
+            in_ch = 1 if i == 0 else num_channels[i - 1]
+            out_ch = num_channels[i]
+            layers.append(
+                TemporalBlock(in_ch, out_ch, kernel_size, dilation, dropout_rate)
+            )
+        self.network = nn.Sequential(*layers)
+        # Global pooling
+        self.global_pool = nn.AdaptiveAvgPool1d(1)
+        # Regression head
+        final_channels = num_channels[-1]
+        self.head = nn.Sequential(
+            nn.Dropout(dropout_rate),
+            nn.Linear(final_channels, 256),
+            nn.GELU(),
+            nn.Dropout(dropout_rate * 0.5),
+            nn.Linear(256, 128),
+            nn.GELU(),
+            nn.Linear(128, out_size),
+        )
+        # Calculate and store receptive field
+        self.receptive_field = self._compute_receptive_field()
+        # Initialize weights
+        self._init_weights()
+    def _compute_receptive_field(self) -> int:
+        """Compute the receptive field of the network."""
+        rf = 1
+        for i in range(len(self.num_channels)):
+            dilation = 2**i
+            # Each temporal block has 2 convolutions
+            rf += 2 * (self.kernel_size - 1) * dilation
+        return rf
+    def _init_weights(self):
+        """Initialize weights using Kaiming initialization."""
+        for m in self.modules():
+            if isinstance(m, (nn.Conv1d, nn.Linear)):
+                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.GroupNorm):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Forward pass.
+        Args:
+            x: Input tensor of shape (B, 1, L)
+        Returns:
+            Output tensor of shape (B, out_size)
+        """
+        # Temporal blocks
+        x = self.network(x)
+        # Global pooling
+        x = self.global_pool(x)
+        x = x.flatten(1)
+        # Regression head
+        return self.head(x)
+    @classmethod
+    def get_default_config(cls) -> dict[str, Any]:
+        """Return default configuration for TCN."""
+        return {
+            "num_channels": [64, 128, 256, 256, 512, 512, 512, 512],
+            "kernel_size": 3,
+            "dropout_rate": 0.1,
+        }
+# =============================================================================
+# REGISTERED MODEL VARIANTS
+# =============================================================================
+@register_model("tcn")
+class TCN(TCNBase):
+    """
+    TCN: Standard Temporal Convolutional Network.
+    ~7.0M parameters. 8 temporal blocks with channels [64→128→256→256→512→512→512→512].
+    Receptive field: 511 samples with kernel_size=3.
+    Recommended for:
+        - Ultrasonic A-scan processing
+        - Acoustic emission signals
+        - Seismic waveform analysis
+        - Any 1D time-series regression
+    Args:
+        in_shape: (L,) input signal length
+        out_size: Number of regression targets
+        kernel_size: Convolution kernel size (default: 3)
+        dropout_rate: Dropout rate (default: 0.1)
+    Example:
+        >>> model = TCN(in_shape=(4096,), out_size=3)
+        >>> x = torch.randn(4, 1, 4096)
+        >>> out = model(x)  # (4, 3)
+    """
+    def __init__(self, in_shape: tuple[int], out_size: int, **kwargs):
+        # Default: 8 layers, 64→512 channels
+        num_channels = kwargs.pop(
+            "num_channels", [64, 128, 256, 256, 512, 512, 512, 512]
+        )
+        super().__init__(
+            in_shape=in_shape, out_size=out_size, num_channels=num_channels, **kwargs
+        )
+    def __repr__(self) -> str:
+        return (
+            f"TCN(in_shape={self.in_shape}, out={self.out_size}, "
+            f"RF={self.receptive_field})"
+        )
+@register_model("tcn_small")
+class TCNSmall(TCNBase):
+    """
+    TCN-Small: Lightweight variant for quick experiments.
+    ~1.0M parameters. 6 temporal blocks with channels [32→64→128→128→256→256].
+    Receptive field: 127 samples with kernel_size=3.
+    Recommended for:
+        - Quick prototyping
+        - Smaller datasets
+        - Real-time inference on edge devices
+    Args:
+        in_shape: (L,) input signal length
+        out_size: Number of regression targets
+        kernel_size: Convolution kernel size (default: 3)
+        dropout_rate: Dropout rate (default: 0.1)
+    Example:
+        >>> model = TCNSmall(in_shape=(1024,), out_size=3)
+        >>> x = torch.randn(4, 1, 1024)
+        >>> out = model(x)  # (4, 3)
+    """
+    def __init__(self, in_shape: tuple[int], out_size: int, **kwargs):
+        num_channels = [32, 64, 128, 128, 256, 256]
+        super().__init__(
+            in_shape=in_shape, out_size=out_size, num_channels=num_channels, **kwargs
+        )
+    def __repr__(self) -> str:
+        return (
+            f"TCN_Small(in_shape={self.in_shape}, out={self.out_size}, "
+            f"RF={self.receptive_field})"
+        )
+@register_model("tcn_large")
+class TCNLarge(TCNBase):
+    """
+    TCN-Large: High-capacity variant for complex patterns.
+    ~10.2M parameters. 10 temporal blocks with channels [64→128→256→256→512→512→512→512→512→512].
+    Receptive field: 2047 samples with kernel_size=3.
+    Recommended for:
+        - Long sequences (>4096 samples)
+        - Complex temporal patterns
+        - Large datasets with sufficient compute
+    Args:
+        in_shape: (L,) input signal length
+        out_size: Number of regression targets
+        kernel_size: Convolution kernel size (default: 3)
+        dropout_rate: Dropout rate (default: 0.1)
+    Example:
+        >>> model = TCNLarge(in_shape=(8192,), out_size=3)
+        >>> x = torch.randn(4, 1, 8192)
+        >>> out = model(x)  # (4, 3)
+    """
+    def __init__(self, in_shape: tuple[int], out_size: int, **kwargs):
+        num_channels = [64, 128, 256, 256, 512, 512, 512, 512, 512, 512]
+        super().__init__(
+            in_shape=in_shape, out_size=out_size, num_channels=num_channels, **kwargs
+        )
+    def __repr__(self) -> str:
+        return (
+            f"TCN_Large(in_shape={self.in_shape}, out={self.out_size}, "
+            f"RF={self.receptive_field})"
+        )

wavedl/models/unet.py CHANGED Viewed

@@ -1,10 +1,10 @@
 """
-U-Net: Encoder-Decoder Architecture for Regression
-====================================================
+U-Net Regression: Encoder-Decoder Architecture for Vector Regression
+=====================================================================
-A dimension-agnostic U-Net implementation for tasks requiring either:
-- Spatial output (e.g., velocity field prediction)
-- Vector output (global pooling → regression head)
+A dimension-agnostic U-Net implementation adapted for vector regression output.
+Uses encoder-decoder architecture with skip connections, then applies global
+pooling to produce a regression vector.
 **Dimensionality Support**:
     - 1D: Waveforms, signals (N, 1, L) → Conv1d
@@ -12,11 +12,9 @@ A dimension-agnostic U-Net implementation for tasks requiring either:
     - 3D: Volumetric data (N, 1, D, H, W) → Conv3d
 **Variants**:
-    - unet: Full encoder-decoder with spatial output capability
     - unet_regression: U-Net with global pooling for vector regression
 Author: Ductho Le (ductho.le@outlook.com)
-Version: 1.0.0
 """
 from typing import Any
@@ -90,10 +88,6 @@ class Up(nn.Module):
         super().__init__()
         _, ConvTranspose, _, _ = _get_layers(dim)
-        # in_channels comes from previous layer
-        # After upconv: in_channels // 2
-        # After concat with skip (out_channels): in_channels // 2 + out_channels = in_channels
-        # Then DoubleConv: in_channels -> out_channels
         self.up = ConvTranspose(in_channels, in_channels // 2, kernel_size=2, stride=2)
         self.conv = DoubleConv(in_channels, out_channels, dim)
@@ -103,7 +97,6 @@ class Up(nn.Module):
         # Handle size mismatch (pad x1 to match x2)
         if x1.shape[2:] != x2.shape[2:]:
             diff = [x2.size(i + 2) - x1.size(i + 2) for i in range(len(x1.shape) - 2)]
-            # Pad x1 to match x2
             pad = []
             for d in reversed(diff):
                 pad.extend([d // 2, d - d // 2])
@@ -113,14 +106,33 @@ class Up(nn.Module):
         return self.conv(x)
-class UNetBase(BaseModel):
+# =============================================================================
+# REGISTERED MODEL
+# =============================================================================
+@register_model("unet_regression")
+class UNetRegression(BaseModel):
     """
-    Base U-Net class for regression tasks.
+    U-Net for vector regression output.
+    Uses U-Net encoder-decoder architecture with skip connections,
+    then applies global pooling for standard vector regression output.
+    ~31.1M parameters (2D). Good for leveraging multi-scale features
+    and skip connections for regression tasks.
+    Args:
+        in_shape: (L,), (H, W), or (D, H, W)
+        out_size: Number of regression targets
+        base_channels: Base channel count (default: 64)
+        depth: Number of encoder/decoder levels (default: 4)
+        dropout_rate: Dropout rate (default: 0.1)
-    Standard U-Net architecture:
-    - Encoder path with downsampling
-    - Decoder path with upsampling and skip connections
-    - Optional spatial or vector output
+    Example:
+        >>> model = UNetRegression(in_shape=(224, 224), out_size=3)
+        >>> x = torch.randn(4, 1, 224, 224)
+        >>> out = model(x)  # (4, 3)
     """
     def __init__(
@@ -130,7 +142,6 @@ class UNetBase(BaseModel):
         base_channels: int = 64,
         depth: int = 4,
         dropout_rate: float = 0.1,
-        spatial_output: bool = False,
         **kwargs,
     ):
         super().__init__(in_shape, out_size)
@@ -139,12 +150,10 @@ class UNetBase(BaseModel):
         self.base_channels = base_channels
         self.depth = depth
         self.dropout_rate = dropout_rate
-        self.spatial_output = spatial_output
-        Conv, _, _, AdaptivePool = _get_layers(self.dim)
+        _, _, _, AdaptivePool = _get_layers(self.dim)
         # Channel progression: 64 -> 128 -> 256 -> 512 (for depth=4)
-        # features[i] = base_channels * 2^i
         features = [base_channels * (2**i) for i in range(depth + 1)]
         # Initial double conv (1 -> features[0])
@@ -158,22 +167,17 @@ class UNetBase(BaseModel):
         # Decoder (up path)
         self.ups = nn.ModuleList()
         for i in range(depth):
-            # Input: features[depth - i], Skip: features[depth - 1 - i], Output: features[depth - 1 - i]
             self.ups.append(Up(features[depth - i], features[depth - 1 - i], self.dim))
-        if spatial_output:
-            # Spatial output: 1x1 conv to out_size channels
-            self.outc = Conv(features[0], out_size, kernel_size=1)
-        else:
-            # Vector output: global pooling + regression head
-            self.global_pool = AdaptivePool(1)
-            self.head = nn.Sequential(
-                nn.Dropout(dropout_rate),
-                nn.Linear(features[0], 256),
-                nn.ReLU(inplace=True),
-                nn.Dropout(dropout_rate),
-                nn.Linear(256, out_size),
-            )
+        # Vector output: global pooling + regression head
+        self.global_pool = AdaptivePool(1)
+        self.head = nn.Sequential(
+            nn.Dropout(dropout_rate),
+            nn.Linear(features[0], 256),
+            nn.ReLU(inplace=True),
+            nn.Dropout(dropout_rate),
+            nn.Linear(256, out_size),
+        )
         self._init_weights()
@@ -220,85 +224,15 @@ class UNetBase(BaseModel):
         for up, skip in zip(self.ups, reversed(skips)):
             x = up(x, skip)
-        if self.spatial_output:
-            return self.outc(x)
-        else:
-            x = self.global_pool(x)
-            x = x.flatten(1)
-            return self.head(x)
+        # Global pooling + regression head
+        x = self.global_pool(x)
+        x = x.flatten(1)
+        return self.head(x)
     @classmethod
     def get_default_config(cls) -> dict[str, Any]:
         """Return default configuration."""
         return {"base_channels": 64, "depth": 4, "dropout_rate": 0.1}
-# =============================================================================
-# REGISTERED MODEL VARIANTS
-# =============================================================================
-@register_model("unet")
-class UNet(UNetBase):
-    """
-    U-Net with spatial output capability.
-    Good for: Pixel/voxel-wise regression (velocity fields, spatial maps).
-    Note: For spatial output, out_size is the number of output channels.
-    Output shape: (B, out_size, *spatial_dims) for spatial_output=True.
-    Args:
-        in_shape: (L,), (H, W), or (D, H, W)
-        out_size: Number of output channels for spatial output
-        base_channels: Base channel count (default: 64)
-        depth: Number of encoder/decoder levels (default: 4)
-        spatial_output: If True, output spatial map; if False, output vector
-        dropout_rate: Dropout rate (default: 0.1)
-    """
-    def __init__(
-        self,
-        in_shape: SpatialShape,
-        out_size: int,
-        spatial_output: bool = True,
-        **kwargs,
-    ):
-        super().__init__(
-            in_shape=in_shape,
-            out_size=out_size,
-            spatial_output=spatial_output,
-            **kwargs,
-        )
-    def __repr__(self) -> str:
-        mode = "spatial" if self.spatial_output else "vector"
-        return f"UNet({self.dim}D, {mode}, in_shape={self.in_shape}, out_size={self.out_size})"
-@register_model("unet_regression")
-class UNetRegression(UNetBase):
-    """
-    U-Net for vector regression output.
-    Uses U-Net encoder-decoder but applies global pooling at the end
-    for standard vector regression output.
-    Good for: Leveraging U-Net features (multi-scale, skip connections)
-    for standard regression tasks.
-    Args:
-        in_shape: (L,), (H, W), or (D, H, W)
-        out_size: Number of regression targets
-        base_channels: Base channel count (default: 64)
-        depth: Number of encoder/decoder levels (default: 4)
-        dropout_rate: Dropout rate (default: 0.1)
-    """
-    def __init__(self, in_shape: SpatialShape, out_size: int, **kwargs):
-        super().__init__(
-            in_shape=in_shape, out_size=out_size, spatial_output=False, **kwargs
-        )
     def __repr__(self) -> str:
         return f"UNet_Regression({self.dim}D, in_shape={self.in_shape}, out_size={self.out_size})"

wavedl/models/vit.py CHANGED Viewed

@@ -10,12 +10,16 @@ Supports both 1D (signals) and 2D (images) inputs via configurable patch embeddi
     - 2D: Images/spectrograms → patches are grid squares
 **Variants**:
-    - vit_tiny: Smallest (embed_dim=192, depth=12, heads=3)
-    - vit_small: Light (embed_dim=384, depth=12, heads=6)
-    - vit_base: Standard (embed_dim=768, depth=12, heads=12)
+    - vit_tiny: Smallest (~5.7M params, embed_dim=192, depth=12, heads=3)
+    - vit_small: Light (~22M params, embed_dim=384, depth=12, heads=6)
+    - vit_base: Standard (~86M params, embed_dim=768, depth=12, heads=12)
+References:
+    Dosovitskiy, A., et al. (2021). An Image is Worth 16x16 Words:
+    Transformers for Image Recognition at Scale. ICLR 2021.
+    https://arxiv.org/abs/2010.11929
 Author: Ductho Le (ductho.le@outlook.com)
-Version: 1.0.0
 """
 from typing import Any

wavedl 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

wavedl 1.3.0py3-none-any.whl → 1.4.0py3-none-any.whl