PyPI - wavedl - Versions diffs - 1.5.7__py3-none-any.whl → 1.6.0__py3-none-any.whl - Mend

wavedl 1.5.7py3-none-any.whl → 1.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

wavedl/__init__.py +1 -1
wavedl/models/__init__.py +52 -4
wavedl/models/_timm_utils.py +238 -0
wavedl/models/caformer.py +270 -0
wavedl/models/convnext.py +108 -33
wavedl/models/convnext_v2.py +504 -0
wavedl/models/densenet.py +5 -5
wavedl/models/efficientnet.py +6 -6
wavedl/models/efficientnetv2.py +3 -3
wavedl/models/fastvit.py +285 -0
wavedl/models/mamba.py +535 -0
wavedl/models/maxvit.py +251 -0
wavedl/models/mobilenetv3.py +6 -6
wavedl/models/regnet.py +10 -10
wavedl/models/resnet.py +5 -5
wavedl/models/resnet3d.py +2 -2
wavedl/models/swin.py +3 -3
wavedl/models/tcn.py +3 -3
wavedl/models/unet.py +1 -1
wavedl/models/vit.py +6 -6
wavedl/train.py +21 -16
wavedl/utils/data.py +39 -6
{wavedl-1.5.7.dist-info → wavedl-1.6.0.dist-info}/METADATA +90 -62
wavedl-1.6.0.dist-info/RECORD +44 -0
wavedl-1.5.7.dist-info/RECORD +0 -38
{wavedl-1.5.7.dist-info → wavedl-1.6.0.dist-info}/LICENSE +0 -0
{wavedl-1.5.7.dist-info → wavedl-1.6.0.dist-info}/WHEEL +0 -0
{wavedl-1.5.7.dist-info → wavedl-1.6.0.dist-info}/entry_points.txt +0 -0
{wavedl-1.5.7.dist-info → wavedl-1.6.0.dist-info}/top_level.txt +0 -0

wavedl/models/maxvit.py ADDED Viewed

@@ -0,0 +1,251 @@
+"""
+MaxViT: Multi-Axis Vision Transformer
+======================================
+MaxViT combines local and global attention with O(n) complexity using
+multi-axis attention: block attention (local) + grid attention (global sparse).
+**Key Features**:
+    - Multi-axis attention for both local and global context
+    - Hybrid design with MBConv + attention
+    - Linear O(n) complexity
+    - Hierarchical multi-scale features
+**Variants**:
+    - maxvit_tiny: 31M params
+    - maxvit_small: 69M params
+    - maxvit_base: 120M params
+**Requirements**:
+    - timm (for pretrained models and architecture)
+    - torchvision (fallback, limited support)
+Reference:
+    Tu, Z., et al. (2022). MaxViT: Multi-Axis Vision Transformer.
+    ECCV 2022. https://arxiv.org/abs/2204.01697
+Author: Ductho Le (ductho.le@outlook.com)
+"""
+import torch
+import torch.nn as nn
+from wavedl.models._timm_utils import build_regression_head
+from wavedl.models.base import BaseModel
+from wavedl.models.registry import register_model
+__all__ = [
+    "MaxViTBase",
+    "MaxViTBaseLarge",
+    "MaxViTSmall",
+    "MaxViTTiny",
+]
+# =============================================================================
+# MAXVIT BASE CLASS
+# =============================================================================
+class MaxViTBase(BaseModel):
+    """
+    MaxViT base class wrapping timm implementation.
+    Multi-axis attention with local block and global grid attention.
+    2D only due to attention structure.
+    """
+    def __init__(
+        self,
+        in_shape: tuple[int, int],
+        out_size: int,
+        model_name: str = "maxvit_tiny_tf_224",
+        pretrained: bool = True,
+        freeze_backbone: bool = False,
+        dropout_rate: float = 0.3,
+        **kwargs,
+    ):
+        super().__init__(in_shape, out_size)
+        if len(in_shape) != 2:
+            raise ValueError(f"MaxViT requires 2D input (H, W), got {len(in_shape)}D")
+        self.pretrained = pretrained
+        self.freeze_backbone = freeze_backbone
+        self.model_name = model_name
+        # Try to load from timm
+        try:
+            import timm
+            self.backbone = timm.create_model(
+                model_name,
+                pretrained=pretrained,
+                num_classes=0,  # Remove classifier
+            )
+            # Get feature dimension
+            with torch.no_grad():
+                dummy = torch.zeros(1, 3, *in_shape)
+                features = self.backbone(dummy)
+                in_features = features.shape[-1]
+        except ImportError:
+            raise ImportError(
+                "timm is required for MaxViT. Install with: pip install timm"
+            )
+        except Exception as e:
+            raise RuntimeError(f"Failed to load MaxViT model '{model_name}': {e}")
+        # Adapt input channels (3 -> 1)
+        self._adapt_input_channels()
+        # Regression head
+        self.head = build_regression_head(in_features, out_size, dropout_rate)
+        if freeze_backbone:
+            self._freeze_backbone()
+    def _adapt_input_channels(self):
+        """Adapt first conv layer for single-channel input."""
+        # MaxViT uses stem.conv1 (Conv2dSame from timm)
+        adapted = False
+        # Find the first Conv2d with 3 input channels
+        for name, module in self.backbone.named_modules():
+            if hasattr(module, "in_channels") and module.in_channels == 3:
+                # Get parent and child names
+                parts = name.split(".")
+                parent = self.backbone
+                for part in parts[:-1]:
+                    parent = getattr(parent, part)
+                child_name = parts[-1]
+                # Create new conv with 1 input channel
+                new_conv = self._make_new_conv(module)
+                setattr(parent, child_name, new_conv)
+                adapted = True
+                break
+        if not adapted:
+            import warnings
+            warnings.warn(
+                "Could not adapt MaxViT input channels. Model may fail.", stacklevel=2
+            )
+    def _make_new_conv(self, old_conv: nn.Module) -> nn.Module:
+        """Create new conv layer with 1 input channel."""
+        # Handle both Conv2d and Conv2dSame from timm
+        type(old_conv)
+        # Get common parameters
+        kwargs = {
+            "out_channels": old_conv.out_channels,
+            "kernel_size": old_conv.kernel_size,
+            "stride": old_conv.stride,
+            "padding": old_conv.padding if hasattr(old_conv, "padding") else 0,
+            "bias": old_conv.bias is not None,
+        }
+        # Create new conv (use regular Conv2d for simplicity)
+        new_conv = nn.Conv2d(1, **kwargs)
+        if self.pretrained:
+            with torch.no_grad():
+                new_conv.weight.copy_(old_conv.weight.mean(dim=1, keepdim=True))
+                if old_conv.bias is not None:
+                    new_conv.bias.copy_(old_conv.bias)
+        return new_conv
+    def _freeze_backbone(self):
+        """Freeze backbone parameters."""
+        for param in self.backbone.parameters():
+            param.requires_grad = False
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        features = self.backbone(x)
+        return self.head(features)
+# =============================================================================
+# REGISTERED VARIANTS
+# =============================================================================
+@register_model("maxvit_tiny")
+class MaxViTTiny(MaxViTBase):
+    """
+    MaxViT Tiny: ~30.1M backbone parameters.
+    Multi-axis attention with local+global context.
+    2D only.
+    Example:
+        >>> model = MaxViTTiny(in_shape=(224, 224), out_size=3)
+        >>> x = torch.randn(4, 1, 224, 224)
+        >>> out = model(x)  # (4, 3)
+    """
+    def __init__(self, in_shape: tuple[int, int], out_size: int, **kwargs):
+        super().__init__(
+            in_shape=in_shape,
+            out_size=out_size,
+            model_name="maxvit_tiny_tf_224",
+            **kwargs,
+        )
+    def __repr__(self) -> str:
+        return (
+            f"MaxViT_Tiny(in_shape={self.in_shape}, out_size={self.out_size}, "
+            f"pretrained={self.pretrained})"
+        )
+@register_model("maxvit_small")
+class MaxViTSmall(MaxViTBase):
+    """
+    MaxViT Small: ~67.6M backbone parameters.
+    Multi-axis attention with local+global context.
+    2D only.
+    """
+    def __init__(self, in_shape: tuple[int, int], out_size: int, **kwargs):
+        super().__init__(
+            in_shape=in_shape,
+            out_size=out_size,
+            model_name="maxvit_small_tf_224",
+            **kwargs,
+        )
+    def __repr__(self) -> str:
+        return (
+            f"MaxViT_Small(in_shape={self.in_shape}, out_size={self.out_size}, "
+            f"pretrained={self.pretrained})"
+        )
+@register_model("maxvit_base")
+class MaxViTBaseLarge(MaxViTBase):
+    """
+    MaxViT Base: ~118.1M backbone parameters.
+    Multi-axis attention with local+global context.
+    2D only.
+    """
+    def __init__(self, in_shape: tuple[int, int], out_size: int, **kwargs):
+        super().__init__(
+            in_shape=in_shape,
+            out_size=out_size,
+            model_name="maxvit_base_tf_224",
+            **kwargs,
+        )
+    def __repr__(self) -> str:
+        return (
+            f"MaxViT_Base(in_shape={self.in_shape}, out_size={self.out_size}, "
+            f"pretrained={self.pretrained})"
+        )

wavedl/models/mobilenetv3.py CHANGED Viewed

@@ -13,8 +13,8 @@ optimization to achieve excellent accuracy with minimal computational cost.
     - Designed for real-time inference on CPUs and edge devices
 **Variants**:
-    - mobilenet_v3_small: Ultra-lightweight (~1.1M params) - Edge/embedded
-    - mobilenet_v3_large: Balanced (~3.2M params) - Mobile deployment
+    - mobilenet_v3_small: Ultra-lightweight (~0.9M backbone params) - Edge/embedded
+    - mobilenet_v3_large: Balanced (~3.0M backbone params) - Mobile deployment
 **Use Cases**:
     - Real-time structural health monitoring on embedded systems
@@ -206,7 +206,7 @@ class MobileNetV3Small(MobileNetV3Base):
     """
     MobileNetV3-Small: Ultra-lightweight for edge deployment.
-    ~1.1M parameters. Designed for the most constrained environments.
+    ~0.9M backbone parameters. Designed for the most constrained environments.
     Achieves ~67% ImageNet accuracy with minimal compute.
     Recommended for:
@@ -217,7 +217,7 @@ class MobileNetV3Small(MobileNetV3Base):
     Performance (approximate):
         - CPU inference: ~6ms (single core)
-        - Parameters: ~1.1M
+        - Parameters: ~0.9M backbone
         - MAdds: 56M
     Args:
@@ -253,7 +253,7 @@ class MobileNetV3Large(MobileNetV3Base):
     """
     MobileNetV3-Large: Balanced efficiency and accuracy.
-    ~3.2M parameters. Best trade-off for mobile/portable deployment.
+    ~3.0M backbone parameters. Best trade-off for mobile/portable deployment.
     Achieves ~75% ImageNet accuracy with efficient inference.
     Recommended for:
@@ -264,7 +264,7 @@ class MobileNetV3Large(MobileNetV3Base):
     Performance (approximate):
         - CPU inference: ~20ms (single core)
-        - Parameters: ~3.2M
+        - Parameters: ~3.0M backbone
         - MAdds: 219M
     Args:

wavedl/models/regnet.py CHANGED Viewed

@@ -13,11 +13,11 @@ Models scale smoothly from mobile to server deployments.
     - Optional Squeeze-and-Excitation (SE) attention
 **Variants** (RegNetY includes SE attention):
-    - regnet_y_400mf: Ultra-light (~4.0M params, 0.4 GFLOPs)
-    - regnet_y_800mf: Light (~5.8M params, 0.8 GFLOPs)
-    - regnet_y_1_6gf: Medium (~10.5M params, 1.6 GFLOPs) - Recommended
-    - regnet_y_3_2gf: Large (~18.3M params, 3.2 GFLOPs)
-    - regnet_y_8gf: Very large (~37.9M params, 8.0 GFLOPs)
+    - regnet_y_400mf: Ultra-light (~3.9M backbone params, 0.4 GFLOPs)
+    - regnet_y_800mf: Light (~5.7M backbone params, 0.8 GFLOPs)
+    - regnet_y_1_6gf: Medium (~10.3M backbone params, 1.6 GFLOPs) - Recommended
+    - regnet_y_3_2gf: Large (~17.9M backbone params, 3.2 GFLOPs)
+    - regnet_y_8gf: Very large (~37.4M backbone params, 8.0 GFLOPs)
 **When to Use RegNet**:
     - When you need predictable performance at a given compute budget
@@ -210,7 +210,7 @@ class RegNetY400MF(RegNetBase):
     """
     RegNetY-400MF: Ultra-lightweight for constrained environments.
-    ~4.0M parameters, 0.4 GFLOPs. Smallest RegNet variant with SE attention.
+    ~3.9M backbone parameters, 0.4 GFLOPs. Smallest RegNet variant with SE attention.
     Recommended for:
         - Edge deployment with moderate accuracy needs
@@ -250,7 +250,7 @@ class RegNetY800MF(RegNetBase):
     """
     RegNetY-800MF: Light variant with good accuracy.
-    ~6.4M parameters, 0.8 GFLOPs. Good balance for mobile deployment.
+    ~5.7M backbone parameters, 0.8 GFLOPs. Good balance for mobile deployment.
     Recommended for:
         - Mobile/portable devices
@@ -290,7 +290,7 @@ class RegNetY1_6GF(RegNetBase):
     """
     RegNetY-1.6GF: Recommended default for balanced performance.
-    ~11.2M parameters, 1.6 GFLOPs. Best trade-off of accuracy and efficiency.
+    ~10.3M backbone parameters, 1.6 GFLOPs. Best trade-off of accuracy and efficiency.
     Comparable to ResNet50 but more efficient.
     Recommended for:
@@ -331,7 +331,7 @@ class RegNetY3_2GF(RegNetBase):
     """
     RegNetY-3.2GF: Higher accuracy for demanding tasks.
-    ~19.4M parameters, 3.2 GFLOPs. Use when 1.6GF isn't sufficient.
+    ~17.9M backbone parameters, 3.2 GFLOPs. Use when 1.6GF isn't sufficient.
     Recommended for:
         - Larger datasets requiring more capacity
@@ -371,7 +371,7 @@ class RegNetY8GF(RegNetBase):
     """
     RegNetY-8GF: High capacity for large-scale tasks.
-    ~39.2M parameters, 8.0 GFLOPs. Use for maximum accuracy needs.
+    ~37.4M backbone parameters, 8.0 GFLOPs. Use for maximum accuracy needs.
     Recommended for:
         - Very large datasets (>50k samples)

wavedl/models/resnet.py CHANGED Viewed

@@ -11,9 +11,9 @@ Provides multiple depth variants (18, 34, 50) with optional pretrained weights f
     - 3D: Volumetric data, CT/MRI (N, 1, D, H, W) → Conv3d
 **Variants**:
-    - resnet18: Lightweight, fast training (~11M params)
-    - resnet34: Balanced capacity (~21M params)
-    - resnet50: Higher capacity with bottleneck blocks (~25M params)
+    - resnet18: Lightweight, fast training (~11.2M backbone params)
+    - resnet34: Balanced capacity (~21.3M backbone params)
+    - resnet50: Higher capacity with bottleneck blocks (~23.5M backbone params)
 References:
     He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep Residual Learning
@@ -534,7 +534,7 @@ class ResNet18Pretrained(PretrainedResNetBase):
     """
     ResNet-18 with ImageNet pretrained weights (2D only).
-    ~11M parameters. Good for: Transfer learning, faster convergence.
+    ~11.2M backbone parameters. Good for: Transfer learning, faster convergence.
     Args:
         in_shape: (H, W) image dimensions
@@ -563,7 +563,7 @@ class ResNet50Pretrained(PretrainedResNetBase):
     """
     ResNet-50 with ImageNet pretrained weights (2D only).
-    ~25M parameters. Good for: High accuracy with transfer learning.
+    ~23.5M backbone parameters. Good for: High accuracy with transfer learning.
     Args:
         in_shape: (H, W) image dimensions

wavedl/models/resnet3d.py CHANGED Viewed

@@ -179,7 +179,7 @@ class ResNet3D18(ResNet3DBase):
     """
     ResNet3D-18: Lightweight 3D ResNet for volumetric data.
-    ~33M parameters. Uses 3D convolutions throughout for true volumetric processing.
+    ~33.2M backbone parameters. Uses 3D convolutions throughout for true volumetric processing.
     Pretrained on Kinetics-400 (video action recognition).
     Recommended for:
@@ -221,7 +221,7 @@ class MC3_18(ResNet3DBase):
     """
     MC3-18: Mixed Convolution 3D ResNet (3D stem + 2D residual blocks).
-    ~11M parameters. More efficient than pure 3D ResNet while maintaining
+    ~11.5M backbone parameters. More efficient than pure 3D ResNet while maintaining
     good spatiotemporal modeling. Uses 3D convolutions in early layers
     and 2D convolutions in later layers.

wavedl/models/swin.py CHANGED Viewed

@@ -304,7 +304,7 @@ class SwinTiny(SwinTransformerBase):
     """
     Swin-T (Tiny): Efficient default for most wave-based tasks.
-    ~28M parameters. Good balance of accuracy and computational cost.
+    ~27.5M backbone parameters. Good balance of accuracy and computational cost.
     Outperforms ResNet50 while being more efficient.
     Recommended for:
@@ -353,7 +353,7 @@ class SwinSmall(SwinTransformerBase):
     """
     Swin-S (Small): Higher accuracy with moderate compute.
-    ~50M parameters. Better accuracy than Swin-T for larger datasets.
+    ~48.8M backbone parameters. Better accuracy than Swin-T for larger datasets.
     Recommended for:
         - Larger datasets (>20k samples)
@@ -400,7 +400,7 @@ class SwinBase(SwinTransformerBase):
     """
     Swin-B (Base): Maximum accuracy for large-scale tasks.
-    ~88M parameters. Best accuracy but requires more compute and data.
+    ~86.7M backbone parameters. Best accuracy but requires more compute and data.
     Recommended for:
         - Very large datasets (>50k samples)

wavedl/models/tcn.py CHANGED Viewed

@@ -296,7 +296,7 @@ class TCN(TCNBase):
     """
     TCN: Standard Temporal Convolutional Network.
-    ~7.0M parameters. 8 temporal blocks with channels [64→128→256→256→512→512→512→512].
+    ~6.9M backbone parameters. 8 temporal blocks with channels [64→128→256→256→512→512→512→512].
     Receptive field: 511 samples with kernel_size=3.
     Recommended for:
@@ -338,7 +338,7 @@ class TCNSmall(TCNBase):
     """
     TCN-Small: Lightweight variant for quick experiments.
-    ~1.0M parameters. 6 temporal blocks with channels [32→64→128→128→256→256].
+    ~0.9M backbone parameters. 6 temporal blocks with channels [32→64→128→128→256→256].
     Receptive field: 127 samples with kernel_size=3.
     Recommended for:
@@ -376,7 +376,7 @@ class TCNLarge(TCNBase):
     """
     TCN-Large: High-capacity variant for complex patterns.
-    ~10.2M parameters. 10 temporal blocks with channels [64→128→256→256→512→512→512→512→512→512].
+    ~10.0M backbone parameters. 10 temporal blocks with channels [64→128→256→256→512→512→512→512→512→512].
     Receptive field: 2047 samples with kernel_size=3.
     Recommended for:

wavedl/models/unet.py CHANGED Viewed

@@ -119,7 +119,7 @@ class UNetRegression(BaseModel):
     Uses U-Net encoder-decoder architecture with skip connections,
     then applies global pooling for standard vector regression output.
-    ~31.1M parameters (2D). Good for leveraging multi-scale features
+    ~31.0M backbone parameters (2D). Good for leveraging multi-scale features
     and skip connections for regression tasks.
     Args:

wavedl/models/vit.py CHANGED Viewed

@@ -10,9 +10,9 @@ Supports both 1D (signals) and 2D (images) inputs via configurable patch embeddi
     - 2D: Images/spectrograms → patches are grid squares
 **Variants**:
-    - vit_tiny: Smallest (~5.7M params, embed_dim=192, depth=12, heads=3)
-    - vit_small: Light (~22M params, embed_dim=384, depth=12, heads=6)
-    - vit_base: Standard (~86M params, embed_dim=768, depth=12, heads=12)
+    - vit_tiny: Smallest (~5.4M backbone params, embed_dim=192, depth=12, heads=3)
+    - vit_small: Light (~21.4M backbone params, embed_dim=384, depth=12, heads=6)
+    - vit_base: Standard (~85.3M backbone params, embed_dim=768, depth=12, heads=12)
 References:
     Dosovitskiy, A., et al. (2021). An Image is Worth 16x16 Words:
@@ -365,7 +365,7 @@ class ViTTiny(ViTBase):
     """
     ViT-Tiny: Smallest Vision Transformer variant.
-    ~5.7M parameters. Good for: Quick experiments, smaller datasets.
+    ~5.4M backbone parameters. Good for: Quick experiments, smaller datasets.
     Args:
         in_shape: (L,) for 1D or (H, W) for 2D
@@ -398,7 +398,7 @@ class ViTSmall(ViTBase):
     """
     ViT-Small: Light Vision Transformer variant.
-    ~22M parameters. Good for: Balanced performance.
+    ~21.4M backbone parameters. Good for: Balanced performance.
     Args:
         in_shape: (L,) for 1D or (H, W) for 2D
@@ -429,7 +429,7 @@ class ViTBase_(ViTBase):
     """
     ViT-Base: Standard Vision Transformer variant.
-    ~86M parameters. Good for: High accuracy, larger datasets.
+    ~85.3M backbone parameters. Good for: High accuracy, larger datasets.
     Args:
         in_shape: (L,) for 1D or (H, W) for 2D

wavedl/train.py CHANGED Viewed

@@ -239,11 +239,12 @@ def parse_args() -> argparse.Namespace:
         help="Python modules to import before training (for custom models)",
     )
     parser.add_argument(
-        "--pretrained",
-        action=argparse.BooleanOptionalAction,
-        default=True,
-        help="Use pretrained weights (default: True). Use --no-pretrained to train from scratch.",
+        "--no_pretrained",
+        dest="pretrained",
+        action="store_false",
+        help="Train from scratch without pretrained weights (default: use pretrained)",
     )
+    parser.set_defaults(pretrained=True)
     # Configuration File
     parser.add_argument(
@@ -1028,12 +1029,14 @@ def main():
             for x, y in pbar:
                 with accelerator.accumulate(model):
-                    pred = model(x)
-                    # Pass inputs for input-dependent constraints (x_mean, x[...], etc.)
-                    if isinstance(criterion, PhysicsConstrainedLoss):
-                        loss = criterion(pred, y, x)
-                    else:
-                        loss = criterion(pred, y)
+                    # Use mixed precision for forward pass (respects --precision flag)
+                    with accelerator.autocast():
+                        pred = model(x)
+                        # Pass inputs for input-dependent constraints (x_mean, x[...], etc.)
+                        if isinstance(criterion, PhysicsConstrainedLoss):
+                            loss = criterion(pred, y, x)
+                        else:
+                            loss = criterion(pred, y)
                     accelerator.backward(loss)
@@ -1082,12 +1085,14 @@ def main():
             with torch.inference_mode():
                 for x, y in val_dl:
-                    pred = model(x)
-                    # Pass inputs for input-dependent constraints
-                    if isinstance(criterion, PhysicsConstrainedLoss):
-                        loss = criterion(pred, y, x)
-                    else:
-                        loss = criterion(pred, y)
+                    # Use mixed precision for validation (consistent with training)
+                    with accelerator.autocast():
+                        pred = model(x)
+                        # Pass inputs for input-dependent constraints
+                        if isinstance(criterion, PhysicsConstrainedLoss):
+                            loss = criterion(pred, y, x)
+                        else:
+                            loss = criterion(pred, y)
                     val_loss_sum += loss.detach() * x.size(0)
                     val_samples += x.size(0)

wavedl/utils/data.py CHANGED Viewed

@@ -474,9 +474,18 @@ class _TransposedH5Dataset:
         self.shape = tuple(reversed(h5_dataset.shape))
         self.dtype = h5_dataset.dtype
-        # Precompute transpose axis order for efficiency
-        # For shape (A, B, C) -> reversed (C, B, A), transpose axes are (2, 1, 0)
-        self._transpose_axes = tuple(range(len(h5_dataset.shape) - 1, -1, -1))
+    @property
+    def ndim(self) -> int:
+        """Number of dimensions (derived from shape for numpy compatibility)."""
+        return len(self.shape)
+    @property
+    def _transpose_axes(self) -> tuple[int, ...]:
+        """Transpose axis order for reversing dimensions.
+        For shape (A, B, C) -> reversed (C, B, A), transpose axes are (2, 1, 0).
+        """
+        return tuple(range(len(self._dataset.shape) - 1, -1, -1))
     def __len__(self) -> int:
         return self.shape[0]
@@ -965,8 +974,17 @@ def load_test_data(
         else:
             # Fallback to default source.load() for unknown formats
             inp, outp = source.load(path)
-    except KeyError:
-        # Try with just inputs if outputs not found (inference-only mode)
+    except KeyError as e:
+        # IMPORTANT: Only fall back to inference-only mode if outputs are
+        # genuinely missing (auto-detection failed). If user explicitly
+        # provided --output_key, they expect it to exist - don't silently drop.
+        if output_key is not None:
+            raise KeyError(
+                f"Explicit --output_key '{output_key}' not found in file. "
+                f"Available keys depend on file format. Original error: {e}"
+            ) from e
+        # Legitimate fallback: no explicit output_key, outputs just not present
         if format == "npz":
             # First pass to find keys
             with np.load(path, allow_pickle=False) as probe:
@@ -1083,11 +1101,26 @@ def load_test_data(
                 raise ValueError(
                     f"Input appears to be channels-last format: {tuple(X.shape)}. "
                     "WaveDL expects channels-first (N, C, H, W). "
-                    "Convert your data using: X = X.permute(0, 3, 1, 2)"
+                    "Convert your data using: X = X.permute(0, 3, 1, 2). "
+                    "If this is actually a 3D volume with small depth, "
+                    "use --input_channels 1 to add a channel dimension."
                 )
             elif X.shape[1] > 16:
                 # Heuristic fallback: large dim 1 suggests 3D volume needing channel
                 X = X.unsqueeze(1)  # 3D volume: (N, D, H, W) → (N, 1, D, H, W)
+            else:
+                # Ambiguous case: shallow 3D volume (D <= 16) or multi-channel 2D
+                # Default to treating as multi-channel 2D (no modification needed)
+                # Log a warning so users know about the --input_channels option
+                import warnings
+                warnings.warn(
+                    f"Ambiguous 4D input shape: {tuple(X.shape)}. "
+                    f"Assuming {X.shape[1]} channels (multi-channel 2D). "
+                    f"For 3D volumes with depth={X.shape[1]}, use --input_channels 1.",
+                    UserWarning,
+                    stacklevel=2,
+                )
     # X.ndim >= 5: assume channel dimension already exists
     return X, y

wavedl 1.5.7__py3-none-any.whl → 1.6.0__py3-none-any.whl

wavedl 1.5.7py3-none-any.whl → 1.6.0py3-none-any.whl