PyPI - wavedl - Versions diffs - 1.6.3__py3-none-any.whl → 1.7.0__py3-none-any.whl - Mend

wavedl 1.6.3py3-none-any.whl → 1.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

wavedl/__init__.py +1 -1
wavedl/hpo.py +115 -9
wavedl/models/_pretrained_utils.py +72 -0
wavedl/models/_template.py +7 -6
wavedl/models/cnn.py +20 -0
wavedl/models/convnext.py +3 -70
wavedl/models/convnext_v2.py +1 -18
wavedl/models/mamba.py +126 -38
wavedl/models/resnet3d.py +23 -5
wavedl/models/unireplknet.py +1 -18
wavedl/models/vit.py +18 -8
wavedl/test.py +5 -23
wavedl/train.py +492 -26
wavedl/utils/__init__.py +49 -9
wavedl/utils/config.py +6 -8
wavedl/utils/cross_validation.py +17 -4
wavedl/utils/data.py +140 -174
wavedl/utils/metrics.py +26 -5
wavedl/utils/schedulers.py +2 -2
{wavedl-1.6.3.dist-info → wavedl-1.7.0.dist-info}/METADATA +35 -14
wavedl-1.7.0.dist-info/RECORD +46 -0
wavedl-1.6.3.dist-info/RECORD +0 -46
{wavedl-1.6.3.dist-info → wavedl-1.7.0.dist-info}/LICENSE +0 -0
{wavedl-1.6.3.dist-info → wavedl-1.7.0.dist-info}/WHEEL +0 -0
{wavedl-1.6.3.dist-info → wavedl-1.7.0.dist-info}/entry_points.txt +0 -0
{wavedl-1.6.3.dist-info → wavedl-1.7.0.dist-info}/top_level.txt +0 -0

wavedl/__init__.py CHANGED Viewed

@@ -18,7 +18,7 @@ For inference:
     # or: python -m wavedl.test --checkpoint best_checkpoint --data_path test.npz
 """
-__version__ = "1.6.3"
+__version__ = "1.7.0"
 __author__ = "Ductho Le"
 __email__ = "ductho.le@outlook.com"

wavedl/hpo.py CHANGED Viewed

@@ -10,12 +10,28 @@ Usage:
     # Quick search (fewer parameters)
     wavedl-hpo --data_path train.npz --n_trials 30 --quick
+    # Medium search (balanced)
+    wavedl-hpo --data_path train.npz --n_trials 50 --medium
     # Full search with specific models
     wavedl-hpo --data_path train.npz --n_trials 100 --models cnn resnet18 efficientnet_b0
     # Parallel trials on multiple GPUs
     wavedl-hpo --data_path train.npz --n_trials 100 --n_jobs 4
+    # In-process mode (enables pruning, faster, single-GPU)
+    wavedl-hpo --data_path train.npz --n_trials 50 --inprocess
+Execution Modes:
+    --inprocess: Runs trials in the same Python process. Enables pruning
+                 (MedianPruner) for early stopping of unpromising trials.
+                 Faster due to no subprocess overhead, but trials share
+                 GPU memory (no isolation between trials).
+    Default (subprocess): Launches each trial as a separate process.
+                          Provides GPU memory isolation but prevents pruning
+                          (subprocess can't report intermediate results).
 Author: Ductho Le (ductho.le@outlook.com)
 """
@@ -41,10 +57,12 @@ except ImportError:
 DEFAULT_MODELS = ["cnn", "resnet18", "resnet34"]
 QUICK_MODELS = ["cnn"]
+MEDIUM_MODELS = ["cnn", "resnet18"]
 # All 6 optimizers
 DEFAULT_OPTIMIZERS = ["adamw", "adam", "sgd", "nadam", "radam", "rmsprop"]
 QUICK_OPTIMIZERS = ["adamw"]
+MEDIUM_OPTIMIZERS = ["adamw", "adam", "sgd"]
 # All 8 schedulers
 DEFAULT_SCHEDULERS = [
@@ -58,10 +76,12 @@ DEFAULT_SCHEDULERS = [
     "linear_warmup",
 ]
 QUICK_SCHEDULERS = ["plateau"]
+MEDIUM_SCHEDULERS = ["plateau", "cosine", "onecycle"]
 # All 6 losses
 DEFAULT_LOSSES = ["mse", "mae", "huber", "smooth_l1", "log_cosh", "weighted_mse"]
 QUICK_LOSSES = ["mse"]
+MEDIUM_LOSSES = ["mse", "mae", "huber"]
 # =============================================================================
@@ -70,16 +90,28 @@ QUICK_LOSSES = ["mse"]
 def create_objective(args):
-    """Create Optuna objective function with configurable search space."""
+    """Create Optuna objective function with configurable search space.
+    Supports two execution modes:
+    - Subprocess (default): Launches wavedl.train via subprocess. Provides GPU
+      memory isolation but prevents pruning (MedianPruner has no effect).
+    - In-process (--inprocess): Calls train_single_trial() directly. Enables
+      pruning and reduces overhead, but trials share GPU memory.
+    """
     def objective(trial):
-        # Select search space based on mode
+        # Select search space based on mode (quick < medium < full)
         # CLI arguments always take precedence over defaults
         if args.quick:
             models = args.models or QUICK_MODELS
             optimizers = args.optimizers or QUICK_OPTIMIZERS
             schedulers = args.schedulers or QUICK_SCHEDULERS
             losses = args.losses or QUICK_LOSSES
+        elif args.medium:
+            models = args.models or MEDIUM_MODELS
+            optimizers = args.optimizers or MEDIUM_OPTIMIZERS
+            schedulers = args.schedulers or MEDIUM_SCHEDULERS
+            losses = args.losses or MEDIUM_LOSSES
         else:
             models = args.models or DEFAULT_MODELS
             optimizers = args.optimizers or DEFAULT_OPTIMIZERS
@@ -101,13 +133,59 @@ def create_objective(args):
         if loss == "huber":
             huber_delta = trial.suggest_float("huber_delta", 0.1, 2.0)
         else:
-            huber_delta = None
+            huber_delta = 1.0  # default
         if optimizer == "sgd":
             momentum = trial.suggest_float("momentum", 0.8, 0.99)
         else:
-            momentum = None
+            momentum = 0.9  # default
+        # ==================================================================
+        # IN-PROCESS MODE: Direct function call with pruning support
+        # ==================================================================
+        if args.inprocess:
+            from wavedl.train import train_single_trial
+            try:
+                result = train_single_trial(
+                    data_path=args.data_path,
+                    model_name=model,
+                    lr=lr,
+                    batch_size=batch_size,
+                    epochs=args.max_epochs,
+                    patience=patience,
+                    optimizer_name=optimizer,
+                    scheduler_name=scheduler,
+                    loss_name=loss,
+                    weight_decay=weight_decay,
+                    seed=args.seed,
+                    huber_delta=huber_delta,
+                    momentum=momentum,
+                    trial=trial,  # Enable pruning via trial.report/should_prune
+                    verbose=False,
+                )
+                if result["pruned"]:
+                    print(
+                        f"Trial {trial.number}: Pruned at epoch {result['epochs_trained']}"
+                    )
+                    raise optuna.TrialPruned()
+                val_loss = result["best_val_loss"]
+                print(
+                    f"Trial {trial.number}: val_loss={val_loss:.6f} ({result['epochs_trained']} epochs)"
+                )
+                return val_loss
+            except optuna.TrialPruned:
+                raise  # Re-raise for Optuna to handle
+            except Exception as e:
+                print(f"Trial {trial.number}: Error - {e}")
+                return float("inf")
+        # ==================================================================
+        # SUBPROCESS MODE (default): GPU memory isolation, no pruning
+        # ==================================================================
         # Build command
         cmd = [
             sys.executable,
@@ -138,9 +216,9 @@ def create_objective(args):
         ]
         # Add conditional args
-        if huber_delta:
+        if loss == "huber":
             cmd.extend(["--huber_delta", str(huber_delta)])
-        if momentum:
+        if optimizer == "sgd":
             cmd.extend(["--momentum", str(momentum)])
         # Use temporary directory for trial output
@@ -285,7 +363,17 @@ Examples:
     parser.add_argument(
         "--quick",
         action="store_true",
-        help="Quick mode: search fewer parameters",
+        help="Quick mode: search fewer parameters (fastest, least thorough)",
+    )
+    parser.add_argument(
+        "--medium",
+        action="store_true",
+        help="Medium mode: balanced parameter search (between --quick and full)",
+    )
+    parser.add_argument(
+        "--inprocess",
+        action="store_true",
+        help="Run trials in-process (enables pruning, faster, but no GPU memory isolation)",
     )
     parser.add_argument(
         "--timeout",
@@ -384,14 +472,32 @@ Examples:
     print("=" * 60)
     print(f"Data: {args.data_path}")
     print(f"Trials: {args.n_trials}")
-    print(f"Mode: {'Quick' if args.quick else 'Full'}")
+    # Determine mode name for display
+    if args.quick:
+        mode_name = "Quick"
+    elif args.medium:
+        mode_name = "Medium"
+    else:
+        mode_name = "Full"
+    print(
+        f"Mode: {mode_name}"
+        + (" (in-process, pruning enabled)" if args.inprocess else " (subprocess)")
+    )
     print(f"Parallel jobs: {args.n_jobs}")
     print("=" * 60)
+    # Use MedianPruner only for in-process mode (subprocess trials can't report)
+    if args.inprocess:
+        pruner = optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=10)
+    else:
+        # NopPruner for subprocess mode - pruning has no effect there
+        pruner = optuna.pruners.NopPruner()
     study = optuna.create_study(
         study_name=args.study_name,
         direction="minimize",
-        pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=10),
+        pruner=pruner,
     )
     # Run optimization

wavedl/models/_pretrained_utils.py CHANGED Viewed

@@ -166,6 +166,78 @@ class LayerNormNd(nn.Module):
         return x
+# =============================================================================
+# STOCHASTIC DEPTH (DropPath)
+# =============================================================================
+class DropPath(nn.Module):
+    """
+    Stochastic Depth (drop path) regularization for residual networks.
+    Randomly drops entire residual branches during training. Used in modern
+    architectures like ConvNeXt, Swin Transformer, UniRepLKNet.
+    Args:
+        drop_prob: Probability of dropping the path (default: 0.0)
+    Reference:
+        Huang, G., et al. (2016). Deep Networks with Stochastic Depth.
+        https://arxiv.org/abs/1603.09382
+    """
+    def __init__(self, drop_prob: float = 0.0):
+        super().__init__()
+        self.drop_prob = drop_prob
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if self.drop_prob == 0.0 or not self.training:
+            return x
+        keep_prob = 1 - self.drop_prob
+        # Shape: (batch_size, 1, 1, ...) for broadcasting
+        shape = (x.shape[0],) + (1,) * (x.ndim - 1)
+        random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
+        random_tensor.floor_()  # Binarize
+        return x.div(keep_prob) * random_tensor
+# =============================================================================
+# BACKBONE FREEZING UTILITIES
+# =============================================================================
+def freeze_backbone(
+    model: nn.Module,
+    exclude_patterns: list[str] | None = None,
+) -> int:
+    """
+    Freeze backbone parameters, keeping specified layers trainable.
+    Args:
+        model: The model whose parameters to freeze
+        exclude_patterns: List of patterns to exclude from freezing.
+            Parameters with names containing any of these patterns stay trainable.
+            Default: ["classifier", "head", "fc"]
+    Returns:
+        Number of parameters frozen
+    Example:
+        >>> freeze_backbone(model.backbone, exclude_patterns=["fc", "classifier"])
+    """
+    if exclude_patterns is None:
+        exclude_patterns = ["classifier", "head", "fc"]
+    frozen_count = 0
+    for name, param in model.named_parameters():
+        if not any(pattern in name for pattern in exclude_patterns):
+            param.requires_grad = False
+            frozen_count += param.numel()
+    return frozen_count
 # =============================================================================
 # REGRESSION HEAD BUILDERS
 # =============================================================================

wavedl/models/_template.py CHANGED Viewed

@@ -31,22 +31,23 @@ from wavedl.models.base import BaseModel
 # @register_model("my_model")
 class TemplateModel(BaseModel):
     """
-    Template Model Architecture.
+    Template Model Architecture (2D only).
     Replace this docstring with your model description.
     The first line will appear in --list_models output.
+    NOTE: This template is hardcoded for 2D inputs using Conv2d/MaxPool2d.
+    For 1D/3D support, use dimension-agnostic layer factories from
+    _pretrained_utils.py (get_conv_layer, get_pool_layer, get_norm_layer).
     Args:
-        in_shape: Input spatial dimensions (auto-detected from data)
-                  - 1D: (L,) for signals
-                  - 2D: (H, W) for images
-                  - 3D: (D, H, W) for volumes
+        in_shape: Input spatial dimensions as (H, W) for 2D images
         out_size: Number of regression targets (auto-detected from data)
         hidden_dim: Size of hidden layers (default: 256)
         dropout: Dropout rate (default: 0.1)
     Input Shape:
-        (B, 1, *in_shape) - e.g., (B, 1, 64, 64) for 2D
+        (B, 1, H, W) - 2D grayscale images
     Output Shape:
         (B, out_size) - Regression predictions

wavedl/models/cnn.py CHANGED Viewed

@@ -159,6 +159,26 @@ class CNN(BaseModel):
             nn.Linear(64, out_size),
         )
+        # Initialize weights
+        self._init_weights()
+    def _init_weights(self):
+        """Initialize weights with Kaiming for conv, Xavier for linear."""
+        for m in self.modules():
+            if isinstance(m, (nn.Conv1d, nn.Conv2d, nn.Conv3d)):
+                nn.init.kaiming_normal_(
+                    m.weight, mode="fan_out", nonlinearity="leaky_relu"
+                )
+                if m.bias is not None:
+                    nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.Linear):
+                nn.init.xavier_uniform_(m.weight)
+                if m.bias is not None:
+                    nn.init.zeros_(m.bias)
+            elif isinstance(m, (nn.GroupNorm, nn.LayerNorm)):
+                nn.init.ones_(m.weight)
+                nn.init.zeros_(m.bias)
     def _make_conv_block(
         self, in_channels: int, out_channels: int, dropout: float = 0.0
     ) -> nn.Sequential:

wavedl/models/convnext.py CHANGED Viewed

@@ -26,79 +26,12 @@ from typing import Any
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
+from wavedl.models._pretrained_utils import LayerNormNd, get_conv_layer
 from wavedl.models.base import BaseModel, SpatialShape
 from wavedl.models.registry import register_model
-def _get_conv_layer(dim: int) -> type[nn.Module]:
-    """Get dimension-appropriate Conv class."""
-    if dim == 1:
-        return nn.Conv1d
-    elif dim == 2:
-        return nn.Conv2d
-    elif dim == 3:
-        return nn.Conv3d
-    else:
-        raise ValueError(f"Unsupported dimensionality: {dim}D")
-class LayerNormNd(nn.Module):
-    """
-    LayerNorm for N-dimensional tensors (channels-first format).
-    Implements channels-last LayerNorm as used in the original ConvNeXt paper.
-    Permutes data to channels-last, applies LayerNorm per-channel over spatial
-    dimensions, and permutes back to channels-first format.
-    This matches PyTorch's nn.LayerNorm behavior when applied to the channel
-    dimension, providing stable gradients for deep ConvNeXt networks.
-    References:
-        Liu, Z., et al. (2022). A ConvNet for the 2020s. CVPR 2022.
-        https://github.com/facebookresearch/ConvNeXt
-    """
-    def __init__(self, num_channels: int, dim: int, eps: float = 1e-6):
-        super().__init__()
-        self.dim = dim
-        self.num_channels = num_channels
-        self.weight = nn.Parameter(torch.ones(num_channels))
-        self.bias = nn.Parameter(torch.zeros(num_channels))
-        self.eps = eps
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Apply LayerNorm in channels-last format.
-        Args:
-            x: Input tensor in channels-first format
-               - 1D: (B, C, L)
-               - 2D: (B, C, H, W)
-               - 3D: (B, C, D, H, W)
-        Returns:
-            Normalized tensor in same format as input
-        """
-        if self.dim == 1:
-            # (B, C, L) -> (B, L, C) -> LayerNorm -> (B, C, L)
-            x = x.permute(0, 2, 1)
-            x = F.layer_norm(x, (self.num_channels,), self.weight, self.bias, self.eps)
-            x = x.permute(0, 2, 1)
-        elif self.dim == 2:
-            # (B, C, H, W) -> (B, H, W, C) -> LayerNorm -> (B, C, H, W)
-            x = x.permute(0, 2, 3, 1)
-            x = F.layer_norm(x, (self.num_channels,), self.weight, self.bias, self.eps)
-            x = x.permute(0, 3, 1, 2)
-        else:
-            # (B, C, D, H, W) -> (B, D, H, W, C) -> LayerNorm -> (B, C, D, H, W)
-            x = x.permute(0, 2, 3, 4, 1)
-            x = F.layer_norm(x, (self.num_channels,), self.weight, self.bias, self.eps)
-            x = x.permute(0, 4, 1, 2, 3)
-        return x
 class ConvNeXtBlock(nn.Module):
     """
     ConvNeXt block matching the official Facebook implementation.
@@ -129,7 +62,7 @@ class ConvNeXtBlock(nn.Module):
     ):
         super().__init__()
         self.dim = dim
-        Conv = _get_conv_layer(dim)
+        Conv = get_conv_layer(dim)
         hidden_dim = int(channels * expansion_ratio)
         # Depthwise conv (7x7) - operates in channels-first
@@ -223,7 +156,7 @@ class ConvNeXtBase(BaseModel):
         self.dims = dims
         self.dropout_rate = dropout_rate
-        Conv = _get_conv_layer(self.dim)
+        Conv = get_conv_layer(self.dim)
         # Stem: Patchify with stride-4 conv (like ViT patch embedding)
         self.stem = nn.Sequential(

wavedl/models/convnext_v2.py CHANGED Viewed

@@ -32,6 +32,7 @@ import torch
 import torch.nn as nn
 from wavedl.models._pretrained_utils import (
+    DropPath,
     LayerNormNd,
     build_regression_head,
     get_conv_layer,
@@ -151,24 +152,6 @@ class ConvNeXtV2Block(nn.Module):
         return x
-class DropPath(nn.Module):
-    """Stochastic Depth (drop path) regularization."""
-    def __init__(self, drop_prob: float = 0.0):
-        super().__init__()
-        self.drop_prob = drop_prob
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        if self.drop_prob == 0.0 or not self.training:
-            return x
-        keep_prob = 1 - self.drop_prob
-        shape = (x.shape[0],) + (1,) * (x.ndim - 1)
-        random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
-        random_tensor.floor_()
-        return x.div(keep_prob) * random_tensor
 # =============================================================================
 # CONVNEXT V2 BASE CLASS
 # =============================================================================

wavedl 1.6.3__py3-none-any.whl → 1.7.0__py3-none-any.whl

wavedl 1.6.3py3-none-any.whl → 1.7.0py3-none-any.whl