PyPI - wavedl - Versions diffs - 1.2.0__py3-none-any.whl - Mend

wavedl 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

wavedl/__init__.py +43 -0
wavedl/hpo.py +366 -0
wavedl/models/__init__.py +86 -0
wavedl/models/_template.py +157 -0
wavedl/models/base.py +173 -0
wavedl/models/cnn.py +249 -0
wavedl/models/convnext.py +425 -0
wavedl/models/densenet.py +406 -0
wavedl/models/efficientnet.py +236 -0
wavedl/models/registry.py +104 -0
wavedl/models/resnet.py +555 -0
wavedl/models/unet.py +304 -0
wavedl/models/vit.py +372 -0
wavedl/test.py +1069 -0
wavedl/train.py +1079 -0
wavedl/utils/__init__.py +151 -0
wavedl/utils/config.py +269 -0
wavedl/utils/cross_validation.py +509 -0
wavedl/utils/data.py +1220 -0
wavedl/utils/distributed.py +138 -0
wavedl/utils/losses.py +216 -0
wavedl/utils/metrics.py +1236 -0
wavedl/utils/optimizers.py +216 -0
wavedl/utils/schedulers.py +251 -0
wavedl-1.2.0.dist-info/LICENSE +21 -0
wavedl-1.2.0.dist-info/METADATA +991 -0
wavedl-1.2.0.dist-info/RECORD +30 -0
wavedl-1.2.0.dist-info/WHEEL +5 -0
wavedl-1.2.0.dist-info/entry_points.txt +4 -0
wavedl-1.2.0.dist-info/top_level.txt +1 -0

wavedl/utils/__init__.py ADDED Viewed

@@ -0,0 +1,151 @@
+"""
+Utility Functions and Classes
+=============================
+Centralized exports for all utility modules.
+Author: Ductho Le (ductho.le@outlook.com)
+Version: 1.0.0
+"""
+from .config import (
+    create_default_config,
+    load_config,
+    merge_config_with_args,
+    save_config,
+    validate_config,
+)
+from .cross_validation import (
+    CVDataset,
+    run_cross_validation,
+    train_fold,
+)
+from .data import (
+    # Multi-format data loading
+    DataSource,
+    HDF5Source,
+    MATSource,
+    MemmapDataset,
+    NPZSource,
+    get_data_source,
+    load_outputs_only,
+    load_test_data,
+    load_training_data,
+    memmap_worker_init_fn,
+    prepare_data,
+)
+from .distributed import (
+    broadcast_early_stop,
+    broadcast_value,
+    sync_tensor,
+)
+from .losses import (
+    LogCoshLoss,
+    WeightedMSELoss,
+    get_loss,
+    list_losses,
+)
+from .metrics import (
+    COLORS,
+    FIGURE_DPI,
+    FIGURE_WIDTH_CM,
+    # Style constants
+    FIGURE_WIDTH_INCH,
+    FONT_SIZE_TEXT,
+    FONT_SIZE_TICKS,
+    MetricTracker,
+    calc_pearson,
+    calc_per_target_r2,
+    configure_matplotlib_style,
+    create_training_curves,
+    get_lr,
+    plot_bland_altman,
+    plot_correlation_heatmap,
+    plot_error_boxplot,
+    plot_error_cdf,
+    plot_error_histogram,
+    plot_prediction_vs_index,
+    plot_qq,
+    plot_relative_error,
+    plot_residuals,
+    plot_scientific_scatter,
+)
+from .optimizers import (
+    get_optimizer,
+    get_optimizer_with_param_groups,
+    list_optimizers,
+)
+from .schedulers import (
+    get_scheduler,
+    get_scheduler_with_warmup,
+    is_epoch_based,
+    list_schedulers,
+)
+__all__ = [
+    "COLORS",
+    "FIGURE_DPI",
+    "FIGURE_WIDTH_CM",
+    # Style constants
+    "FIGURE_WIDTH_INCH",
+    "FONT_SIZE_TEXT",
+    "FONT_SIZE_TICKS",
+    "CVDataset",
+    "DataSource",
+    "HDF5Source",
+    "LogCoshLoss",
+    "MATSource",
+    # Data
+    "MemmapDataset",
+    # Metrics
+    "MetricTracker",
+    "NPZSource",
+    "WeightedMSELoss",
+    # Distributed
+    "broadcast_early_stop",
+    "broadcast_value",
+    "calc_pearson",
+    "calc_per_target_r2",
+    "configure_matplotlib_style",
+    "create_default_config",
+    "create_training_curves",
+    "get_data_source",
+    # Losses
+    "get_loss",
+    "get_lr",
+    # Optimizers
+    "get_optimizer",
+    "get_optimizer_with_param_groups",
+    # Schedulers
+    "get_scheduler",
+    "get_scheduler_with_warmup",
+    "is_epoch_based",
+    "list_losses",
+    "list_optimizers",
+    "list_schedulers",
+    # Config
+    "load_config",
+    "load_outputs_only",
+    "load_test_data",
+    "load_training_data",
+    "memmap_worker_init_fn",
+    "merge_config_with_args",
+    "plot_bland_altman",
+    "plot_correlation_heatmap",
+    "plot_error_boxplot",
+    "plot_error_cdf",
+    "plot_error_histogram",
+    "plot_prediction_vs_index",
+    "plot_qq",
+    "plot_relative_error",
+    "plot_residuals",
+    "plot_scientific_scatter",
+    "prepare_data",
+    # Cross-Validation
+    "run_cross_validation",
+    "save_config",
+    "sync_tensor",
+    "train_fold",
+    "validate_config",
+]

wavedl/utils/config.py ADDED Viewed

@@ -0,0 +1,269 @@
+"""
+WaveDL - Configuration Management
+==================================
+YAML configuration file support for reproducible experiments.
+Features:
+    - Load experiment configs from YAML files
+    - Merge configs with CLI arguments (CLI takes precedence)
+    - Validate config values against known options
+    - Save effective config for reproducibility
+Usage:
+    # Load config and merge with CLI args
+    config = load_config("experiment.yaml")
+    args = merge_config_with_args(config, args)
+    # Save effective config
+    save_config(args, "output/config.yaml")
+Author: Ductho Le (ductho.le@outlook.com)
+Version: 1.0.0
+"""
+import argparse
+import logging
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+import yaml
+def load_config(config_path: str) -> dict[str, Any]:
+    """
+    Load configuration from a YAML file.
+    Args:
+        config_path: Path to YAML configuration file
+    Returns:
+        Dictionary of configuration values
+    Raises:
+        FileNotFoundError: If config file doesn't exist
+        yaml.YAMLError: If config file is invalid YAML
+    Example:
+        >>> config = load_config("configs/experiment.yaml")
+        >>> print(config["model"])
+        'cnn'
+    """
+    config_path = Path(config_path)
+    if not config_path.exists():
+        raise FileNotFoundError(f"Config file not found: {config_path}")
+    with open(config_path, encoding="utf-8") as f:
+        config = yaml.safe_load(f)
+    if config is None:
+        config = {}
+    # Handle nested configs (e.g., optimizer.lr -> optimizer_lr)
+    config = _flatten_config(config)
+    return config
+def _flatten_config(
+    config: dict[str, Any], parent_key: str = "", sep: str = "_"
+) -> dict[str, Any]:
+    """
+    Flatten nested dictionaries for argparse compatibility.
+    Recursively flattens nested dicts, preserving the full key path.
+    Example:
+        {'optimizer': {'lr': 1e-3}} -> {'optimizer_lr': 1e-3}
+        {'optimizer': {'params': {'beta1': 0.9}}} -> {'optimizer_params_beta1': 0.9}
+        {'lr': 1e-3} -> {'lr': 1e-3}
+    """
+    items = []
+    for key, value in config.items():
+        new_key = f"{parent_key}{sep}{key}" if parent_key else key
+        if isinstance(value, dict):
+            # Recursively flatten, passing full accumulated key path
+            items.extend(_flatten_config(value, new_key, sep).items())
+        else:
+            items.append((new_key, value))
+    return dict(items)
+def merge_config_with_args(
+    config: dict[str, Any],
+    args: argparse.Namespace,
+    parser: argparse.ArgumentParser | None = None,
+    ignore_unknown: bool = True,
+) -> argparse.Namespace:
+    """
+    Merge YAML config with CLI arguments. CLI args take precedence.
+    Args:
+        config: Dictionary from load_config()
+        args: Parsed argparse Namespace
+        parser: Optional ArgumentParser to detect defaults (if not provided,
+                uses heuristic comparison with common default values)
+        ignore_unknown: If True, skip config keys not in args
+    Returns:
+        Updated argparse Namespace
+    Note:
+        CLI arguments (non-default values) always override config values.
+        This allows: `--config base.yaml --lr 5e-4` to use config but override LR.
+    """
+    # Get parser defaults to detect which args were explicitly set by user
+    if parser is not None:
+        defaults = vars(parser.parse_args([]))
+    else:
+        # Fallback: reconstruct defaults from known patterns
+        # This works because argparse stores actual values, and we compare
+        defaults = {}
+    # Track which args were explicitly set on CLI (differ from defaults)
+    cli_overrides = set()
+    for key, value in vars(args).items():
+        if parser is not None:
+            if key in defaults and value != defaults[key]:
+                cli_overrides.add(key)
+        # Without parser, we can't reliably detect CLI overrides
+        # So we apply all config values (legacy behavior)
+    # Apply config values only where CLI didn't override
+    for key, value in config.items():
+        if hasattr(args, key):
+            # Skip if user explicitly set this via CLI
+            if key in cli_overrides:
+                logging.debug(f"Config key '{key}' skipped: CLI override detected")
+                continue
+            setattr(args, key, value)
+        elif not ignore_unknown:
+            logging.warning(f"Unknown config key: {key}")
+    return args
+def save_config(
+    args: argparse.Namespace, output_path: str, exclude_keys: list[str] | None = None
+) -> str:
+    """
+    Save effective configuration to YAML for reproducibility.
+    Args:
+        args: Parsed argparse Namespace
+        output_path: Path to save YAML file
+        exclude_keys: Keys to exclude from saved config
+    Returns:
+        Path to saved config file
+    Example:
+        >>> save_config(args, "output/effective_config.yaml")
+    """
+    if exclude_keys is None:
+        exclude_keys = ["list_models", "fresh", "resume"]
+    config = {}
+    for key, value in vars(args).items():
+        if key not in exclude_keys:
+            # Convert Path objects to strings
+            if isinstance(value, Path):
+                value = str(value)
+            config[key] = value
+    # Add metadata
+    config["_metadata"] = {
+        "saved_at": datetime.now().isoformat(),
+        "wavedl_version": "1.0.0",
+    }
+    output_path = Path(output_path)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(output_path, "w", encoding="utf-8") as f:
+        yaml.dump(config, f, default_flow_style=False, sort_keys=False)
+    return str(output_path)
+def validate_config(config: dict[str, Any]) -> list[str]:
+    """
+    Validate configuration values against known options.
+    Args:
+        config: Configuration dictionary
+    Returns:
+        List of warning messages (empty if valid)
+    """
+    warnings = []
+    # Known valid options
+    from wavedl.models import list_models
+    from wavedl.utils import list_losses, list_optimizers, list_schedulers
+    valid_options = {
+        "model": list_models(),
+        "loss": list_losses(),
+        "optimizer": list_optimizers(),
+        "scheduler": list_schedulers(),
+    }
+    for key, valid_values in valid_options.items():
+        if key in config and config[key] not in valid_values:
+            warnings.append(
+                f"Invalid {key}='{config[key]}'. Valid options: {valid_values}"
+            )
+    # Validate numeric ranges
+    numeric_checks = {
+        "lr": (0, 1, "Learning rate should be between 0 and 1"),
+        "epochs": (1, 100000, "Epochs should be positive"),
+        "batch_size": (1, 10000, "Batch size should be positive"),
+        "patience": (1, 1000, "Patience should be positive"),
+        "cv": (0, 100, "CV folds should be 0-100"),
+    }
+    for key, (min_val, max_val, msg) in numeric_checks.items():
+        if key in config:
+            val = config[key]
+            if not (min_val <= val <= max_val):
+                warnings.append(f"{msg}: got {val}")
+    return warnings
+def create_default_config() -> dict[str, Any]:
+    """
+    Create a default configuration dictionary.
+    Returns:
+        Dictionary with default training configuration
+    """
+    return {
+        # Model
+        "model": "cnn",
+        # Hyperparameters
+        "batch_size": 128,
+        "lr": 1e-3,
+        "epochs": 1000,
+        "patience": 20,
+        "weight_decay": 1e-4,
+        "grad_clip": 1.0,
+        # Training components
+        "loss": "mse",
+        "optimizer": "adamw",
+        "scheduler": "plateau",
+        # Cross-validation
+        "cv": 0,
+        "cv_stratify": False,
+        "cv_bins": 10,
+        # Performance
+        "precision": "bf16",
+        "compile": False,
+        # Output
+        "seed": 2025,
+        "workers": 8,
+    }