PyPI - torch-l1-snr - Versions diffs - 0.0.4__py3-none-any.whl → 0.1.0__py3-none-any.whl - Mend

torch-l1-snr 0.0.4py3-none-any.whl → 0.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

{torch_l1snr → torch_l1_snr}/__init__.py RENAMED Viewed

@@ -12,4 +12,6 @@ __all__ = [
     "L1SNRDBLoss",
     "STFTL1SNRDBLoss",
     "MultiL1SNRDBLoss",
-]
+]
+__version__ = "0.1.0"

{torch_l1snr → torch_l1_snr}/l1snr.py RENAMED Viewed

@@ -16,6 +16,8 @@
 #     Proceedings of the 25th International Society for Music Information Retrieval Conference, 2024
 #     arXiv:2406.18747
+import warnings
 import torch
 import torch.nn as nn
 from torchaudio.transforms import Spectrogram
@@ -107,20 +109,6 @@ class L1SNRLoss(torch.nn.Module):
         scale_time = c * inv_mean
         l1_term = torch.mean(l1_error) * scale_time
-        if getattr(self, "balance_per_sample", False):
-            # per-sample w-independent scaling
-            bal = c / (l1_error.detach() + self.eps)
-            l1_term = torch.mean(l1_error * bal)
-        if getattr(self, "debug_balance", False):
-            g_d1 = (1.0 - w) * c * inv_mean
-            if getattr(self, "balance_per_sample", False):
-                g_l1 = w * torch.mean(c / (l1_error.detach() + self.eps))
-            else:
-                g_l1 = w * c * inv_mean
-            ratio = (g_l1 / (g_d1 + 1e-12)).item()
-            setattr(self, "last_balance_ratio", ratio)
         loss = (1.0 - w) * l1snr_loss + w * l1_term
         return loss * self.weight
@@ -464,11 +452,6 @@ class STFTL1SNRDBLoss(torch.nn.Module):
             scale_spec = 2.0 * c * inv_mean_comp
             l1_term = 0.5 * (torch.mean(err_re) + torch.mean(err_im)) * scale_spec
-            if getattr(self, "balance_per_sample", False):
-                bal_re = c / (err_re.detach() + self.l1snr_eps)
-                bal_im = c / (err_im.detach() + self.l1snr_eps)
-                l1_term = 0.5 * (torch.mean(err_re * bal_re) + torch.mean(err_im * bal_im))
             loss = (1.0 - w) * d1_sum + w * l1_term
             return loss
         elif w >= 1.0:
@@ -563,8 +546,10 @@ class STFTL1SNRDBLoss(torch.nn.Module):
                     est_spec = transform(est_source)
                     act_spec = transform(act_source)
                 except RuntimeError as e:
-                    print(f"Error computing spectrogram for resolution {i}: {e}")
-                    print(f"Parameters: n_fft={self.n_ffts[i]}, hop_length={self.hop_lengths[i]}, win_length={self.win_lengths[i]}")
+                    warnings.warn(
+                        f"Error computing spectrogram for resolution {i}: {e}. "
+                        f"Parameters: n_fft={self.n_ffts[i]}, hop_length={self.hop_lengths[i]}, win_length={self.win_lengths[i]}"
+                    )
                     continue
                 # Ensure same (B, C, F, T); crop only (F, T) if needed
@@ -578,7 +563,7 @@ class STFTL1SNRDBLoss(torch.nn.Module):
                 try:
                     spec_loss = self._compute_complex_spec_l1snr_loss(est_spec, act_spec)
                 except RuntimeError as e:
-                    print(f"Error computing complex spectral loss for resolution {i}: {e}")
+                    warnings.warn(f"Error computing complex spectral loss for resolution {i}: {e}")
                     continue
                 # Check for numerical issues
@@ -599,19 +584,19 @@ class STFTL1SNRDBLoss(torch.nn.Module):
                         # Accumulate regularization loss
                         total_spec_reg_loss += spec_reg_loss
                     except RuntimeError as e:
-                        print(f"Error computing spectral level-matching for resolution {i}: {e}")
+                        warnings.warn(f"Error computing spectral level-matching for resolution {i}: {e}")
                 # Accumulate loss
                 total_spec_loss += spec_loss
                 valid_transforms += 1
             except RuntimeError as e:
-                print(f"Runtime error in spectrogram transform {i}: {e}")
+                warnings.warn(f"Runtime error in spectrogram transform {i}: {e}")
                 continue
         # If all transforms failed, return zero loss
         if valid_transforms == 0:
-            print("Warning: All spectrogram transforms failed. Returning zero loss.")
+            warnings.warn("All spectrogram transforms failed. Returning zero loss.")
             return torch.tensor(0.0, device=device)
         # Average losses across valid transforms

{torch_l1_snr-0.0.4.dist-info → torch_l1_snr-0.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,9 +1,9 @@
 Metadata-Version: 2.4
 Name: torch-l1-snr
-Version: 0.0.4
+Version: 0.1.0
 Summary: L1-SNR loss functions for audio source separation in PyTorch
 Home-page: https://github.com/crlandsc/torch-l1-snr
-Author: Christopher Landscaping
+Author: Christopher Landschoot
 Author-email: crlandschoot@gmail.com
 License: MIT
 Classifier: Intended Audience :: Developers
@@ -28,43 +28,59 @@ Dynamic: license-file
 ![torch-l1-snr-logo](https://raw.githubusercontent.com/crlandsc/torch-l1-snr/main/images/logo.png)
-# NOTE: Repo is currently a work-in-progress and not ready for installation & use.
+[![LICENSE](https://img.shields.io/github/license/crlandsc/torch-l1-snr)](https://github.com/crlandsc/torch-l1-snr/blob/main/LICENSE) [![GitHub Repo stars](https://img.shields.io/github/stars/crlandsc/torch-l1-snr)](https://github.com/crlandsc/torch-l1-snr/stargazers)
-[![LICENSE](https://img.shields.io/github/license/crlandsc/torch-l1snr)](https://github.com/crlandsc/torch-l1snr/blob/main/LICENSE) [![GitHub Repo stars](https://img.shields.io/github/stars/crlandsc/torch-l1snr)](https://github.com/crlandsc/torch-l1snr/stargazers)
+L1 Signal-to-Noise Ratio (SNR) loss functions for audio source separation in PyTorch. This package provides four loss functions that combine implementations from recent academic research with novel extensions, designed to integrate easily into any audio separation training pipeline.
-A PyTorch implementation of L1-based Signal-to-Noise Ratio (SNR) loss functions for audio source separation. This package provides implementations and novel extensions based on concepts from recent academic papers, offering flexible and robust loss functions that can be easily integrated into any PyTorch-based audio separation pipeline.
+The core `L1SNRLoss` is based on the loss function described in [[1]](https://arxiv.org/abs/2309.02539), while `L1SNRDBLoss` and `STFTL1SNRDBLoss` are extensions of the adaptive level-matching regularization technique proposed in [[2]](https://arxiv.org/abs/2501.16171). `MultiL1SNRDBLoss` combines both time-domain and spectrogram-domain losses into a single loss function for convenience and flexibility.
-The core `L1SNRLoss` is based on the loss function described in [[1]](https://arxiv.org/abs/2309.02539), while `L1SNRDBLoss` and `STFTL1SNRDBLoss` are extensions of the adaptive level-matching regularization technique proposed in [[2]](https://arxiv.org/abs/2501.16171).
+## Quick Start
+```python
+import torch
+from torch_l1_snr import MultiL1SNRDBLoss
+# Create combined time + spectrogram domain loss function with adaptive regularization
+loss_fn = MultiL1SNRDBLoss(name="multi_l1_snr_db_loss")
+# Calculate loss between model output and target
+estimates = torch.randn(4, 32000)  # (batch, samples)
+targets = torch.randn(4, 32000)
+loss = loss_fn(estimates, targets)
+loss.backward()
+```
 ## Features
 - **Time-Domain L1SNR Loss**: A basic, time-domain L1-SNR loss, based on [[1]](https://arxiv.org/abs/2309.02539).
 - **Regularized Time-Domain L1SNRDBLoss**: An extension of the L1SNR loss with adaptive level-matching regularization from [[2]](https://arxiv.org/abs/2501.16171), plus an optional L1 loss component.
 - **Multi-Resolution STFT L1SNRDBLoss**: A spectrogram-domain version of the loss from [[2]](https://arxiv.org/abs/2501.16171), calculated over multiple STFT resolutions.
-- **Modular Stem-based Loss**: A wrapper that combines time and spectrogram domain losses and can be configured to run on specific stems.
-- **Efficient & Robust**: Includes optimizations for pure L1 loss calculation and robust handling of `NaN`/`inf` values and short audio segments.
+- **Combined Multi-Domain Loss**: `MultiL1SNRDBLoss` combines time-domain and spectrogram-domain losses into a single, weighted objective function.
+- **L1 Loss Blending**: The `l1_weight` parameter allows mixing between L1SNR and standard L1 loss, softening the ["all-or-nothing" behavior](#all-or-nothing-behavior-and-l1_weight) of pure SNR losses for more nuanced separation.
+- **Numerical Stability**: Robust handling of `NaN` and `inf` values during training.
+- **Short Audio Fallback**: Graceful fallback to time-domain loss when audio is too short for STFT processing.
 ## Installation
 [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/torch-l1-snr)](https://pypi.org/project/torch-l1-snr/) [![PyPI - Version](https://img.shields.io/pypi/v/torch-l1-snr)](https://pypi.org/project/torch-l1-snr/) [![Number of downloads from PyPI per month](https://img.shields.io/pypi/dm/torch-l1-snr)](https://pypi.org/project/torch-l1-snr/)
-## Install from PyPI
+### Install from PyPI
 ```bash
 pip install torch-l1-snr
 ```
-## Install from GitHub
+### Install from GitHub
 ```bash
-pip install git+https://github.com/crlandsc/torch-l1snr.git
+pip install git+https://github.com/crlandsc/torch-l1-snr.git
 ```
 Or, you can clone the repository and install it in editable mode for development:
 ```bash
-git clone https://github.com/crlandsc/torch-l1snr.git
-cd torch-l1snr
+git clone https://github.com/crlandsc/torch-l1-snr.git
+cd torch-l1-snr
 pip install -e .
 ```
@@ -72,27 +88,52 @@ pip install -e .
 - [PyTorch](https://pytorch.org/)
 - [torchaudio](https://pytorch.org/audio/stable/index.html)
+- [NumPy](https://numpy.org/) (>=1.21.0)
 ## Supported Tensor Shapes
-All loss functions in this package (`L1SNRLoss`, `L1SNRDBLoss`, `STFTL1SNRDBLoss`, and `MultiL1SNRDBLoss`) accept standard audio tensors of shape `(batch, samples)` or `(batch, channels, samples)`. For 3D tensors, the channel and sample dimensions are flattened before the time-domain losses are calculated. For the spectrogram-domain loss, a separate STFT is computed for each channel.
+All loss functions in this package (`L1SNRLoss`, `L1SNRDBLoss`, `STFTL1SNRDBLoss`, and `MultiL1SNRDBLoss`) accept standard audio tensors of shape `(batch, samples)`, `(batch, channels, samples)`, or `(batch, num_sources, channels, samples)`. For 3D & 4D tensors, the channel and sample dimensions are flattened before the time-domain losses are calculated. For the spectrogram-domain loss, a separate STFT is computed for each channel.
 ## Usage
-The loss functions can be imported directly from the `torch_l1snr` package.
+The loss functions can be imported directly from the `torch_l1_snr` package.
+### Example: `L1SNRLoss` (Time Domain)
+The simplest loss function - pure L1SNR without regularization.
+```python
+import torch
+from torch_l1_snr import L1SNRLoss
+# Create dummy audio signals
+estimates = torch.randn(4, 2, 44100)  # Batch of 4, stereo, 44100 samples
+actuals = torch.randn(4, 2, 44100)
+# Basic L1SNR loss
+loss_fn = L1SNRLoss(name="l1_snr_loss")
-### Example: `L1SNRDBLoss` (Time Domain)
+# Calculate loss
+loss = loss_fn(estimates, actuals)
+loss.backward()
+print(f"L1SNRLoss: {loss.item()}")
+```
+### Example: `L1SNRDBLoss` (Time Domain with Regularization)
+Adds adaptive level-matching regularization to prevent silence collapse.
 ```python
 import torch
-from torch_l1snr import L1SNRDBLoss
+from torch_l1_snr import L1SNRDBLoss
 # Create dummy audio signals
-estimates = torch.randn(4, 32000)  # Batch of 4, 32000 samples
-actuals = torch.randn(4, 32000)
+estimates = torch.randn(4, 2, 44100)  # Batch of 4, stereo, 44100 samples
+actuals = torch.randn(4, 2, 44100)
 # Initialize the loss function with regularization enabled
-# l1_weight=0.1 blends L1SNR+Regularization with 10% L1 loss
+# l1_weight=0.1 blends 90% L1SNR+Regularization with 10% L1 loss
 loss_fn = L1SNRDBLoss(
     name="l1_snr_db_loss",
     use_regularization=True,  # Enable adaptive level-matching regularization
@@ -108,15 +149,17 @@ print(f"L1SNRDBLoss: {loss.item()}")
 ### Example: `STFTL1SNRDBLoss` (Spectrogram Domain)
+Computes L1SNR loss across multiple STFT resolutions.
 ```python
 import torch
-from torch_l1snr import STFTL1SNRDBLoss
+from torch_l1_snr import STFTL1SNRDBLoss
 # Create dummy audio signals
-estimates = torch.randn(4, 32000)
-actuals = torch.randn(4, 32000)
+estimates = torch.randn(4, 2, 44100)  # Batch of 4, stereo, 44100 samples
+actuals = torch.randn(4, 2, 44100)
-# Initialize the loss function
+# Initialize the loss function without regularization or traditional L1
 # Uses multiple STFT resolutions by default: [512, 1024, 2048] FFT sizes
 loss_fn = STFTL1SNRDBLoss(
     name="stft_l1_snr_db_loss",
@@ -130,20 +173,19 @@ loss.backward()
 print(f"STFTL1SNRDBLoss: {loss.item()}")
 ```
-### Example: `MultiL1SNRDBLoss` for a Combined Time+Spectrogram Loss
+### Example: `MultiL1SNRDBLoss` (Combined Time + Spectrogram)
-This loss combines the time-domain and spectrogram-domain losses into a single, weighted objective function.
+Combines time-domain and spectrogram-domain losses into a single weighted objective.
 ```python
 import torch
-from torch_l1snr import MultiL1SNRDBLoss
+from torch_l1_snr import MultiL1SNRDBLoss
 # Create dummy audio signals
-# Shape: (batch, channels, samples)
-estimates = torch.randn(2, 2, 44100) # Batch of 2, stereo
-actuals = torch.randn(2, 2, 44100)
+estimates = torch.randn(4, 2, 44100)  # Batch of 4, stereo, 44100 samples
+actuals = torch.randn(4, 2, 44100)
-# --- Configuration ---
+# Initialize the multi-domain loss function
 loss_fn = MultiL1SNRDBLoss(
     name="multi_l1_snr_db_loss",
     weight=1.0,                    # Overall weight for this loss
@@ -152,6 +194,8 @@ loss_fn = MultiL1SNRDBLoss(
     use_time_regularization=True,  # Enable regularization in time domain
     use_spec_regularization=False  # Disable regularization in spec domain
 )
+# Calculate loss
 loss = loss_fn(estimates, actuals)
 print(f"Multi-domain Loss: {loss.item()}")
 ```
@@ -164,27 +208,29 @@ The goal of these loss functions is to provide a perceptually-informed and robus
 - **Perceptual Relevance**: The loss is scaled to decibels (dB), which more closely aligns with human perception of loudness.
 - **Adaptive Regularization**: Prevents the model from collapsing to silent outputs by penalizing mismatches in the overall loudness (dBRMS) between the estimate and the target.
-#### Level-Matching Regularization
+### Level-Matching Regularization
 A key feature of `L1SNRDBLoss` is the adaptive regularization term, as described in [[2]](https://arxiv.org/abs/2501.16171). This component calculates the difference in decibel-scaled root-mean-square (dBRMS) levels between the estimated and actual signals. An adaptive weight (`lambda`) is applied to this difference, which increases when the model incorrectly silences a non-silent target. This encourages the model to learn the correct output level and specifically avoids the model collapsing to a trivial silent solution when uncertain.
-#### Multi-Resolution Spectrogram Analysis
+### Multi-Resolution Spectrogram Analysis
-The `STFTL1SNRDBLoss` module applies the L1SNRDB loss across multiple time-frequency resolutions. By analyzing the signal with different STFT window sizes and hop lengths, the loss function can capture a wider range of artifacts—from short, transient errors to longer, tonal discrepancies. This provides a more comprehensive error signal to the model during training.
+The `STFTL1SNRDBLoss` module applies the L1SNRDB loss across multiple time-frequency resolutions. By analyzing the signal with different STFT window sizes and hop lengths, the loss function can capture a wider range of artifacts—from short, transient errors to longer, tonal discrepancies. This provides a more comprehensive error signal to the model during training. Using multiple resolutions for an STFT loss is common among many recent source separation works.
-#### "All-or-Nothing" Behavior and `l1_weight`
+### "All-or-Nothing" Behavior and `l1_weight`
-A characteristic of SNR-style losses is that they encourage the model to make definitive, "all-or-nothing" separation decisions. This can be highly effective for well-defined sources, as it pushes the model to be confident in its estimations. However, this can also lead to "confident errors," where the model completely removes a signal component it should have kept.
+A characteristic of SNR-style losses (that I experienced in many training experiments) is that they encourage the model to make definitive, "all-or-nothing" separation decisions. This can be highly effective for well-defined sources, as it pushes the model to be confident in its estimations. However, this can also lead to "confident errors," where the model completely removes a signal component it should have kept.
 While the Level-Matching Regularization prevents a *total collapse to silence*, it does not by itself solve this issue of overly confident, hard-boundary separation. To provide a tunable solution, this implementation introduces a novel `l1_weight` hyperparameter. This allows you to create a hybrid loss, blending the decisive L1SNR objective with a standard L1 loss to soften its "all-or-nothing"-style behavior and allow for more nuanced separation.
+While this can potentially reduce metrics like SDR, I found that re-introducing some standard L1 loss allows for slightly more "smearing" of sound between sources to mask large errors and be more perceptually acceptable. I have no hard numbers on this, just my experience, so I recommend starting with no standard L1 mixed in (`l1_weight=0.0`), and then slowly increasing from there based on your needs.
 -   `l1_weight=0.0` (Default): Pure L1SNR (+ regularization).
 -   `l1_weight=1.0`: Pure L1 loss.
 -   `0.0 < l1_weight < 1.0`: A weighted combination of the two.
 The implementation is optimized for efficiency: if `l1_weight` is `0.0` or `1.0`, the unused loss component is not computed, saving computational resources.
-**Note on Gradient Balancing:** When blending losses (`0.0 < l1_weight < 1.0`), you may need to tune `l1_scale_time` and `l1_scale_spec`. This is to ensure the gradients of the L1 and L1SNR components are balanced, which is crucial for stable training. The default values provide a reasonable starting point, but monitoring the loss components is recommended to ensure they are scaled appropriately.
+**Note on Gradient Balancing:** When blending losses (`0.0 < l1_weight < 1.0`), the implementation automatically scales the L1 component to approximately match the gradient magnitudes of the L1SNR component. This helps maintain stable training without manual tuning.
 ## Limitations
@@ -201,7 +247,7 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
 ## Acknowledgments
-The loss functions implemented here are based on the work of the authors of the referenced papers.
+The loss functions implemented here are largely based on the work of the authors of the referenced papers. Thank you for your research!
 ## References
@@ -209,4 +255,4 @@ The loss functions implemented here are based on the work of the authors of the
 [2] K. N. Watcharasupat and A. Lerch, "Separate This, and All of these Things Around It: Music Source Separation via Hyperellipsoidal Queries," [arXiv:2501.16171](https://arxiv.org/abs/2501.16171).
-[3] K. N. Watcharasupat and A. Lerch, "A Stem-Agnostic Single-Decoder System for Music Source Separation Beyond Four Stems," Proceedings of the 25th International Society for Music Information Retrieval Conference, 2024. [arXiv:2406.18747](https://arxiv.org/abs/2406.18747)
+[3] K. N. Watcharasupat and A. Lerch, "A Stem-Agnostic Single-Decoder System for Music Source Separation Beyond Four Stems," Proceedings of the 25th International Society for Music Information Retrieval Conference, 2024. [arXiv:2406.18747](https://arxiv.org/abs/2406.18747)

torch_l1_snr-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+torch_l1_snr/__init__.py,sha256=mT6WxYYlshOwabs79jbUvmoNTn2pG19UKHdSrGVdbYc,244
+torch_l1_snr/l1snr.py,sha256=F1NF3VGodaLWFtHs9xco9MbxfEJ01ip_JSHFS2GgBkU,34520
+torch_l1_snr-0.1.0.dist-info/licenses/LICENSE,sha256=JdS2Pv6DDs3jvXHACGdcHYdiFMe9EO1XGeHkEHLTr8Y,1079
+torch_l1_snr-0.1.0.dist-info/METADATA,sha256=JQjYJCQgzf5Ogj3GJ8OAioGdBtI8ddCo6Tnjf1JfMxs,13112
+torch_l1_snr-0.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+torch_l1_snr-0.1.0.dist-info/top_level.txt,sha256=VUo0QlGvu7tOF8BKWWDoIiLlhcAcetYwR6c8Ldhhpco,13
+torch_l1_snr-0.1.0.dist-info/RECORD,,

{torch_l1_snr-0.0.4.dist-info → torch_l1_snr-0.1.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.9.0)
+Generator: setuptools (80.10.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

torch_l1_snr-0.1.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ torch_l1_snr

torch_l1_snr-0.0.4.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-torch_l1_snr-0.0.4.dist-info/licenses/LICENSE,sha256=JdS2Pv6DDs3jvXHACGdcHYdiFMe9EO1XGeHkEHLTr8Y,1079
-torch_l1snr/__init__.py,sha256=pR9jg3fjTKt_suZoVDC67tqB7EWRkbfaXaPP7pYQrlQ,220
-torch_l1snr/l1snr.py,sha256=aqmtNfT_8A0IRI9jiVGwNse3igBvelQGKnjfe23Xh7w,35304
-torch_l1_snr-0.0.4.dist-info/METADATA,sha256=pB7DvZ6BdvCshcDqOTkJNqekh97qXNaPc7tnNzBqJVk,11143
-torch_l1_snr-0.0.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-torch_l1_snr-0.0.4.dist-info/top_level.txt,sha256=NfaRND6pcjZ7-035d4XAg8xJuz31EEU210Y9xWeFOxc,12
-torch_l1_snr-0.0.4.dist-info/RECORD,,

torch_l1_snr-0.0.4.dist-info/top_level.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- torch_l1snr

{torch_l1_snr-0.0.4.dist-info → torch_l1_snr-0.1.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

torch-l1-snr 0.0.4__py3-none-any.whl → 0.1.0__py3-none-any.whl

torch-l1-snr 0.0.4py3-none-any.whl → 0.1.0py3-none-any.whl