sonusai 0.18.6__py3-none-any.whl → 0.18.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sonusai/__init__.py CHANGED
@@ -2,6 +2,12 @@ import logging
2
2
  from importlib import metadata
3
3
  from os.path import dirname
4
4
 
5
+ from pyaaware import TorchForwardTransform
6
+ from pyaaware import TorchInverseTransform
7
+
8
+ ForwardTransform = TorchForwardTransform
9
+ InverseTransform = TorchInverseTransform
10
+
5
11
  __version__ = metadata.version(__package__)
6
12
  BASEDIR = dirname(__file__)
7
13
 
@@ -18,7 +24,6 @@ commands_doc = """
18
24
  mkwav Make WAV files from a mixture database
19
25
  onnx_predict Run ONNX predict on a trained model
20
26
  plot Plot mixture data
21
- post_spenh_targetf Run post-processing for speech enhancement targetf data
22
27
  summarize_metric_spenh Summarize speech enhancement and analysis results
23
28
  tplot Plot truth data
24
29
  vars List custom SonusAI variables
sonusai/genmetrics.py CHANGED
@@ -115,11 +115,11 @@ def main() -> None:
115
115
  mixdb = MixtureDatabase(location)
116
116
  supported = mixdb.supported_metrics
117
117
  if show_supported:
118
- logger.info(f'\nSupported metrics: {", ".join(sorted(supported))}')
118
+ logger.info(f'\nSupported metrics:\n\n{supported.pretty}')
119
119
  sys.exit(0)
120
120
 
121
121
  if includes is None or 'all' in includes:
122
- metrics = supported
122
+ metrics = supported.names
123
123
  else:
124
124
  metrics = set(includes)
125
125
  if 'mxwer' in metrics:
@@ -127,7 +127,7 @@ def main() -> None:
127
127
  for name in mixdb.asr_configs:
128
128
  metrics.add(f'mxwer.{name}')
129
129
 
130
- diff = metrics.difference(supported)
130
+ diff = metrics.difference(supported.names)
131
131
  if diff:
132
132
  logger.error(f'Unrecognized metric: {", ".join(diff)}')
133
133
  sys.exit(1)
@@ -141,7 +141,7 @@ def main() -> None:
141
141
  for name in mixdb.asr_configs:
142
142
  _excludes.add(f'mxwer.{name}')
143
143
 
144
- diff = _excludes.difference(supported)
144
+ diff = _excludes.difference(supported.names)
145
145
  if diff:
146
146
  logger.error(f'Unrecognized metric: {", ".join(diff)}')
147
147
  sys.exit(1)
@@ -8,7 +8,8 @@ from .calc_pesq import calc_pesq
8
8
  from .calc_phase_distance import calc_phase_distance
9
9
  from .calc_sa_sdr import calc_sa_sdr
10
10
  from .calc_sample_weights import calc_sample_weights
11
- from .calc_snr_f import calc_snr_f
11
+ from .calc_segsnr_f import calc_segsnr_f
12
+ from .calc_segsnr_f import calc_segsnr_f_bin
12
13
  from .calc_speech import calc_speech
13
14
  from .calc_wer import calc_wer
14
15
  from .calc_wsdr import calc_wsdr
@@ -2,6 +2,14 @@ from sonusai.mixture.datatypes import AudioStatsMetrics
2
2
  from sonusai.mixture.datatypes import AudioT
3
3
 
4
4
 
5
+ def _convert_str_with_factors_to_int(x: str) -> int:
6
+ if 'k' in x:
7
+ return int(1000 * float(x.replace('k', '')))
8
+ if 'M' in x:
9
+ return int(1000000 * float(x.replace('M', '')))
10
+ return int(x)
11
+
12
+
5
13
  def calc_audio_stats(audio: AudioT, win_len: float = None) -> AudioStatsMetrics:
6
14
  from sonusai.mixture import SAMPLE_RATE
7
15
  from sonusai.mixture import Transformer
@@ -38,5 +46,5 @@ def calc_audio_stats(audio: AudioT, win_len: float = None) -> AudioStatsMetrics:
38
46
  tr=float(stats['RMS Tr dB']),
39
47
  cr=float(stats['Crest factor']),
40
48
  fl=float(stats['Flat factor']),
41
- pkc=int(stats['Pk count']),
49
+ pkc=_convert_str_with_factors_to_int(stats['Pk count']),
42
50
  )
@@ -0,0 +1,84 @@
1
+ import numpy as np
2
+
3
+ from sonusai.mixture.datatypes import AudioF
4
+ from sonusai.mixture.datatypes import Segsnr
5
+ from sonusai.mixture.datatypes import SnrFBinMetrics
6
+ from sonusai.mixture.datatypes import SnrFMetrics
7
+
8
+
9
+ def calc_segsnr_f(segsnr_f: Segsnr) -> SnrFMetrics:
10
+ """Calculate metrics of snr_f truth data.
11
+
12
+ Includes mean and standard deviation of the linear values (usually energy)
13
+ and mean and standard deviation of the dB values (10 * log10).
14
+ """
15
+ if np.count_nonzero(segsnr_f) == 0:
16
+ # If all entries are zeros
17
+ return SnrFMetrics(0, 0, -np.inf, 0)
18
+
19
+ tmp = np.ma.array(segsnr_f, mask=np.logical_not(np.isfinite(segsnr_f)))
20
+ if np.ma.count_masked(tmp) == np.ma.size(tmp, axis=0):
21
+ # If all entries are infinite
22
+ return SnrFMetrics(np.inf, 0, np.inf, 0)
23
+
24
+ snr_mean = np.mean(tmp, axis=0)
25
+ snr_std = np.std(tmp, axis=0)
26
+
27
+ tmp = 10 * np.ma.log10(tmp)
28
+ if np.ma.count_masked(tmp) == np.ma.size(tmp, axis=0):
29
+ # If all entries are masked, special case where all inputs are either 0 or infinite
30
+ snr_db_mean = -np.inf
31
+ snr_db_std = np.inf
32
+ else:
33
+ snr_db_mean = np.mean(tmp, axis=0)
34
+ snr_db_std = np.std(tmp, axis=0)
35
+
36
+ return SnrFMetrics(snr_mean,
37
+ snr_std,
38
+ snr_db_mean,
39
+ snr_db_std)
40
+
41
+
42
+ def calc_segsnr_f_bin(target_f: AudioF, noise_f: AudioF) -> SnrFBinMetrics:
43
+ """Calculate per-bin segmental SNR metrics.
44
+
45
+ Includes per-bin mean and standard deviation of the linear values
46
+ and mean and standard deviation of the dB values.
47
+ """
48
+ if target_f.ndim != 2 and noise_f.ndim != 2:
49
+ raise ValueError('target_f and noise_f must have 2 dimensions')
50
+
51
+ segsnr_f = (np.abs(target_f) ** 2) / (np.abs(noise_f) ** 2)
52
+
53
+ frames, bins = segsnr_f.shape
54
+ if np.count_nonzero(segsnr_f) == 0:
55
+ # If all entries are zeros
56
+ return SnrFBinMetrics(np.zeros(bins),
57
+ np.zeros(bins),
58
+ -np.inf * np.ones(bins),
59
+ np.zeros(bins))
60
+
61
+ tmp = np.ma.array(segsnr_f, mask=np.logical_not(np.isfinite(segsnr_f)))
62
+ if np.ma.count_masked(tmp) == np.ma.size(tmp, axis=0):
63
+ # If all entries are infinite
64
+ return SnrFBinMetrics(np.inf * np.ones(bins),
65
+ np.zeros(bins),
66
+ np.inf * np.ones(bins),
67
+ np.zeros(bins))
68
+
69
+ snr_mean = np.mean(tmp, axis=0)
70
+ snr_std = np.std(tmp, axis=0)
71
+
72
+ tmp = 10 * np.ma.log10(tmp)
73
+ if np.ma.count_masked(tmp) == np.ma.size(tmp, axis=0):
74
+ # If all entries are masked, special case where all inputs are either 0 or infinite
75
+ snr_db_mean = -np.inf * np.ones(bins)
76
+ snr_db_std = np.inf * np.ones(bins)
77
+ else:
78
+ snr_db_mean = np.mean(tmp, axis=0)
79
+ snr_db_std = np.std(tmp, axis=0)
80
+
81
+ return SnrFBinMetrics(np.ma.getdata(snr_mean),
82
+ np.ma.getdata(snr_std),
83
+ np.ma.getdata(snr_db_mean),
84
+ np.ma.getdata(snr_db_std))
@@ -6,7 +6,7 @@ from .calc_pesq import calc_pesq
6
6
 
7
7
 
8
8
  def calc_speech(hypothesis: np.ndarray, reference: np.ndarray, sample_rate: int = SAMPLE_RATE) -> SpeechMetrics:
9
- """Calculate speech metrics pesq, c_sig, c_bak, c_ovl, seg_snr.
9
+ """Calculate speech metrics pesq, c_sig, c_bak, and c_ovl.
10
10
 
11
11
  These are all related and thus included in one function. Reference: matlab script "compute_metrics.m".
12
12
 
@@ -38,11 +38,11 @@ def calc_speech(hypothesis: np.ndarray, reference: np.ndarray, sample_rate: int
38
38
  _pesq = calc_pesq(hypothesis=hypothesis, reference=reference, sample_rate=sample_rate)
39
39
 
40
40
  # Now compute the composite measures
41
- c_sig = np.clip(3.093 - 1.029 * llr_mean + 0.603 * _pesq - 0.009 * wss_dist, 1, 5)
42
- c_bak = np.clip(1.634 + 0.478 * _pesq - 0.007 * wss_dist + 0.063 * seg_snr, 1, 5)
43
- c_ovl = np.clip(1.594 + 0.805 * _pesq - 0.512 * llr_mean - 0.007 * wss_dist, 1, 5)
41
+ csig = np.clip(3.093 - 1.029 * llr_mean + 0.603 * _pesq - 0.009 * wss_dist, 1, 5)
42
+ cbak = np.clip(1.634 + 0.478 * _pesq - 0.007 * wss_dist + 0.063 * seg_snr, 1, 5)
43
+ covl = np.clip(1.594 + 0.805 * _pesq - 0.512 * llr_mean - 0.007 * wss_dist, 1, 5)
44
44
 
45
- return SpeechMetrics(_pesq, c_sig, c_bak, c_ovl)
45
+ return SpeechMetrics(_pesq, csig, cbak, covl)
46
46
 
47
47
 
48
48
  def _calc_weighted_spectral_slope_measure(hypothesis: np.ndarray,
@@ -66,6 +66,8 @@ from .datatypes import GeneralizedIDs
66
66
  from .datatypes import ImpulseResponseData
67
67
  from .datatypes import ImpulseResponseFiles
68
68
  from .datatypes import ListAudiosT
69
+ from .datatypes import MetricDoc
70
+ from .datatypes import MetricDocs
69
71
  from .datatypes import Mixture
70
72
  from .datatypes import MixtureDatabaseConfig
71
73
  from .datatypes import Mixtures
@@ -105,6 +107,7 @@ from .helpers import augmented_noise_samples
105
107
  from .helpers import augmented_target_samples
106
108
  from .helpers import check_audio_files_exist
107
109
  from .helpers import forward_transform
110
+ from .helpers import frames_from_samples
108
111
  from .helpers import get_audio_from_transform
109
112
  from .helpers import get_ft
110
113
  from .helpers import get_segsnr
@@ -1,7 +1,9 @@
1
1
  from dataclasses import dataclass
2
2
  from typing import Any
3
+ from typing import Iterable
3
4
  from typing import NamedTuple
4
5
  from typing import Optional
6
+ from typing import SupportsIndex
5
7
  from typing import TypeAlias
6
8
 
7
9
  import numpy as np
@@ -336,17 +338,24 @@ SpeechMetadata: TypeAlias = str | list[Interval] | None
336
338
 
337
339
 
338
340
  class SnrFMetrics(NamedTuple):
339
- mean: Optional[float] = None
340
- var: Optional[float] = None
341
- db_mean: Optional[float] = None
341
+ avg: Optional[float] = None
342
+ std: Optional[float] = None
343
+ db_avg: Optional[float] = None
342
344
  db_std: Optional[float] = None
343
345
 
344
346
 
347
+ class SnrFBinMetrics(NamedTuple):
348
+ avg: Optional[np.ndarray] = None
349
+ std: Optional[np.ndarray] = None
350
+ db_avg: Optional[np.ndarray] = None
351
+ db_std: Optional[np.ndarray] = None
352
+
353
+
345
354
  class SpeechMetrics(NamedTuple):
346
355
  pesq: Optional[float] = None
347
- c_sig: Optional[float] = None
348
- c_bak: Optional[float] = None
349
- c_ovl: Optional[float] = None
356
+ csig: Optional[float] = None
357
+ cbak: Optional[float] = None
358
+ covl: Optional[float] = None
350
359
 
351
360
 
352
361
  class AudioStatsMetrics(NamedTuple):
@@ -360,3 +369,53 @@ class AudioStatsMetrics(NamedTuple):
360
369
  cr: Optional[float] = None
361
370
  fl: Optional[float] = None
362
371
  pkc: Optional[float] = None
372
+
373
+
374
+ @dataclass
375
+ class MetricDoc:
376
+ category: str
377
+ name: str
378
+ description: str
379
+
380
+
381
+ class MetricDocs(list[MetricDoc]):
382
+ def __init__(self, __iterable: Iterable[MetricDoc]) -> None:
383
+ super().__init__(item for item in __iterable)
384
+
385
+ def __setitem__(self, __key: SupportsIndex, __value: MetricDoc) -> None: # type: ignore
386
+ super().__setitem__(__key, __value)
387
+
388
+ def insert(self, __index: SupportsIndex, __object: MetricDoc) -> None:
389
+ super().insert(__index, __object)
390
+
391
+ def append(self, __object: MetricDoc) -> None:
392
+ super().append(__object)
393
+
394
+ def extend(self, __iterable: Iterable[MetricDoc]) -> None:
395
+ if isinstance(__iterable, type(self)):
396
+ super().extend(__iterable)
397
+ else:
398
+ super().extend(item for item in __iterable)
399
+
400
+ @property
401
+ def pretty(self) -> str:
402
+ max_category_len = ((max([len(item.category) for item in self]) + 9) // 10) * 10
403
+ max_name_len = 2 + ((max([len(item.name) for item in self]) + 1) // 2) * 2
404
+ categories: list[str] = []
405
+ for item in self:
406
+ if item.category not in categories:
407
+ categories.append(item.category)
408
+
409
+ result = ''
410
+ for category in categories:
411
+ result += f'{category}\n'
412
+ result += '-' * max_category_len + '\n'
413
+ for item in [sub for sub in self if sub.category == category]:
414
+ result += f' {item.name:<{max_name_len}}{item.description}\n'
415
+ result += '\n'
416
+
417
+ return result
418
+
419
+ @property
420
+ def names(self) -> set[str]:
421
+ return set(item.name for item in self)
@@ -1,6 +1,5 @@
1
1
  from typing import Optional
2
2
 
3
- from sonusai.mixture.datatypes import AudioF
4
3
  from sonusai.mixture.datatypes import AudioT
5
4
  from sonusai.mixture.datatypes import Feature
6
5
 
@@ -58,15 +57,13 @@ def get_feature_from_audio(audio: AudioT,
58
57
  def get_audio_from_feature(feature: Feature,
59
58
  feature_mode: str,
60
59
  num_classes: Optional[int] = 1,
61
- truth_mutex: Optional[bool] = False,
62
- trim: Optional[bool] = True) -> AudioT:
60
+ truth_mutex: Optional[bool] = False) -> AudioT:
63
61
  """Apply inverse transform to feature data to generate audio data
64
62
 
65
63
  :param feature: Feature data [frames, strides, feature_parameters]
66
64
  :param feature_mode: Feature mode
67
65
  :param num_classes: Number of classes
68
66
  :param truth_mutex: Whether to calculate 'other' label
69
- :param trim: Whether to trim the audio data
70
67
  :return: Audio data [samples]
71
68
  """
72
69
  import numpy as np
@@ -76,6 +73,7 @@ def get_audio_from_feature(feature: Feature,
76
73
  from .datatypes import TransformConfig
77
74
  from .helpers import inverse_transform
78
75
  from sonusai.utils.stacked_complex import unstack_complex
76
+ from sonusai.utils.compress import power_uncompress
79
77
 
80
78
  fg = FeatureGenerator(feature_mode=feature_mode,
81
79
  num_classes=num_classes,
@@ -83,23 +81,10 @@ def get_audio_from_feature(feature: Feature,
83
81
 
84
82
  feature_complex = unstack_complex(feature)
85
83
  if feature_mode[0:1] == 'h':
86
- feature_complex = _power_uncompress(feature_complex)
84
+ feature_complex = power_uncompress(feature_complex)
87
85
  return np.squeeze(inverse_transform(transform=feature_complex,
88
86
  config=TransformConfig(N=fg.itransform_N,
89
87
  R=fg.itransform_R,
90
88
  bin_start=fg.bin_start,
91
89
  bin_end=fg.bin_end,
92
- ttype=fg.itransform_ttype),
93
- trim=trim))
94
-
95
-
96
- def _power_uncompress(feature: AudioF) -> AudioF:
97
- import numpy as np
98
-
99
- mag = np.abs(feature)
100
- phase = np.angle(feature)
101
- mag = mag ** (1. / 0.3)
102
- real_uncompress = mag * np.cos(phase)
103
- imag_uncompress = mag * np.sin(phase)
104
-
105
- return real_uncompress + 1j * imag_uncompress
90
+ ttype=fg.itransform_ttype)))
@@ -2,9 +2,9 @@ from typing import Any
2
2
  from typing import Optional
3
3
 
4
4
  from praatio.utilities.constants import Interval
5
- from pyaaware import ForwardTransform
6
- from pyaaware import InverseTransform
7
5
 
6
+ from sonusai import ForwardTransform
7
+ from sonusai import InverseTransform
8
8
  from sonusai.mixture import EnergyT
9
9
  from sonusai.mixture.datatypes import AudioF
10
10
  from sonusai.mixture.datatypes import AudioT
@@ -276,7 +276,6 @@ def read_mixture_data(name: str, items: list[str] | str) -> Any:
276
276
  :return: Data (or tuple of data)
277
277
  """
278
278
  from os.path import exists
279
- from typing import Any
280
279
 
281
280
  import h5py
282
281
  import numpy as np
@@ -285,7 +284,13 @@ def read_mixture_data(name: str, items: list[str] | str) -> Any:
285
284
 
286
285
  def _get_dataset(file: h5py.File, d_name: str) -> Any:
287
286
  if d_name in file:
288
- return np.array(file[d_name])
287
+ data = np.array(file[d_name])
288
+ if data.size == 1:
289
+ item = data.item()
290
+ if isinstance(item, bytes):
291
+ return item.decode('utf-8')
292
+ return item
293
+ return data
289
294
  return None
290
295
 
291
296
  if not isinstance(items, list):
@@ -371,8 +376,8 @@ def get_ft(mixdb: MixtureDatabase, mixture: Mixture, mixture_audio: AudioT, trut
371
376
 
372
377
  mixture_f = get_mixture_f(mixdb=mixdb, mixture=mixture, mixture_audio=mixture_audio)
373
378
 
374
- transform_frames = mixdb.mixture_transform_frames(mixture.samples)
375
- feature_frames = mixdb.mixture_feature_frames(mixture.samples)
379
+ transform_frames = frames_from_samples(mixture.samples, mixdb.ft_config.R)
380
+ feature_frames = frames_from_samples(mixture.samples, mixdb.feature_step_samples)
376
381
 
377
382
  feature = np.empty((feature_frames, mixdb.fg_stride, mixdb.feature_parameters), dtype=np.float32)
378
383
  truth_f = np.empty((feature_frames, mixdb.num_classes), dtype=np.complex64)
@@ -418,20 +423,21 @@ def get_segsnr_t(mixdb: MixtureDatabase, mixture: Mixture, target_audio: AudioT,
418
423
  :return: segsnr_t data
419
424
  """
420
425
  import numpy as np
421
- from pyaaware import AawareForwardTransform
426
+ import torch
427
+ from sonusai import ForwardTransform
422
428
 
423
429
  from sonusai import SonusAIError
424
430
 
425
- fft = AawareForwardTransform(N=mixdb.ft_config.N,
426
- R=mixdb.ft_config.R,
427
- bin_start=mixdb.ft_config.bin_start,
428
- bin_end=mixdb.ft_config.bin_end,
429
- ttype=mixdb.ft_config.ttype)
431
+ fft = ForwardTransform(N=mixdb.ft_config.N,
432
+ R=mixdb.ft_config.R,
433
+ bin_start=mixdb.ft_config.bin_start,
434
+ bin_end=mixdb.ft_config.bin_end,
435
+ ttype=mixdb.ft_config.ttype)
430
436
 
431
437
  segsnr_t = np.empty(mixture.samples, dtype=np.float32)
432
438
 
433
- _, target_energy = fft.execute_all(target_audio)
434
- _, noise_energy = fft.execute_all(noise_audio)
439
+ target_energy = fft.execute_all(torch.from_numpy(target_audio))[1].numpy()
440
+ noise_energy = fft.execute_all(torch.from_numpy(noise_audio))[1].numpy()
435
441
 
436
442
  offsets = range(0, mixture.samples, mixdb.ft_config.R)
437
443
  if len(target_energy) != len(offsets):
@@ -505,8 +511,11 @@ def get_transform_from_audio(audio: AudioT, transform: ForwardTransform) -> tupl
505
511
  :param transform: ForwardTransform object
506
512
  :return: Frequency domain data [frames, bins], Energy [frames]
507
513
  """
508
- f, e = transform.execute_all(audio)
509
- return f.transpose(), e
514
+ import torch
515
+
516
+ f, e = transform.execute_all(torch.from_numpy(audio))
517
+
518
+ return f.numpy(), e.numpy()
510
519
 
511
520
 
512
521
  def forward_transform(audio: AudioT, config: TransformConfig) -> AudioF:
@@ -518,54 +527,50 @@ def forward_transform(audio: AudioT, config: TransformConfig) -> AudioF:
518
527
  :param config: Transform configuration
519
528
  :return: Frequency domain data [frames, bins]
520
529
  """
521
- from pyaaware import AawareForwardTransform
530
+ from sonusai import ForwardTransform
522
531
 
523
532
  audio_f, _ = get_transform_from_audio(audio=audio,
524
- transform=AawareForwardTransform(N=config.N,
525
- R=config.R,
526
- bin_start=config.bin_start,
527
- bin_end=config.bin_end,
528
- ttype=config.ttype))
533
+ transform=ForwardTransform(N=config.N,
534
+ R=config.R,
535
+ bin_start=config.bin_start,
536
+ bin_end=config.bin_end,
537
+ ttype=config.ttype))
529
538
  return audio_f
530
539
 
531
540
 
532
- def get_audio_from_transform(data: AudioF, transform: InverseTransform, trim: bool = True) -> tuple[AudioT, EnergyT]:
541
+ def get_audio_from_transform(data: AudioF, transform: InverseTransform) -> tuple[AudioT, EnergyT]:
533
542
  """Apply inverse transform to input transform data to generate audio data
534
543
 
535
544
  :param data: Frequency domain data [frames, bins]
536
545
  :param transform: InverseTransform object
537
- :param trim: Removes starting samples so output waveform will be time-aligned with input waveform to the transform
538
546
  :return: Time domain data [samples], Energy [frames]
539
547
  """
540
- t, e = transform.execute_all(data.transpose())
541
- if trim:
542
- t = t[transform.N - transform.R:]
548
+ import torch
543
549
 
544
- return t, e
550
+ t, e = transform.execute_all(torch.from_numpy(data))
545
551
 
552
+ return t.numpy(), e.numpy()
546
553
 
547
- def inverse_transform(transform: AudioF, config: TransformConfig, trim: bool = True) -> AudioT:
554
+
555
+ def inverse_transform(transform: AudioF, config: TransformConfig) -> AudioT:
548
556
  """Transform frequency domain data into time domain using the inverse transform config from the feature
549
557
 
550
558
  A new transform is used for each call; i.e., state is not maintained between calls to inverse_transform().
551
559
 
552
560
  :param transform: Frequency domain data [frames, bins]
553
561
  :param config: Transform configuration
554
- :param trim: Removes starting samples so output waveform will be time-aligned with input waveform to the
555
- transform
556
562
  :return: Time domain data [samples]
557
563
  """
558
564
  import numpy as np
559
- from pyaaware import AawareInverseTransform
565
+ from sonusai import InverseTransform
560
566
 
561
567
  audio, _ = get_audio_from_transform(data=transform,
562
- transform=AawareInverseTransform(N=config.N,
563
- R=config.R,
564
- bin_start=config.bin_start,
565
- bin_end=config.bin_end,
566
- ttype=config.ttype,
567
- gain=np.float32(1)),
568
- trim=trim)
568
+ transform=InverseTransform(N=config.N,
569
+ R=config.R,
570
+ bin_start=config.bin_start,
571
+ bin_end=config.bin_end,
572
+ ttype=config.ttype,
573
+ gain=np.float32(1)))
569
574
  return audio
570
575
 
571
576
 
@@ -641,3 +646,9 @@ def get_textgrid_tier_from_target_file(target_file: str, tier: str) -> Optional[
641
646
  return list(entries)
642
647
  else:
643
648
  return entries[0].label
649
+
650
+
651
+ def frames_from_samples(samples: int, step_samples: int) -> int:
652
+ import numpy as np
653
+
654
+ return int(np.ceil(samples / step_samples))