speaker-detector 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,633 @@
1
+ """A popular speaker recognition and diarization model.
2
+
3
+ Authors
4
+ * Hwidong Na 2020
5
+ """
6
+
7
+ import torch # noqa: F401
8
+ import torch.nn as nn
9
+ import torch.nn.functional as F
10
+
11
+ from speechbrain.dataio.dataio import length_to_mask
12
+ from speechbrain.nnet.CNN import Conv1d as _Conv1d
13
+ from speechbrain.nnet.linear import Linear
14
+ from speechbrain.nnet.normalization import BatchNorm1d as _BatchNorm1d
15
+
16
+
17
+ # Skip transpose as much as possible for efficiency
18
+ class Conv1d(_Conv1d):
19
+ """1D convolution. Skip transpose is used to improve efficiency."""
20
+
21
+ def __init__(self, *args, **kwargs):
22
+ super().__init__(skip_transpose=True, *args, **kwargs)
23
+
24
+
25
+ class BatchNorm1d(_BatchNorm1d):
26
+ """1D batch normalization. Skip transpose is used to improve efficiency."""
27
+
28
+ def __init__(self, *args, **kwargs):
29
+ super().__init__(skip_transpose=True, *args, **kwargs)
30
+
31
+
32
+ class TDNNBlock(nn.Module):
33
+ """An implementation of TDNN.
34
+
35
+ Arguments
36
+ ---------
37
+ in_channels : int
38
+ Number of input channels.
39
+ out_channels : int
40
+ The number of output channels.
41
+ kernel_size : int
42
+ The kernel size of the TDNN blocks.
43
+ dilation : int
44
+ The dilation of the TDNN block.
45
+ activation : torch class
46
+ A class for constructing the activation layers.
47
+ groups : int
48
+ The groups size of the TDNN blocks.
49
+ dropout : float
50
+ Rate of channel dropout during training.
51
+
52
+ Example
53
+ -------
54
+ >>> inp_tensor = torch.rand([8, 120, 64]).transpose(1, 2)
55
+ >>> layer = TDNNBlock(64, 64, kernel_size=3, dilation=1)
56
+ >>> out_tensor = layer(inp_tensor).transpose(1, 2)
57
+ >>> out_tensor.shape
58
+ torch.Size([8, 120, 64])
59
+ """
60
+
61
+ def __init__(
62
+ self,
63
+ in_channels,
64
+ out_channels,
65
+ kernel_size,
66
+ dilation,
67
+ activation=nn.ReLU,
68
+ groups=1,
69
+ dropout=0.0,
70
+ ):
71
+ super().__init__()
72
+ self.conv = Conv1d(
73
+ in_channels=in_channels,
74
+ out_channels=out_channels,
75
+ kernel_size=kernel_size,
76
+ dilation=dilation,
77
+ groups=groups,
78
+ )
79
+ self.activation = activation()
80
+ self.norm = BatchNorm1d(input_size=out_channels)
81
+ self.dropout = nn.Dropout1d(p=dropout)
82
+
83
+ def forward(self, x):
84
+ """Processes the input tensor x and returns an output tensor."""
85
+ return self.dropout(self.norm(self.activation(self.conv(x))))
86
+
87
+
88
+ class Res2NetBlock(torch.nn.Module):
89
+ """An implementation of Res2NetBlock w/ dilation.
90
+
91
+ Arguments
92
+ ---------
93
+ in_channels : int
94
+ The number of channels expected in the input.
95
+ out_channels : int
96
+ The number of output channels.
97
+ scale : int
98
+ The scale of the Res2Net block.
99
+ kernel_size: int
100
+ The kernel size of the Res2Net block.
101
+ dilation : int
102
+ The dilation of the Res2Net block.
103
+ dropout : float
104
+ Rate of channel dropout during training.
105
+
106
+ Example
107
+ -------
108
+ >>> inp_tensor = torch.rand([8, 120, 64]).transpose(1, 2)
109
+ >>> layer = Res2NetBlock(64, 64, scale=4, dilation=3)
110
+ >>> out_tensor = layer(inp_tensor).transpose(1, 2)
111
+ >>> out_tensor.shape
112
+ torch.Size([8, 120, 64])
113
+ """
114
+
115
+ def __init__(
116
+ self,
117
+ in_channels,
118
+ out_channels,
119
+ scale=8,
120
+ kernel_size=3,
121
+ dilation=1,
122
+ dropout=0.0,
123
+ ):
124
+ super().__init__()
125
+ assert in_channels % scale == 0
126
+ assert out_channels % scale == 0
127
+
128
+ in_channel = in_channels // scale
129
+ hidden_channel = out_channels // scale
130
+
131
+ self.blocks = nn.ModuleList(
132
+ [
133
+ TDNNBlock(
134
+ in_channel,
135
+ hidden_channel,
136
+ kernel_size=kernel_size,
137
+ dilation=dilation,
138
+ dropout=dropout,
139
+ )
140
+ for i in range(scale - 1)
141
+ ]
142
+ )
143
+ self.scale = scale
144
+
145
+ def forward(self, x):
146
+ """Processes the input tensor x and returns an output tensor."""
147
+ y = []
148
+ for i, x_i in enumerate(torch.chunk(x, self.scale, dim=1)):
149
+ if i == 0:
150
+ y_i = x_i
151
+ elif i == 1:
152
+ y_i = self.blocks[i - 1](x_i)
153
+ else:
154
+ y_i = self.blocks[i - 1](x_i + y_i)
155
+ y.append(y_i)
156
+ y = torch.cat(y, dim=1)
157
+ return y
158
+
159
+
160
+ class SEBlock(nn.Module):
161
+ """An implementation of squeeze-and-excitation block.
162
+
163
+ Arguments
164
+ ---------
165
+ in_channels : int
166
+ The number of input channels.
167
+ se_channels : int
168
+ The number of output channels after squeeze.
169
+ out_channels : int
170
+ The number of output channels.
171
+
172
+ Example
173
+ -------
174
+ >>> inp_tensor = torch.rand([8, 120, 64]).transpose(1, 2)
175
+ >>> se_layer = SEBlock(64, 16, 64)
176
+ >>> lengths = torch.rand((8,))
177
+ >>> out_tensor = se_layer(inp_tensor, lengths).transpose(1, 2)
178
+ >>> out_tensor.shape
179
+ torch.Size([8, 120, 64])
180
+ """
181
+
182
+ def __init__(self, in_channels, se_channels, out_channels):
183
+ super().__init__()
184
+
185
+ self.conv1 = Conv1d(
186
+ in_channels=in_channels, out_channels=se_channels, kernel_size=1
187
+ )
188
+ self.relu = torch.nn.ReLU(inplace=True)
189
+ self.conv2 = Conv1d(
190
+ in_channels=se_channels, out_channels=out_channels, kernel_size=1
191
+ )
192
+ self.sigmoid = torch.nn.Sigmoid()
193
+
194
+ def forward(self, x, lengths=None):
195
+ """Processes the input tensor x and returns an output tensor."""
196
+ L = x.shape[-1]
197
+ if lengths is not None:
198
+ mask = length_to_mask(lengths * L, max_len=L, device=x.device)
199
+ mask = mask.unsqueeze(1)
200
+ total = mask.sum(dim=2, keepdim=True)
201
+ s = (x * mask).sum(dim=2, keepdim=True) / total
202
+ else:
203
+ s = x.mean(dim=2, keepdim=True)
204
+
205
+ s = self.relu(self.conv1(s))
206
+ s = self.sigmoid(self.conv2(s))
207
+
208
+ return s * x
209
+
210
+
211
+ class AttentiveStatisticsPooling(nn.Module):
212
+ """This class implements an attentive statistic pooling layer for each channel.
213
+ It returns the concatenated mean and std of the input tensor.
214
+
215
+ Arguments
216
+ ---------
217
+ channels: int
218
+ The number of input channels.
219
+ attention_channels: int
220
+ The number of attention channels.
221
+ global_context: bool
222
+ Whether to use global context.
223
+
224
+ Example
225
+ -------
226
+ >>> inp_tensor = torch.rand([8, 120, 64]).transpose(1, 2)
227
+ >>> asp_layer = AttentiveStatisticsPooling(64)
228
+ >>> lengths = torch.rand((8,))
229
+ >>> out_tensor = asp_layer(inp_tensor, lengths).transpose(1, 2)
230
+ >>> out_tensor.shape
231
+ torch.Size([8, 1, 128])
232
+ """
233
+
234
+ def __init__(self, channels, attention_channels=128, global_context=True):
235
+ super().__init__()
236
+
237
+ self.eps = 1e-12
238
+ self.global_context = global_context
239
+ if global_context:
240
+ self.tdnn = TDNNBlock(channels * 3, attention_channels, 1, 1)
241
+ else:
242
+ self.tdnn = TDNNBlock(channels, attention_channels, 1, 1)
243
+ self.tanh = nn.Tanh()
244
+ self.conv = Conv1d(
245
+ in_channels=attention_channels, out_channels=channels, kernel_size=1
246
+ )
247
+
248
+ def forward(self, x, lengths=None):
249
+ """Calculates mean and std for a batch (input tensor).
250
+
251
+ Arguments
252
+ ---------
253
+ x : torch.Tensor
254
+ Tensor of shape [N, C, L].
255
+ lengths : torch.Tensor
256
+ The corresponding relative lengths of the inputs.
257
+
258
+ Returns
259
+ -------
260
+ pooled_stats : torch.Tensor
261
+ mean and std of batch
262
+ """
263
+ L = x.shape[-1]
264
+
265
+ def _compute_statistics(x, m, dim=2, eps=self.eps):
266
+ mean = (m * x).sum(dim)
267
+ std = torch.sqrt(
268
+ (m * (x - mean.unsqueeze(dim)).pow(2)).sum(dim).clamp(eps)
269
+ )
270
+ return mean, std
271
+
272
+ if lengths is None:
273
+ lengths = torch.ones(x.shape[0], device=x.device)
274
+
275
+ # Make binary mask of shape [N, 1, L]
276
+ mask = length_to_mask(lengths * L, max_len=L, device=x.device)
277
+ mask = mask.unsqueeze(1)
278
+
279
+ # Expand the temporal context of the pooling layer by allowing the
280
+ # self-attention to look at global properties of the utterance.
281
+ if self.global_context:
282
+ # torch.std is unstable for backward computation
283
+ # https://github.com/pytorch/pytorch/issues/4320
284
+ total = mask.sum(dim=2, keepdim=True).float()
285
+ mean, std = _compute_statistics(x, mask / total)
286
+ mean = mean.unsqueeze(2).repeat(1, 1, L)
287
+ std = std.unsqueeze(2).repeat(1, 1, L)
288
+ attn = torch.cat([x, mean, std], dim=1)
289
+ else:
290
+ attn = x
291
+
292
+ # Apply layers
293
+ attn = self.conv(self.tanh(self.tdnn(attn)))
294
+
295
+ # Filter out zero-paddings
296
+ attn = attn.masked_fill(mask == 0, float("-inf"))
297
+
298
+ attn = F.softmax(attn, dim=2)
299
+ mean, std = _compute_statistics(x, attn)
300
+ # Append mean and std of the batch
301
+ pooled_stats = torch.cat((mean, std), dim=1)
302
+ pooled_stats = pooled_stats.unsqueeze(2)
303
+
304
+ return pooled_stats
305
+
306
+
307
+ class SERes2NetBlock(nn.Module):
308
+ """An implementation of building block in ECAPA-TDNN, i.e.,
309
+ TDNN-Res2Net-TDNN-SEBlock.
310
+
311
+ Arguments
312
+ ---------
313
+ in_channels: int
314
+ Expected size of input channels.
315
+ out_channels: int
316
+ The number of output channels.
317
+ res2net_scale: int
318
+ The scale of the Res2Net block.
319
+ se_channels : int
320
+ The number of output channels after squeeze.
321
+ kernel_size: int
322
+ The kernel size of the TDNN blocks.
323
+ dilation: int
324
+ The dilation of the Res2Net block.
325
+ activation : torch class
326
+ A class for constructing the activation layers.
327
+ groups: int
328
+ Number of blocked connections from input channels to output channels.
329
+ dropout: float
330
+ Rate of channel dropout during training.
331
+
332
+ Example
333
+ -------
334
+ >>> x = torch.rand(8, 120, 64).transpose(1, 2)
335
+ >>> conv = SERes2NetBlock(64, 64, res2net_scale=4)
336
+ >>> out = conv(x).transpose(1, 2)
337
+ >>> out.shape
338
+ torch.Size([8, 120, 64])
339
+ """
340
+
341
+ def __init__(
342
+ self,
343
+ in_channels,
344
+ out_channels,
345
+ res2net_scale=8,
346
+ se_channels=128,
347
+ kernel_size=1,
348
+ dilation=1,
349
+ activation=torch.nn.ReLU,
350
+ groups=1,
351
+ dropout=0.0,
352
+ ):
353
+ super().__init__()
354
+ self.out_channels = out_channels
355
+ self.tdnn1 = TDNNBlock(
356
+ in_channels,
357
+ out_channels,
358
+ kernel_size=1,
359
+ dilation=1,
360
+ activation=activation,
361
+ groups=groups,
362
+ dropout=dropout,
363
+ )
364
+ self.res2net_block = Res2NetBlock(
365
+ out_channels, out_channels, res2net_scale, kernel_size, dilation
366
+ )
367
+ self.tdnn2 = TDNNBlock(
368
+ out_channels,
369
+ out_channels,
370
+ kernel_size=1,
371
+ dilation=1,
372
+ activation=activation,
373
+ groups=groups,
374
+ dropout=dropout,
375
+ )
376
+ self.se_block = SEBlock(out_channels, se_channels, out_channels)
377
+
378
+ self.shortcut = None
379
+ if in_channels != out_channels:
380
+ self.shortcut = Conv1d(
381
+ in_channels=in_channels,
382
+ out_channels=out_channels,
383
+ kernel_size=1,
384
+ )
385
+
386
+ def forward(self, x, lengths=None):
387
+ """Processes the input tensor x and returns an output tensor."""
388
+ residual = x
389
+ if self.shortcut:
390
+ residual = self.shortcut(x)
391
+
392
+ x = self.tdnn1(x)
393
+ x = self.res2net_block(x)
394
+ x = self.tdnn2(x)
395
+ x = self.se_block(x, lengths)
396
+
397
+ return x + residual
398
+
399
+
400
+ class ECAPA_TDNN(torch.nn.Module):
401
+ """An implementation of the speaker embedding model in a paper.
402
+ "ECAPA-TDNN: Emphasized Channel Attention, Propagation and Aggregation in
403
+ TDNN Based Speaker Verification" (https://arxiv.org/abs/2005.07143).
404
+
405
+ Arguments
406
+ ---------
407
+ input_size : int
408
+ Expected size of the input dimension.
409
+ device : str
410
+ Device used, e.g., "cpu" or "cuda".
411
+ lin_neurons : int
412
+ Number of neurons in linear layers.
413
+ activation : torch class
414
+ A class for constructing the activation layers.
415
+ channels : list of ints
416
+ Output channels for TDNN/SERes2Net layer.
417
+ kernel_sizes : list of ints
418
+ List of kernel sizes for each layer.
419
+ dilations : list of ints
420
+ List of dilations for kernels in each layer.
421
+ attention_channels: int
422
+ The number of attention channels.
423
+ res2net_scale : int
424
+ The scale of the Res2Net block.
425
+ se_channels : int
426
+ The number of output channels after squeeze.
427
+ global_context: bool
428
+ Whether to use global context.
429
+ groups : list of ints
430
+ List of groups for kernels in each layer.
431
+ dropout : float
432
+ Rate of channel dropout during training.
433
+
434
+ Example
435
+ -------
436
+ >>> input_feats = torch.rand([5, 120, 80])
437
+ >>> compute_embedding = ECAPA_TDNN(80, lin_neurons=192)
438
+ >>> outputs = compute_embedding(input_feats)
439
+ >>> outputs.shape
440
+ torch.Size([5, 1, 192])
441
+ """
442
+
443
+ def __init__(
444
+ self,
445
+ input_size,
446
+ device="cpu",
447
+ lin_neurons=192,
448
+ activation=torch.nn.ReLU,
449
+ channels=[512, 512, 512, 512, 1536],
450
+ kernel_sizes=[5, 3, 3, 3, 1],
451
+ dilations=[1, 2, 3, 4, 1],
452
+ attention_channels=128,
453
+ res2net_scale=8,
454
+ se_channels=128,
455
+ global_context=True,
456
+ groups=[1, 1, 1, 1, 1],
457
+ dropout=0.0,
458
+ ):
459
+ super().__init__()
460
+ assert len(channels) == len(kernel_sizes)
461
+ assert len(channels) == len(dilations)
462
+ self.channels = channels
463
+ self.blocks = nn.ModuleList()
464
+
465
+ # The initial TDNN layer
466
+ self.blocks.append(
467
+ TDNNBlock(
468
+ input_size,
469
+ channels[0],
470
+ kernel_sizes[0],
471
+ dilations[0],
472
+ activation,
473
+ groups[0],
474
+ dropout,
475
+ )
476
+ )
477
+
478
+ # SE-Res2Net layers
479
+ for i in range(1, len(channels) - 1):
480
+ self.blocks.append(
481
+ SERes2NetBlock(
482
+ channels[i - 1],
483
+ channels[i],
484
+ res2net_scale=res2net_scale,
485
+ se_channels=se_channels,
486
+ kernel_size=kernel_sizes[i],
487
+ dilation=dilations[i],
488
+ activation=activation,
489
+ groups=groups[i],
490
+ dropout=dropout,
491
+ )
492
+ )
493
+
494
+ # Multi-layer feature aggregation
495
+ self.mfa = TDNNBlock(
496
+ channels[-2] * (len(channels) - 2),
497
+ channels[-1],
498
+ kernel_sizes[-1],
499
+ dilations[-1],
500
+ activation,
501
+ groups=groups[-1],
502
+ dropout=dropout,
503
+ )
504
+
505
+ # Attentive Statistical Pooling
506
+ self.asp = AttentiveStatisticsPooling(
507
+ channels[-1],
508
+ attention_channels=attention_channels,
509
+ global_context=global_context,
510
+ )
511
+ self.asp_bn = BatchNorm1d(input_size=channels[-1] * 2)
512
+
513
+ # Final linear transformation
514
+ self.fc = Conv1d(
515
+ in_channels=channels[-1] * 2,
516
+ out_channels=lin_neurons,
517
+ kernel_size=1,
518
+ )
519
+
520
+ def forward(self, x, lengths=None):
521
+ """Returns the embedding vector.
522
+
523
+ Arguments
524
+ ---------
525
+ x : torch.Tensor
526
+ Tensor of shape (batch, time, channel).
527
+ lengths : torch.Tensor
528
+ Corresponding relative lengths of inputs.
529
+
530
+ Returns
531
+ -------
532
+ x : torch.Tensor
533
+ Embedding vector.
534
+ """
535
+ # Minimize transpose for efficiency
536
+ x = x.transpose(1, 2)
537
+
538
+ xl = []
539
+ for layer in self.blocks:
540
+ try:
541
+ x = layer(x, lengths=lengths)
542
+ except TypeError:
543
+ x = layer(x)
544
+ xl.append(x)
545
+
546
+ # Multi-layer feature aggregation
547
+ x = torch.cat(xl[1:], dim=1)
548
+ x = self.mfa(x)
549
+
550
+ # Attentive Statistical Pooling
551
+ x = self.asp(x, lengths=lengths)
552
+ x = self.asp_bn(x)
553
+
554
+ # Final linear transformation
555
+ x = self.fc(x)
556
+
557
+ x = x.transpose(1, 2)
558
+ return x
559
+
560
+
561
+ class Classifier(torch.nn.Module):
562
+ """This class implements the cosine similarity on the top of features.
563
+
564
+ Arguments
565
+ ---------
566
+ input_size : int
567
+ Expected size of input dimension.
568
+ device : str
569
+ Device used, e.g., "cpu" or "cuda".
570
+ lin_blocks : int
571
+ Number of linear layers.
572
+ lin_neurons : int
573
+ Number of neurons in linear layers.
574
+ out_neurons : int
575
+ Number of classes.
576
+
577
+ Example
578
+ -------
579
+ >>> classify = Classifier(input_size=2, lin_neurons=2, out_neurons=2)
580
+ >>> outputs = torch.tensor([ [1., -1.], [-9., 1.], [0.9, 0.1], [0.1, 0.9] ])
581
+ >>> outputs = outputs.unsqueeze(1)
582
+ >>> cos = classify(outputs)
583
+ >>> (cos < -1.0).long().sum()
584
+ tensor(0)
585
+ >>> (cos > 1.0).long().sum()
586
+ tensor(0)
587
+ """
588
+
589
+ def __init__(
590
+ self,
591
+ input_size,
592
+ device="cpu",
593
+ lin_blocks=0,
594
+ lin_neurons=192,
595
+ out_neurons=1211,
596
+ ):
597
+ super().__init__()
598
+ self.blocks = nn.ModuleList()
599
+
600
+ for block_index in range(lin_blocks):
601
+ self.blocks.extend(
602
+ [
603
+ _BatchNorm1d(input_size=input_size),
604
+ Linear(input_size=input_size, n_neurons=lin_neurons),
605
+ ]
606
+ )
607
+ input_size = lin_neurons
608
+
609
+ # Final Layer
610
+ self.weight = nn.Parameter(
611
+ torch.FloatTensor(out_neurons, input_size, device=device)
612
+ )
613
+ nn.init.xavier_uniform_(self.weight)
614
+
615
+ def forward(self, x):
616
+ """Returns the output probabilities over speakers.
617
+
618
+ Arguments
619
+ ---------
620
+ x : torch.Tensor
621
+ Torch tensor.
622
+
623
+ Returns
624
+ -------
625
+ out : torch.Tensor
626
+ Output probabilities over speakers.
627
+ """
628
+ for layer in self.blocks:
629
+ x = layer(x)
630
+
631
+ # Need to be normalized
632
+ x = F.linear(F.normalize(x.squeeze(1)), F.normalize(self.weight))
633
+ return x.unsqueeze(1)
File without changes
@@ -0,0 +1,4 @@
1
+ from .cli import main
2
+
3
+ if __name__ == "__main__":
4
+ main()
@@ -0,0 +1,59 @@
1
+ from pathlib import Path
2
+ import torchaudio
3
+ import torch
4
+ from speaker_detector.core import get_embedding, STORAGE_DIR
5
+
6
+ CHUNK_DURATION = 2.5 # seconds
7
+
8
+ def match_speaker(embedding, speaker_embeddings):
9
+ scores = {}
10
+ for name, emb in speaker_embeddings.items():
11
+ score = torch.nn.functional.cosine_similarity(emb, embedding, dim=0).item()
12
+ scores[name] = score
13
+ if not scores:
14
+ return "unknown", 0.0
15
+ best = max(scores.items(), key=lambda kv: kv[1])
16
+ return best[0], round(best[1], 3)
17
+
18
+ def analyze_meeting(wav_path):
19
+ waveform, sample_rate = torchaudio.load(wav_path)
20
+ duration_sec = waveform.shape[1] / sample_rate
21
+
22
+ chunk_samples = int(CHUNK_DURATION * sample_rate)
23
+ num_chunks = int(waveform.shape[1] / chunk_samples)
24
+
25
+ # Load enrolled speaker embeddings
26
+ speaker_embeddings = {}
27
+ for spk_dir in STORAGE_DIR.iterdir():
28
+ if not spk_dir.is_dir():
29
+ continue
30
+ wavs = list(spk_dir.glob("*.wav"))
31
+ if not wavs:
32
+ continue
33
+ # Average multiple embeddings
34
+ embs = [get_embedding(str(wav)) for wav in wavs]
35
+ speaker_embeddings[spk_dir.name] = torch.stack(embs).mean(dim=0)
36
+
37
+ results = []
38
+
39
+ for i in range(num_chunks):
40
+ start_sample = i * chunk_samples
41
+ end_sample = start_sample + chunk_samples
42
+ chunk = waveform[:, start_sample:end_sample]
43
+
44
+ tmp_path = Path(wav_path).parent / f"tmp_chunk_{i}.wav"
45
+ torchaudio.save(str(tmp_path), chunk, sample_rate)
46
+
47
+ embedding = get_embedding(str(tmp_path))
48
+ speaker, score = match_speaker(embedding, speaker_embeddings)
49
+
50
+ results.append({
51
+ "start": round(i * CHUNK_DURATION, 2),
52
+ "end": round((i + 1) * CHUNK_DURATION, 2),
53
+ "speaker": speaker,
54
+ "score": score
55
+ })
56
+
57
+ tmp_path.unlink() # clean up
58
+
59
+ return results
@@ -0,0 +1,82 @@
1
+ import warnings
2
+ import argparse
3
+ import os
4
+
5
+ def main():
6
+ parser = argparse.ArgumentParser(prog="speaker-detector", description="Speaker Detector CLI")
7
+ subparsers = parser.add_subparsers(dest="command")
8
+
9
+ # ---- Global options ----
10
+ parser.add_argument("--verbose", action="store_true", help="Show detailed logs and warnings")
11
+
12
+ # ---- enroll ----
13
+ enroll_cmd = subparsers.add_parser("enroll", help="Enroll a speaker from a .wav file")
14
+ enroll_cmd.add_argument("speaker_id", help="Name/ID of the speaker")
15
+ enroll_cmd.add_argument("audio_path", help="Path to .wav file")
16
+
17
+ # ---- identify ----
18
+ identify_cmd = subparsers.add_parser("identify", help="Identify speaker from a .wav file")
19
+ identify_cmd.add_argument("audio_path", help="Path to .wav file")
20
+
21
+ # ---- list-speakers ----
22
+ subparsers.add_parser("list-speakers", help="List enrolled speakers")
23
+
24
+ # ---- export-model ----
25
+ model_parser = subparsers.add_parser("export-model", help="Export ECAPA model to ONNX")
26
+ model_parser.add_argument("--pt", required=True, help="Path to embedding_model.ckpt")
27
+ model_parser.add_argument("--out", default="speaker_embedding.onnx", help="Output ONNX file")
28
+
29
+ # ---- export-speaker-json ----
30
+ emb_parser = subparsers.add_parser("export-speaker-json", help="Convert enrolled .pt file to browser-friendly .json")
31
+ emb_parser.add_argument("--pt", required=True, help="Path to enrolled_speakers.pt")
32
+ emb_parser.add_argument("--out", default="speakers.json", help="Output .json file for browser")
33
+
34
+ # ---- combine ----
35
+ comb_parser = subparsers.add_parser("combine", help="Combine individual .pt files into enrolled_speakers.pt")
36
+ comb_parser.add_argument("--folder", required=True, help="Folder with individual .pt files")
37
+ comb_parser.add_argument("--out", required=True, help="Output .pt file path")
38
+
39
+ # ---- Parse arguments ----
40
+ args = parser.parse_args()
41
+
42
+ # ---- Suppress warnings unless --verbose ----
43
+ if not args.verbose:
44
+ warnings.simplefilter("ignore", category=DeprecationWarning)
45
+ warnings.simplefilter("ignore", category=UserWarning)
46
+ os.environ["PYTHONWARNINGS"] = "ignore"
47
+
48
+ # ---- Import modules after filtering warnings ----
49
+ from .core import enroll_speaker, identify_speaker, list_speakers
50
+ from .export_model import export_model_to_onnx
51
+ from .export_embeddings import export_embeddings_to_json
52
+ from .combine import combine_embeddings_from_folder
53
+
54
+ # ---- Command Dispatch ----
55
+ if args.command == "enroll":
56
+ enroll_speaker(args.audio_path, args.speaker_id)
57
+ print(f"โœ… Enrolled: {args.speaker_id}")
58
+
59
+ elif args.command == "identify":
60
+ result = identify_speaker(args.audio_path)
61
+ print(f"๐Ÿ•ต๏ธ Identified: {result['speaker']} (score: {result['score']})")
62
+
63
+ elif args.command == "list-speakers":
64
+ speakers = list_speakers()
65
+ if speakers:
66
+ print("๐Ÿ“‹ Enrolled Speakers:")
67
+ for s in speakers:
68
+ print(f" โ€ข {s}")
69
+ else:
70
+ print("โš ๏ธ No speakers enrolled yet.")
71
+
72
+ elif args.command == "export-model":
73
+ export_model_to_onnx(args.pt, args.out)
74
+
75
+ elif args.command == "export-speaker-json":
76
+ export_embeddings_to_json(args.pt, args.out)
77
+
78
+ elif args.command == "combine":
79
+ combine_embeddings_from_folder(args.folder, args.out)
80
+
81
+ else:
82
+ parser.print_help()
@@ -0,0 +1,22 @@
1
+ import torch
2
+ import os
3
+
4
+ def combine_embeddings_from_folder(folder_path, output_path):
5
+ speaker_data = {}
6
+
7
+ for fname in os.listdir(folder_path):
8
+ if fname.endswith(".pt"):
9
+ label = os.path.splitext(fname)[0]
10
+ fpath = os.path.join(folder_path, fname)
11
+ tensor = torch.load(fpath, map_location="cpu")
12
+ if not isinstance(tensor, torch.Tensor):
13
+ print(f"โŒ Skipping {fname}: not a valid tensor")
14
+ continue
15
+ speaker_data[label] = tensor
16
+
17
+ if not speaker_data:
18
+ print("โš ๏ธ No valid .pt files found.")
19
+ return
20
+
21
+ torch.save(speaker_data, output_path)
22
+ print(f"โœ… Combined {len(speaker_data)} speakers into {output_path}")
@@ -0,0 +1,103 @@
1
+ from speechbrain.pretrained import SpeakerRecognition
2
+ from pathlib import Path
3
+ import torchaudio
4
+ import torch
5
+
6
+ # Storage directories
7
+ BASE_DIR = Path(__file__).resolve().parent.parent / "storage"
8
+ SPEAKER_AUDIO_DIR = BASE_DIR / "speakers"
9
+ EMBEDDINGS_DIR = BASE_DIR / "embeddings"
10
+
11
+ # Ensure they exist
12
+ SPEAKER_AUDIO_DIR.mkdir(parents=True, exist_ok=True)
13
+ EMBEDDINGS_DIR.mkdir(parents=True, exist_ok=True)
14
+
15
+ # Load model once
16
+ MODEL = SpeakerRecognition.from_hparams(
17
+ source="speechbrain/spkrec-ecapa-voxceleb", savedir="model"
18
+ )
19
+
20
+ def get_embedding(audio_path):
21
+ try:
22
+ signal, fs = torchaudio.load(audio_path)
23
+ if signal.numel() == 0:
24
+ raise ValueError(f"{audio_path} is empty.")
25
+ return MODEL.encode_batch(signal).squeeze().detach().cpu()
26
+ except Exception as e:
27
+ raise RuntimeError(f"Failed to embed {audio_path}: {e}")
28
+
29
+ def enroll_speaker(audio_path, speaker_id):
30
+ speaker_dir = SPEAKER_AUDIO_DIR / speaker_id
31
+ speaker_dir.mkdir(parents=True, exist_ok=True)
32
+
33
+ # Save audio sample
34
+ existing = list(speaker_dir.glob("*.wav"))
35
+ new_index = len(existing) + 1
36
+ dest_path = speaker_dir / f"{new_index}.wav"
37
+
38
+ waveform, sample_rate = torchaudio.load(audio_path)
39
+ if waveform.numel() == 0:
40
+ raise ValueError("Cannot enroll empty audio file.")
41
+
42
+ torchaudio.save(str(dest_path), waveform, sample_rate)
43
+ print(f"๐ŸŽ™ Saved {speaker_id}'s recording #{new_index} โ†’ {dest_path}")
44
+
45
+ # Save embedding
46
+ emb = get_embedding(audio_path)
47
+ emb_path = EMBEDDINGS_DIR / f"{speaker_id}.pt"
48
+ torch.save(emb, emb_path)
49
+ print(f"๐Ÿง  Saved embedding for {speaker_id} โ†’ {emb_path}")
50
+
51
+ def identify_speaker(audio_path, threshold=0.25):
52
+ try:
53
+ test_emb = get_embedding(audio_path)
54
+ except Exception as e:
55
+ return {"speaker": "error", "score": 0, "error": str(e)}
56
+
57
+ scores = {}
58
+ for emb_path in EMBEDDINGS_DIR.glob("*.pt"):
59
+ speaker_name = emb_path.stem
60
+ try:
61
+ enrolled_emb = torch.load(emb_path)
62
+ score = torch.nn.functional.cosine_similarity(enrolled_emb, test_emb, dim=0).item()
63
+ scores[speaker_name] = score
64
+ except Exception as e:
65
+ continue
66
+
67
+ if not scores:
68
+ return {"speaker": "unknown", "score": 0}
69
+
70
+ sorted_scores = sorted(scores.items(), key=lambda kv: kv[1], reverse=True)
71
+ best, second = sorted_scores[0], sorted_scores[1] if len(sorted_scores) > 1 else (None, None)
72
+ auto_thresh = best[1] - (second[1] if second else 0) > 0.1
73
+ is_match = auto_thresh or best[1] >= threshold
74
+
75
+ result = {
76
+ "speaker": best[0] if is_match else "unknown",
77
+ "score": round(best[1], 3),
78
+ "all_scores": {k: round(v, 3) for k, v in sorted_scores}
79
+ }
80
+ return result
81
+
82
+ def list_speakers():
83
+ speakers = []
84
+ for dir in SPEAKER_AUDIO_DIR.iterdir():
85
+ if dir.is_dir():
86
+ count = len(list(dir.glob("*.wav")))
87
+ speakers.append(f"{dir.name} ({count} recording{'s' if count != 1 else ''})")
88
+ print(f"๐Ÿ“‹ Found {len(speakers)} enrolled speaker(s): {speakers}")
89
+ return [s.split()[0] for s in speakers]
90
+
91
+ def rebuild_embedding(speaker_id):
92
+ speaker_dir = SPEAKER_AUDIO_DIR / speaker_id
93
+ wavs = list(speaker_dir.glob("*.wav"))
94
+
95
+ if not wavs:
96
+ raise RuntimeError(f"No recordings found for {speaker_id}.")
97
+
98
+ embeddings = [get_embedding(w) for w in wavs]
99
+ avg_emb = torch.stack(embeddings).mean(dim=0)
100
+
101
+ emb_path = EMBEDDINGS_DIR / f"{speaker_id}.pt"
102
+ torch.save(avg_emb, emb_path)
103
+ print(f"๐Ÿ” Rebuilt embedding for {speaker_id}")
@@ -0,0 +1,41 @@
1
+ import torch
2
+ import json
3
+
4
+ def export_embeddings_to_json(pt_path, json_path):
5
+ """
6
+ Converts a .pt file containing speaker embeddings into a
7
+ JSON file for use in the browser frontend.
8
+
9
+ Expected input format:
10
+ {
11
+ "lara": tensor([...]),
12
+ "guest": tensor([...]),
13
+ ...
14
+ }
15
+
16
+ Output format:
17
+ [
18
+ { "label": "lara", "vector": [...] },
19
+ { "label": "guest", "vector": [...] },
20
+ ...
21
+ ]
22
+ """
23
+ data = torch.load(pt_path, map_location="cpu")
24
+
25
+ if not isinstance(data, dict):
26
+ raise ValueError("Expected a dict of {label: tensor} in the .pt file")
27
+
28
+ converted = []
29
+ for label, tensor in data.items():
30
+ if not isinstance(tensor, torch.Tensor):
31
+ print(f"โš ๏ธ Skipping {label}: not a tensor")
32
+ continue
33
+ converted.append({
34
+ "label": label,
35
+ "vector": tensor.tolist()
36
+ })
37
+
38
+ with open(json_path, "w") as f:
39
+ json.dump(converted, f, indent=2)
40
+
41
+ print(f"โœ… Exported {len(converted)} speaker embeddings to {json_path}")
@@ -0,0 +1,40 @@
1
+ import torch
2
+ from speechbrain.lobes.models.ECAPA_TDNN import ECAPA_TDNN
3
+ from collections import OrderedDict
4
+
5
+ def export_model_to_onnx(ckpt_path, out_path):
6
+ model = ECAPA_TDNN(
7
+ input_size=80,
8
+ channels=[1024, 1024, 1024, 1024, 3072],
9
+ kernel_sizes=[5, 3, 3, 3, 1],
10
+ dilations=[1, 2, 3, 4, 1],
11
+ attention_channels=128,
12
+ lin_neurons=192,
13
+ )
14
+
15
+ state_dict = torch.load(ckpt_path, map_location="cpu")
16
+
17
+ if "model" in state_dict:
18
+ state_dict = state_dict["model"]
19
+
20
+ new_state_dict = OrderedDict()
21
+ for k, v in state_dict.items():
22
+ if k.startswith("embedding_model."):
23
+ k = k[len("embedding_model."):]
24
+ new_state_dict[k] = v
25
+
26
+ model.load_state_dict(new_state_dict)
27
+ model.eval()
28
+
29
+ dummy_input = torch.randn(1, 200, 80)
30
+ torch.onnx.export(
31
+ model,
32
+ dummy_input,
33
+ out_path,
34
+ input_names=["features"],
35
+ output_names=["embedding"],
36
+ dynamic_axes={"features": {0: "batch", 1: "time"}},
37
+ opset_version=12,
38
+ )
39
+
40
+ print(f"โœ… Exported ECAPA-TDNN to {out_path}")
@@ -0,0 +1,110 @@
1
+ import os
2
+ import torch
3
+ import torchaudio
4
+ import requests
5
+ from pathlib import Path
6
+ from pydub import AudioSegment
7
+ from dotenv import load_dotenv
8
+ from speaker_detector.core import get_embedding, STORAGE_DIR
9
+
10
+ load_dotenv()
11
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
12
+
13
+ CHUNK_DURATION = 8 # seconds
14
+ SCORE_THRESHOLD = 0.6
15
+ MIN_VALID_DURATION = 1.0 # seconds
16
+ WHISPER_API_URL = "https://api.openai.com/v1/audio/transcriptions"
17
+
18
+ def match_speaker(embedding, speaker_embeddings):
19
+ scores = {
20
+ name: torch.nn.functional.cosine_similarity(emb, embedding, dim=0).item()
21
+ for name, emb in speaker_embeddings.items()
22
+ }
23
+ if not scores:
24
+ return "unknown", 0.0
25
+ best = max(scores.items(), key=lambda kv: kv[1])
26
+ return best[0], round(best[1], 3)
27
+
28
+ def transcribe_full_audio(wav_path: Path) -> str:
29
+ try:
30
+ with open(wav_path, "rb") as f:
31
+ response = requests.post(
32
+ WHISPER_API_URL,
33
+ headers={"Authorization": f"Bearer {OPENAI_API_KEY}"},
34
+ files={"file": (wav_path.name, f, "audio/wav")},
35
+ data={
36
+ "model": "whisper-1",
37
+ "response_format": "json",
38
+ "temperature": 0.2,
39
+ "language": "en",
40
+ "prompt": "This is a meeting transcription.",
41
+ },
42
+ timeout=120
43
+ )
44
+ response.raise_for_status()
45
+ return response.json()["text"].strip()
46
+ except Exception as e:
47
+ print(f"โŒ Whisper failed: {e}")
48
+ return ""
49
+
50
+ def is_valid_audio(path):
51
+ try:
52
+ waveform, sample_rate = torchaudio.load(str(path))
53
+ duration_sec = waveform.shape[1] / sample_rate
54
+ return duration_sec >= MIN_VALID_DURATION
55
+ except Exception:
56
+ return False
57
+
58
+ def generate_summary(meeting_dir: Path):
59
+ meeting_dir = meeting_dir.resolve()
60
+ chunk_files = sorted([
61
+ f for f in meeting_dir.iterdir()
62
+ if f.name.startswith("chunk_") and f.suffix == ".wav" and is_valid_audio(f)
63
+ ])
64
+
65
+ if not chunk_files:
66
+ return {"warning": "No valid .wav chunks found in meeting folder.", "segments": []}
67
+
68
+ # Merge all chunks into one file
69
+ combined = AudioSegment.empty()
70
+ for f in chunk_files:
71
+ combined += AudioSegment.from_wav(f)
72
+ merged_path = meeting_dir / "combined.wav"
73
+ combined.export(merged_path, format="wav")
74
+
75
+ # Get full transcript
76
+ full_text = transcribe_full_audio(merged_path)
77
+ print("๐Ÿง  Full transcript:", full_text)
78
+
79
+ # Load speaker embeddings
80
+ speaker_embeddings = {}
81
+ for spk_dir in STORAGE_DIR.iterdir():
82
+ if spk_dir.is_dir():
83
+ wavs = [w for w in spk_dir.glob("*.wav") if is_valid_audio(w)]
84
+ if wavs:
85
+ embs = [get_embedding(str(w)) for w in wavs]
86
+ speaker_embeddings[spk_dir.name] = torch.stack(embs).mean(dim=0)
87
+
88
+ segments = []
89
+ total = len(chunk_files)
90
+
91
+ for idx, chunk in enumerate(chunk_files):
92
+ try:
93
+ emb = get_embedding(chunk)
94
+ speaker, score = match_speaker(emb, speaker_embeddings)
95
+ segment_text = f"[chunk {idx+1}]"
96
+ segments.append({
97
+ "timestamp": idx * CHUNK_DURATION,
98
+ "speaker": speaker if score >= SCORE_THRESHOLD else "unknown",
99
+ "score": round(score, 3),
100
+ "text": segment_text,
101
+ "progress": round((idx + 1) / total * 100)
102
+ })
103
+ except Exception as e:
104
+ print(f"โŒ Failed on {chunk.name}: {e}")
105
+
106
+ return {
107
+ "transcript": full_text,
108
+ "segments": segments if segments else [],
109
+ "warning": None if segments else "No speaker segments found."
110
+ }
@@ -0,0 +1,101 @@
1
+ Metadata-Version: 2.4
2
+ Name: speaker-detector
3
+ Version: 0.1.3
4
+ Summary: A CLI tool for speaker enrollment and identification using SpeechBrain.
5
+ Author-email: Lara Whybrow <lara.whybrow@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/P0llen/speaker-detector
8
+ Project-URL: Repository, https://github.com/P0llen/speaker-detector
9
+ Project-URL: Issues, https://github.com/P0llen/speaker-detector/issues
10
+ Project-URL: Documentation, https://github.com/P0llen/speaker-detector#readme
11
+ Keywords: speaker-recognition,speechbrain,voice,cli,ai
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.8
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Requires-Python: >=3.8
21
+ Description-Content-Type: text/markdown
22
+ Requires-Dist: torch
23
+ Requires-Dist: torchaudio
24
+ Requires-Dist: speechbrain
25
+ Requires-Dist: onnx
26
+
27
+ # speaker-detector ๐ŸŽ™๏ธ
28
+
29
+ A lightweight CLI tool for speaker enrollment and voice identification, powered by [SpeechBrain](https://speechbrain.readthedocs.io/).
30
+
31
+ ## ๐Ÿ”ง Features
32
+
33
+
34
+ - โœ… Enroll speakers from .wav audio
35
+ - ๐Ÿ•ต๏ธ Identify speakers from audio samples
36
+ - ๐Ÿง  ECAPA-TDNN embedding-based matching
37
+ - ๐ŸŽ›๏ธ Simple, fast command-line interface
38
+ - ๐Ÿ“ Clean file storage in `~/.speaker-detector/`
39
+ - ๐Ÿ”Š Optional `--verbose` mode for debugging
40
+
41
+
42
+ ## ๐Ÿ“ฆ Installation
43
+
44
+ Install from [TestPyPI](https://test.pypi.org/):
45
+
46
+ ```bash
47
+ pip install --index-url https://test.pypi.org/simple/ speaker-detector
48
+ ```
49
+
50
+ ## ๐Ÿš€ Usage
51
+
52
+ ## ๐ŸŽ™๏ธ Enroll a speaker:
53
+
54
+ ```bash
55
+ speaker-detector record --enroll Lara
56
+ ```
57
+
58
+ ## ๐Ÿ•ต๏ธ Identify a speaker:
59
+
60
+ ```bash
61
+ speaker-detector record --test
62
+ ```
63
+ ## ๐Ÿ“‹ List enrolled speakers:
64
+
65
+ ```bash
66
+ speaker-detector list
67
+ ```
68
+
69
+ ## ๐Ÿ—‚๏ธ Project Structure
70
+
71
+ ~/.speaker-detector/enrollments/ Saved .pt voice embeddings
72
+ ~/.speaker-detector/recordings/ CLI-recorded .wav audio files
73
+
74
+ ๐Ÿงน Clean vs Verbose Mode
75
+ By default, warnings from speechbrain, torch, etc. are hidden for a clean CLI experience.
76
+ To enable full logs & deprecation warnings:
77
+
78
+ speaker-detector --verbose identify samples/test_sample.wav
79
+
80
+ ๐Ÿ›  Requirements
81
+ Python 3.8+
82
+ torch
83
+ speechbrain
84
+ numpy
85
+ soundfile
86
+ onnxruntime
87
+
88
+ | Step | Command | When / Purpose | Output |
89
+ | --------------------------------- | ------------------------------------------------------------------------------------------------------------------- | ----------------------------- | ---------------------------------------- |
90
+ | **1. Export ECAPA Model to ONNX** | `speaker-detector export-model --pt models/embedding_model.ckpt --out ecapa_model.onnx` | Run once unless model changes | `ecapa_model.onnx` |
91
+ | **2. Enroll Speaker** | `speaker-detector enroll <speaker_id> <audio_path>`<br>Example:<br>`speaker-detector enroll Lara samples/lara1.wav` | Run per new speaker | Individual `.pt` files (e.g., `Lara.pt`) |
92
+ | **3. Combine Embeddings** | `speaker-detector combine --folder data/embeddings/ --out data/enrolled_speakers.pt` | After enrolling speakers | `enrolled_speakers.pt` |
93
+ | **4. Export Speakers to JSON** | `speaker-detector export-speaker-json --pt data/enrolled_speakers.pt --out public/speakers.json` | For frontend use | `speakers.json` |
94
+ | **5. Identify Speaker** | `speaker-detector identify samples/test_sample.wav` | Identify speaker from audio | Console output: name + score |
95
+ | **6. List Enrolled Speakers** | `speaker-detector list-speakers` | Show all enrolled speakers | Console output: list of IDs |
96
+ | **Verbose Mode (optional)** | Add `--verbose` to any command:<br>`speaker-detector --verbose identify samples/test_sample.wav` | Show warnings, detailed logs | Developer debug info |
97
+
98
+
99
+
100
+
101
+ NB: When pushing to Github, do not include any .identifier files.
@@ -0,0 +1,15 @@
1
+ speaker_detector/ECAPA_TDNN.py,sha256=KB5T-ye4c9ZWgTgn_SMH-T_-qYSEHQJJtf3xHjsfNPk,19024
2
+ speaker_detector/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ speaker_detector/__main__.py,sha256=EClCwCzb6h6YBpt0hrnG4h0mlNhNePyg_xBNNSVm1os,65
4
+ speaker_detector/analyze.py,sha256=sA8qyzczdHUbJw2_1JIbXn1WpiKC5dHLPRtPPoppJzY,1943
5
+ speaker_detector/cli.py,sha256=TKci4o4Fru-3NqUkPDRQRvtis2niNEAh9sQWwE5t6Us,3521
6
+ speaker_detector/combine.py,sha256=yCiqG6VMojz0CxSTPqjx0RrUban8oFIcKlA1zFMzaU4,761
7
+ speaker_detector/core.py,sha256=lQNOcmZs2IJOqrNKlk1BeVQX6tzc7BSpeP5Gordff-E,3586
8
+ speaker_detector/export_embeddings.py,sha256=OxNXadzEiMEJgpmCG6HHFncUX7DumFvTOys1R6UMUnw,1151
9
+ speaker_detector/export_model.py,sha256=qVVT2wSCnsPA8pSAEEyIMkY7Kc8uAgepc03MxBMT3xU,1146
10
+ speaker_detector/generate_summary.py,sha256=oTWEf2bxTCRIUl8L17-J64FyhRbCPnDjihFluEnBWc8,3726
11
+ speaker_detector-0.1.3.dist-info/METADATA,sha256=ilNHpXunmGzuyDUxadMHolzhHX2aNjsOBWZe8mHDgQc,4564
12
+ speaker_detector-0.1.3.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
13
+ speaker_detector-0.1.3.dist-info/entry_points.txt,sha256=2B30ee2cTyeeA49x_TBURl53bDRiLWGK3NWhb9rlK3s,63
14
+ speaker_detector-0.1.3.dist-info/top_level.txt,sha256=PJ5rfvd3GAbzMbc7-Fwhtufjf6HxzzTiiHociOy7RiM,17
15
+ speaker_detector-0.1.3.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.7.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ speaker-detector = speaker_detector.cli:main
@@ -0,0 +1 @@
1
+ speaker_detector