speaker-detector 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- speaker_detector-0.1.3/PKG-INFO +101 -0
- speaker_detector-0.1.3/README.md +75 -0
- speaker_detector-0.1.3/pyproject.toml +50 -0
- speaker_detector-0.1.3/setup.cfg +4 -0
- speaker_detector-0.1.3/speaker_detector/ECAPA_TDNN.py +633 -0
- speaker_detector-0.1.3/speaker_detector/__init__.py +0 -0
- speaker_detector-0.1.3/speaker_detector/__main__.py +4 -0
- speaker_detector-0.1.3/speaker_detector/analyze.py +59 -0
- speaker_detector-0.1.3/speaker_detector/cli.py +82 -0
- speaker_detector-0.1.3/speaker_detector/combine.py +22 -0
- speaker_detector-0.1.3/speaker_detector/core.py +103 -0
- speaker_detector-0.1.3/speaker_detector/export_embeddings.py +41 -0
- speaker_detector-0.1.3/speaker_detector/export_model.py +40 -0
- speaker_detector-0.1.3/speaker_detector/generate_summary.py +110 -0
- speaker_detector-0.1.3/speaker_detector.egg-info/PKG-INFO +101 -0
- speaker_detector-0.1.3/speaker_detector.egg-info/SOURCES.txt +18 -0
- speaker_detector-0.1.3/speaker_detector.egg-info/dependency_links.txt +1 -0
- speaker_detector-0.1.3/speaker_detector.egg-info/entry_points.txt +2 -0
- speaker_detector-0.1.3/speaker_detector.egg-info/requires.txt +4 -0
- speaker_detector-0.1.3/speaker_detector.egg-info/top_level.txt +1 -0
@@ -0,0 +1,101 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: speaker-detector
|
3
|
+
Version: 0.1.3
|
4
|
+
Summary: A CLI tool for speaker enrollment and identification using SpeechBrain.
|
5
|
+
Author-email: Lara Whybrow <lara.whybrow@gmail.com>
|
6
|
+
License: MIT
|
7
|
+
Project-URL: Homepage, https://github.com/P0llen/speaker-detector
|
8
|
+
Project-URL: Repository, https://github.com/P0llen/speaker-detector
|
9
|
+
Project-URL: Issues, https://github.com/P0llen/speaker-detector/issues
|
10
|
+
Project-URL: Documentation, https://github.com/P0llen/speaker-detector#readme
|
11
|
+
Keywords: speaker-recognition,speechbrain,voice,cli,ai
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
13
|
+
Classifier: Intended Audience :: Developers
|
14
|
+
Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
17
|
+
Classifier: Programming Language :: Python :: 3.8
|
18
|
+
Classifier: Programming Language :: Python :: 3.9
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
20
|
+
Requires-Python: >=3.8
|
21
|
+
Description-Content-Type: text/markdown
|
22
|
+
Requires-Dist: torch
|
23
|
+
Requires-Dist: torchaudio
|
24
|
+
Requires-Dist: speechbrain
|
25
|
+
Requires-Dist: onnx
|
26
|
+
|
27
|
+
# speaker-detector ๐๏ธ
|
28
|
+
|
29
|
+
A lightweight CLI tool for speaker enrollment and voice identification, powered by [SpeechBrain](https://speechbrain.readthedocs.io/).
|
30
|
+
|
31
|
+
## ๐ง Features
|
32
|
+
|
33
|
+
|
34
|
+
- โ
Enroll speakers from .wav audio
|
35
|
+
- ๐ต๏ธ Identify speakers from audio samples
|
36
|
+
- ๐ง ECAPA-TDNN embedding-based matching
|
37
|
+
- ๐๏ธ Simple, fast command-line interface
|
38
|
+
- ๐ Clean file storage in `~/.speaker-detector/`
|
39
|
+
- ๐ Optional `--verbose` mode for debugging
|
40
|
+
|
41
|
+
|
42
|
+
## ๐ฆ Installation
|
43
|
+
|
44
|
+
Install from [TestPyPI](https://test.pypi.org/):
|
45
|
+
|
46
|
+
```bash
|
47
|
+
pip install --index-url https://test.pypi.org/simple/ speaker-detector
|
48
|
+
```
|
49
|
+
|
50
|
+
## ๐ Usage
|
51
|
+
|
52
|
+
## ๐๏ธ Enroll a speaker:
|
53
|
+
|
54
|
+
```bash
|
55
|
+
speaker-detector record --enroll Lara
|
56
|
+
```
|
57
|
+
|
58
|
+
## ๐ต๏ธ Identify a speaker:
|
59
|
+
|
60
|
+
```bash
|
61
|
+
speaker-detector record --test
|
62
|
+
```
|
63
|
+
## ๐ List enrolled speakers:
|
64
|
+
|
65
|
+
```bash
|
66
|
+
speaker-detector list
|
67
|
+
```
|
68
|
+
|
69
|
+
## ๐๏ธ Project Structure
|
70
|
+
|
71
|
+
~/.speaker-detector/enrollments/ Saved .pt voice embeddings
|
72
|
+
~/.speaker-detector/recordings/ CLI-recorded .wav audio files
|
73
|
+
|
74
|
+
๐งน Clean vs Verbose Mode
|
75
|
+
By default, warnings from speechbrain, torch, etc. are hidden for a clean CLI experience.
|
76
|
+
To enable full logs & deprecation warnings:
|
77
|
+
|
78
|
+
speaker-detector --verbose identify samples/test_sample.wav
|
79
|
+
|
80
|
+
๐ Requirements
|
81
|
+
Python 3.8+
|
82
|
+
torch
|
83
|
+
speechbrain
|
84
|
+
numpy
|
85
|
+
soundfile
|
86
|
+
onnxruntime
|
87
|
+
|
88
|
+
| Step | Command | When / Purpose | Output |
|
89
|
+
| --------------------------------- | ------------------------------------------------------------------------------------------------------------------- | ----------------------------- | ---------------------------------------- |
|
90
|
+
| **1. Export ECAPA Model to ONNX** | `speaker-detector export-model --pt models/embedding_model.ckpt --out ecapa_model.onnx` | Run once unless model changes | `ecapa_model.onnx` |
|
91
|
+
| **2. Enroll Speaker** | `speaker-detector enroll <speaker_id> <audio_path>`<br>Example:<br>`speaker-detector enroll Lara samples/lara1.wav` | Run per new speaker | Individual `.pt` files (e.g., `Lara.pt`) |
|
92
|
+
| **3. Combine Embeddings** | `speaker-detector combine --folder data/embeddings/ --out data/enrolled_speakers.pt` | After enrolling speakers | `enrolled_speakers.pt` |
|
93
|
+
| **4. Export Speakers to JSON** | `speaker-detector export-speaker-json --pt data/enrolled_speakers.pt --out public/speakers.json` | For frontend use | `speakers.json` |
|
94
|
+
| **5. Identify Speaker** | `speaker-detector identify samples/test_sample.wav` | Identify speaker from audio | Console output: name + score |
|
95
|
+
| **6. List Enrolled Speakers** | `speaker-detector list-speakers` | Show all enrolled speakers | Console output: list of IDs |
|
96
|
+
| **Verbose Mode (optional)** | Add `--verbose` to any command:<br>`speaker-detector --verbose identify samples/test_sample.wav` | Show warnings, detailed logs | Developer debug info |
|
97
|
+
|
98
|
+
|
99
|
+
|
100
|
+
|
101
|
+
NB: When pushing to Github, do not include any .identifier files.
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# speaker-detector ๐๏ธ
|
2
|
+
|
3
|
+
A lightweight CLI tool for speaker enrollment and voice identification, powered by [SpeechBrain](https://speechbrain.readthedocs.io/).
|
4
|
+
|
5
|
+
## ๐ง Features
|
6
|
+
|
7
|
+
|
8
|
+
- โ
Enroll speakers from .wav audio
|
9
|
+
- ๐ต๏ธ Identify speakers from audio samples
|
10
|
+
- ๐ง ECAPA-TDNN embedding-based matching
|
11
|
+
- ๐๏ธ Simple, fast command-line interface
|
12
|
+
- ๐ Clean file storage in `~/.speaker-detector/`
|
13
|
+
- ๐ Optional `--verbose` mode for debugging
|
14
|
+
|
15
|
+
|
16
|
+
## ๐ฆ Installation
|
17
|
+
|
18
|
+
Install from [TestPyPI](https://test.pypi.org/):
|
19
|
+
|
20
|
+
```bash
|
21
|
+
pip install --index-url https://test.pypi.org/simple/ speaker-detector
|
22
|
+
```
|
23
|
+
|
24
|
+
## ๐ Usage
|
25
|
+
|
26
|
+
## ๐๏ธ Enroll a speaker:
|
27
|
+
|
28
|
+
```bash
|
29
|
+
speaker-detector record --enroll Lara
|
30
|
+
```
|
31
|
+
|
32
|
+
## ๐ต๏ธ Identify a speaker:
|
33
|
+
|
34
|
+
```bash
|
35
|
+
speaker-detector record --test
|
36
|
+
```
|
37
|
+
## ๐ List enrolled speakers:
|
38
|
+
|
39
|
+
```bash
|
40
|
+
speaker-detector list
|
41
|
+
```
|
42
|
+
|
43
|
+
## ๐๏ธ Project Structure
|
44
|
+
|
45
|
+
~/.speaker-detector/enrollments/ Saved .pt voice embeddings
|
46
|
+
~/.speaker-detector/recordings/ CLI-recorded .wav audio files
|
47
|
+
|
48
|
+
๐งน Clean vs Verbose Mode
|
49
|
+
By default, warnings from speechbrain, torch, etc. are hidden for a clean CLI experience.
|
50
|
+
To enable full logs & deprecation warnings:
|
51
|
+
|
52
|
+
speaker-detector --verbose identify samples/test_sample.wav
|
53
|
+
|
54
|
+
๐ Requirements
|
55
|
+
Python 3.8+
|
56
|
+
torch
|
57
|
+
speechbrain
|
58
|
+
numpy
|
59
|
+
soundfile
|
60
|
+
onnxruntime
|
61
|
+
|
62
|
+
| Step | Command | When / Purpose | Output |
|
63
|
+
| --------------------------------- | ------------------------------------------------------------------------------------------------------------------- | ----------------------------- | ---------------------------------------- |
|
64
|
+
| **1. Export ECAPA Model to ONNX** | `speaker-detector export-model --pt models/embedding_model.ckpt --out ecapa_model.onnx` | Run once unless model changes | `ecapa_model.onnx` |
|
65
|
+
| **2. Enroll Speaker** | `speaker-detector enroll <speaker_id> <audio_path>`<br>Example:<br>`speaker-detector enroll Lara samples/lara1.wav` | Run per new speaker | Individual `.pt` files (e.g., `Lara.pt`) |
|
66
|
+
| **3. Combine Embeddings** | `speaker-detector combine --folder data/embeddings/ --out data/enrolled_speakers.pt` | After enrolling speakers | `enrolled_speakers.pt` |
|
67
|
+
| **4. Export Speakers to JSON** | `speaker-detector export-speaker-json --pt data/enrolled_speakers.pt --out public/speakers.json` | For frontend use | `speakers.json` |
|
68
|
+
| **5. Identify Speaker** | `speaker-detector identify samples/test_sample.wav` | Identify speaker from audio | Console output: name + score |
|
69
|
+
| **6. List Enrolled Speakers** | `speaker-detector list-speakers` | Show all enrolled speakers | Console output: list of IDs |
|
70
|
+
| **Verbose Mode (optional)** | Add `--verbose` to any command:<br>`speaker-detector --verbose identify samples/test_sample.wav` | Show warnings, detailed logs | Developer debug info |
|
71
|
+
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
NB: When pushing to Github, do not include any .identifier files.
|
@@ -0,0 +1,50 @@
|
|
1
|
+
[build-system]
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
3
|
+
build-backend = "setuptools.build_meta"
|
4
|
+
|
5
|
+
[project]
|
6
|
+
name = "speaker-detector"
|
7
|
+
version = "0.1.3"
|
8
|
+
description = "A CLI tool for speaker enrollment and identification using SpeechBrain."
|
9
|
+
readme = "README.md"
|
10
|
+
requires-python = ">=3.8"
|
11
|
+
license = { text = "MIT" }
|
12
|
+
|
13
|
+
authors = [
|
14
|
+
{ name = "Lara Whybrow", email = "lara.whybrow@gmail.com" }
|
15
|
+
]
|
16
|
+
|
17
|
+
classifiers = [
|
18
|
+
"Development Status :: 3 - Alpha",
|
19
|
+
"Intended Audience :: Developers",
|
20
|
+
"Topic :: Multimedia :: Sound/Audio :: Speech",
|
21
|
+
"License :: OSI Approved :: MIT License",
|
22
|
+
"Programming Language :: Python :: 3",
|
23
|
+
"Programming Language :: Python :: 3.8",
|
24
|
+
"Programming Language :: Python :: 3.9",
|
25
|
+
"Programming Language :: Python :: 3.10"
|
26
|
+
]
|
27
|
+
|
28
|
+
keywords = ["speaker-recognition", "speechbrain", "voice", "cli", "ai"]
|
29
|
+
|
30
|
+
dependencies = [
|
31
|
+
"torch",
|
32
|
+
"torchaudio",
|
33
|
+
"speechbrain",
|
34
|
+
"onnx"
|
35
|
+
]
|
36
|
+
|
37
|
+
[project.scripts]
|
38
|
+
speaker-detector = "speaker_detector.cli:main"
|
39
|
+
|
40
|
+
[project.urls]
|
41
|
+
Homepage = "https://github.com/P0llen/speaker-detector"
|
42
|
+
Repository = "https://github.com/P0llen/speaker-detector"
|
43
|
+
Issues = "https://github.com/P0llen/speaker-detector/issues"
|
44
|
+
Documentation = "https://github.com/P0llen/speaker-detector#readme"
|
45
|
+
|
46
|
+
[tool.setuptools]
|
47
|
+
packages = ["speaker_detector"]
|
48
|
+
|
49
|
+
[tool.setuptools.package-data]
|
50
|
+
speaker_detector = ["*.onnx", "*.json", "*.yaml", "models/*"]
|