speaker-detector 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. speaker_detector/cli.py +12 -26
  2. speaker_detector/core.py +78 -65
  3. speaker_detector/model/classifier.ckpt +0 -0
  4. speaker_detector/model/embedding_model.ckpt +0 -0
  5. speaker_detector/model/hyperparams.yaml +58 -0
  6. speaker_detector/model/label_encoder.ckpt +7207 -0
  7. speaker_detector/model/mean_var_norm_emb.ckpt +0 -0
  8. speaker_detector/server copy.py +296 -0
  9. speaker_detector/server.py +82 -0
  10. speaker_detector/state.py +69 -0
  11. speaker_detector/web/static/favicon.ico +0 -0
  12. speaker_detector/web/static/index.html +29 -0
  13. speaker_detector/web/static/scripts/loader copy.js +10 -0
  14. speaker_detector/web/static/scripts/loader.js +14 -0
  15. speaker_detector/web/static/scripts/script copy.js +954 -0
  16. speaker_detector/web/static/scripts/script.js +22 -0
  17. speaker_detector/web/static/style.css +133 -0
  18. {speaker_detector-0.1.5.dist-info → speaker_detector-0.1.6.dist-info}/METADATA +28 -3
  19. speaker_detector-0.1.6.dist-info/RECORD +25 -0
  20. {speaker_detector-0.1.5.dist-info → speaker_detector-0.1.6.dist-info}/WHEEL +1 -1
  21. speaker_detector/analyze.py +0 -59
  22. speaker_detector/combine.py +0 -22
  23. speaker_detector/export_embeddings.py +0 -62
  24. speaker_detector/export_model.py +0 -40
  25. speaker_detector/generate_summary.py +0 -110
  26. speaker_detector-0.1.5.dist-info/RECORD +0 -15
  27. /speaker_detector/{ECAPA_TDNN.py → model/ECAPA_TDNN.py} +0 -0
  28. /speaker_detector/{__init__.py → web/static/__init__.py} +0 -0
  29. {speaker_detector-0.1.5.dist-info → speaker_detector-0.1.6.dist-info}/entry_points.txt +0 -0
  30. {speaker_detector-0.1.5.dist-info → speaker_detector-0.1.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,296 @@
1
+ # server.py
2
+
3
+ import os
4
+ import tempfile
5
+ import threading
6
+ import time
7
+ import signal
8
+ import json
9
+ import sounddevice as sd
10
+ import soundfile as sf
11
+ from datetime import datetime
12
+ from pathlib import Path
13
+ from flask import Flask, request, jsonify, send_from_directory
14
+ from flask_cors import CORS
15
+ from pydub import AudioSegment
16
+ import numpy as np
17
+
18
+ from speaker_detector.utils.generate_index import regenerate_component_index
19
+ regenerate_component_index(verbose=False)
20
+
21
+
22
+ from speaker_detector.core import (
23
+ identify_speaker,
24
+ rebuild_embedding,
25
+ compute_background_embedding,
26
+ get_speakers_needing_rebuild,
27
+ )
28
+ from speaker_detector.utils.paths import (
29
+ STATIC_DIR,
30
+ STORAGE_DIR,
31
+ SPEAKERS_DIR,
32
+ NOISE_DIR,
33
+ EXPORTS_DIR,
34
+ MEETINGS_DIR,
35
+ COMPONENTS_DIR,
36
+ INDEX_JSON,
37
+ )
38
+
39
+ # ── Flask Setup ─────────────────────────────────────────────────────
40
+ app = Flask(
41
+ __name__,
42
+ static_folder=str(STATIC_DIR)
43
+ )
44
+
45
+ # ── State ───────────────────────────────────────────────────────────
46
+ current_speaker = {"speaker": None, "confidence": None}
47
+ MIC_AVAILABLE = True
48
+ stop_event = threading.Event()
49
+ LISTENING_MODE = {"mode": "single"} # off, single, multi
50
+ DETECTION_INTERVAL_MS = 3000
51
+ DETECTION_THRESHOLD = 0.75
52
+
53
+ # ── Setup ───────────────────────────────────────────────────────────
54
+ for d in [SPEAKERS_DIR, NOISE_DIR, EXPORTS_DIR, MEETINGS_DIR]:
55
+ d.mkdir(parents=True, exist_ok=True)
56
+
57
+ def get_speaker_folder(name: str) -> Path:
58
+ return SPEAKERS_DIR / name
59
+
60
+
61
+
62
+ # ── Routes ──────────────────────────────────────────────────────────
63
+
64
+ @app.route("/")
65
+ def index():
66
+ return send_from_directory(STATIC_DIR, "index.html")
67
+
68
+
69
+ @app.route("/api/settings", methods=["GET", "POST"])
70
+ def update_settings():
71
+ global DETECTION_INTERVAL_MS, DETECTION_THRESHOLD, LISTENING_MODE
72
+ if request.method == "POST":
73
+ data = request.get_json() or {}
74
+ DETECTION_INTERVAL_MS = int(data.get("interval_ms", DETECTION_INTERVAL_MS))
75
+ DETECTION_THRESHOLD = float(data.get("threshold", DETECTION_THRESHOLD))
76
+ LISTENING_MODE["mode"] = data.get("mode", LISTENING_MODE["mode"])
77
+ return jsonify({
78
+ "interval_ms": DETECTION_INTERVAL_MS,
79
+ "threshold": DETECTION_THRESHOLD,
80
+ "mode": LISTENING_MODE["mode"]
81
+ })
82
+
83
+ @app.route("/api/active-speaker")
84
+ def get_active_speaker():
85
+ if LISTENING_MODE["mode"] == "off":
86
+ return jsonify({"speaker": None, "confidence": None, "status": "disabled"})
87
+ if not MIC_AVAILABLE:
88
+ return jsonify({"speaker": None, "confidence": None, "status": "mic unavailable"}), 503
89
+ return jsonify({**current_speaker, "status": "listening"})
90
+
91
+ @app.route("/api/identify", methods=["POST"])
92
+ def api_identify():
93
+ if "file" not in request.files:
94
+ return jsonify({"error": "Missing file"}), 400
95
+ audio = request.files["file"]
96
+ suffix = Path(audio.filename).suffix.lower()
97
+ with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
98
+ tmp_path = tmp.name
99
+ audio.save(tmp_path)
100
+ try:
101
+ if suffix in [".webm", ".ogg", ".mp3"]:
102
+ wav_path = tmp_path.replace(suffix, ".wav")
103
+ AudioSegment.from_file(tmp_path).export(wav_path, format="wav")
104
+ os.remove(tmp_path)
105
+ else:
106
+ wav_path = tmp_path
107
+ speaker, score = identify_speaker(wav_path, threshold=DETECTION_THRESHOLD)
108
+ os.remove(wav_path)
109
+ return jsonify({"speaker": speaker, "score": round(score or 0, 3)})
110
+ except Exception as e:
111
+ if os.path.exists(tmp_path):
112
+ os.remove(tmp_path)
113
+ return jsonify({"error": str(e)}), 500
114
+
115
+ @app.route("/api/speakers")
116
+ def api_speakers():
117
+ speakers = []
118
+ for spk_dir in SPEAKERS_DIR.iterdir():
119
+ if spk_dir.is_dir():
120
+ wavs = list(spk_dir.glob("*.wav"))
121
+ speakers.append({
122
+ "name": spk_dir.name,
123
+ "recordings": len(wavs)
124
+ })
125
+ return jsonify(speakers)
126
+
127
+ @app.route("/api/enroll/<name>", methods=["POST"])
128
+ def api_enroll(name):
129
+ if "file" not in request.files:
130
+ return jsonify({"error": "Missing audio file"}), 400
131
+ audio = request.files["file"]
132
+ folder = get_speaker_folder(name)
133
+ folder.mkdir(exist_ok=True)
134
+ suffix = Path(audio.filename).suffix
135
+ with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
136
+ audio.save(tmp.name)
137
+ path = Path(tmp.name)
138
+ try:
139
+ if suffix != ".wav":
140
+ wav_path = path.with_suffix(".wav")
141
+ AudioSegment.from_file(path).export(wav_path, format="wav")
142
+ os.remove(path)
143
+ else:
144
+ wav_path = path
145
+ dest_path = folder / f"{name}_{int(time.time())}.wav"
146
+ Path(wav_path).rename(dest_path)
147
+ return jsonify({"status": "enrolled", "file": dest_path.name})
148
+ except Exception as e:
149
+ return jsonify({"error": str(e)}), 500
150
+
151
+ @app.route("/api/speakers/<name>", methods=["DELETE"])
152
+ def api_delete_speaker(name):
153
+ folder = get_speaker_folder(name)
154
+ emb_path = STORAGE_DIR / "embeddings" / f"{name}.pt"
155
+ try:
156
+ if folder.exists():
157
+ for file in folder.glob("*"):
158
+ file.unlink()
159
+ folder.rmdir()
160
+ if emb_path.exists():
161
+ emb_path.unlink()
162
+ return jsonify({"deleted": True})
163
+ except Exception as e:
164
+ return jsonify({"error": str(e)}), 500
165
+
166
+ @app.route("/api/speakers/<name>/improve", methods=["POST"])
167
+ def api_improve(name):
168
+ if "file" not in request.files:
169
+ return jsonify({"error": "Missing audio file"}), 400
170
+ folder = get_speaker_folder(name)
171
+ if not folder.exists():
172
+ return jsonify({"error": f"Speaker '{name}' not found"}), 404
173
+ audio = request.files["file"]
174
+ suffix = Path(audio.filename).suffix
175
+ with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
176
+ audio.save(tmp.name)
177
+ path = Path(tmp.name)
178
+ try:
179
+ if suffix != ".wav":
180
+ wav_path = path.with_suffix(".wav")
181
+ AudioSegment.from_file(path).export(wav_path, format="wav")
182
+ os.remove(path)
183
+ else:
184
+ wav_path = path
185
+ dest_path = folder / f"{name}_imp_{int(time.time())}.wav"
186
+ Path(wav_path).rename(dest_path)
187
+ return jsonify({"status": "improved", "file": dest_path.name})
188
+ except Exception as e:
189
+ return jsonify({"error": str(e)}), 500
190
+
191
+ @app.route("/api/background_noise", methods=["POST"])
192
+ def api_background_noise():
193
+ if "audio" not in request.files:
194
+ return jsonify({"error": "Missing audio file"}), 400
195
+ audio = request.files["audio"]
196
+ suffix = Path(audio.filename).suffix
197
+ with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
198
+ audio.save(tmp.name)
199
+ path = Path(tmp.name)
200
+ try:
201
+ if suffix != ".wav":
202
+ wav_path = path.with_suffix(".wav")
203
+ AudioSegment.from_file(path).export(wav_path, format="wav")
204
+ os.remove(path)
205
+ else:
206
+ wav_path = path
207
+ final_path = NOISE_DIR / f"noise_{int(time.time())}.wav"
208
+ Path(wav_path).rename(final_path)
209
+ return jsonify({"success": True})
210
+ except Exception as e:
211
+ return jsonify({"error": str(e)}), 500
212
+
213
+ @app.route("/api/rebuild-all", methods=["POST"])
214
+ def api_rebuild_all():
215
+ rebuilt = []
216
+ errors = {}
217
+ for spk_dir in SPEAKERS_DIR.iterdir():
218
+ if spk_dir.is_dir():
219
+ name = spk_dir.name
220
+ try:
221
+ rebuild_embedding(name)
222
+ rebuilt.append(name)
223
+ except Exception as e:
224
+ errors[name] = str(e)
225
+ if errors:
226
+ return jsonify({"status": "partial", "rebuilt": rebuilt, "errors": errors}), 207
227
+ return jsonify({"status": "rebuilt", "rebuilt": rebuilt})
228
+
229
+ @app.route("/api/rebuild/<name>", methods=["POST"])
230
+ def api_rebuild_one(name):
231
+ try:
232
+ rebuild_embedding(name)
233
+ return jsonify({"status": "rebuilt", "name": name})
234
+ except Exception as e:
235
+ return jsonify({"error": str(e)}), 500
236
+
237
+ @app.route("/api/rebuild-background", methods=["POST"])
238
+ def api_rebuild_background():
239
+ try:
240
+ compute_background_embedding()
241
+ return jsonify({"status": "success"})
242
+ except Exception as e:
243
+ return jsonify({"error": str(e)}), 500
244
+
245
+ @app.route("/api/speakers/needs-rebuild")
246
+ def api_needs_rebuild():
247
+ try:
248
+ to_rebuild = get_speakers_needing_rebuild()
249
+ return jsonify({"toRebuild": to_rebuild})
250
+ except Exception as e:
251
+ return jsonify({"error": str(e)}), 500
252
+
253
+ @app.after_request
254
+ def remove_favicon_warnings(response):
255
+ if request.path.endswith("favicon.ico"):
256
+ response.status_code = 204
257
+ return response
258
+
259
+ # ── Background Detection Loop ──────────────────────────────────────
260
+ def background_speaker_loop():
261
+ global current_speaker, MIC_AVAILABLE
262
+ samplerate = 16000
263
+ duration = 2
264
+ while not stop_event.is_set():
265
+ try:
266
+ audio = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1, dtype="int16")
267
+ sd.wait()
268
+ tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
269
+ sf.write(tmp.name, audio, samplerate)
270
+ MIC_AVAILABLE = True
271
+ speaker, conf = identify_speaker(tmp.name, threshold=DETECTION_THRESHOLD)
272
+ os.remove(tmp.name)
273
+ current_speaker.update(speaker=speaker, confidence=conf)
274
+ print(f"{datetime.now().strftime('%H:%M:%S')} 🧠 Detected: {speaker} ({conf:.2f})")
275
+ except Exception as e:
276
+ print(f"❌ Loop error: {e}")
277
+ current_speaker.update(speaker=None, confidence=None)
278
+ if isinstance(e, sd.PortAudioError):
279
+ MIC_AVAILABLE = False
280
+ time.sleep(0.5)
281
+
282
+ def handle_interrupt(sig, frame):
283
+ print("🛑 Shutting down cleanly...")
284
+ stop_event.set()
285
+ time.sleep(1)
286
+ exit(0)
287
+
288
+ signal.signal(signal.SIGINT, handle_interrupt)
289
+
290
+ if __name__ == "__main__":
291
+ thread = threading.Thread(target=background_speaker_loop, daemon=True)
292
+ thread.start()
293
+ print("🎤 Speaker detection loop started.")
294
+ time.sleep(2)
295
+ print("🌐 Server running on http://0.0.0.0:9000")
296
+ app.run(host="0.0.0.0", port=9000)
@@ -0,0 +1,82 @@
1
+ # ── Core Imports ─────────────────────────────────────────────
2
+ import os, signal, time
3
+ from flask import Flask, request, send_from_directory, send_file
4
+ from flask_cors import CORS
5
+ from pathlib import Path
6
+
7
+ # ── Internal Modules ─────────────────────────────────────────
8
+ from speaker_detector.state import stop_event
9
+ from speaker_detector.utils.paths import STATIC_DIR, INDEX_HTML, COMPONENTS_DIR
10
+
11
+ # ── App Setup ────────────────────────────────────────────────
12
+ app = Flask(__name__, static_folder=str(STATIC_DIR))
13
+ CORS(app)
14
+
15
+
16
+ # ── Routes ──────────────────────────────────────────────────
17
+ @app.after_request
18
+ def apply_csp(response):
19
+ response.headers["Content-Security-Policy"] = (
20
+ "default-src 'self'; script-src 'self'; style-src 'self'; object-src 'none';"
21
+ )
22
+ return response
23
+
24
+ @app.route("/")
25
+ def serve_index():
26
+ return send_file(INDEX_HTML)
27
+
28
+ @app.route("/index.html")
29
+ def serve_index_html():
30
+ return send_file(INDEX_HTML)
31
+
32
+ @app.route("/static/<path:filename>")
33
+ def serve_static_file(filename):
34
+ return send_from_directory(STATIC_DIR, filename)
35
+
36
+ @app.route("/static/components/<path:filename>")
37
+ def serve_component_file(filename):
38
+ return send_from_directory(COMPONENTS_DIR, filename)
39
+
40
+ @app.route("/favicon.ico")
41
+ def serve_favicon():
42
+ return send_from_directory(STATIC_DIR, "favicon.ico")
43
+
44
+ @app.errorhandler(404)
45
+ def not_found(e):
46
+ return {"error": "Resource not found"}, 404
47
+
48
+ # ── Route Registrations ─────────────────────────────────────
49
+ from speaker_detector.routes.index_routes import index_bp
50
+ from speaker_detector.routes.settings_routes import settings_bp
51
+ from speaker_detector.routes.speaker_routes import speakers_bp
52
+ from speaker_detector.routes.background_routes import background_bp
53
+ from speaker_detector.routes.rebuild_routes import rebuild_bp
54
+ from speaker_detector.routes.identify_routes import identify_bp
55
+ from speaker_detector.routes.recordings_routes import recordings_bp
56
+ from speaker_detector.routes.meetings_routes import meetings_bp
57
+
58
+ app.register_blueprint(index_bp)
59
+ app.register_blueprint(settings_bp)
60
+ app.register_blueprint(speakers_bp)
61
+ app.register_blueprint(background_bp)
62
+ app.register_blueprint(rebuild_bp)
63
+ app.register_blueprint(identify_bp)
64
+ app.register_blueprint(recordings_bp)
65
+ app.register_blueprint(meetings_bp)
66
+
67
+ # ── Interrupt Handler ───────────────────────────────────────
68
+ def handle_interrupt(sig, frame):
69
+ print("🛑 Shutting down cleanly...")
70
+ stop_event.set()
71
+ time.sleep(1)
72
+ exit(0)
73
+
74
+ signal.signal(signal.SIGINT, handle_interrupt)
75
+
76
+ # ── Entrypoint ───────────────────────────────────────────────
77
+ if __name__ == "__main__":
78
+ print("🌐 Server running on http://0.0.0.0:9000")
79
+ print(f"🚀 Static folder: {STATIC_DIR}")
80
+ print(f"📁 Component folder: {COMPONENTS_DIR}")
81
+ print(f"📄 Index HTML: {INDEX_HTML}")
82
+ app.run(host="0.0.0.0", port=9000, debug=True)
@@ -0,0 +1,69 @@
1
+ # speaker_detector/state.py
2
+
3
+ import threading
4
+ import tempfile
5
+ import time
6
+ import sounddevice as sd
7
+ import soundfile as sf
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+
11
+ from speaker_detector.core import identify_speaker # ✅ safe import — no circular loop
12
+
13
+ # ── Global State ─────────────────────────────────────────────
14
+ current_speaker = {"speaker": None, "confidence": None}
15
+ LISTENING_MODE = {"mode": "off"} # Values: "off", "single", "multi"
16
+ DETECTION_INTERVAL_MS = 3000
17
+ DETECTION_THRESHOLD = 0.75
18
+
19
+ MIC_AVAILABLE = True
20
+ stop_event = threading.Event() # ✅ defined here, no self-import
21
+ detection_thread = None
22
+
23
+ # ── Background Detection Loop ────────────────────────────────
24
+ def detection_loop():
25
+ global MIC_AVAILABLE
26
+
27
+ samplerate = 16000
28
+ duration = 2
29
+
30
+ while not stop_event.is_set():
31
+ try:
32
+ audio = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1, dtype="int16")
33
+ sd.wait()
34
+
35
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
36
+ sf.write(tmp.name, audio, samplerate)
37
+ MIC_AVAILABLE = True
38
+ speaker, conf = identify_speaker(tmp.name, threshold=DETECTION_THRESHOLD)
39
+ current_speaker.update(speaker=speaker, confidence=conf)
40
+ print(f"{datetime.now().strftime('%H:%M:%S')} 🧠 Detected: {speaker} ({conf:.2f})")
41
+ except Exception as e:
42
+ print(f"❌ Detection loop error: {e}")
43
+ current_speaker.update(speaker=None, confidence=None)
44
+ if isinstance(e, sd.PortAudioError):
45
+ MIC_AVAILABLE = False
46
+
47
+ time.sleep(DETECTION_INTERVAL_MS / 1000.0)
48
+
49
+ # ── Control Functions ────────────────────────────────────────
50
+ def start_detection_loop():
51
+ global detection_thread
52
+ if detection_thread and detection_thread.is_alive():
53
+ return
54
+ print("🔁 Starting detection loop...")
55
+ stop_event.clear()
56
+ detection_thread = threading.Thread(target=detection_loop, daemon=True)
57
+ detection_thread.start()
58
+
59
+ def stop_detection_loop():
60
+ if detection_thread and detection_thread.is_alive():
61
+ print("⏹️ Stopping detection loop...")
62
+ stop_event.set()
63
+
64
+ def get_active_speaker():
65
+ if LISTENING_MODE["mode"] == "off":
66
+ return {"speaker": None, "confidence": None, "status": "disabled"}
67
+ if not MIC_AVAILABLE:
68
+ return {"speaker": None, "confidence": None, "status": "mic unavailable"}
69
+ return {**current_speaker, "status": "listening"}
Binary file
@@ -0,0 +1,29 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <title>Speaker Detector</title>
6
+ <link rel="icon" type="image/x-icon" href="/favicon.ico" />
7
+ <link rel="stylesheet" href="/static/style.css" />
8
+ </head>
9
+ <body>
10
+ <div id="loading-overlay">
11
+ <div class="loader">🔄 Loading Speaker Detector...</div>
12
+ </div>
13
+
14
+ <h1>🎡️ Speaker Detector</h1>
15
+
16
+ <!-- Include all component templates -->
17
+ <div include-html="/static/components/accordion-nav/accordion-nav.html"></div>
18
+ <div include-html="/static/components/mic-test/mic-test.html"></div>
19
+ <div include-html="/static/components/enroll-speaker/enroll-speaker.html"></div>
20
+ <div include-html="/static/components/identify-speaker/identify-speaker.html"></div>
21
+ <div include-html="/static/components/meeting-mode/meeting-mode.html"></div>
22
+ <div include-html="/static/components/recordings-tab/recordings-tab.html"></div>
23
+ <div include-html="/static/components/mic-popup/mic-popup.html"></div>
24
+ <div include-html="/static/components/correction/correction.html"></div>
25
+
26
+ <!-- ✅ External module to handle includes + script setup -->
27
+ <script type="module" src="/static/scripts/loader.js"></script>
28
+ </body>
29
+ </html>
@@ -0,0 +1,10 @@
1
+ import { includeHTML } from "/static/scripts/utils/include-html.js";
2
+
3
+ includeHTML(() => {
4
+ import("/static/scripts/script.js")
5
+ .then(mod => {
6
+ console.log("✅ script.js loaded");
7
+ mod.runSetup(); // ✅ only run after includes finish
8
+ })
9
+ .catch(err => console.error("❌ Failed to load script.js:", err));
10
+ });
@@ -0,0 +1,14 @@
1
+ import { includeHTML } from "/static/scripts/utils/include-html.js";
2
+
3
+ includeHTML(() => {
4
+ import("/static/scripts/script.js")
5
+ .then(mod => {
6
+ console.log("✅ script.js loaded");
7
+ mod.runSetup(); // ✅ only run after includes finish
8
+
9
+ // ✅ Hide loading screen now that setup is complete
10
+ const loadingEl = document.getElementById("loading-overlay");
11
+ if (loadingEl) loadingEl.remove();
12
+ })
13
+ .catch(err => console.error("❌ Failed to load script.js:", err));
14
+ });