swap-cli 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
swap_cli/gui.py ADDED
@@ -0,0 +1,1695 @@
1
+ """customtkinter GUI for swap-cli.
2
+
3
+ Layout mirrors Deep-Live-Cam: face thumbnail, camera dropdown, options,
4
+ Start / Destroy / Preview / Live buttons. The Live button opens the
5
+ realtime stream in a separate window via the existing display.py.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import asyncio
11
+ import sys
12
+ import threading
13
+ import tkinter as tk
14
+ from pathlib import Path
15
+ from tkinter import filedialog
16
+ from typing import TYPE_CHECKING, Callable
17
+
18
+ import customtkinter as ctk
19
+ from PIL import Image
20
+
21
+ from . import config, license
22
+ from .devices import CameraDevice, enumerate_cameras
23
+ from .runtime import DEFAULT_PROMPT, RunOptions, run_session
24
+ from .version import __version__
25
+
26
+ if TYPE_CHECKING:
27
+ pass
28
+
29
+ if sys.platform == "win32":
30
+ # Make Tk render correctly on high-DPI Alienware/Surface/4K displays.
31
+ # Without this, customtkinter's internal scaling fights the OS and the
32
+ # window can render off-screen or at the wrong size.
33
+ try:
34
+ from ctypes import windll
35
+
36
+ windll.shcore.SetProcessDpiAwareness(1) # PROCESS_SYSTEM_DPI_AWARE
37
+ except Exception: # noqa: BLE001 — best effort on older Windows
38
+ pass
39
+
40
+ ctk.set_appearance_mode("dark")
41
+ ctk.set_default_color_theme("blue")
42
+
43
+ THUMB_SIZE = (140, 140)
44
+
45
+
46
+ # Sprint 14l: settings panel helpers.
47
+ # Pure functions so they're trivially unit-testable without spinning a tk root.
48
+
49
+
50
+ def _redact_key(value: str | None) -> str:
51
+ """Show last 4 chars only: 'dct_a…AB12' or '—' if absent."""
52
+ if not value:
53
+ return "—"
54
+ if len(value) <= 4:
55
+ return value
56
+ return f"{value[:4]}…{value[-4:]}"
57
+
58
+
59
+ class DecartKeyValidationError(ValueError):
60
+ """Raised when a candidate Decart API key fails basic format checks."""
61
+
62
+
63
+ def apply_decart_key_update(new_key: str) -> None:
64
+ """Validate + persist a new Decart API key.
65
+
66
+ Loose validation: must start with 'dct_' and be at least 20 chars.
67
+ Decart's exact format isn't documented and changes over time, so we
68
+ only catch obvious typos (empty, "dct_short", pasted gibberish) here.
69
+ Real validation happens at session-open time when the SDK rejects
70
+ the key.
71
+
72
+ Side effects on success:
73
+ - config.update(decart_api_key=<new>, license_cached_at=None,
74
+ license_cached_valid_until=None) — the cache reset forces the
75
+ next session to re-validate the license against the server.
76
+ """
77
+ trimmed = (new_key or "").strip()
78
+ if not trimmed.startswith("dct_"):
79
+ raise DecartKeyValidationError("Decart key must start with 'dct_'.")
80
+ if len(trimmed) < 20:
81
+ raise DecartKeyValidationError(
82
+ f"Decart key looks too short ({len(trimmed)} chars; expect ≥ 20)."
83
+ )
84
+ config.update(
85
+ decart_api_key=trimmed,
86
+ license_cached_at=None,
87
+ license_cached_valid_until=None,
88
+ )
89
+
90
+
91
+ class SwapGUI(ctk.CTk):
92
+ def __init__(self) -> None:
93
+ super().__init__()
94
+ self.title(f"swap-cli {__version__} · live deepfake")
95
+ W, H = 520, 720
96
+ self.minsize(480, 660)
97
+ # Center on the primary monitor so the window can't open off-screen
98
+ # on multi-monitor setups (Alienware ships with this misconfigured).
99
+ self.update_idletasks()
100
+ sw = self.winfo_screenwidth()
101
+ sh = self.winfo_screenheight()
102
+ x = max(0, (sw - W) // 2)
103
+ y = max(0, (sh - H) // 2)
104
+ self.geometry(f"{W}x{H}+{x}+{y}")
105
+ # Flash topmost so the window pops to the foreground even if another
106
+ # app stole focus during start-up. Released after 200ms so users can
107
+ # alt-tab freely afterward.
108
+ self.after(0, self._raise_to_front)
109
+
110
+ self._reference_path: Path | None = None
111
+ self._thumb_image: ctk.CTkImage | None = None
112
+ self._cameras: list[CameraDevice] = []
113
+ self._session_thread: threading.Thread | None = None
114
+ self._session_loop: asyncio.AbstractEventLoop | None = None
115
+ self._stop_event: asyncio.Event | None = None
116
+ # Set by runtime.run_session via the on_runtime_ready callback. Calling
117
+ # this from the tk main thread cleanly winds the session down.
118
+ self._stop_session: Callable[[], None] | None = None
119
+ # Voice-only test state (no Decart). Independent of _session_thread.
120
+ self._voice_test_thread: threading.Thread | None = None
121
+ self._voice_test_loop: asyncio.AbstractEventLoop | None = None
122
+ self._voice_test_stop: Callable[[], None] | None = None
123
+ self._status_var = tk.StringVar(value="Idle.")
124
+
125
+ self._build_ui()
126
+ self._refresh_cameras()
127
+ self._refresh_status()
128
+ self._refresh_voice_section()
129
+
130
+ # ── UI build ──────────────────────────────────────────────────────
131
+
132
+ def _build_ui(self) -> None:
133
+ outer = ctk.CTkFrame(self, fg_color="transparent")
134
+ outer.pack(fill="both", expand=True, padx=20, pady=18)
135
+
136
+ # Settings (gear) button — top-right, opens the settings modal.
137
+ title_bar = ctk.CTkFrame(outer, fg_color="transparent")
138
+ title_bar.pack(fill="x", pady=(0, 4))
139
+ ctk.CTkButton(
140
+ title_bar,
141
+ text="⚙",
142
+ width=36,
143
+ height=36,
144
+ corner_radius=18,
145
+ command=self._on_settings_clicked,
146
+ fg_color="#374151",
147
+ hover_color="#4b5563",
148
+ font=ctk.CTkFont(size=16),
149
+ ).pack(side="right")
150
+
151
+ # Top: face thumbnail + reference picker
152
+ top = ctk.CTkFrame(outer, fg_color="transparent")
153
+ top.pack(fill="x")
154
+ self._face_label = ctk.CTkLabel(
155
+ top,
156
+ text="No face\nselected",
157
+ width=THUMB_SIZE[0],
158
+ height=THUMB_SIZE[1],
159
+ corner_radius=12,
160
+ fg_color="#1f2937",
161
+ text_color="#9ca3af",
162
+ font=ctk.CTkFont(size=11),
163
+ )
164
+ self._face_label.pack(pady=(0, 10))
165
+
166
+ self._select_face_btn = ctk.CTkButton(
167
+ top,
168
+ text="① Select a face",
169
+ command=self._on_select_face,
170
+ height=42,
171
+ corner_radius=8,
172
+ )
173
+ self._select_face_btn.pack(fill="x")
174
+
175
+ # Options row 1
176
+ opts = ctk.CTkFrame(outer, fg_color="transparent")
177
+ opts.pack(fill="x", pady=(18, 0))
178
+ opts.columnconfigure((0, 1), weight=1, uniform="opt")
179
+
180
+ self._mirror_var = tk.BooleanVar(value=True)
181
+ ctk.CTkSwitch(
182
+ opts, text="Mirror camera", variable=self._mirror_var
183
+ ).grid(row=0, column=0, sticky="w", pady=4)
184
+
185
+ self._record_var = tk.BooleanVar(value=False)
186
+ ctk.CTkSwitch(
187
+ opts, text="Record to MP4", variable=self._record_var
188
+ ).grid(row=0, column=1, sticky="w", pady=4)
189
+
190
+ # Sprint 14k: virtual camera output. When on, Zoom/Meet/Discord
191
+ # see the deepfake as a camera device ("OBS Virtual Camera") —
192
+ # no OBS app to open, no manual switching. Default on; user can
193
+ # disable for screen-recording / preview-only sessions.
194
+ self._vcam_var = tk.BooleanVar(value=True)
195
+ ctk.CTkSwitch(
196
+ opts,
197
+ text="Output to virtual camera",
198
+ variable=self._vcam_var,
199
+ ).grid(row=1, column=0, columnspan=2, sticky="w", pady=4)
200
+
201
+ # Model selector. Decart fixes width/height/fps per model — we display
202
+ # the native dimensions next to each option so the user knows what
203
+ # they're getting. There is no orientation knob: the SDK only accepts
204
+ # a model's native dimensions; portrait / 1:1 / 4:5 / 3:4 framing
205
+ # would have to be done post-hoc on the saved file (not exposed yet).
206
+ tier_row = ctk.CTkFrame(outer, fg_color="transparent")
207
+ tier_row.pack(fill="x", pady=(14, 0))
208
+ ctk.CTkLabel(
209
+ tier_row, text="Model", anchor="w", font=ctk.CTkFont(size=11)
210
+ ).pack(anchor="w")
211
+
212
+ from decart import models as _decart_models
213
+
214
+ self._model_specs: dict[str, tuple[int, int, int]] = {}
215
+ labels: list[str] = []
216
+ for name in ("lucy-2", "lucy-2.1"):
217
+ try:
218
+ m = _decart_models.realtime(name)
219
+ self._model_specs[name] = (int(m.width), int(m.height), int(m.fps))
220
+ labels.append(f"{name} ({m.width}×{m.height}, {m.fps} fps)")
221
+ except Exception:
222
+ continue
223
+ if not labels:
224
+ labels = ["lucy-2 (default)"]
225
+ self._model_specs["lucy-2"] = (1280, 720, 20)
226
+
227
+ self._tier_var = tk.StringVar(value=labels[0])
228
+ self._tier_dropdown = ctk.CTkComboBox(
229
+ tier_row,
230
+ values=labels,
231
+ variable=self._tier_var,
232
+ state="readonly",
233
+ height=34,
234
+ )
235
+ self._tier_dropdown.pack(fill="x")
236
+ ctk.CTkLabel(
237
+ tier_row,
238
+ text="Decart fixes the resolution per model. Pricing tier "
239
+ "(Fast / Pro) is set on your Decart account.",
240
+ anchor="w",
241
+ text_color="#6b7280",
242
+ font=ctk.CTkFont(size=10),
243
+ wraplength=460,
244
+ justify="left",
245
+ ).pack(anchor="w", pady=(2, 0))
246
+
247
+ # Prompt — hidden behind Advanced. Default deepfake template covers
248
+ # the common case ("become this face"). Power users expand to tweak.
249
+ adv_row = ctk.CTkFrame(outer, fg_color="transparent")
250
+ adv_row.pack(fill="x", pady=(14, 0))
251
+ self._advanced_open = tk.BooleanVar(value=False)
252
+ self._advanced_toggle = ctk.CTkButton(
253
+ adv_row,
254
+ text="⚙ Advanced",
255
+ command=self._toggle_advanced,
256
+ height=28,
257
+ corner_radius=6,
258
+ fg_color="transparent",
259
+ hover_color="#1f2937",
260
+ anchor="w",
261
+ text_color="#9ca3af",
262
+ )
263
+ self._advanced_toggle.pack(fill="x")
264
+
265
+ self._advanced_panel = ctk.CTkFrame(outer, fg_color="transparent")
266
+ ctk.CTkLabel(
267
+ self._advanced_panel,
268
+ text="Prompt (optional — guides the swap)",
269
+ anchor="w",
270
+ font=ctk.CTkFont(size=11),
271
+ ).pack(anchor="w", pady=(8, 0))
272
+ self._prompt_box = ctk.CTkTextbox(self._advanced_panel, height=72)
273
+ self._prompt_box.pack(fill="x")
274
+ self._prompt_box.insert("1.0", DEFAULT_PROMPT)
275
+ # Note: panel is NOT packed by default (collapsed).
276
+
277
+ # Camera dropdown + refresh
278
+ cam_row = ctk.CTkFrame(outer, fg_color="transparent")
279
+ cam_row.pack(fill="x", pady=(14, 0))
280
+ cam_row.columnconfigure(0, weight=1)
281
+ ctk.CTkLabel(
282
+ cam_row, text="② Select camera", anchor="w", font=ctk.CTkFont(size=11)
283
+ ).grid(row=0, column=0, sticky="w", columnspan=2)
284
+
285
+ self._camera_var = tk.StringVar(value="No cameras detected")
286
+ self._camera_dropdown = ctk.CTkComboBox(
287
+ cam_row,
288
+ values=[],
289
+ variable=self._camera_var,
290
+ state="readonly",
291
+ height=34,
292
+ )
293
+ self._camera_dropdown.grid(row=1, column=0, sticky="ew", padx=(0, 8))
294
+
295
+ ctk.CTkButton(
296
+ cam_row,
297
+ text="↻",
298
+ width=42,
299
+ height=34,
300
+ command=self._refresh_cameras,
301
+ ).grid(row=1, column=1)
302
+
303
+ # Voice section (Sprint 13b). Two states:
304
+ # - collapsed: single line with Off label + Enable… button → opens modal
305
+ # - expanded: library dropdown + mic dropdown when toggle ON
306
+ # Restored from config.voice_enabled at startup.
307
+ voice_row = ctk.CTkFrame(outer, fg_color="transparent")
308
+ voice_row.pack(fill="x", pady=(14, 0))
309
+ ctk.CTkLabel(
310
+ voice_row, text="Voice", anchor="w", font=ctk.CTkFont(size=11)
311
+ ).pack(anchor="w")
312
+
313
+ # Collapsed row (default state)
314
+ self._voice_collapsed_row = ctk.CTkFrame(voice_row, fg_color="transparent")
315
+ self._voice_collapsed_row.pack(fill="x")
316
+ ctk.CTkLabel(
317
+ self._voice_collapsed_row,
318
+ text="☐ Off · clone your voice (requires GPU)",
319
+ anchor="w",
320
+ text_color="#6b7280",
321
+ ).pack(side="left", fill="x", expand=True)
322
+ self._enable_voice_btn = ctk.CTkButton(
323
+ self._voice_collapsed_row,
324
+ text="Enable…",
325
+ width=86,
326
+ height=28,
327
+ corner_radius=6,
328
+ command=self._on_enable_voice,
329
+ )
330
+ self._enable_voice_btn.pack(side="right")
331
+
332
+ # Expanded row (hidden until voice is enabled)
333
+ self._voice_expanded_row = ctk.CTkFrame(voice_row, fg_color="transparent")
334
+ # not packed yet — _refresh_voice_section() shows it when voice is on
335
+ self._voice_var = tk.StringVar(value="(no voices found)")
336
+ self._voice_dropdown = ctk.CTkComboBox(
337
+ self._voice_expanded_row,
338
+ values=[],
339
+ variable=self._voice_var,
340
+ state="readonly",
341
+ height=30,
342
+ )
343
+ self._voice_dropdown.pack(fill="x", pady=(2, 4))
344
+ voice_actions = ctk.CTkFrame(self._voice_expanded_row, fg_color="transparent")
345
+ voice_actions.pack(fill="x")
346
+ self._voice_status_label = ctk.CTkLabel(
347
+ voice_actions,
348
+ text="On · cloning enabled",
349
+ anchor="w",
350
+ text_color="#10b981",
351
+ )
352
+ self._voice_status_label.pack(side="left", fill="x", expand=True)
353
+ self._test_voice_btn = ctk.CTkButton(
354
+ voice_actions,
355
+ text="Test voice",
356
+ width=92,
357
+ height=24,
358
+ corner_radius=6,
359
+ fg_color="#0ea5e9",
360
+ hover_color="#0284c7",
361
+ command=self._on_test_voice,
362
+ )
363
+ self._test_voice_btn.pack(side="right", padx=(0, 6))
364
+ self._disable_voice_btn = ctk.CTkButton(
365
+ voice_actions,
366
+ text="Disable",
367
+ width=72,
368
+ height=24,
369
+ corner_radius=6,
370
+ fg_color="#374151",
371
+ hover_color="#4b5563",
372
+ command=self._on_disable_voice,
373
+ )
374
+ self._disable_voice_btn.pack(side="right")
375
+
376
+ # Action buttons row
377
+ actions = ctk.CTkFrame(outer, fg_color="transparent")
378
+ actions.pack(fill="x", pady=(20, 0))
379
+ actions.columnconfigure((0, 1, 2), weight=1, uniform="act")
380
+
381
+ self._live_btn = ctk.CTkButton(
382
+ actions,
383
+ text="③ Live",
384
+ command=self._on_live,
385
+ height=44,
386
+ corner_radius=8,
387
+ fg_color="#ec4899",
388
+ hover_color="#db2777",
389
+ )
390
+ self._live_btn.grid(row=0, column=0, columnspan=2, sticky="ew", padx=(0, 6))
391
+
392
+ self._stop_btn = ctk.CTkButton(
393
+ actions,
394
+ text="Stop",
395
+ command=self._on_stop,
396
+ height=44,
397
+ corner_radius=8,
398
+ fg_color="#374151",
399
+ hover_color="#4b5563",
400
+ state="disabled",
401
+ )
402
+ self._stop_btn.grid(row=0, column=2, sticky="ew")
403
+
404
+ # Status bar
405
+ status = ctk.CTkFrame(outer, fg_color="transparent")
406
+ status.pack(fill="x", pady=(18, 0))
407
+ ctk.CTkLabel(
408
+ status,
409
+ textvariable=self._status_var,
410
+ anchor="w",
411
+ text_color="#9ca3af",
412
+ font=ctk.CTkFont(size=11),
413
+ ).pack(fill="x")
414
+
415
+ # Footer
416
+ ctk.CTkLabel(
417
+ outer,
418
+ text="swap-cli — Press Q in the preview window to stop",
419
+ font=ctk.CTkFont(size=10),
420
+ text_color="#6b7280",
421
+ ).pack(side="bottom", pady=(8, 0))
422
+
423
+ # ── Actions ───────────────────────────────────────────────────────
424
+
425
+ def _toggle_advanced(self) -> None:
426
+ if self._advanced_open.get():
427
+ self._advanced_panel.pack_forget()
428
+ self._advanced_open.set(False)
429
+ self._advanced_toggle.configure(text="⚙ Advanced")
430
+ else:
431
+ # Pack right after the toggle button, before the camera row.
432
+ self._advanced_panel.pack(
433
+ fill="x", pady=(0, 0), after=self._advanced_toggle.master
434
+ )
435
+ self._advanced_open.set(True)
436
+ self._advanced_toggle.configure(text="⚙ Advanced (hide)")
437
+
438
+ def _on_settings_clicked(self) -> None:
439
+ """Spawn the settings modal. Reuses the existing one if open."""
440
+ try:
441
+ if getattr(self, "_settings_modal", None) is not None and self._settings_modal.winfo_exists():
442
+ self._settings_modal.deiconify()
443
+ self._settings_modal.focus_force()
444
+ return
445
+ except Exception: # noqa: BLE001
446
+ pass
447
+ self._settings_modal = _SettingsModal(self)
448
+
449
+ def _on_select_face(self) -> None:
450
+ path = filedialog.askopenfilename(
451
+ title="Select a reference face",
452
+ filetypes=[
453
+ ("Images", "*.jpg *.jpeg *.png *.webp"),
454
+ ("All files", "*.*"),
455
+ ],
456
+ )
457
+ if not path:
458
+ return
459
+ self._reference_path = Path(path)
460
+ try:
461
+ img = Image.open(path)
462
+ img.thumbnail(THUMB_SIZE)
463
+ self._thumb_image = ctk.CTkImage(light_image=img, dark_image=img, size=THUMB_SIZE)
464
+ self._face_label.configure(image=self._thumb_image, text="")
465
+ except Exception as err:
466
+ self._status_var.set(f"Error loading face: {err}")
467
+ return
468
+ self._status_var.set(f"Face: {self._reference_path.name}")
469
+
470
+ def _refresh_cameras(self) -> None:
471
+ self._status_var.set("Probing cameras…")
472
+ self.update_idletasks()
473
+ try:
474
+ self._cameras = enumerate_cameras()
475
+ except Exception as err:
476
+ self._cameras = []
477
+ self._status_var.set(f"Camera probe failed: {err}")
478
+
479
+ if not self._cameras:
480
+ self._camera_dropdown.configure(values=["No cameras detected"])
481
+ self._camera_var.set("No cameras detected")
482
+ self._status_var.set("No cameras detected. Plug one in and click ↻.")
483
+ return
484
+
485
+ # Sprint 14o: prefix virtual cameras with ⚠ in the dropdown so
486
+ # users see the feedback-loop warning at a glance. Then auto-
487
+ # select the first REAL (non-virtual) camera; fall back to the
488
+ # first device of any kind if only virtuals are present.
489
+ def _label_for_display(cam: CameraDevice) -> str:
490
+ return f"⚠ {cam.label}" if cam.virtual else cam.label
491
+
492
+ labels = [_label_for_display(c) for c in self._cameras]
493
+ self._camera_dropdown.configure(values=labels)
494
+
495
+ real_idx = next(
496
+ (i for i, c in enumerate(self._cameras) if not c.virtual),
497
+ None,
498
+ )
499
+ if real_idx is not None:
500
+ self._camera_var.set(labels[real_idx])
501
+ virtual_count = sum(1 for c in self._cameras if c.virtual)
502
+ note = (
503
+ f" ({virtual_count} virtual hidden from default)"
504
+ if virtual_count
505
+ else ""
506
+ )
507
+ self._status_var.set(
508
+ f"{len(self._cameras)} camera(s) detected{note}."
509
+ )
510
+ else:
511
+ # Only virtuals available — picking one would create the
512
+ # feedback loop if vcam output is on. Warn the user.
513
+ self._camera_var.set(labels[0])
514
+ self._status_var.set(
515
+ "⚠ Only virtual cameras detected. Close any app holding "
516
+ "your real webcam (Zoom/Teams/Discord) and click ↻."
517
+ )
518
+
519
+ def _refresh_status(self) -> None:
520
+ cfg = config.load()
521
+ if not cfg.is_complete:
522
+ self._status_var.set("⚠ Run `swap setup` first to save your license + Decart key.")
523
+
524
+ def _selected_camera(self) -> CameraDevice | None:
525
+ label = self._camera_var.get()
526
+ # Sprint 14o: dropdown labels for virtual cameras are prefixed
527
+ # with "⚠ " — strip that before matching against the original
528
+ # CameraDevice.label.
529
+ if label.startswith("⚠ "):
530
+ label = label[2:]
531
+ for cam in self._cameras:
532
+ if cam.label == label:
533
+ return cam
534
+ return None
535
+
536
+ def _selected_model(self) -> str:
537
+ # Labels look like "lucy-2 (1280×720, 20 fps)" — take the first token.
538
+ v = self._tier_var.get()
539
+ return v.split()[0] if v else "lucy-2"
540
+
541
+ def _on_live(self) -> None:
542
+ print("[gui] live clicked", flush=True)
543
+ if self._session_thread is not None and self._session_thread.is_alive():
544
+ print("[gui] bail: session already live", flush=True)
545
+ self._status_var.set("Already live.")
546
+ return
547
+
548
+ cfg = config.load()
549
+ if not cfg.is_complete:
550
+ print("[gui] bail: config incomplete", flush=True)
551
+ self._status_var.set("⚠ Run `swap setup` in a terminal first.")
552
+ return
553
+ if not self._reference_path:
554
+ print("[gui] bail: no reference face loaded", flush=True)
555
+ self._status_var.set("Pick a reference face first.")
556
+ return
557
+ camera = self._selected_camera()
558
+ if camera is None:
559
+ print("[gui] bail: no camera selected", flush=True)
560
+ self._status_var.set("No camera selected.")
561
+ return
562
+
563
+ # Sprint 14o: refuse the feedback-loop combo. If the user has
564
+ # picked a virtual camera as input AND has vcam output enabled,
565
+ # swap would be reading from the same device it writes to —
566
+ # Lucy would consume its own previous frames, producing the
567
+ # "very weird" recursive transform the user reported.
568
+ vcam_on = bool(self._vcam_var.get()) if hasattr(self, "_vcam_var") else True
569
+ if camera.virtual and vcam_on:
570
+ print(
571
+ f"[gui] bail: feedback loop — camera={camera.label} + vcam on",
572
+ flush=True,
573
+ )
574
+ self._status_var.set(
575
+ "⚠ Feedback loop: virtual camera as input + virtual camera "
576
+ "output. Pick a real webcam, or toggle off 'Output to "
577
+ "virtual camera' in options."
578
+ )
579
+ return
580
+
581
+ print(f"[gui] starting session: face={self._reference_path}, camera={camera.index}", flush=True)
582
+
583
+ record_path: Path | None = None
584
+ if self._record_var.get():
585
+ from .display import default_recording_path
586
+
587
+ record_path = default_recording_path()
588
+
589
+ def _emit_status(msg: str) -> None:
590
+ # Worker thread → tk main thread. Bind msg via default arg.
591
+ self.after(0, lambda m=msg: self._status_var.set(m))
592
+
593
+ def _capture_stop(stop_fn: Callable[[], None]) -> None:
594
+ self._stop_session = stop_fn
595
+
596
+ # Voice opts: only set when toggle is on AND a library/user voice is
597
+ # picked. We resolve the mic + virtual cable here using
598
+ # voice_router's auto-detect so the user doesn't have to pick from
599
+ # a sounddevice-numbered list — config remembers their last choice
600
+ # if they had one. None on any of these = video-only path.
601
+ voice_id = self._selected_voice_id() if cfg.voice_enabled else None
602
+ mic_device: int | None = None
603
+ out_device: int | None = None
604
+ if voice_id:
605
+ from . import voice_router
606
+
607
+ mic = voice_router.pick_input_device(cfg.last_microphone)
608
+ mic_device = int(mic["index"]) if mic else 0 # fall back to default
609
+ out = voice_router.pick_output_device(cfg.last_voice_output)
610
+ out_device = int(out["index"]) if out else None
611
+ if out_device is None:
612
+ _emit_status(
613
+ "Voice on, but no virtual audio cable detected — "
614
+ "converted audio will be silent. Install BlackHole / VB-Cable."
615
+ )
616
+ # Persist for next launch.
617
+ from . import config as _config
618
+
619
+ _config.update(last_microphone=mic_device, last_voice_output=out_device)
620
+
621
+ opts = RunOptions(
622
+ decart_api_key=cfg.decart_api_key or "",
623
+ reference=str(self._reference_path),
624
+ prompt=self._prompt_box.get("1.0", "end-1c").strip() or DEFAULT_PROMPT,
625
+ model_name=self._selected_model(),
626
+ camera_device=camera.index,
627
+ record=record_path,
628
+ on_status_change=_emit_status,
629
+ on_runtime_ready=_capture_stop,
630
+ reference_voice=voice_id,
631
+ microphone_device=mic_device,
632
+ voice_output_device=out_device,
633
+ virtual_camera=bool(self._vcam_var.get()) if hasattr(self, "_vcam_var") else True,
634
+ )
635
+
636
+ # Persist the voice id so next launch defaults to it.
637
+ if voice_id and voice_id != cfg.last_voice_id:
638
+ config.update(last_voice_id=voice_id)
639
+
640
+ self._stop_event = asyncio.Event()
641
+ self._set_running(True)
642
+ self._status_var.set("Connecting…")
643
+
644
+ def worker() -> None:
645
+ print("[gui] worker thread started", flush=True)
646
+ loop = asyncio.new_event_loop()
647
+ asyncio.set_event_loop(loop)
648
+ self._session_loop = loop
649
+ try:
650
+ loop.run_until_complete(self._supervised_run(opts))
651
+ except BaseException as e:
652
+ import traceback
653
+ traceback.print_exc()
654
+ print(f"[gui] worker died: {e}", flush=True)
655
+ finally:
656
+ print("[gui] worker thread exiting", flush=True)
657
+ loop.close()
658
+ self._session_loop = None
659
+ self._stop_session = None
660
+ self.after(0, lambda: self._set_running(False))
661
+ self.after(0, lambda: self._status_var.set("Session ended."))
662
+
663
+ self._session_thread = threading.Thread(target=worker, daemon=True)
664
+ self._session_thread.start()
665
+
666
+ # Best-effort license check (non-blocking).
667
+ threading.Thread(target=self._check_license_async, daemon=True).start()
668
+
669
+ async def _supervised_run(self, opts: RunOptions) -> None:
670
+ print("[gui] _supervised_run entered", flush=True)
671
+ try:
672
+ await run_session(opts)
673
+ except Exception as err:
674
+ import traceback
675
+ traceback.print_exc()
676
+ msg = str(err) or err.__class__.__name__
677
+ print(f"[gui] caught exception: {msg}", flush=True)
678
+ self.after(0, lambda m=msg: self._status_var.set(f"Error: {m}"))
679
+ else:
680
+ print("[gui] run_session returned cleanly", flush=True)
681
+
682
+ def _check_license_async(self) -> None:
683
+ try:
684
+ loop = asyncio.new_event_loop()
685
+ asyncio.set_event_loop(loop)
686
+ try:
687
+ status = loop.run_until_complete(license.validate())
688
+ finally:
689
+ loop.close()
690
+ if not status.valid:
691
+ self.after(
692
+ 0,
693
+ lambda: self._status_var.set(
694
+ f"License invalid ({status.reason}) — buy at swap.ikieguy.online"
695
+ ),
696
+ )
697
+ except Exception:
698
+ # Network down or backend not reachable; offline grace handles it
699
+ pass
700
+
701
+ def _on_stop(self) -> None:
702
+ if self._stop_session is None:
703
+ self._status_var.set("Nothing running.")
704
+ return
705
+ print("[gui] stop clicked", flush=True)
706
+ self._status_var.set("Stopping…")
707
+ try:
708
+ self._stop_session()
709
+ except Exception as err: # noqa: BLE001
710
+ self._status_var.set(f"Stop failed: {err}")
711
+ return
712
+ # Disable Stop immediately so the user can't double-click; the worker's
713
+ # `finally` will reset _set_running(False) when the loop fully unwinds.
714
+ self._stop_btn.configure(state="disabled")
715
+ self._stop_session = None
716
+
717
+ def _on_enable_voice(self) -> None:
718
+ """Open the Enable Voice modal: prereq check + guided install."""
719
+ modal = _EnableVoiceModal(self)
720
+ modal.grab_set() # modal: blocks input on the main window
721
+
722
+ def _on_disable_voice(self) -> None:
723
+ """Turn voice off (sticky in config). Doesn't uninstall deps."""
724
+ from . import config as _config
725
+
726
+ _config.update(voice_enabled=False)
727
+ self._status_var.set("Voice: disabled.")
728
+ self._refresh_voice_section()
729
+
730
+ # ── Standalone voice test (no Decart, zero tokens) ─────────────────
731
+
732
+ def _on_test_voice(self) -> None:
733
+ """Toggle: start voice-only test if idle, stop if already running."""
734
+ if self._voice_test_thread is not None and self._voice_test_thread.is_alive():
735
+ self._stop_voice_test()
736
+ return
737
+ self._start_voice_test()
738
+
739
+ def _start_voice_test(self) -> None:
740
+ # Don't run while a full session is up — they'd both want the mic.
741
+ if self._session_thread is not None and self._session_thread.is_alive():
742
+ self._status_var.set("Stop the live session before testing voice.")
743
+ return
744
+
745
+ voice_id = self._selected_voice_id()
746
+ if not voice_id:
747
+ self._status_var.set("Pick a voice first.")
748
+ return
749
+
750
+ from . import voice_library, voice_router
751
+
752
+ target = voice_library.find_voice(voice_id)
753
+ if target is None:
754
+ self._status_var.set(f"Voice '{voice_id}' not found.")
755
+ return
756
+
757
+ mic = voice_router.pick_input_device(None)
758
+ mic_idx = int(mic["index"]) if mic else 0
759
+ out = voice_router.pick_output_device(None)
760
+ out_idx = int(out["index"]) if out else None
761
+
762
+ if out_idx is None:
763
+ cable = voice_router.virtual_cable_hint()
764
+ self._status_var.set(
765
+ f"No virtual audio cable detected. Install {cable.name} "
766
+ "to route the cloned voice to Zoom/Meet/OBS."
767
+ )
768
+ # We still run — gives the user a way to verify the model
769
+ # itself works even without routing.
770
+
771
+ def _emit(msg: str) -> None:
772
+ self.after(0, lambda m=msg: self._status_var.set(m))
773
+
774
+ cfg_for_engine = config.load()
775
+
776
+ async def _runner() -> None:
777
+ from .voice_track import VoiceTrack, VoiceTrackOptions
778
+
779
+ track = VoiceTrack(
780
+ VoiceTrackOptions(
781
+ voice=target,
782
+ microphone_device=mic_idx,
783
+ output_device=out_idx,
784
+ engine_name=cfg_for_engine.voice_engine,
785
+ fast=cfg_for_engine.voice_fast,
786
+ )
787
+ )
788
+
789
+ # Expose a thread-safe stop function back to the tk thread.
790
+ loop = asyncio.get_running_loop()
791
+
792
+ def _request_stop() -> None:
793
+ loop.call_soon_threadsafe(stop_evt.set)
794
+
795
+ self._voice_test_stop = _request_stop
796
+ stop_evt = asyncio.Event()
797
+
798
+ track.start(on_status=_emit)
799
+ try:
800
+ await stop_evt.wait()
801
+ finally:
802
+ await track.stop()
803
+
804
+ def _worker() -> None:
805
+ print("[gui] voice test thread started", flush=True)
806
+ loop = asyncio.new_event_loop()
807
+ asyncio.set_event_loop(loop)
808
+ self._voice_test_loop = loop
809
+ try:
810
+ loop.run_until_complete(_runner())
811
+ except Exception as err: # noqa: BLE001
812
+ import traceback
813
+
814
+ traceback.print_exc()
815
+ _emit(f"Voice test failed: {err}")
816
+ finally:
817
+ print("[gui] voice test thread exiting", flush=True)
818
+ loop.close()
819
+ self._voice_test_loop = None
820
+ self._voice_test_stop = None
821
+ self.after(0, self._voice_test_finished)
822
+
823
+ self._set_voice_testing(True)
824
+ _emit(f"Voice test: starting ({target.name})…")
825
+ self._voice_test_thread = threading.Thread(target=_worker, daemon=True)
826
+ self._voice_test_thread.start()
827
+
828
+ def _stop_voice_test(self) -> None:
829
+ if self._voice_test_stop is not None:
830
+ try:
831
+ self._voice_test_stop()
832
+ except Exception as err: # noqa: BLE001
833
+ self._status_var.set(f"Voice test stop failed: {err}")
834
+ return
835
+ self._test_voice_btn.configure(state="disabled", text="Stopping…")
836
+
837
+ def _voice_test_finished(self) -> None:
838
+ self._set_voice_testing(False)
839
+ self._voice_test_thread = None
840
+ self._status_var.set("Voice test ended.")
841
+
842
+ def _set_voice_testing(self, testing: bool) -> None:
843
+ """Disable Live / Disable / dropdown while voice test is running."""
844
+ self._test_voice_btn.configure(
845
+ state="normal",
846
+ text="Stop test" if testing else "Test voice",
847
+ fg_color="#ef4444" if testing else "#0ea5e9",
848
+ hover_color="#dc2626" if testing else "#0284c7",
849
+ )
850
+ self._live_btn.configure(state="disabled" if testing else "normal")
851
+ self._disable_voice_btn.configure(state="disabled" if testing else "normal")
852
+ try:
853
+ self._voice_dropdown.configure(state="disabled" if testing else "readonly")
854
+ except Exception:
855
+ pass
856
+
857
+ def _refresh_voice_section(self) -> None:
858
+ """Show collapsed vs expanded voice UI based on config.voice_enabled."""
859
+ from . import config as _config
860
+
861
+ cfg = _config.load()
862
+ if cfg.voice_enabled:
863
+ # Hide collapsed row, show expanded with library dropdown.
864
+ try:
865
+ self._voice_collapsed_row.pack_forget()
866
+ except Exception:
867
+ pass
868
+ if not self._voice_expanded_row.winfo_ismapped():
869
+ self._voice_expanded_row.pack(fill="x", pady=(2, 0))
870
+ self._populate_voice_library(cfg.last_voice_id)
871
+ else:
872
+ try:
873
+ self._voice_expanded_row.pack_forget()
874
+ except Exception:
875
+ pass
876
+ if not self._voice_collapsed_row.winfo_ismapped():
877
+ self._voice_collapsed_row.pack(fill="x")
878
+
879
+ def _populate_voice_library(self, preferred_id: str | None) -> None:
880
+ from . import voice_library
881
+
882
+ voices = voice_library.load_all_voices()
883
+ if not voices:
884
+ self._voice_dropdown.configure(values=["(no voices found)"])
885
+ self._voice_var.set("(no voices found)")
886
+ return
887
+
888
+ labels = [self._format_voice_label(v) for v in voices]
889
+ self._voice_dropdown.configure(values=labels)
890
+ self._voice_label_to_id = {
891
+ self._format_voice_label(v): v.id for v in voices
892
+ }
893
+
894
+ # Restore previously selected voice if still present, else default to first.
895
+ chosen_label = labels[0]
896
+ if preferred_id:
897
+ for v, label in zip(voices, labels, strict=False):
898
+ if v.id == preferred_id:
899
+ chosen_label = label
900
+ break
901
+ self._voice_var.set(chosen_label)
902
+
903
+ @staticmethod
904
+ def _format_voice_label(voice) -> str: # type: ignore[no-untyped-def]
905
+ suffix = "library" if voice.is_library else "custom"
906
+ return f"{voice.name} — {voice.description} [{suffix}]"
907
+
908
+ def _selected_voice_id(self) -> str | None:
909
+ label_to_id = getattr(self, "_voice_label_to_id", {})
910
+ return label_to_id.get(self._voice_var.get())
911
+
912
+ def _set_running(self, running: bool) -> None:
913
+ self._live_btn.configure(state="disabled" if running else "normal")
914
+ self._stop_btn.configure(state="normal" if running else "disabled")
915
+ self._select_face_btn.configure(state="disabled" if running else "normal")
916
+ self._camera_dropdown.configure(state="disabled" if running else "readonly")
917
+ self._tier_dropdown.configure(state="disabled" if running else "readonly")
918
+ self._enable_voice_btn.configure(state="disabled" if running else "normal")
919
+ try:
920
+ self._voice_dropdown.configure(state="disabled" if running else "readonly")
921
+ self._disable_voice_btn.configure(state="disabled" if running else "normal")
922
+ # Disable Test voice during a full live session — both grab the mic.
923
+ self._test_voice_btn.configure(state="disabled" if running else "normal")
924
+ except Exception:
925
+ pass
926
+
927
+ def _raise_to_front(self) -> None:
928
+ """Flash the window topmost for one beat so it's visible at startup."""
929
+ try:
930
+ self.lift()
931
+ self.attributes("-topmost", True)
932
+ self.after(200, lambda: self.attributes("-topmost", False))
933
+ self.focus_force()
934
+ except Exception: # noqa: BLE001 — non-fatal cosmetic
935
+ pass
936
+
937
+
938
+ class _EnableVoiceModal(ctk.CTkToplevel):
939
+ """Voice setup wizard. Shows prereq checks inline, lets user run install.
940
+
941
+ On success, sets config.voice_enabled = True and refreshes the parent
942
+ GUI's voice section so the library dropdown appears.
943
+ """
944
+
945
+ def __init__(self, parent: SwapGUI) -> None:
946
+ super().__init__(parent)
947
+ self._parent = parent
948
+ self.title("Enable voice cloning")
949
+ self.geometry("520x440")
950
+ self.resizable(False, False)
951
+ # Center over parent.
952
+ self.after(0, self._center_on_parent)
953
+
954
+ outer = ctk.CTkFrame(self, fg_color="transparent")
955
+ outer.pack(fill="both", expand=True, padx=20, pady=18)
956
+
957
+ ctk.CTkLabel(
958
+ outer,
959
+ text="Voice cloning prerequisites",
960
+ font=ctk.CTkFont(size=14, weight="bold"),
961
+ anchor="w",
962
+ ).pack(fill="x", pady=(0, 6))
963
+
964
+ self._checks_frame = ctk.CTkFrame(outer, fg_color="#1f2937", corner_radius=8)
965
+ self._checks_frame.pack(fill="x", pady=(0, 10))
966
+ self._render_prereqs()
967
+
968
+ ctk.CTkLabel(
969
+ outer,
970
+ text=(
971
+ "Voice features add ~3 GB of CUDA-matched PyTorch + RVC stack.\n"
972
+ "You'll also need an RVC .pth model (download from weights.gg)\n"
973
+ "and a virtual audio cable (BlackHole on macOS, VB-Cable on Windows)."
974
+ ),
975
+ anchor="w",
976
+ justify="left",
977
+ text_color="#9ca3af",
978
+ ).pack(fill="x", pady=(0, 12))
979
+
980
+ self._status_var = tk.StringVar(value="")
981
+ ctk.CTkLabel(
982
+ outer,
983
+ textvariable=self._status_var,
984
+ anchor="w",
985
+ text_color="#10b981",
986
+ ).pack(fill="x")
987
+
988
+ # Action buttons
989
+ actions = ctk.CTkFrame(outer, fg_color="transparent")
990
+ actions.pack(fill="x", side="bottom", pady=(12, 0))
991
+ actions.columnconfigure((0, 1), weight=1, uniform="ev")
992
+
993
+ self._skip_btn = ctk.CTkButton(
994
+ actions,
995
+ text="Skip — keep video only",
996
+ command=self._on_skip,
997
+ height=36,
998
+ fg_color="#374151",
999
+ hover_color="#4b5563",
1000
+ )
1001
+ self._skip_btn.grid(row=0, column=0, sticky="ew", padx=(0, 6))
1002
+
1003
+ self._continue_btn = ctk.CTkButton(
1004
+ actions,
1005
+ text="Continue",
1006
+ command=self._on_continue,
1007
+ height=36,
1008
+ fg_color="#ec4899",
1009
+ hover_color="#db2777",
1010
+ )
1011
+ self._continue_btn.grid(row=0, column=1, sticky="ew")
1012
+
1013
+ def _render_prereqs(self) -> None:
1014
+ # Wipe and re-render — called after each step so the UI tracks state.
1015
+ for child in self._checks_frame.winfo_children():
1016
+ child.destroy()
1017
+
1018
+ from . import voice_prereq
1019
+
1020
+ result = voice_prereq.check_all()
1021
+ self._latest_result = result
1022
+
1023
+ rows = [
1024
+ ("GPU", result.gpu),
1025
+ ("Voice deps (torch, rvc-python, fairseq)", result.deps_installed),
1026
+ ("ffmpeg on PATH", result.ffmpeg),
1027
+ ("Visual C++ Build Tools", result.build_tools),
1028
+ ("Virtual audio cable", result.audio_cable),
1029
+ ]
1030
+ for title, check in rows:
1031
+ row = ctk.CTkFrame(self._checks_frame, fg_color="transparent")
1032
+ row.pack(fill="x", padx=10, pady=4)
1033
+ ctk.CTkLabel(
1034
+ row,
1035
+ text="✓" if check.ok else "✗",
1036
+ width=20,
1037
+ text_color="#10b981" if check.ok else "#ef4444",
1038
+ font=ctk.CTkFont(size=14, weight="bold"),
1039
+ ).pack(side="left")
1040
+ label_text = f"{title}: {check.label}"
1041
+ if check.hint and not check.ok:
1042
+ label_text += f" → {check.hint}"
1043
+ ctk.CTkLabel(
1044
+ row,
1045
+ text=label_text,
1046
+ anchor="w",
1047
+ justify="left",
1048
+ wraplength=420,
1049
+ ).pack(side="left", fill="x", expand=True)
1050
+
1051
+ # If GPU is blocked, replace the Continue button with a Got-it close.
1052
+ if hasattr(self, "_continue_btn") and result.gpu_blocked:
1053
+ self._continue_btn.configure(
1054
+ text="Got it — keep video only",
1055
+ command=self._on_skip,
1056
+ )
1057
+
1058
+ def _center_on_parent(self) -> None:
1059
+ try:
1060
+ px = self._parent.winfo_x()
1061
+ py = self._parent.winfo_y()
1062
+ pw = self._parent.winfo_width()
1063
+ ph = self._parent.winfo_height()
1064
+ ww = 520
1065
+ wh = 440
1066
+ x = px + (pw - ww) // 2
1067
+ y = py + (ph - wh) // 2
1068
+ self.geometry(f"{ww}x{wh}+{max(0, x)}+{max(0, y)}")
1069
+ except Exception:
1070
+ pass
1071
+
1072
+ def _on_skip(self) -> None:
1073
+ self.destroy()
1074
+
1075
+ def _on_continue(self) -> None:
1076
+ # Disable buttons during install.
1077
+ self._skip_btn.configure(state="disabled")
1078
+ self._continue_btn.configure(state="disabled", text="Installing…")
1079
+ self._status_var.set("Installing voice deps + downloading weights …")
1080
+ self.update_idletasks()
1081
+ # Run the install in a worker thread so the UI stays responsive.
1082
+ threading.Thread(target=self._install_worker, daemon=True).start()
1083
+
1084
+ def _install_worker(self) -> None:
1085
+ from . import config as _config
1086
+ from . import voice_ops, voice_prereq
1087
+
1088
+ try:
1089
+ pre = voice_prereq.check_all()
1090
+ if not pre.deps_installed.ok:
1091
+ ok = voice_ops.install_voice_deps()
1092
+ if not ok:
1093
+ self._on_install_error("pip install failed")
1094
+ return
1095
+ _config.update(voice_enabled=True)
1096
+ self.after(0, self._on_install_done)
1097
+ except Exception as err: # noqa: BLE001
1098
+ self.after(0, lambda e=err: self._on_install_error(str(e)))
1099
+
1100
+ def _on_install_done(self) -> None:
1101
+ self._status_var.set("✓ Voice features ready.")
1102
+ self._render_prereqs()
1103
+ self._skip_btn.configure(state="normal", text="Close")
1104
+ self._continue_btn.configure(text="Done", command=self._finish_and_close)
1105
+ self._continue_btn.configure(state="normal")
1106
+
1107
+ def _on_install_error(self, msg: str) -> None:
1108
+ self._status_var.set(f"✗ {msg}")
1109
+ self._skip_btn.configure(state="normal", text="Close")
1110
+ self._continue_btn.configure(state="normal", text="Retry", command=self._on_continue)
1111
+
1112
+ def _finish_and_close(self) -> None:
1113
+ self._parent._refresh_voice_section()
1114
+ self._parent._status_var.set("Voice: enabled. Pick a voice to use.")
1115
+ self.destroy()
1116
+
1117
+
1118
+ class VoiceOnlyGUI(ctk.CTk):
1119
+ """Stripped single-purpose window — voice cloning only, no face/camera/Decart.
1120
+
1121
+ Launched via `swap gui --voice`. Reuses VoiceTrack, voice_library,
1122
+ voice_router. License + Decart-key checks are skipped: voice runs
1123
+ locally and we trust the swap-cli install itself.
1124
+ """
1125
+
1126
+ def __init__(self) -> None:
1127
+ super().__init__()
1128
+ self.title(f"swap voice {__version__}")
1129
+ W, H = 460, 460
1130
+ self.minsize(420, 420)
1131
+ self.update_idletasks()
1132
+ sw = self.winfo_screenwidth()
1133
+ sh = self.winfo_screenheight()
1134
+ self.geometry(f"{W}x{H}+{max(0, (sw - W) // 2)}+{max(0, (sh - H) // 2)}")
1135
+ self.after(0, self._raise_to_front)
1136
+
1137
+ self._track = None # voice_track.VoiceTrack | None
1138
+ self._track_thread: threading.Thread | None = None
1139
+ self._track_loop: asyncio.AbstractEventLoop | None = None
1140
+ self._track_stop: Callable[[], None] | None = None
1141
+ self._status_var = tk.StringVar(value="Idle.")
1142
+
1143
+ self._build_ui()
1144
+ self._refresh_devices()
1145
+
1146
+ # ── UI build ──────────────────────────────────────────────────────
1147
+
1148
+ def _build_ui(self) -> None:
1149
+ outer = ctk.CTkFrame(self, fg_color="transparent")
1150
+ outer.pack(fill="both", expand=True, padx=20, pady=18)
1151
+
1152
+ ctk.CTkLabel(
1153
+ outer,
1154
+ text="Voice clone · live",
1155
+ font=ctk.CTkFont(size=18, weight="bold"),
1156
+ anchor="w",
1157
+ ).pack(fill="x", pady=(0, 4))
1158
+ ctk.CTkLabel(
1159
+ outer,
1160
+ text=(
1161
+ "Local-only voice transformation for calls. No video, no "
1162
+ "Decart, zero token cost."
1163
+ ),
1164
+ anchor="w",
1165
+ justify="left",
1166
+ text_color="#6b7280",
1167
+ wraplength=400,
1168
+ ).pack(fill="x", pady=(0, 14))
1169
+
1170
+ # ① Reference voice
1171
+ ctk.CTkLabel(
1172
+ outer, text="① Reference voice", anchor="w", font=ctk.CTkFont(size=11)
1173
+ ).pack(anchor="w")
1174
+ self._voice_var = tk.StringVar(value="(no voices found)")
1175
+ self._voice_dropdown = ctk.CTkComboBox(
1176
+ outer, values=[], variable=self._voice_var, state="readonly", height=32
1177
+ )
1178
+ self._voice_dropdown.pack(fill="x", pady=(2, 14))
1179
+
1180
+ # ② Microphone
1181
+ ctk.CTkLabel(
1182
+ outer, text="② Microphone", anchor="w", font=ctk.CTkFont(size=11)
1183
+ ).pack(anchor="w")
1184
+ self._mic_var = tk.StringVar(value="(no mic found)")
1185
+ self._mic_dropdown = ctk.CTkComboBox(
1186
+ outer, values=[], variable=self._mic_var, state="readonly", height=32
1187
+ )
1188
+ self._mic_dropdown.pack(fill="x", pady=(2, 14))
1189
+
1190
+ # ③ Output (virtual cable)
1191
+ out_row = ctk.CTkFrame(outer, fg_color="transparent")
1192
+ out_row.pack(fill="x")
1193
+ out_row.columnconfigure(0, weight=1)
1194
+ ctk.CTkLabel(
1195
+ out_row, text="③ Output (virtual cable)", anchor="w", font=ctk.CTkFont(size=11)
1196
+ ).grid(row=0, column=0, sticky="w", columnspan=2)
1197
+ self._output_var = tk.StringVar(value="(no output found)")
1198
+ self._output_dropdown = ctk.CTkComboBox(
1199
+ out_row, values=[], variable=self._output_var, state="readonly", height=32
1200
+ )
1201
+ self._output_dropdown.grid(row=1, column=0, sticky="ew", padx=(0, 8))
1202
+ ctk.CTkButton(
1203
+ out_row, text="↻", width=42, height=32, command=self._refresh_devices
1204
+ ).grid(row=1, column=1)
1205
+
1206
+ # Action row
1207
+ actions = ctk.CTkFrame(outer, fg_color="transparent")
1208
+ actions.pack(fill="x", pady=(20, 0))
1209
+ actions.columnconfigure((0, 1), weight=1, uniform="act")
1210
+
1211
+ self._start_btn = ctk.CTkButton(
1212
+ actions,
1213
+ text="▶ Start",
1214
+ command=self._on_start,
1215
+ height=44,
1216
+ corner_radius=8,
1217
+ fg_color="#10b981",
1218
+ hover_color="#059669",
1219
+ )
1220
+ self._start_btn.grid(row=0, column=0, sticky="ew", padx=(0, 6))
1221
+
1222
+ self._stop_btn = ctk.CTkButton(
1223
+ actions,
1224
+ text="Stop",
1225
+ command=self._on_stop,
1226
+ height=44,
1227
+ corner_radius=8,
1228
+ fg_color="#374151",
1229
+ hover_color="#4b5563",
1230
+ state="disabled",
1231
+ )
1232
+ self._stop_btn.grid(row=0, column=1, sticky="ew")
1233
+
1234
+ # Status bar
1235
+ status = ctk.CTkFrame(outer, fg_color="transparent")
1236
+ status.pack(fill="x", pady=(18, 0))
1237
+ ctk.CTkLabel(
1238
+ status,
1239
+ textvariable=self._status_var,
1240
+ anchor="w",
1241
+ text_color="#9ca3af",
1242
+ ).pack(side="left", fill="x", expand=True)
1243
+
1244
+ # ── Device discovery ──────────────────────────────────────────────
1245
+
1246
+ def _refresh_devices(self) -> None:
1247
+ from . import voice_library, voice_router
1248
+
1249
+ # Voices
1250
+ voices = voice_library.load_all_voices()
1251
+ if voices:
1252
+ self._voice_label_to_id = {self._fmt_voice(v): v.id for v in voices}
1253
+ labels = list(self._voice_label_to_id.keys())
1254
+ self._voice_dropdown.configure(values=labels)
1255
+ self._voice_var.set(labels[0])
1256
+ else:
1257
+ self._voice_dropdown.configure(values=["(no voices found)"])
1258
+ self._voice_var.set("(no voices found)")
1259
+
1260
+ # Mics + outputs (require sounddevice)
1261
+ inputs, outputs = voice_router.list_audio_devices()
1262
+
1263
+ if inputs:
1264
+ self._mic_label_to_idx = {
1265
+ f"{d['name']} (#{d['index']})": int(d["index"]) for d in inputs
1266
+ }
1267
+ self._mic_dropdown.configure(values=list(self._mic_label_to_idx.keys()))
1268
+ self._mic_var.set(next(iter(self._mic_label_to_idx)))
1269
+ else:
1270
+ self._mic_dropdown.configure(values=["(no mic found)"])
1271
+ self._mic_var.set("(no mic found)")
1272
+
1273
+ if outputs:
1274
+ self._out_label_to_idx = {
1275
+ f"{d['name']} (#{d['index']})": int(d["index"]) for d in outputs
1276
+ }
1277
+ self._output_dropdown.configure(values=list(self._out_label_to_idx.keys()))
1278
+ # Prefer auto-detected virtual cable.
1279
+ cable = voice_router.detect_virtual_cable_in_devices(outputs)
1280
+ if cable is not None:
1281
+ self._output_var.set(f"{cable['name']} (#{cable['index']})")
1282
+ else:
1283
+ self._output_var.set(next(iter(self._out_label_to_idx)))
1284
+ else:
1285
+ self._output_dropdown.configure(values=["(no output found)"])
1286
+ self._output_var.set("(no output found)")
1287
+
1288
+ @staticmethod
1289
+ def _fmt_voice(voice) -> str: # type: ignore[no-untyped-def]
1290
+ suffix = "library" if voice.is_library else "custom"
1291
+ return f"{voice.name} — {voice.description} [{suffix}]"
1292
+
1293
+ def _selected_voice_id(self) -> str | None:
1294
+ return getattr(self, "_voice_label_to_id", {}).get(self._voice_var.get())
1295
+
1296
+ def _selected_mic(self) -> int | None:
1297
+ return getattr(self, "_mic_label_to_idx", {}).get(self._mic_var.get())
1298
+
1299
+ def _selected_output(self) -> int | None:
1300
+ return getattr(self, "_out_label_to_idx", {}).get(self._output_var.get())
1301
+
1302
+ # ── Start / stop ─────────────────────────────────────────────────
1303
+
1304
+ def _on_start(self) -> None:
1305
+ if self._track_thread is not None and self._track_thread.is_alive():
1306
+ self._status_var.set("Already running.")
1307
+ return
1308
+ voice_id = self._selected_voice_id()
1309
+ mic = self._selected_mic()
1310
+ output = self._selected_output()
1311
+
1312
+ if not voice_id:
1313
+ self._status_var.set("Pick a voice first.")
1314
+ return
1315
+ if mic is None:
1316
+ self._status_var.set("Pick a microphone first.")
1317
+ return
1318
+
1319
+ from . import voice_library
1320
+
1321
+ target = voice_library.find_voice(voice_id)
1322
+ if target is None:
1323
+ self._status_var.set(f"Voice '{voice_id}' not found.")
1324
+ return
1325
+
1326
+ def _emit(msg: str) -> None:
1327
+ self.after(0, lambda m=msg: self._status_var.set(m))
1328
+
1329
+ from . import config as _cfg_mod
1330
+
1331
+ cfg_for_engine = _cfg_mod.load()
1332
+
1333
+ async def _runner() -> None:
1334
+ from .voice_track import VoiceTrack, VoiceTrackOptions
1335
+
1336
+ stop_evt = asyncio.Event()
1337
+ loop = asyncio.get_running_loop()
1338
+
1339
+ def _request_stop() -> None:
1340
+ loop.call_soon_threadsafe(stop_evt.set)
1341
+
1342
+ self._track_stop = _request_stop
1343
+
1344
+ track = VoiceTrack(
1345
+ VoiceTrackOptions(
1346
+ voice=target,
1347
+ microphone_device=mic,
1348
+ output_device=output,
1349
+ engine_name=cfg_for_engine.voice_engine,
1350
+ fast=cfg_for_engine.voice_fast,
1351
+ )
1352
+ )
1353
+ track.start(on_status=_emit)
1354
+ try:
1355
+ await stop_evt.wait()
1356
+ finally:
1357
+ await track.stop()
1358
+
1359
+ def _worker() -> None:
1360
+ print("[voice-gui] worker started", flush=True)
1361
+ loop = asyncio.new_event_loop()
1362
+ asyncio.set_event_loop(loop)
1363
+ self._track_loop = loop
1364
+ try:
1365
+ loop.run_until_complete(_runner())
1366
+ except Exception as err: # noqa: BLE001
1367
+ import traceback
1368
+
1369
+ traceback.print_exc()
1370
+ _emit(f"Failed: {err}")
1371
+ finally:
1372
+ print("[voice-gui] worker exiting", flush=True)
1373
+ loop.close()
1374
+ self._track_loop = None
1375
+ self._track_stop = None
1376
+ self.after(0, self._on_stopped)
1377
+
1378
+ self._set_running(True)
1379
+ _emit(f"Starting ({target.name})…")
1380
+ self._track_thread = threading.Thread(target=_worker, daemon=True)
1381
+ self._track_thread.start()
1382
+
1383
+ def _on_stop(self) -> None:
1384
+ if self._track_stop is not None:
1385
+ try:
1386
+ self._track_stop()
1387
+ except Exception as err: # noqa: BLE001
1388
+ self._status_var.set(f"Stop failed: {err}")
1389
+ return
1390
+ self._stop_btn.configure(state="disabled", text="Stopping…")
1391
+
1392
+ def _on_stopped(self) -> None:
1393
+ self._set_running(False)
1394
+ self._track_thread = None
1395
+ self._status_var.set("Stopped.")
1396
+
1397
+ def _set_running(self, running: bool) -> None:
1398
+ self._start_btn.configure(state="disabled" if running else "normal")
1399
+ self._stop_btn.configure(state="normal" if running else "disabled", text="Stop")
1400
+ self._voice_dropdown.configure(state="disabled" if running else "readonly")
1401
+ self._mic_dropdown.configure(state="disabled" if running else "readonly")
1402
+ self._output_dropdown.configure(state="disabled" if running else "readonly")
1403
+
1404
+ def _raise_to_front(self) -> None:
1405
+ try:
1406
+ self.lift()
1407
+ self.attributes("-topmost", True)
1408
+ self.after(200, lambda: self.attributes("-topmost", False))
1409
+ self.focus_force()
1410
+ except Exception:
1411
+ pass
1412
+
1413
+
1414
+ class _SettingsModal(ctk.CTkToplevel):
1415
+ """Settings panel — view license key, rotate Decart API key, open
1416
+ the config folder. Sprint 14l.
1417
+
1418
+ All state lives in config.toml on disk. We never cache it in memory
1419
+ longer than the modal's lifetime, so save → close → reopen always
1420
+ reflects current truth.
1421
+ """
1422
+
1423
+ def __init__(self, parent: "SwapGUI") -> None:
1424
+ super().__init__(parent)
1425
+ self._parent = parent
1426
+ self.title("Settings")
1427
+ self.geometry("560x420")
1428
+ self.resizable(False, False)
1429
+ self.after(0, self._center_on_parent)
1430
+
1431
+ cfg = config.load()
1432
+
1433
+ outer = ctk.CTkFrame(self, fg_color="transparent")
1434
+ outer.pack(fill="both", expand=True, padx=20, pady=18)
1435
+
1436
+ ctk.CTkLabel(
1437
+ outer,
1438
+ text="Settings",
1439
+ font=ctk.CTkFont(size=16, weight="bold"),
1440
+ anchor="w",
1441
+ ).pack(fill="x", pady=(0, 10))
1442
+
1443
+ # ── License key (read-only) ────────────────────────────────────
1444
+ license_frame = ctk.CTkFrame(outer, fg_color="#1f2937", corner_radius=8)
1445
+ license_frame.pack(fill="x", pady=(0, 12))
1446
+ ctk.CTkLabel(
1447
+ license_frame,
1448
+ text="License key",
1449
+ font=ctk.CTkFont(size=11, weight="bold"),
1450
+ text_color="#9ca3af",
1451
+ anchor="w",
1452
+ ).pack(fill="x", padx=14, pady=(10, 0))
1453
+ ctk.CTkLabel(
1454
+ license_frame,
1455
+ text=_redact_key(cfg.license_key),
1456
+ font=ctk.CTkFont(size=13),
1457
+ anchor="w",
1458
+ ).pack(fill="x", padx=14, pady=(2, 4))
1459
+ ctk.CTkLabel(
1460
+ license_frame,
1461
+ text="Bought from swap.ikieguy.online — rotate by contacting support.",
1462
+ font=ctk.CTkFont(size=10),
1463
+ text_color="#6b7280",
1464
+ anchor="w",
1465
+ ).pack(fill="x", padx=14, pady=(0, 10))
1466
+
1467
+ # ── Decart key (editable) ──────────────────────────────────────
1468
+ decart_frame = ctk.CTkFrame(outer, fg_color="#1f2937", corner_radius=8)
1469
+ decart_frame.pack(fill="x", pady=(0, 12))
1470
+ ctk.CTkLabel(
1471
+ decart_frame,
1472
+ text="Decart API key",
1473
+ font=ctk.CTkFont(size=11, weight="bold"),
1474
+ text_color="#9ca3af",
1475
+ anchor="w",
1476
+ ).pack(fill="x", padx=14, pady=(10, 0))
1477
+
1478
+ self._decart_var = tk.StringVar(value=cfg.decart_api_key or "")
1479
+ self._editing = False
1480
+ self._decart_show = tk.BooleanVar(value=False)
1481
+ self._decart_label = ctk.CTkLabel(
1482
+ decart_frame,
1483
+ text=_redact_key(cfg.decart_api_key),
1484
+ font=ctk.CTkFont(size=13),
1485
+ anchor="w",
1486
+ )
1487
+ self._decart_label.pack(fill="x", padx=14, pady=(2, 4))
1488
+
1489
+ self._decart_entry = ctk.CTkEntry(
1490
+ decart_frame,
1491
+ textvariable=self._decart_var,
1492
+ show="•",
1493
+ placeholder_text="dct_…",
1494
+ )
1495
+ # Hidden until Edit is clicked.
1496
+
1497
+ btn_row = ctk.CTkFrame(decart_frame, fg_color="transparent")
1498
+ btn_row.pack(fill="x", padx=14, pady=(0, 10))
1499
+ self._edit_btn = ctk.CTkButton(
1500
+ btn_row, text="✎ Edit", width=80, command=self._on_edit
1501
+ )
1502
+ self._edit_btn.pack(side="left")
1503
+ self._show_btn = ctk.CTkButton(
1504
+ btn_row,
1505
+ text="👁 Show",
1506
+ width=80,
1507
+ command=self._on_toggle_show,
1508
+ fg_color="#374151",
1509
+ hover_color="#4b5563",
1510
+ )
1511
+ self._show_btn.pack(side="left", padx=(8, 0))
1512
+ self._save_btn = ctk.CTkButton(
1513
+ btn_row, text="Save", width=80, command=self._on_save
1514
+ )
1515
+ self._cancel_btn = ctk.CTkButton(
1516
+ btn_row,
1517
+ text="Cancel",
1518
+ width=80,
1519
+ command=self._on_cancel,
1520
+ fg_color="#374151",
1521
+ hover_color="#4b5563",
1522
+ )
1523
+ # Save/Cancel hidden until Edit is clicked.
1524
+
1525
+ self._error_label = ctk.CTkLabel(
1526
+ decart_frame, text="", text_color="#ef4444", anchor="w"
1527
+ )
1528
+ self._error_label.pack(fill="x", padx=14, pady=(0, 4))
1529
+
1530
+ # ── Config file path ───────────────────────────────────────────
1531
+ path_frame = ctk.CTkFrame(outer, fg_color="#1f2937", corner_radius=8)
1532
+ path_frame.pack(fill="x", pady=(0, 12))
1533
+ ctk.CTkLabel(
1534
+ path_frame,
1535
+ text="Config file",
1536
+ font=ctk.CTkFont(size=11, weight="bold"),
1537
+ text_color="#9ca3af",
1538
+ anchor="w",
1539
+ ).pack(fill="x", padx=14, pady=(10, 0))
1540
+ cfg_path = config.config_path()
1541
+ ctk.CTkLabel(
1542
+ path_frame,
1543
+ text=str(cfg_path),
1544
+ font=ctk.CTkFont(size=11),
1545
+ text_color="#d1d5db",
1546
+ anchor="w",
1547
+ wraplength=480,
1548
+ justify="left",
1549
+ ).pack(fill="x", padx=14, pady=(2, 4))
1550
+ ctk.CTkButton(
1551
+ path_frame,
1552
+ text="📁 Open folder",
1553
+ width=140,
1554
+ command=self._on_open_folder,
1555
+ fg_color="#374151",
1556
+ hover_color="#4b5563",
1557
+ ).pack(anchor="w", padx=14, pady=(0, 10))
1558
+
1559
+ # ── Close ──────────────────────────────────────────────────────
1560
+ ctk.CTkButton(
1561
+ outer,
1562
+ text="Close",
1563
+ command=self.destroy,
1564
+ height=36,
1565
+ fg_color="#ec4899",
1566
+ hover_color="#db2777",
1567
+ ).pack(fill="x", side="bottom")
1568
+
1569
+ self._status_var = tk.StringVar(value="")
1570
+ ctk.CTkLabel(
1571
+ outer,
1572
+ textvariable=self._status_var,
1573
+ text_color="#10b981",
1574
+ anchor="w",
1575
+ ).pack(fill="x", side="bottom", pady=(0, 8))
1576
+
1577
+ def _on_edit(self) -> None:
1578
+ self._editing = True
1579
+ self._decart_label.pack_forget()
1580
+ self._decart_entry.pack(fill="x", padx=14, pady=(0, 4))
1581
+ self._edit_btn.pack_forget()
1582
+ self._show_btn.pack_forget()
1583
+ self._save_btn.pack(side="left")
1584
+ self._cancel_btn.pack(side="left", padx=(8, 0))
1585
+ self._error_label.configure(text="")
1586
+
1587
+ def _on_cancel(self) -> None:
1588
+ # Restore current saved value into the var.
1589
+ cfg = config.load()
1590
+ self._decart_var.set(cfg.decart_api_key or "")
1591
+ self._editing = False
1592
+ self._decart_entry.pack_forget()
1593
+ self._save_btn.pack_forget()
1594
+ self._cancel_btn.pack_forget()
1595
+ self._decart_label.configure(text=_redact_key(cfg.decart_api_key))
1596
+ self._decart_label.pack(fill="x", padx=14, pady=(2, 4))
1597
+ self._edit_btn.pack(side="left")
1598
+ self._show_btn.pack(side="left", padx=(8, 0))
1599
+ self._error_label.configure(text="")
1600
+
1601
+ def _on_save(self) -> None:
1602
+ try:
1603
+ apply_decart_key_update(self._decart_var.get())
1604
+ except DecartKeyValidationError as err:
1605
+ self._error_label.configure(text=str(err))
1606
+ return
1607
+ # Saved. Collapse the editor + show a toast.
1608
+ cfg = config.load()
1609
+ self._editing = False
1610
+ self._decart_entry.pack_forget()
1611
+ self._save_btn.pack_forget()
1612
+ self._cancel_btn.pack_forget()
1613
+ self._decart_label.configure(text=_redact_key(cfg.decart_api_key))
1614
+ self._decart_label.pack(fill="x", padx=14, pady=(2, 4))
1615
+ self._edit_btn.pack(side="left")
1616
+ self._show_btn.pack(side="left", padx=(8, 0))
1617
+ self._status_var.set("✓ Decart key updated. Takes effect next session.")
1618
+ self.after(3500, lambda: self._status_var.set(""))
1619
+ # Refresh the parent status row so it picks up the change.
1620
+ try:
1621
+ self._parent._refresh_status()
1622
+ except Exception: # noqa: BLE001 — non-fatal cosmetic
1623
+ pass
1624
+
1625
+ def _on_toggle_show(self) -> None:
1626
+ self._decart_show.set(not self._decart_show.get())
1627
+ cfg = config.load()
1628
+ if self._decart_show.get():
1629
+ self._decart_label.configure(text=cfg.decart_api_key or "—")
1630
+ self._show_btn.configure(text="🙈 Hide")
1631
+ else:
1632
+ self._decart_label.configure(text=_redact_key(cfg.decart_api_key))
1633
+ self._show_btn.configure(text="👁 Show")
1634
+
1635
+ def _on_open_folder(self) -> None:
1636
+ import os
1637
+ import subprocess
1638
+
1639
+ folder = config.config_path().parent
1640
+ try:
1641
+ folder.mkdir(parents=True, exist_ok=True)
1642
+ if sys.platform == "win32":
1643
+ os.startfile(str(folder)) # type: ignore[attr-defined]
1644
+ elif sys.platform == "darwin":
1645
+ subprocess.Popen(["open", str(folder)])
1646
+ else:
1647
+ subprocess.Popen(["xdg-open", str(folder)])
1648
+ except Exception as err: # noqa: BLE001
1649
+ self._error_label.configure(text=f"Couldn't open folder: {err}")
1650
+
1651
+ def _center_on_parent(self) -> None:
1652
+ try:
1653
+ px = self._parent.winfo_x()
1654
+ py = self._parent.winfo_y()
1655
+ pw = self._parent.winfo_width()
1656
+ ph = self._parent.winfo_height()
1657
+ ww = 560
1658
+ wh = 420
1659
+ x = px + (pw - ww) // 2
1660
+ y = py + (ph - wh) // 2
1661
+ self.geometry(f"{ww}x{wh}+{max(0, x)}+{max(0, y)}")
1662
+ except Exception:
1663
+ pass
1664
+
1665
+
1666
+ def launch(voice_only: bool = False) -> None:
1667
+ """Entrypoint used by `swap gui`. With voice_only=True, opens a
1668
+ stripped single-purpose window for live voice cloning only — no
1669
+ face, no camera, no Decart connection.
1670
+ """
1671
+ print(f"[gui] starting swap-cli GUI (voice_only={voice_only})", flush=True)
1672
+ try:
1673
+ app: ctk.CTk = VoiceOnlyGUI() if voice_only else SwapGUI()
1674
+ except Exception:
1675
+ import traceback
1676
+
1677
+ print("[gui] failed to construct window:", flush=True)
1678
+ traceback.print_exc()
1679
+ # Pause so the user can read the trace before the cmd window closes
1680
+ # if they ran via a desktop shortcut instead of a terminal.
1681
+ try:
1682
+ input("\nPress Enter to exit…")
1683
+ except EOFError:
1684
+ pass
1685
+ raise
1686
+ print("[gui] entering mainloop", flush=True)
1687
+ try:
1688
+ app.mainloop()
1689
+ except Exception:
1690
+ import traceback
1691
+
1692
+ print("[gui] mainloop crashed:", flush=True)
1693
+ traceback.print_exc()
1694
+ raise
1695
+ print("[gui] mainloop returned", flush=True)