swap-cli 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
swap_cli/cli.py ADDED
@@ -0,0 +1,1183 @@
1
+ """Typer CLI entrypoint for swap-cli."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import shutil
7
+ import socket
8
+ import sys
9
+ from pathlib import Path
10
+ from typing import Annotated
11
+
12
+ import typer
13
+ from rich.console import Console
14
+ from rich.panel import Panel
15
+ from rich.table import Table
16
+
17
+ from . import config, license
18
+ from .runtime import DEFAULT_PROMPT, RunOptions, run_session
19
+ from .version import __version__
20
+
21
+ app = typer.Typer(
22
+ name="swap",
23
+ help="Realtime deepfake on your desktop. Bring your own Decart API key.",
24
+ no_args_is_help=True,
25
+ rich_markup_mode="rich",
26
+ )
27
+ console = Console()
28
+ err_console = Console(stderr=True)
29
+
30
+ # Voice cloning subcommand group.
31
+ voices_app = typer.Typer(
32
+ name="voices",
33
+ help="Manage voice cloning library and dependencies (optional feature).",
34
+ no_args_is_help=True,
35
+ )
36
+ app.add_typer(voices_app, name="voices")
37
+
38
+
39
+ @app.command()
40
+ def version() -> None:
41
+ """Print the installed version."""
42
+ console.print(f"swap-cli [bold]{__version__}[/bold]")
43
+
44
+
45
+ @app.command()
46
+ def gui(
47
+ voice_only: Annotated[
48
+ bool,
49
+ typer.Option(
50
+ "--voice",
51
+ help="Open a stripped voice-only window (no face/camera/Decart).",
52
+ ),
53
+ ] = False,
54
+ ) -> None:
55
+ """Launch the desktop GUI (recommended for non-developers)."""
56
+ try:
57
+ from .gui import launch
58
+ except ImportError as err:
59
+ err_console.print(
60
+ f"[red]GUI dependencies not installed: {err}[/red]\n"
61
+ "Install with [bold]pip install 'swap-cli[gui]'[/bold] or "
62
+ "[bold]pip install customtkinter[/bold]."
63
+ )
64
+ raise typer.Exit(1) from err
65
+ launch(voice_only=voice_only)
66
+
67
+
68
+ @app.command()
69
+ def setup(
70
+ license_key: Annotated[
71
+ str | None,
72
+ typer.Option(
73
+ "--license",
74
+ "-l",
75
+ help="Your swap-cli license key (SWAP-CLI-…). Prompted if omitted.",
76
+ ),
77
+ ] = None,
78
+ decart_api_key: Annotated[
79
+ str | None,
80
+ typer.Option(
81
+ "--decart-key",
82
+ "-d",
83
+ help="Your Decart API key (dct_…). Prompted if omitted.",
84
+ ),
85
+ ] = None,
86
+ ) -> None:
87
+ """Save your license key and Decart API key to the user config dir."""
88
+ if not license_key:
89
+ license_key = typer.prompt("License key (SWAP-CLI-…)")
90
+ if not decart_api_key:
91
+ decart_api_key = typer.prompt("Decart API key (dct_…)", hide_input=True)
92
+
93
+ cfg = config.update(
94
+ license_key=license_key.strip(),
95
+ decart_api_key=decart_api_key.strip(),
96
+ # Reset the cached validation so the next launch will re-validate.
97
+ license_cached_at=None,
98
+ license_cached_valid_until=None,
99
+ )
100
+ path = config.config_path()
101
+ console.print(
102
+ Panel.fit(
103
+ f"Saved to [bold]{path}[/bold]\n"
104
+ f"License: {_redact(cfg.license_key)}\n"
105
+ f"Decart key: {_redact(cfg.decart_api_key)}\n\n"
106
+ "Run [bold cyan]swap doctor[/bold cyan] to verify everything works.",
107
+ title="✓ Setup complete",
108
+ border_style="green",
109
+ )
110
+ )
111
+
112
+
113
+ @app.command(name="config")
114
+ def show_config() -> None:
115
+ """Show current config (keys redacted)."""
116
+ cfg = config.load()
117
+ table = Table(title="swap-cli config", show_header=False, box=None)
118
+ table.add_column("key", style="dim")
119
+ table.add_column("value")
120
+ table.add_row("config path", str(config.config_path()))
121
+ table.add_row("license key", _redact(cfg.license_key) or "[red]not set[/red]")
122
+ table.add_row("decart api key", _redact(cfg.decart_api_key) or "[red]not set[/red]")
123
+ table.add_row("machine id", config.machine_id())
124
+ if cfg.license_cached_valid_until is not None:
125
+ table.add_row(
126
+ "license valid until",
127
+ _format_unix(cfg.license_cached_valid_until),
128
+ )
129
+ console.print(table)
130
+
131
+
132
+ @app.command()
133
+ def doctor() -> None:
134
+ """Verify camera, network, license, and Decart auth."""
135
+ asyncio.run(_doctor())
136
+
137
+
138
+ @app.command()
139
+ def run(
140
+ reference: Annotated[
141
+ str | None,
142
+ typer.Option(
143
+ "--reference",
144
+ "-r",
145
+ help="Path or URL of the reference identity image.",
146
+ ),
147
+ ] = None,
148
+ prompt: Annotated[
149
+ str,
150
+ typer.Option(
151
+ "--prompt",
152
+ "-p",
153
+ help="Transformation prompt. Defaults to a deepfake template.",
154
+ ),
155
+ ] = DEFAULT_PROMPT,
156
+ model_name: Annotated[
157
+ str,
158
+ typer.Option(
159
+ "--model",
160
+ "-m",
161
+ help="Decart realtime model identifier.",
162
+ ),
163
+ ] = "lucy-2",
164
+ device: Annotated[
165
+ int,
166
+ typer.Option(
167
+ "--device",
168
+ help="Camera device index (0 = default webcam).",
169
+ ),
170
+ ] = 0,
171
+ record: Annotated[
172
+ Path | None,
173
+ typer.Option(
174
+ "--record",
175
+ help="Save the output stream to MP4 at the given path.",
176
+ ),
177
+ ] = None,
178
+ skip_license: Annotated[
179
+ bool,
180
+ typer.Option(
181
+ "--skip-license",
182
+ help="[dev] skip license validation. Will be removed in 1.0.",
183
+ hidden=True,
184
+ ),
185
+ ] = False,
186
+ vcam: Annotated[
187
+ bool,
188
+ typer.Option(
189
+ "--vcam/--no-vcam",
190
+ help=(
191
+ "Push frames to OBS Virtual Camera so Zoom/Meet/Discord "
192
+ "see swap as a camera device — no OBS app needed. "
193
+ "Requires OBS Studio installed (driver only)."
194
+ ),
195
+ ),
196
+ ] = True,
197
+ ) -> None:
198
+ """Open a realtime Decart session and stream until you press Q."""
199
+ cfg = config.load()
200
+ if not cfg.is_complete:
201
+ err_console.print(
202
+ "[red]Run [bold]swap setup[/bold] first to save your license + Decart key.[/red]"
203
+ )
204
+ raise typer.Exit(2)
205
+ assert cfg.decart_api_key # narrowed by is_complete
206
+
207
+ if not skip_license:
208
+ try:
209
+ status = asyncio.run(license.validate())
210
+ except license.LicenseError as err:
211
+ err_console.print(f"[red]license: {err}[/red]")
212
+ raise typer.Exit(3) from err
213
+ if not status.valid:
214
+ err_console.print(
215
+ f"[red]License invalid ({status.reason}). "
216
+ "Buy or renew at https://swap.ikieguy.online[/red]"
217
+ )
218
+ raise typer.Exit(3)
219
+ if status.cached:
220
+ console.print(f"[dim]license: cached ({status.reason})[/dim]")
221
+
222
+ opts = RunOptions(
223
+ decart_api_key=cfg.decart_api_key,
224
+ reference=reference,
225
+ prompt=prompt,
226
+ model_name=model_name,
227
+ camera_device=device,
228
+ record=record,
229
+ virtual_camera=vcam,
230
+ )
231
+
232
+ console.print(
233
+ Panel.fit(
234
+ f"model: [bold]{opts.model_name}[/bold]\n"
235
+ f"reference: {opts.reference or '[dim]none[/dim]'}\n"
236
+ f"camera device: {opts.camera_device}\n"
237
+ f"record: {opts.record or '[dim]off[/dim]'}\n\n"
238
+ "[dim]Press [bold]Q[/bold] in the preview window to quit.[/dim]",
239
+ title="▶ swap · live",
240
+ border_style="cyan",
241
+ )
242
+ )
243
+
244
+ try:
245
+ asyncio.run(run_session(opts))
246
+ except KeyboardInterrupt:
247
+ console.print("\n[dim]interrupted[/dim]")
248
+ except Exception as err: # noqa: BLE001
249
+ err_console.print(f"[red]session failed: {err}[/red]")
250
+ raise typer.Exit(1) from err
251
+
252
+
253
+ @app.command()
254
+ def voice(
255
+ voice: Annotated[
256
+ str,
257
+ typer.Option(
258
+ "--voice",
259
+ "-v",
260
+ help="Voice id or name (e.g. 'aria' or your custom voice).",
261
+ ),
262
+ ],
263
+ mic: Annotated[
264
+ int | None,
265
+ typer.Option(
266
+ "--mic",
267
+ help="Microphone device index. Default: system default mic.",
268
+ ),
269
+ ] = None,
270
+ output: Annotated[
271
+ int | None,
272
+ typer.Option(
273
+ "--output",
274
+ "-o",
275
+ help="Output device index. Default: auto-detected virtual cable.",
276
+ ),
277
+ ] = None,
278
+ ) -> None:
279
+ """Run voice cloning standalone for live calls. Press Ctrl+C to stop.
280
+
281
+ Voice runs entirely on your local GPU — no Decart connection, no
282
+ tokens spent. Open Zoom/Meet/Discord with the virtual cable as your
283
+ microphone (e.g. 'CABLE Output' / 'BlackHole 2ch') and speak — the
284
+ other side hears the cloned voice.
285
+ """
286
+ _run_voice_session(voice_name=voice, mic=mic, output=output, seconds=0)
287
+
288
+
289
+ def _run_voice_session(
290
+ *,
291
+ voice_name: str,
292
+ mic: int | None,
293
+ output: int | None,
294
+ seconds: int,
295
+ ) -> None:
296
+ """Shared body for `swap voice` (forever) and `swap voices test` (timed)."""
297
+ import asyncio
298
+
299
+ from . import voice_ops, voice_router
300
+ from .voice_track import VoiceTrack, VoiceTrackOptions
301
+
302
+ target = voice_ops.find_voice_by_name_or_id(voice_name)
303
+ if target is None:
304
+ err_console.print(
305
+ f"[red]Voice '{voice_name}' not found.[/red] "
306
+ "Run [bold]swap voices list[/bold] to see available voices."
307
+ )
308
+ raise typer.Exit(1)
309
+
310
+ # Sprint 14d: gate on engine readiness before spinning up audio
311
+ # devices. Otherwise the user gets an opaque RuntimeError mid-init.
312
+ from . import voice_engines
313
+
314
+ cfg_engine = config.load().voice_engine
315
+ engine = voice_engines.get_engine(cfg_engine)
316
+ if not engine.is_available():
317
+ err_console.print(
318
+ f"[red]Voice engine '{cfg_engine}' isn't installed.[/red] "
319
+ "Run [bold]swap voices install[/bold]."
320
+ )
321
+ raise typer.Exit(1)
322
+ if not engine.is_ready():
323
+ err_console.print(
324
+ "[red]No RVC .pth voices registered.[/red] "
325
+ "Add one with [bold]swap voices add-rvc /path/to/model.pth --name X[/bold].\n"
326
+ "Get models from [link]https://huggingface.co/lj1995/VoiceConversionWebUI[/link] "
327
+ "or [link]https://weights.gg[/link], or train your own with Applio."
328
+ )
329
+ raise typer.Exit(1)
330
+
331
+ mic_idx, out_idx = voice_ops.resolve_voice_devices(mic, output)
332
+ cable_hint = voice_router.virtual_cable_hint()
333
+
334
+ if out_idx is None:
335
+ err_console.print(
336
+ f"[yellow]No virtual audio cable detected.[/yellow] Install "
337
+ f"[bold]{cable_hint.name}[/bold] so apps like Zoom/Meet can hear "
338
+ f"the cloned voice. Continuing — converted audio will be silent."
339
+ )
340
+
341
+ duration = "until Ctrl+C" if seconds <= 0 else f"{seconds}s"
342
+ console.print(
343
+ Panel.fit(
344
+ f"voice: [bold]{target.name}[/bold] ({target.source})\n"
345
+ f"mic device: {mic_idx}\n"
346
+ f"output device: {out_idx if out_idx is not None else '[dim]none — silent[/dim]'}\n"
347
+ f"duration: {duration}\n\n"
348
+ "[dim]Local GPU only. No Decart. Zero tokens spent.[/dim]",
349
+ title="▶ swap voice",
350
+ border_style="cyan",
351
+ )
352
+ )
353
+
354
+ cfg = config.load()
355
+
356
+ async def _run() -> None:
357
+ track = VoiceTrack(
358
+ VoiceTrackOptions(
359
+ voice=target,
360
+ microphone_device=mic_idx,
361
+ output_device=out_idx,
362
+ engine_name=cfg.voice_engine,
363
+ fast=cfg.voice_fast,
364
+ )
365
+ )
366
+ track.start(on_status=lambda s: console.print(f"[dim]{s}[/dim]"))
367
+ try:
368
+ if seconds > 0:
369
+ await asyncio.sleep(seconds)
370
+ else:
371
+ while True:
372
+ await asyncio.sleep(60)
373
+ finally:
374
+ await track.stop()
375
+
376
+ try:
377
+ asyncio.run(_run())
378
+ console.print("[green]✓ done.[/green]")
379
+ except KeyboardInterrupt:
380
+ console.print("\n[dim]interrupted.[/dim]")
381
+ except Exception as err: # noqa: BLE001
382
+ err_console.print(f"[red]voice session failed: {err}[/red]")
383
+ raise typer.Exit(1) from err
384
+
385
+
386
+ # ── voices ─────────────────────────────────────────────────────────────────
387
+
388
+
389
+ @voices_app.command("install")
390
+ def voices_install(
391
+ starter: Annotated[
392
+ str | None,
393
+ typer.Option(
394
+ "--starter",
395
+ help="Auto-download a specific catalog voice after install (e.g. soft-asmr).",
396
+ ),
397
+ ] = None,
398
+ no_starter: Annotated[
399
+ bool,
400
+ typer.Option(
401
+ "--no-starter",
402
+ help="Skip the post-install starter-voice prompt entirely.",
403
+ ),
404
+ ] = False,
405
+ ) -> None:
406
+ """Install voice deps for the RVC streaming engine.
407
+
408
+ Sprint 14e: Pulls CUDA-matched PyTorch first (Win/Linux NVIDIA),
409
+ then RVC's runtime deps + rvc-python + fairseq. ~3–5 GB total.
410
+
411
+ Sprint 14g: After deps install, optionally download a starter voice
412
+ so users can run `swap gui --voice` immediately. Interactive by
413
+ default; use --starter <slug> for CI or --no-starter to skip.
414
+ """
415
+ from . import rvc_catalog, voice_engines, voice_ops, voice_prereq
416
+
417
+ pre = voice_prereq.check_all()
418
+ if not pre.gpu.ok:
419
+ err_console.print(
420
+ f"[red]✗ {pre.gpu.label}.[/red] {pre.gpu.hint or ''}\n"
421
+ "[red]Voice features require a supported GPU.[/red]"
422
+ )
423
+ raise typer.Exit(2)
424
+
425
+ # Surface ffmpeg + Build Tools issues BEFORE the long pip install.
426
+ if not pre.ffmpeg.ok:
427
+ err_console.print(
428
+ f"[red]✗ {pre.ffmpeg.label}.[/red] {pre.ffmpeg.hint}"
429
+ )
430
+ raise typer.Exit(2)
431
+ if not pre.build_tools.ok:
432
+ err_console.print(
433
+ f"[red]✗ {pre.build_tools.label}.[/red] {pre.build_tools.hint}"
434
+ )
435
+ raise typer.Exit(2)
436
+
437
+ if pre.deps_installed.ok:
438
+ console.print("[green]✓ voice deps already installed.[/green]")
439
+ else:
440
+ console.print(
441
+ "Installing RVC voice stack — CUDA torch, runtime deps, rvc-python, fairseq …"
442
+ )
443
+ if not voice_ops.install_voice_deps():
444
+ err_console.print("[red]pip install failed.[/red]")
445
+ raise typer.Exit(1)
446
+ console.print("[green]✓ voice deps installed.[/green]")
447
+
448
+ # Sprint 14g: voice deps are in place; offer a starter voice so the
449
+ # user has something to immediately try.
450
+ rvc_engine = voice_engines.get_engine("rvc")
451
+ if rvc_engine.is_ready():
452
+ # User already has at least one rvc-* voice — nothing to do.
453
+ return
454
+
455
+ if no_starter:
456
+ console.print(
457
+ "\n[dim]No voice registered yet. Browse with `swap voices catalog` "
458
+ "or run `swap voices download <slug>`.[/dim]"
459
+ )
460
+ return
461
+
462
+ if starter is not None:
463
+ entry = rvc_catalog.find(starter)
464
+ if entry is None:
465
+ err_console.print(
466
+ f"[red]Unknown catalog slug '{starter}'.[/red] "
467
+ "Run [bold]swap voices catalog[/bold] to see options."
468
+ )
469
+ raise typer.Exit(1)
470
+ _download_catalog_entry(entry)
471
+ return
472
+
473
+ # Interactive prompt — only when stdin is a TTY. Piped input skips.
474
+ if not sys.stdin.isatty():
475
+ console.print(
476
+ "\n[dim]No voice registered yet. Run `swap voices catalog` to "
477
+ "browse, or `swap voices install --starter <slug>` for a non-"
478
+ "interactive setup.[/dim]"
479
+ )
480
+ return
481
+
482
+ starter_entry = rvc_catalog.starter()
483
+ prompt_msg = (
484
+ f"\n[bold]Download a starter voice?[/bold] "
485
+ f"({starter_entry.name}, ~{starter_entry.total_size_mb} MB) [Y/n] "
486
+ )
487
+ answer = typer.prompt(prompt_msg, default="Y", show_default=False).strip().lower()
488
+ if answer in ("", "y", "yes"):
489
+ _download_catalog_entry(starter_entry)
490
+ else:
491
+ console.print(
492
+ "[dim]Skipped. Browse with `swap voices catalog` whenever you're ready.[/dim]"
493
+ )
494
+
495
+
496
+ def _download_catalog_entry(entry) -> None: # type: ignore[no-untyped-def]
497
+ """Shared helper for the install starter + standalone download command."""
498
+ from rich.progress import (
499
+ BarColumn,
500
+ DownloadColumn,
501
+ Progress,
502
+ TextColumn,
503
+ TimeRemainingColumn,
504
+ TransferSpeedColumn,
505
+ )
506
+
507
+ from . import voice_ops
508
+
509
+ progress = Progress(
510
+ TextColumn("[bold blue]{task.fields[fname]}[/bold blue]"),
511
+ BarColumn(),
512
+ DownloadColumn(),
513
+ TransferSpeedColumn(),
514
+ TimeRemainingColumn(),
515
+ console=console,
516
+ )
517
+ task_id: dict[str, int] = {}
518
+
519
+ def on_progress(fname: str, done: int, total: int) -> None:
520
+ if fname not in task_id:
521
+ task_id[fname] = progress.add_task("download", total=total or None, fname=fname)
522
+ progress.update(task_id[fname], completed=done)
523
+
524
+ with progress:
525
+ try:
526
+ voice = voice_ops.download_catalog_voice(entry, on_progress=on_progress)
527
+ except RuntimeError as err:
528
+ err_console.print(f"[red]{err}[/red]")
529
+ raise typer.Exit(1) from err
530
+ except Exception as err: # noqa: BLE001 — httpx/network failures
531
+ err_console.print(f"[red]download failed: {err}[/red]")
532
+ raise typer.Exit(1) from err
533
+
534
+ console.print(
535
+ f"[green]✓ Voice ready:[/green] [bold]{voice.name}[/bold] (id: {voice.id})\n"
536
+ "[dim]Try it with `swap gui --voice`.[/dim]"
537
+ )
538
+
539
+
540
+ @voices_app.command("devices")
541
+ def voices_devices() -> None:
542
+ """List audio devices with hints — pick mic + output by index."""
543
+ from . import voice_router
544
+
545
+ inputs, outputs = voice_router.list_audio_devices()
546
+
547
+ table = Table(title="Audio inputs (microphones)", show_header=True, box=None)
548
+ table.add_column("idx", style="dim", justify="right")
549
+ table.add_column("name")
550
+ table.add_column("rate", justify="right")
551
+ table.add_column("ch", justify="right")
552
+ table.add_column("hint")
553
+ for dev in inputs:
554
+ idx = dev.get("index")
555
+ name = str(dev.get("name", "?"))
556
+ rate = int(dev.get("default_samplerate", 0))
557
+ ch = int(dev.get("max_input_channels", 0))
558
+ if voice_router.is_shim_input_device(name):
559
+ hint = "[red]✗ skip — Windows shim, returns silence[/red]"
560
+ else:
561
+ hint = "[green]✓ real mic[/green]"
562
+ table.add_row(str(idx), name, f"{rate}Hz", str(ch), hint)
563
+ console.print(table)
564
+
565
+ table = Table(title="Audio outputs", show_header=True, box=None)
566
+ table.add_column("idx", style="dim", justify="right")
567
+ table.add_column("name")
568
+ table.add_column("rate", justify="right")
569
+ table.add_column("ch", justify="right")
570
+ table.add_column("hint")
571
+ cable = voice_router.detect_virtual_cable_in_devices(outputs)
572
+ cable_idx = cable.get("index") if cable else None
573
+ for dev in outputs:
574
+ idx = dev.get("index")
575
+ name = str(dev.get("name", "?"))
576
+ rate = int(dev.get("default_samplerate", 0))
577
+ ch = int(dev.get("max_output_channels", 0))
578
+ if idx == cable_idx:
579
+ hint = "[bold yellow]★ virtual cable — pick this for swap output[/bold yellow]"
580
+ else:
581
+ hint = "[dim]speakers / headphones[/dim]"
582
+ table.add_row(str(idx), name, f"{rate}Hz", str(ch), hint)
583
+ console.print(table)
584
+
585
+
586
+ @voices_app.command("fast")
587
+ def voices_fast(
588
+ state: Annotated[
589
+ str | None,
590
+ typer.Argument(
591
+ help="'on' or 'off'. Omit to print current setting.",
592
+ ),
593
+ ] = None,
594
+ ) -> None:
595
+ """Toggle Fast mode for voice streaming.
596
+
597
+ Fast mode skips RVC's Faiss retrieval (sets index_rate=0). On voices
598
+ with large .index files (e.g. calm-man's 607 MB), this is the
599
+ difference between real-time and falling behind. Quality loss is
600
+ real but acceptable for "make it work" — try Fast first, then turn
601
+ it off if your hardware can keep up.
602
+ """
603
+ from . import config as _config
604
+
605
+ cfg = _config.load()
606
+ if state is None:
607
+ cur = "on" if cfg.voice_fast else "off"
608
+ console.print(f"voice fast mode: [bold]{cur}[/bold]")
609
+ return
610
+
611
+ norm = state.strip().lower()
612
+ if norm in ("on", "true", "1", "yes", "y"):
613
+ _config.update(voice_fast=True)
614
+ console.print(
615
+ "[green]✓ Fast mode ON[/green] — index_rate=0, "
616
+ "Faiss retrieval skipped."
617
+ )
618
+ elif norm in ("off", "false", "0", "no", "n"):
619
+ _config.update(voice_fast=False)
620
+ console.print(
621
+ "[green]✓ Fast mode OFF[/green] — index_rate at default, full quality."
622
+ )
623
+ else:
624
+ err_console.print(
625
+ f"[red]Unknown state '{state}'.[/red] Use 'on' or 'off'."
626
+ )
627
+ raise typer.Exit(1)
628
+
629
+
630
+ @voices_app.command("repair")
631
+ def voices_repair() -> None:
632
+ """Migrate existing voice installs to the working fairseq fork.
633
+
634
+ Four fixes (idempotent — safe to re-run):
635
+ 1. Replace CPU-only torch with CUDA-matched wheels on NVIDIA boxes.
636
+ Without this, rvc-python falls back to CPU on a 4070+ and
637
+ streaming is too slow.
638
+ 2. Reinstall fairseq from One-sixth/fairseq — RVC-friendly fork
639
+ with dataclass + omegaconf fixes baked in. Replaces the broken
640
+ facebookresearch fairseq main that pre-14g.4 installs picked up.
641
+ 3. Pip-install fairseq's runtime deps (hydra-core, bitarray, regex,
642
+ sacrebleu, scikit-learn, etc.) that --no-deps fairseq doesn't
643
+ pull. Pre-14g.3 installs missed these.
644
+ 4. Patch fairseq's mutable dataclass defaults if any are still
645
+ present (no-op on the One-sixth fork; safety belt for users who
646
+ installed manually).
647
+
648
+ Run this if voice sessions fail with anything resembling:
649
+ rvc_python.configs.config | No supported Nvidia GPU found
650
+ ModuleNotFoundError: No module named 'hydra'
651
+ ValueError: mutable default ... for field common is not allowed
652
+ Object of unsupported type: '_MISSING_TYPE'
653
+ """
654
+ from . import voice_ops
655
+
656
+ console.print("[bold]Step 1/4[/bold]: replacing CPU torch with CUDA wheels …")
657
+ if not voice_ops.reinstall_cuda_torch():
658
+ err_console.print(
659
+ "[red]CUDA torch install failed — see error above.[/red] "
660
+ "If you don't have an NVIDIA GPU this step is skipped."
661
+ )
662
+ raise typer.Exit(1)
663
+ if voice_ops.is_cuda_torch_available():
664
+ console.print("[green]✓ CUDA torch installed.[/green]")
665
+ else:
666
+ # Skipped (no nvidia-smi) or genuinely unavailable; not an error.
667
+ console.print("[dim]No CUDA platform detected — skipped.[/dim]")
668
+
669
+ console.print("[bold]Step 2/4[/bold]: reinstalling fairseq from One-sixth fork …")
670
+ if not voice_ops.reinstall_fairseq_from_fork():
671
+ err_console.print(
672
+ "[red]fairseq reinstall failed — see error above.[/red]"
673
+ )
674
+ raise typer.Exit(1)
675
+ console.print("[green]✓ fairseq from One-sixth/fairseq installed.[/green]")
676
+
677
+ console.print("[bold]Step 3/4[/bold]: installing fairseq runtime deps …")
678
+ if not voice_ops.install_fairseq_runtime_deps():
679
+ err_console.print(
680
+ "[red]pip install failed — see error above.[/red] "
681
+ "If you haven't run [bold]swap voices install[/bold] yet, do that first."
682
+ )
683
+ raise typer.Exit(1)
684
+ console.print("[green]✓ fairseq runtime deps in place.[/green]")
685
+
686
+ console.print("[bold]Step 4/4[/bold]: patching fairseq dataclass defaults …")
687
+ if not voice_ops.patch_fairseq_dataclass_defaults():
688
+ err_console.print(
689
+ "[red]Patch failed — see error above.[/red] "
690
+ "If fairseq isn't installed yet, run [bold]swap voices install[/bold] first."
691
+ )
692
+ raise typer.Exit(1)
693
+ console.print(
694
+ "[green]✓ fairseq dataclass patch applied (or already in place).[/green]\n"
695
+ "[dim]Retry `swap gui --voice` or `swap voice -v <name>`.[/dim]"
696
+ )
697
+
698
+
699
+ @voices_app.command("list")
700
+ def voices_list() -> None:
701
+ """List the user-added RVC voices."""
702
+ from . import voice_ops
703
+
704
+ _, user = voice_ops.list_all()
705
+
706
+ table = Table(title="Your voices (RVC)", show_header=True, box=None)
707
+ table.add_column("id", style="dim")
708
+ table.add_column("name")
709
+ table.add_column("description")
710
+ if not user:
711
+ table.add_row(
712
+ "(empty)", "—",
713
+ "Download an RVC .pth (weights.gg or HF lj1995/VoiceConversionWebUI), "
714
+ "then `swap voices add-rvc /path/to/model.pth --name \"Name\"`.",
715
+ )
716
+ for v in user:
717
+ table.add_row(v.id, v.name, v.description)
718
+ console.print(table)
719
+
720
+
721
+ @voices_app.command("add")
722
+ def voices_add(
723
+ path: Annotated[Path, typer.Argument(help="Path to an RVC .pth model.")],
724
+ name: Annotated[
725
+ str | None,
726
+ typer.Option("--name", "-n", help="Display name. Defaults to file stem."),
727
+ ] = None,
728
+ ) -> None:
729
+ """Deprecated alias of `swap voices add-rvc`.
730
+
731
+ Sprint 14e removed OpenVoice; the WAV-to-embedding flow no longer
732
+ exists. To clone your own voice, train an RVC model with Applio
733
+ (https://github.com/IAHispano/Applio) and register the resulting
734
+ .pth + .index here.
735
+ """
736
+ err_console.print(
737
+ "[yellow]`swap voices add` (OpenVoice WAV→embedding) was removed in 14e.[/yellow]\n"
738
+ "Use [bold]swap voices add-rvc /path/to/model.pth --name X[/bold] for an "
739
+ "RVC .pth, or train your own with Applio.\n"
740
+ "Forwarding to add-rvc with the supplied path …"
741
+ )
742
+ from . import voice_ops
743
+
744
+ try:
745
+ voice = voice_ops.add_rvc_voice(path, name)
746
+ except FileNotFoundError as err:
747
+ err_console.print(f"[red]{err}[/red]")
748
+ raise typer.Exit(1) from err
749
+ except (ValueError, RuntimeError) as err:
750
+ err_console.print(f"[red]{err}[/red]")
751
+ raise typer.Exit(2) from err
752
+
753
+ console.print(
754
+ f"[green]✓ Added[/green] [bold]{voice.name}[/bold] (id: {voice.id})"
755
+ )
756
+
757
+
758
+ @voices_app.command("remove")
759
+ def voices_remove(
760
+ name: Annotated[str, typer.Argument(help="Voice name or id to remove.")],
761
+ ) -> None:
762
+ """Remove a custom voice from your library."""
763
+ from . import voice_ops
764
+
765
+ if voice_ops.remove_user_voice(name):
766
+ console.print(f"[green]✓ Removed[/green] {name}.")
767
+ else:
768
+ err_console.print(
769
+ f"[red]No user voice named/id matching '{name}'.[/red]"
770
+ )
771
+ raise typer.Exit(1)
772
+
773
+
774
+ @voices_app.command("add-rvc")
775
+ def voices_add_rvc(
776
+ pth: Annotated[Path, typer.Argument(help="Path to the RVC .pth model file.")],
777
+ name: Annotated[
778
+ str | None,
779
+ typer.Option("--name", "-n", help="Display name. Defaults to file stem."),
780
+ ] = None,
781
+ index: Annotated[
782
+ Path | None,
783
+ typer.Option(
784
+ "--index",
785
+ "-i",
786
+ help="Optional .index file (Faiss retrieval index — improves quality).",
787
+ ),
788
+ ] = None,
789
+ ) -> None:
790
+ """Register an RVC voice model. The .pth file is copied into swap-cli's
791
+ model directory; an optional .index file can be provided alongside.
792
+
793
+ After registering, switch to the RVC engine with `swap voices engine rvc`
794
+ and the voice appears in the GUI dropdown / `swap voice -v` list.
795
+ """
796
+ from . import voice_ops
797
+
798
+ try:
799
+ voice = voice_ops.add_rvc_voice(pth, name=name, index_path=index)
800
+ except (FileNotFoundError, ValueError) as err:
801
+ err_console.print(f"[red]{err}[/red]")
802
+ raise typer.Exit(1) from err
803
+
804
+ console.print(
805
+ f"[green]✓ Registered RVC voice[/green] [bold]{voice.name}[/bold] "
806
+ f"(id: {voice.id})"
807
+ )
808
+ console.print(
809
+ "[dim]Switch to RVC engine: [bold]swap voices engine rvc[/bold]\n"
810
+ "Then pick the voice in the GUI dropdown or `swap voice -v "
811
+ f"{voice.id}`.[/dim]"
812
+ )
813
+
814
+
815
+ @voices_app.command("remove-rvc")
816
+ def voices_remove_rvc(
817
+ name: Annotated[str, typer.Argument(help="RVC voice name or id.")],
818
+ ) -> None:
819
+ """Remove an RVC voice and its model files."""
820
+ from . import voice_ops
821
+
822
+ if voice_ops.remove_rvc_voice(name):
823
+ console.print(f"[green]✓ Removed RVC voice[/green] {name}.")
824
+ else:
825
+ err_console.print(f"[red]No RVC voice matching '{name}'.[/red]")
826
+ raise typer.Exit(1)
827
+
828
+
829
+ @voices_app.command("catalog")
830
+ def voices_catalog() -> None:
831
+ """List curated RVC voices available via `swap voices download`.
832
+
833
+ These are mirrored to our GitHub Releases — stable URLs, license-
834
+ vetted personas (no real people, no copyrighted IP).
835
+ """
836
+ from . import rvc_catalog
837
+
838
+ table = Table(title="Curated voice catalog", show_header=True, box=None)
839
+ table.add_column("slug", style="dim")
840
+ table.add_column("name")
841
+ table.add_column("size", justify="right")
842
+ table.add_column("description")
843
+ for entry in rvc_catalog.CATALOG:
844
+ starter_marker = " [yellow]★[/yellow]" if entry.slug == rvc_catalog.STARTER_SLUG else ""
845
+ table.add_row(
846
+ entry.slug + starter_marker,
847
+ entry.name,
848
+ f"{entry.total_size_mb} MB",
849
+ entry.description,
850
+ )
851
+ console.print(table)
852
+ console.print(
853
+ f"\n[dim]★ = default starter (smallest). "
854
+ f"Download with [bold]swap voices download <slug>[/bold].[/dim]"
855
+ )
856
+
857
+
858
+ @voices_app.command("download")
859
+ def voices_download(
860
+ slug: Annotated[
861
+ str,
862
+ typer.Argument(help="Catalog slug — see `swap voices catalog` for options."),
863
+ ],
864
+ ) -> None:
865
+ """Download a curated RVC voice from our mirror and register it."""
866
+ from . import rvc_catalog, voice_engines
867
+
868
+ rvc_engine = voice_engines.get_engine("rvc")
869
+ if not rvc_engine.is_available():
870
+ err_console.print(
871
+ "[red]RVC isn't installed.[/red] Run [bold]swap voices install[/bold] first."
872
+ )
873
+ raise typer.Exit(1)
874
+
875
+ entry = rvc_catalog.find(slug)
876
+ if entry is None:
877
+ err_console.print(
878
+ f"[red]Unknown catalog slug '{slug}'.[/red] "
879
+ "Run [bold]swap voices catalog[/bold] to see options."
880
+ )
881
+ raise typer.Exit(1)
882
+
883
+ _download_catalog_entry(entry)
884
+
885
+
886
+ @voices_app.command("engine")
887
+ def voices_engine(
888
+ name: Annotated[
889
+ str | None,
890
+ typer.Argument(
891
+ help="Engine to set as default. Omit to print current setting + list.",
892
+ ),
893
+ ] = None,
894
+ ) -> None:
895
+ """Pick which engine handles live voice streaming.
896
+
897
+ Sprint 14e: only 'rvc' is registered. Field exists for forward
898
+ compatibility with future engines (Applio, GPT-SoVITS).
899
+ """
900
+ from . import config as _config
901
+ from . import voice_engines
902
+
903
+ cfg = _config.load()
904
+ if name is None:
905
+ # Show current + available engines. "available?" = deps installed,
906
+ # "ready?" = also has at least one usable voice (RVC needs a .pth).
907
+ table = Table(title="Voice engines", show_header=True, box=None)
908
+ table.add_column("name", style="dim")
909
+ table.add_column("display name")
910
+ table.add_column("available?")
911
+ table.add_column("ready?")
912
+ table.add_column("active")
913
+ for engine_name in voice_engines.available_engines():
914
+ engine = voice_engines.get_engine(engine_name)
915
+ available = "[green]✓[/green]" if engine.is_available() else "[red]✗[/red]"
916
+ ready = "[green]✓[/green]" if engine.is_ready() else "[red]✗[/red]"
917
+ active = "[bold]●[/bold]" if engine_name == cfg.voice_engine else ""
918
+ table.add_row(engine.name, engine.display_name, available, ready, active)
919
+ console.print(table)
920
+ return
921
+
922
+ if name not in voice_engines.available_engines():
923
+ err_console.print(
924
+ f"[red]Unknown engine '{name}'.[/red] "
925
+ f"Known: {voice_engines.available_engines()}"
926
+ )
927
+ raise typer.Exit(1)
928
+
929
+ engine = voice_engines.get_engine(name)
930
+ if not engine.is_available():
931
+ # Refuse — Sprint 14d. Setting an unavailable engine as active
932
+ # left users in a corrupt state (engine ✗ ●) where Live failed
933
+ # opaquely. Force them to install first.
934
+ err_console.print(
935
+ f"[red]✗ Engine '{name}' isn't installed.[/red] "
936
+ "Run [bold]swap voices install[/bold] first, then retry."
937
+ )
938
+ raise typer.Exit(1)
939
+
940
+ _config.update(voice_engine=name)
941
+ console.print(
942
+ f"[green]✓ Default voice engine set to[/green] [bold]{name}[/bold]"
943
+ f" ({engine.display_name})."
944
+ )
945
+ # Soft-warn if available but not ready (e.g. RVC installed, no .pth yet).
946
+ if not engine.is_ready():
947
+ console.print(
948
+ f"[yellow]Note: '{name}' is available but not ready — "
949
+ "no usable voice registered yet.[/yellow] "
950
+ "For RVC, run [bold]swap voices add-rvc /path/to/model.pth --name X[/bold]."
951
+ )
952
+
953
+
954
+ @voices_app.command("test")
955
+ def voices_test(
956
+ voice: Annotated[
957
+ str,
958
+ typer.Option(
959
+ "--voice",
960
+ "-v",
961
+ help="Voice id or name (e.g. 'aria', 'Aria', or your custom voice).",
962
+ ),
963
+ ],
964
+ seconds: Annotated[
965
+ int,
966
+ typer.Option(
967
+ "--seconds",
968
+ "-s",
969
+ help="Stop automatically after N seconds. 0 = run until Ctrl+C.",
970
+ ),
971
+ ] = 30,
972
+ mic: Annotated[
973
+ int | None,
974
+ typer.Option("--mic", help="Microphone device index. Default: system default."),
975
+ ] = None,
976
+ output: Annotated[
977
+ int | None,
978
+ typer.Option("--output", help="Output device index. Default: auto-detected virtual cable."),
979
+ ] = None,
980
+ ) -> None:
981
+ """Test voice cloning briefly (default 30s). Lighter sibling of `swap voice`."""
982
+ _run_voice_session(voice_name=voice, mic=mic, output=output, seconds=seconds)
983
+
984
+
985
+ # ── Helpers ────────────────────────────────────────────────────────────────
986
+
987
+
988
+ def _redact(value: str | None) -> str | None:
989
+ if not value:
990
+ return None
991
+ if len(value) <= 8:
992
+ return "•" * len(value)
993
+ return f"{value[:4]}…{value[-4:]}"
994
+
995
+
996
+ def _format_unix(ts: int) -> str:
997
+ from datetime import datetime, timezone
998
+
999
+ return datetime.fromtimestamp(ts, tz=timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
1000
+
1001
+
1002
+ async def _doctor() -> None:
1003
+ cfg = config.load()
1004
+ table = Table(title="swap-cli doctor", show_header=False, box=None)
1005
+ table.add_column("check", style="dim")
1006
+ table.add_column("status")
1007
+
1008
+ # Config
1009
+ if cfg.license_key:
1010
+ table.add_row("license key set", "[green]✓[/green]")
1011
+ else:
1012
+ table.add_row("license key set", "[red]✗ run `swap setup`[/red]")
1013
+ if cfg.decart_api_key:
1014
+ table.add_row("decart api key set", "[green]✓[/green]")
1015
+ else:
1016
+ table.add_row("decart api key set", "[red]✗ run `swap setup`[/red]")
1017
+
1018
+ # Network — just DNS for the two endpoints we'll hit
1019
+ for host in ("swap.ikieguy.online", "api.decart.ai"):
1020
+ try:
1021
+ await asyncio.to_thread(socket.gethostbyname, host)
1022
+ table.add_row(f"dns {host}", "[green]✓[/green]")
1023
+ except OSError as err:
1024
+ table.add_row(f"dns {host}", f"[red]✗ {err}[/red]")
1025
+
1026
+ # License validation
1027
+ if cfg.license_key:
1028
+ try:
1029
+ status = await license.validate(force_online=True)
1030
+ if status.valid:
1031
+ table.add_row("license validate", f"[green]✓ {status.reason}[/green]")
1032
+ else:
1033
+ table.add_row("license validate", f"[red]✗ {status.reason}[/red]")
1034
+ except license.LicenseError as err:
1035
+ table.add_row("license validate", f"[yellow]offline ({err})[/yellow]")
1036
+
1037
+ # Camera (cheap probe)
1038
+ table.add_row("camera probe", _camera_probe_label())
1039
+
1040
+ # Native deps
1041
+ table.add_row("aiortc import", _import_ok("aiortc"))
1042
+ table.add_row("decart import", _import_ok("decart"))
1043
+ table.add_row("opencv import", _import_ok("cv2"))
1044
+ table.add_row("av import", _import_ok("av"))
1045
+
1046
+ # ffmpeg on PATH — RVC needs it for any non-WAV codec; missing
1047
+ # ffmpeg accounts for ~half the 'file failed to load' errors per
1048
+ # the upstream RVC community.
1049
+ if shutil.which("ffmpeg") is not None:
1050
+ table.add_row("ffmpeg on PATH", "[green]✓[/green]")
1051
+ else:
1052
+ if sys.platform == "win32":
1053
+ hint = "winget install Gyan.FFmpeg"
1054
+ elif sys.platform == "darwin":
1055
+ hint = "brew install ffmpeg"
1056
+ else:
1057
+ hint = "sudo apt install ffmpeg"
1058
+ table.add_row("ffmpeg on PATH", f"[red]✗ {hint}[/red]")
1059
+
1060
+ # PyTorch CUDA — only relevant on a machine with an NVIDIA GPU.
1061
+ # Common failure: the user pip-installed torch from PyPI default,
1062
+ # got the CPU wheel, and then RVC silently falls back to CPU. Surface
1063
+ # it explicitly so they notice without running a session.
1064
+ table.add_row("torch CUDA", _torch_cuda_label())
1065
+
1066
+ # RVC base models — hubert_base.pt + rmvpe.pt. First session
1067
+ # downloads these ~370 MB; until that finishes, the session waits.
1068
+ # Surfacing them here tells users whether to expect a pause.
1069
+ from . import voice_prereq
1070
+
1071
+ rvc_check = voice_prereq._check_rvc_base_models()
1072
+ if rvc_check.ok:
1073
+ table.add_row("rvc base models", f"[green]✓ {rvc_check.label}[/green]")
1074
+ else:
1075
+ table.add_row(
1076
+ "rvc base models",
1077
+ f"[yellow]⚠ {rvc_check.label} — {rvc_check.hint}[/yellow]",
1078
+ )
1079
+
1080
+ # Virtual camera driver (Sprint 14k) — when present, swap can stream
1081
+ # the deepfake straight into Zoom/Meet/Discord without OBS open.
1082
+ vcam_check = voice_prereq._check_obs_vcam()
1083
+ if vcam_check.ok:
1084
+ table.add_row("virtual camera", f"[green]✓ {vcam_check.label}[/green]")
1085
+ else:
1086
+ table.add_row(
1087
+ "virtual camera",
1088
+ f"[yellow]⚠ {vcam_check.label} — {vcam_check.hint}[/yellow]",
1089
+ )
1090
+
1091
+ # macOS-only: customtkinter needs Tcl/Tk >= 8.6.9. The system Python
1092
+ # ships 8.5.9 which fails silently or renders broken windows. Surface
1093
+ # this here so users know to switch to python.org Python or
1094
+ # `brew install python-tk@3.11`.
1095
+ if sys.platform == "darwin":
1096
+ table.add_row("tcl/tk version", _tcl_tk_label())
1097
+
1098
+ console.print(table)
1099
+
1100
+ failures = sum(
1101
+ 1
1102
+ for row in table.rows
1103
+ if "✗" in str(row) # naive — we just inspect the rendering
1104
+ )
1105
+ if failures:
1106
+ sys.exit(1)
1107
+
1108
+
1109
+ def _camera_probe_label() -> str:
1110
+ try:
1111
+ import cv2
1112
+
1113
+ cap = cv2.VideoCapture(0)
1114
+ if not cap.isOpened():
1115
+ cap.release()
1116
+ return "[red]✗ no camera at index 0[/red]"
1117
+ ok, _ = cap.read()
1118
+ cap.release()
1119
+ return "[green]✓[/green]" if ok else "[red]✗ read failed[/red]"
1120
+ except Exception as err: # noqa: BLE001
1121
+ return f"[red]✗ {err}[/red]"
1122
+
1123
+
1124
+ def _import_ok(name: str) -> str:
1125
+ try:
1126
+ __import__(name)
1127
+ return "[green]✓[/green]"
1128
+ except ImportError as err:
1129
+ return f"[red]✗ {err}[/red]"
1130
+
1131
+
1132
+ def _torch_cuda_label() -> str:
1133
+ """Return a doctor-row label for torch's CUDA backend.
1134
+
1135
+ On NVIDIA boxes: green if torch built with CUDA (and the GPU is
1136
+ visible), red with an explicit fix hint otherwise. On macOS / non-
1137
+ NVIDIA Linux: dim "n/a" — CPU torch is correct.
1138
+ """
1139
+ if shutil.which("nvidia-smi") is None:
1140
+ # No NVIDIA GPU on this box; CPU torch is the right answer.
1141
+ return "[dim]n/a (no NVIDIA GPU)[/dim]"
1142
+ try:
1143
+ import torch # type: ignore[import-not-found]
1144
+ except ImportError:
1145
+ return "[dim]not installed (run `swap voices install`)[/dim]"
1146
+ try:
1147
+ if not torch.cuda.is_available():
1148
+ return (
1149
+ "[red]✗ CPU-only torch on a CUDA-capable machine — "
1150
+ "run `swap voices repair`[/red]"
1151
+ )
1152
+ try:
1153
+ name = torch.cuda.get_device_name(0)
1154
+ except Exception: # noqa: BLE001
1155
+ name = "CUDA device"
1156
+ return f"[green]✓ {name}[/green]"
1157
+ except Exception as err: # noqa: BLE001
1158
+ return f"[red]✗ {err}[/red]"
1159
+
1160
+
1161
+ def _tcl_tk_label() -> str:
1162
+ """Return a doctor-row label for the Tcl/Tk version on macOS.
1163
+
1164
+ customtkinter requires >= 8.6.9; macOS system Python is stuck on
1165
+ Apple's 8.5.9 (which has known Tk bugs). Direct users to
1166
+ python.org Python or `brew install python-tk@3.11` when below the
1167
+ floor.
1168
+ """
1169
+ try:
1170
+ import tkinter
1171
+
1172
+ ver = tkinter.Tcl().call("info", "patchlevel")
1173
+ parts = [int(p) for p in ver.split(".")[:3]]
1174
+ while len(parts) < 3:
1175
+ parts.append(0)
1176
+ if tuple(parts) >= (8, 6, 9):
1177
+ return f"[green]✓ {ver}[/green]"
1178
+ return (
1179
+ f"[red]✗ {ver} — need ≥ 8.6.9; "
1180
+ "install python.org Python or `brew install python-tk@3.11`[/red]"
1181
+ )
1182
+ except Exception as err: # noqa: BLE001
1183
+ return f"[red]✗ {err}[/red]"