voice-mode 2.27.0__py3-none-any.whl → 2.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. voice_mode/__version__.py +1 -1
  2. voice_mode/cli.py +152 -37
  3. voice_mode/cli_commands/exchanges.py +6 -0
  4. voice_mode/frontend/.next/BUILD_ID +1 -1
  5. voice_mode/frontend/.next/app-build-manifest.json +5 -5
  6. voice_mode/frontend/.next/app-path-routes-manifest.json +1 -1
  7. voice_mode/frontend/.next/build-manifest.json +3 -3
  8. voice_mode/frontend/.next/next-minimal-server.js.nft.json +1 -1
  9. voice_mode/frontend/.next/next-server.js.nft.json +1 -1
  10. voice_mode/frontend/.next/prerender-manifest.json +1 -1
  11. voice_mode/frontend/.next/required-server-files.json +1 -1
  12. voice_mode/frontend/.next/server/app/_not-found/page.js +1 -1
  13. voice_mode/frontend/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
  14. voice_mode/frontend/.next/server/app/_not-found.html +1 -1
  15. voice_mode/frontend/.next/server/app/_not-found.rsc +1 -1
  16. voice_mode/frontend/.next/server/app/api/connection-details/route.js +2 -2
  17. voice_mode/frontend/.next/server/app/favicon.ico/route.js +2 -2
  18. voice_mode/frontend/.next/server/app/index.html +1 -1
  19. voice_mode/frontend/.next/server/app/index.rsc +2 -2
  20. voice_mode/frontend/.next/server/app/page.js +3 -3
  21. voice_mode/frontend/.next/server/app/page_client-reference-manifest.js +1 -1
  22. voice_mode/frontend/.next/server/app-paths-manifest.json +1 -1
  23. voice_mode/frontend/.next/server/chunks/994.js +1 -1
  24. voice_mode/frontend/.next/server/middleware-build-manifest.js +1 -1
  25. voice_mode/frontend/.next/server/next-font-manifest.js +1 -1
  26. voice_mode/frontend/.next/server/next-font-manifest.json +1 -1
  27. voice_mode/frontend/.next/server/pages/404.html +1 -1
  28. voice_mode/frontend/.next/server/pages/500.html +1 -1
  29. voice_mode/frontend/.next/server/server-reference-manifest.json +1 -1
  30. voice_mode/frontend/.next/standalone/.next/BUILD_ID +1 -1
  31. voice_mode/frontend/.next/standalone/.next/app-build-manifest.json +5 -5
  32. voice_mode/frontend/.next/standalone/.next/app-path-routes-manifest.json +1 -1
  33. voice_mode/frontend/.next/standalone/.next/build-manifest.json +3 -3
  34. voice_mode/frontend/.next/standalone/.next/prerender-manifest.json +1 -1
  35. voice_mode/frontend/.next/standalone/.next/required-server-files.json +1 -1
  36. voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page.js +1 -1
  37. voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
  38. voice_mode/frontend/.next/standalone/.next/server/app/_not-found.html +1 -1
  39. voice_mode/frontend/.next/standalone/.next/server/app/_not-found.rsc +1 -1
  40. voice_mode/frontend/.next/standalone/.next/server/app/api/connection-details/route.js +2 -2
  41. voice_mode/frontend/.next/standalone/.next/server/app/favicon.ico/route.js +2 -2
  42. voice_mode/frontend/.next/standalone/.next/server/app/index.html +1 -1
  43. voice_mode/frontend/.next/standalone/.next/server/app/index.rsc +2 -2
  44. voice_mode/frontend/.next/standalone/.next/server/app/page.js +3 -3
  45. voice_mode/frontend/.next/standalone/.next/server/app/page_client-reference-manifest.js +1 -1
  46. voice_mode/frontend/.next/standalone/.next/server/app-paths-manifest.json +1 -1
  47. voice_mode/frontend/.next/standalone/.next/server/chunks/994.js +1 -1
  48. voice_mode/frontend/.next/standalone/.next/server/middleware-build-manifest.js +1 -1
  49. voice_mode/frontend/.next/standalone/.next/server/next-font-manifest.js +1 -1
  50. voice_mode/frontend/.next/standalone/.next/server/next-font-manifest.json +1 -1
  51. voice_mode/frontend/.next/standalone/.next/server/pages/404.html +1 -1
  52. voice_mode/frontend/.next/standalone/.next/server/pages/500.html +1 -1
  53. voice_mode/frontend/.next/standalone/.next/server/server-reference-manifest.json +1 -1
  54. voice_mode/frontend/.next/standalone/server.js +1 -1
  55. voice_mode/frontend/.next/static/chunks/app/{layout-08be62ed6e344292.js → layout-a9d79fcaeb3295f5.js} +1 -1
  56. voice_mode/frontend/.next/static/chunks/app/page-011e46e13f394b9b.js +1 -0
  57. voice_mode/frontend/.next/static/chunks/{main-app-413f77c1f2c53e3f.js → main-app-b03681837de4dca6.js} +1 -1
  58. voice_mode/frontend/.next/trace +43 -43
  59. voice_mode/frontend/.next/types/app/api/connection-details/route.ts +1 -1
  60. voice_mode/frontend/.next/types/app/layout.ts +1 -1
  61. voice_mode/frontend/.next/types/app/page.ts +1 -1
  62. voice_mode/frontend/package-lock.json +6 -6
  63. voice_mode/tools/converse.py +44 -24
  64. voice_mode/tools/service.py +30 -3
  65. voice_mode/tools/services/kokoro/install.py +1 -1
  66. voice_mode/tools/services/whisper/__init__.py +15 -5
  67. voice_mode/tools/services/whisper/install.py +40 -9
  68. voice_mode/tools/services/whisper/list_models.py +14 -14
  69. voice_mode/tools/services/whisper/model_active.py +54 -0
  70. voice_mode/tools/services/whisper/model_benchmark.py +159 -0
  71. voice_mode/tools/services/whisper/{download_model.py → model_install.py} +72 -11
  72. voice_mode/tools/services/whisper/model_remove.py +36 -0
  73. voice_mode/tools/services/whisper/models.py +225 -26
  74. voice_mode/utils/services/whisper_helpers.py +206 -19
  75. voice_mode/utils/services/whisper_version.py +138 -0
  76. {voice_mode-2.27.0.dist-info → voice_mode-2.28.0.dist-info}/METADATA +5 -1
  77. {voice_mode-2.27.0.dist-info → voice_mode-2.28.0.dist-info}/RECORD +81 -78
  78. voice_mode/frontend/.next/static/chunks/app/page-80fc72669f25298f.js +0 -1
  79. voice_mode/tools/services/whisper/list_models_tool.py +0 -65
  80. /voice_mode/frontend/.next/static/{wQ5pxzPmwjlzdUfJwSjMg → cSCYUZbU1EJR-gEGqdoa-}/_buildManifest.js +0 -0
  81. /voice_mode/frontend/.next/static/{wQ5pxzPmwjlzdUfJwSjMg → cSCYUZbU1EJR-gEGqdoa-}/_ssgManifest.js +0 -0
  82. {voice_mode-2.27.0.dist-info → voice_mode-2.28.0.dist-info}/WHEEL +0 -0
  83. {voice_mode-2.27.0.dist-info → voice_mode-2.28.0.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,4 @@
1
- // File: /tmp/build-via-sdist-qw720py5/voice_mode-2.27.0/voice_mode/frontend/app/api/connection-details/route.ts
1
+ // File: /tmp/build-via-sdist-_mb6zwu1/voice_mode-2.28.0/voice_mode/frontend/app/api/connection-details/route.ts
2
2
  import * as entry from '../../../../../app/api/connection-details/route.js'
3
3
  import type { NextRequest } from 'next/server.js'
4
4
 
@@ -1,4 +1,4 @@
1
- // File: /tmp/build-via-sdist-qw720py5/voice_mode-2.27.0/voice_mode/frontend/app/layout.tsx
1
+ // File: /tmp/build-via-sdist-_mb6zwu1/voice_mode-2.28.0/voice_mode/frontend/app/layout.tsx
2
2
  import * as entry from '../../../app/layout.js'
3
3
  import type { ResolvingMetadata, ResolvingViewport } from 'next/dist/lib/metadata/types/metadata-interface.js'
4
4
 
@@ -1,4 +1,4 @@
1
- // File: /tmp/build-via-sdist-qw720py5/voice_mode-2.27.0/voice_mode/frontend/app/page.tsx
1
+ // File: /tmp/build-via-sdist-_mb6zwu1/voice_mode-2.28.0/voice_mode/frontend/app/page.tsx
2
2
  import * as entry from '../../../app/page.js'
3
3
  import type { ResolvingMetadata, ResolvingViewport } from 'next/dist/lib/metadata/types/metadata-interface.js'
4
4
 
@@ -1489,9 +1489,9 @@
1489
1489
  }
1490
1490
  },
1491
1491
  "node_modules/caniuse-lite": {
1492
- "version": "1.0.30001735",
1493
- "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001735.tgz",
1494
- "integrity": "sha512-EV/laoX7Wq2J9TQlyIXRxTJqIw4sxfXS4OYgudGxBYRuTv0q7AM6yMEpU/Vo1I94thg9U6EZ2NfZx9GJq83u7w==",
1492
+ "version": "1.0.30001737",
1493
+ "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001737.tgz",
1494
+ "integrity": "sha512-BiloLiXtQNrY5UyF0+1nSJLXUENuhka2pzy2Fx5pGxqavdrxSCW4U6Pn/PoG3Efspi2frRbHpBV2XsrPE6EDlw==",
1495
1495
  "dev": true,
1496
1496
  "funding": [
1497
1497
  {
@@ -1774,9 +1774,9 @@
1774
1774
  "license": "MIT"
1775
1775
  },
1776
1776
  "node_modules/electron-to-chromium": {
1777
- "version": "1.5.207",
1778
- "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.207.tgz",
1779
- "integrity": "sha512-mryFrrL/GXDTmAtIVMVf+eIXM09BBPlO5IQ7lUyKmK8d+A4VpRGG+M3ofoVef6qyF8s60rJei8ymlJxjUA8Faw==",
1777
+ "version": "1.5.208",
1778
+ "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.208.tgz",
1779
+ "integrity": "sha512-ozZyibehoe7tOhNaf16lKmljVf+3npZcJIEbJRVftVsmAg5TeA1mGS9dVCZzOwr2xT7xK15V0p7+GZqSPgkuPg==",
1780
1780
  "dev": true,
1781
1781
  "license": "ISC"
1782
1782
  },
@@ -613,31 +613,51 @@ async def _speech_to_text_internal(
613
613
  if stt_config.get('base_url') and ("127.0.0.1" in stt_config['base_url'] or "localhost" in stt_config['base_url']):
614
614
  provider = "whisper-local"
615
615
 
616
- # Validate format for provider
617
- export_format = validate_audio_format(STT_AUDIO_FORMAT, provider, "stt")
618
-
619
- # Convert WAV to target format for upload
620
- logger.debug(f"Converting WAV to {export_format.upper()} for upload...")
621
- try:
622
- audio = AudioSegment.from_wav(wav_file)
623
- logger.debug(f"Audio loaded - Duration: {len(audio)}ms, Channels: {audio.channels}, Frame rate: {audio.frame_rate}")
624
-
625
- # Get export parameters for the format
626
- export_params = get_format_export_params(export_format)
616
+ # Check if we can skip conversion for local whisper
617
+ skip_conversion = False
618
+ if provider == "whisper-local":
619
+ # Check if whisper is truly local (not SSH-forwarded)
620
+ from voice_mode.utils.services.common import check_service_status
621
+ from voice_mode.config import WHISPER_PORT
622
+ status, _ = check_service_status(WHISPER_PORT)
623
+ if status == "local":
624
+ skip_conversion = True
625
+ logger.info("Detected truly local whisper - skipping audio conversion, using WAV directly")
626
+
627
+ if skip_conversion:
628
+ # Use WAV directly for local whisper
629
+ upload_file = wav_file
630
+ export_format = "wav"
631
+ logger.debug("Using WAV file directly for local whisper upload")
632
+ else:
633
+ # Validate format for provider
634
+ export_format = validate_audio_format(STT_AUDIO_FORMAT, provider, "stt")
627
635
 
628
- with tempfile.NamedTemporaryFile(suffix=f'.{export_format}', delete=False) as export_file_obj:
629
- export_file = export_file_obj.name
630
- audio.export(export_file, **export_params)
631
- upload_file = export_file
632
- logger.debug(f"{export_format.upper()} created for STT upload: {upload_file}")
633
- except Exception as e:
634
- if "ffmpeg" in str(e).lower() or "avconv" in str(e).lower():
635
- logger.error(f"Audio conversion failed - FFmpeg may not be installed: {e}")
636
- from voice_mode.utils.ffmpeg_check import get_install_instructions
637
- logger.error(f"\n{get_install_instructions()}")
638
- raise RuntimeError("FFmpeg is required but not found. Please install FFmpeg and try again.") from e
639
- else:
640
- raise
636
+ # Convert WAV to target format for upload
637
+ logger.debug(f"Converting WAV to {export_format.upper()} for upload...")
638
+ conversion_start = time.perf_counter()
639
+ try:
640
+ audio = AudioSegment.from_wav(wav_file)
641
+ logger.debug(f"Audio loaded - Duration: {len(audio)}ms, Channels: {audio.channels}, Frame rate: {audio.frame_rate}")
642
+
643
+ # Get export parameters for the format
644
+ export_params = get_format_export_params(export_format)
645
+
646
+ with tempfile.NamedTemporaryFile(suffix=f'.{export_format}', delete=False) as export_file_obj:
647
+ export_file = export_file_obj.name
648
+ audio.export(export_file, **export_params)
649
+ upload_file = export_file
650
+ conversion_time = time.perf_counter() - conversion_start
651
+ logger.info(f"Audio conversion: WAV → {export_format.upper()} took {conversion_time:.3f}s")
652
+ logger.debug(f"{export_format.upper()} created for STT upload: {upload_file}")
653
+ except Exception as e:
654
+ if "ffmpeg" in str(e).lower() or "avconv" in str(e).lower():
655
+ logger.error(f"Audio conversion failed - FFmpeg may not be installed: {e}")
656
+ from voice_mode.utils.ffmpeg_check import get_install_instructions
657
+ logger.error(f"\n{get_install_instructions()}")
658
+ raise RuntimeError("FFmpeg is required but not found. Please install FFmpeg and try again.") from e
659
+ else:
660
+ raise
641
661
 
642
662
  # Save debug file for upload version
643
663
  if DEBUG:
@@ -233,18 +233,45 @@ async def status_service(service_name: str) -> str:
233
233
  if service_name == "whisper":
234
234
  # Get model info
235
235
  model = "unknown"
236
+ model_name = None
236
237
  for i, arg in enumerate(cmdline):
237
238
  if arg == "--model" and i + 1 < len(cmdline):
238
239
  model = Path(cmdline[i + 1]).name
240
+ # Extract model name from filename (e.g., ggml-large-v3-turbo.bin -> large-v3-turbo)
241
+ if model.startswith("ggml-") and model.endswith(".bin"):
242
+ model_name = model[5:-4]
239
243
  break
240
244
  extra_info_parts.append(f"Model: {model}")
241
245
 
242
- # Try to get version info
246
+ # Get version and capability info
243
247
  try:
244
- from voice_mode.tools.services.version_info import get_whisper_version
245
- version_info = get_whisper_version()
248
+ from voice_mode.utils.services.whisper_version import get_whisper_version_info, check_coreml_model_exists
249
+ version_info = get_whisper_version_info()
250
+
246
251
  if version_info.get("version"):
247
252
  extra_info_parts.append(f"Version: {version_info['version']}")
253
+ elif version_info.get("commit"):
254
+ extra_info_parts.append(f"Commit: {version_info['commit']}")
255
+
256
+ # Show Core ML status on Apple Silicon
257
+ if platform.machine() == "arm64" and platform.system() == "Darwin":
258
+ if version_info.get("coreml_supported"):
259
+ # Check if the current model has Core ML
260
+ if model_name and check_coreml_model_exists(model_name):
261
+ extra_info_parts.append("Core ML: ✓ Enabled & Active")
262
+ else:
263
+ extra_info_parts.append("Core ML: ✓ Supported (model not converted)")
264
+ else:
265
+ extra_info_parts.append("Core ML: ✗ Not compiled in")
266
+
267
+ # Show GPU support
268
+ gpu_support = []
269
+ if version_info.get("metal_supported"):
270
+ gpu_support.append("Metal")
271
+ if version_info.get("cuda_supported"):
272
+ gpu_support.append("CUDA")
273
+ if gpu_support:
274
+ extra_info_parts.append(f"GPU: {', '.join(gpu_support)}")
248
275
  except:
249
276
  pass
250
277
 
@@ -243,7 +243,7 @@ async def kokoro_install(
243
243
  <key>EnvironmentVariables</key>
244
244
  <dict>
245
245
  <key>PATH</key>
246
- <string>/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/opt/homebrew/bin</string>
246
+ <string>{os.path.expanduser("~/.local/bin")}:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/opt/homebrew/bin</string>
247
247
  </dict>
248
248
  </dict>
249
249
  </plist>"""
@@ -2,12 +2,22 @@
2
2
 
3
3
  from voice_mode.tools.services.whisper.install import whisper_install
4
4
  from voice_mode.tools.services.whisper.uninstall import whisper_uninstall
5
- from voice_mode.tools.services.whisper.download_model import download_model
6
- from voice_mode.tools.services.whisper.list_models_tool import whisper_list_models
5
+ from voice_mode.tools.services.whisper.model_install import whisper_model_install
6
+ from voice_mode.tools.services.whisper.list_models import whisper_models
7
+ from voice_mode.tools.services.whisper.model_active import whisper_model_active
8
+ from voice_mode.tools.services.whisper.model_remove import whisper_model_remove
9
+ from voice_mode.tools.services.whisper.model_benchmark import whisper_model_benchmark
7
10
 
8
11
  __all__ = [
9
12
  'whisper_install',
10
13
  'whisper_uninstall',
11
- 'download_model',
12
- 'whisper_list_models'
13
- ]
14
+ 'whisper_model_install',
15
+ 'whisper_models',
16
+ 'whisper_model_active',
17
+ 'whisper_model_remove',
18
+ 'whisper_model_benchmark'
19
+ ]
20
+
21
+ # Backwards compatibility aliases
22
+ download_model = whisper_model_install # Deprecated alias
23
+ whisper_list_models = whisper_models # Deprecated alias
@@ -206,13 +206,20 @@ async def whisper_install(
206
206
  except subprocess.CalledProcessError:
207
207
  logger.warning("Make clean failed, continuing anyway...")
208
208
 
209
- # Build with appropriate flags
209
+ # Build with CMake for better control and Core ML support
210
210
  build_env = os.environ.copy()
211
+ cmake_flags = []
211
212
 
212
- if is_macos and use_gpu:
213
- build_env["WHISPER_METAL"] = "1"
213
+ # Enable GPU support based on platform
214
+ if is_macos:
215
+ # On macOS, always enable Metal
216
+ cmake_flags.append("-DGGML_METAL=ON")
217
+ # On Apple Silicon, also enable Core ML for better performance
218
+ if platform.machine() == "arm64":
219
+ cmake_flags.append("-DWHISPER_COREML=ON")
220
+ logger.info("Enabling Core ML support for Apple Silicon")
214
221
  elif is_linux and use_gpu:
215
- build_env["WHISPER_CUDA"] = "1"
222
+ cmake_flags.append("-DGGML_CUDA=ON")
216
223
 
217
224
  # Get number of CPU cores for parallel build
218
225
  cpu_count = os.cpu_count() or 4
@@ -220,13 +227,31 @@ async def whisper_install(
220
227
  # Determine if we should show build output
221
228
  debug_mode = os.environ.get("VOICEMODE_DEBUG", "").lower() in ("true", "1", "yes")
222
229
 
230
+ # Configure with CMake
231
+ logger.info("Configuring whisper.cpp build...")
232
+ cmake_cmd = ["cmake", "-B", "build"] + cmake_flags
233
+
223
234
  if debug_mode:
224
- subprocess.run(["make", f"-j{cpu_count}"], env=build_env, check=True)
235
+ subprocess.run(cmake_cmd, env=build_env, check=True)
225
236
  else:
226
- # Suppress output unless there's an error
227
- logger.info("Building whisper.cpp (this may take a few minutes)...")
228
237
  try:
229
- result = subprocess.run(["make", f"-j{cpu_count}"], env=build_env,
238
+ result = subprocess.run(cmake_cmd, env=build_env,
239
+ capture_output=True, text=True, check=True)
240
+ except subprocess.CalledProcessError as e:
241
+ logger.error(f"Configuration failed: {e}")
242
+ if e.stderr:
243
+ logger.error(f"Configuration errors:\n{e.stderr}")
244
+ raise
245
+
246
+ # Build with CMake
247
+ logger.info("Building whisper.cpp (this may take a few minutes)...")
248
+ build_cmd = ["cmake", "--build", "build", "-j", str(cpu_count), "--config", "Release"]
249
+
250
+ if debug_mode:
251
+ subprocess.run(build_cmd, env=build_env, check=True)
252
+ else:
253
+ try:
254
+ result = subprocess.run(build_cmd, env=build_env,
230
255
  capture_output=True, text=True, check=True)
231
256
  logger.info("Build completed successfully")
232
257
  except subprocess.CalledProcessError as e:
@@ -258,7 +283,8 @@ async def whisper_install(
258
283
  model_path = download_result["path"]
259
284
 
260
285
  # Test whisper with sample if available
261
- main_path = os.path.join(install_dir, "main")
286
+ # With CMake build, binaries are in build/bin/
287
+ main_path = os.path.join(install_dir, "build", "bin", "whisper-cli")
262
288
  sample_path = os.path.join(install_dir, "samples", "jfk.wav")
263
289
  if os.path.exists(sample_path) and os.path.exists(main_path):
264
290
  try:
@@ -283,6 +309,11 @@ async def whisper_install(
283
309
  WHISPER_DIR="{install_dir}"
284
310
  LOG_FILE="{os.path.join(voicemode_dir, 'whisper-server.log')}"
285
311
 
312
+ # Source voicemode configuration if it exists
313
+ if [ -f "{voicemode_dir}/voicemode.env" ]; then
314
+ source "{voicemode_dir}/voicemode.env"
315
+ fi
316
+
286
317
  # Model selection with environment variable support
287
318
  MODEL_NAME="${{VOICEMODE_WHISPER_MODEL:-{model}}}"
288
319
  MODEL_PATH="$WHISPER_DIR/models/ggml-$MODEL_NAME.bin"
@@ -2,18 +2,18 @@
2
2
 
3
3
  from typing import Dict, Any
4
4
  from voice_mode.tools.services.whisper.models import (
5
- WHISPER_MODELS,
5
+ WHISPER_MODEL_REGISTRY,
6
6
  get_model_directory,
7
- get_current_model,
8
- is_model_installed,
9
- get_installed_models,
7
+ get_active_model,
8
+ is_whisper_model_installed,
9
+ get_installed_whisper_models,
10
10
  format_size,
11
- has_coreml_model,
11
+ has_whisper_coreml_model,
12
12
  is_apple_silicon
13
13
  )
14
14
 
15
15
 
16
- async def list_whisper_models() -> Dict[str, Any]:
16
+ async def whisper_models() -> Dict[str, Any]:
17
17
  """List available Whisper models and their installation status.
18
18
 
19
19
  Returns:
@@ -21,32 +21,32 @@ async def list_whisper_models() -> Dict[str, Any]:
21
21
  """
22
22
  try:
23
23
  model_dir = get_model_directory()
24
- current_model = get_current_model()
25
- installed_models = get_installed_models()
24
+ current_model = get_active_model()
25
+ installed_models = get_installed_whisper_models()
26
26
 
27
27
  # Build models list with status
28
28
  models = []
29
29
  show_coreml = is_apple_silicon() # Only show Core ML on Apple Silicon
30
30
 
31
- for model_name, info in WHISPER_MODELS.items():
31
+ for model_name, info in WHISPER_MODEL_REGISTRY.items():
32
32
  model_status = {
33
33
  "name": model_name,
34
34
  "size_mb": info["size_mb"],
35
35
  "size": format_size(info["size_mb"]),
36
36
  "languages": info["languages"],
37
37
  "description": info["description"],
38
- "installed": is_model_installed(model_name),
38
+ "installed": is_whisper_model_installed(model_name),
39
39
  "current": model_name == current_model,
40
- "has_coreml": has_coreml_model(model_name) if show_coreml else False
40
+ "has_coreml": has_whisper_coreml_model(model_name) if show_coreml else False
41
41
  }
42
42
  models.append(model_status)
43
43
 
44
44
  # Calculate totals
45
45
  total_installed_size = sum(
46
- WHISPER_MODELS[m]["size_mb"] for m in installed_models
46
+ WHISPER_MODEL_REGISTRY[m]["size_mb"] for m in installed_models
47
47
  )
48
48
  total_available_size = sum(
49
- m["size_mb"] for m in WHISPER_MODELS.values()
49
+ m["size_mb"] for m in WHISPER_MODEL_REGISTRY.values()
50
50
  )
51
51
 
52
52
  return {
@@ -55,7 +55,7 @@ async def list_whisper_models() -> Dict[str, Any]:
55
55
  "current_model": current_model,
56
56
  "model_directory": str(model_dir),
57
57
  "installed_count": len(installed_models),
58
- "total_count": len(WHISPER_MODELS),
58
+ "total_count": len(WHISPER_MODEL_REGISTRY),
59
59
  "installed_size_mb": total_installed_size,
60
60
  "installed_size": format_size(total_installed_size),
61
61
  "available_size_mb": total_available_size,
@@ -0,0 +1,54 @@
1
+ """MCP tool for showing/setting active Whisper model."""
2
+
3
+ from typing import Optional, Dict, Any
4
+ from voice_mode.tools.services.whisper.models import (
5
+ get_active_model,
6
+ set_active_model,
7
+ is_whisper_model_installed,
8
+ WHISPER_MODEL_REGISTRY
9
+ )
10
+
11
+
12
+ async def whisper_model_active(model_name: Optional[str] = None) -> Dict[str, Any]:
13
+ """Show or set the active Whisper model.
14
+
15
+ Args:
16
+ model_name: Model to set as active (None to just show current)
17
+
18
+ Returns:
19
+ Dict with current/new active model info
20
+ """
21
+ if model_name is None:
22
+ # Just show current
23
+ current = get_active_model()
24
+ return {
25
+ "success": True,
26
+ "active_model": current,
27
+ "installed": is_whisper_model_installed(current),
28
+ "message": f"Current active model: {current}"
29
+ }
30
+
31
+ # Validate model exists in registry
32
+ if model_name not in WHISPER_MODEL_REGISTRY:
33
+ return {
34
+ "success": False,
35
+ "error": f"Model {model_name} is not a valid Whisper model",
36
+ "available_models": list(WHISPER_MODEL_REGISTRY.keys())
37
+ }
38
+
39
+ # Check if model is installed
40
+ if not is_whisper_model_installed(model_name):
41
+ return {
42
+ "success": False,
43
+ "error": f"Model {model_name} is not installed. Install it first with whisper_model_install()",
44
+ "model": model_name
45
+ }
46
+
47
+ # Set new active model
48
+ set_active_model(model_name)
49
+
50
+ return {
51
+ "success": True,
52
+ "active_model": model_name,
53
+ "message": f"Active model set to {model_name}. Restart whisper service for changes to take effect."
54
+ }
@@ -0,0 +1,159 @@
1
+ """MCP tool for benchmarking Whisper models."""
2
+
3
+ from typing import Union, List, Dict, Any, Optional
4
+ from voice_mode.tools.services.whisper.models import (
5
+ get_installed_whisper_models,
6
+ benchmark_whisper_model,
7
+ is_whisper_model_installed,
8
+ WHISPER_MODEL_REGISTRY
9
+ )
10
+
11
+
12
+ async def whisper_model_benchmark(
13
+ models: Union[str, List[str]] = "installed",
14
+ sample_file: Optional[str] = None,
15
+ runs: int = 1
16
+ ) -> Dict[str, Any]:
17
+ """Benchmark Whisper model performance.
18
+
19
+ Args:
20
+ models: 'installed' (default), 'all', specific model name, or list of models
21
+ sample_file: Optional audio file for testing (uses default JFK sample if None)
22
+ runs: Number of benchmark runs per model (default: 1)
23
+
24
+ Returns:
25
+ Dict with benchmark results and recommendations
26
+ """
27
+ # Determine which models to benchmark
28
+ if models == "installed":
29
+ model_list = get_installed_whisper_models()
30
+ if not model_list:
31
+ return {
32
+ "success": False,
33
+ "error": "No Whisper models are installed. Install models first with whisper_model_install()"
34
+ }
35
+ elif models == "all":
36
+ # Only benchmark installed models from the full list
37
+ all_models = list(WHISPER_MODEL_REGISTRY.keys())
38
+ model_list = [m for m in all_models if is_whisper_model_installed(m)]
39
+ if not model_list:
40
+ return {
41
+ "success": False,
42
+ "error": "No Whisper models are installed"
43
+ }
44
+ elif isinstance(models, str):
45
+ # Single model specified
46
+ if not is_whisper_model_installed(models):
47
+ return {
48
+ "success": False,
49
+ "error": f"Model {models} is not installed"
50
+ }
51
+ model_list = [models]
52
+ elif isinstance(models, list):
53
+ # List of models specified
54
+ model_list = []
55
+ for model in models:
56
+ if is_whisper_model_installed(model):
57
+ model_list.append(model)
58
+ else:
59
+ # Model not installed, skip silently or could use logger.warning
60
+ pass
61
+ if not model_list:
62
+ return {
63
+ "success": False,
64
+ "error": "None of the specified models are installed"
65
+ }
66
+ else:
67
+ return {
68
+ "success": False,
69
+ "error": f"Invalid models parameter: {models}"
70
+ }
71
+
72
+ # Run benchmarks
73
+ results = []
74
+ failed = []
75
+
76
+ for model in model_list:
77
+ best_result = None
78
+
79
+ for run_num in range(runs):
80
+ result = benchmark_whisper_model(model, sample_file)
81
+
82
+ if result.get("success"):
83
+ # Keep the best (fastest) result from multiple runs
84
+ if best_result is None or result["total_time_ms"] < best_result["total_time_ms"]:
85
+ best_result = result
86
+ else:
87
+ # If any run fails, record the failure
88
+ if model not in failed:
89
+ failed.append(model)
90
+ results.append({
91
+ "model": model,
92
+ "success": False,
93
+ "error": result.get("error", "Benchmark failed")
94
+ })
95
+ break
96
+
97
+ if best_result:
98
+ results.append(best_result)
99
+
100
+ if not results:
101
+ return {
102
+ "success": False,
103
+ "error": "No benchmarks completed successfully"
104
+ }
105
+
106
+ # Find successful results for analysis
107
+ successful_results = [r for r in results if r.get("success")]
108
+
109
+ if successful_results:
110
+ # Find fastest model
111
+ fastest = min(successful_results, key=lambda x: x["total_time_ms"])
112
+
113
+ # Generate recommendations based on results
114
+ recommendations = []
115
+
116
+ # Categorize by speed
117
+ for result in successful_results:
118
+ rtf = result.get("real_time_factor", 0)
119
+ if rtf > 20:
120
+ category = "Ultra-fast (good for real-time)"
121
+ elif rtf > 5:
122
+ category = "Fast (good for interactive use)"
123
+ elif rtf > 1:
124
+ category = "Moderate (good balance)"
125
+ else:
126
+ category = "Slow (best accuracy)"
127
+
128
+ result["category"] = category
129
+
130
+ # Generate specific recommendations
131
+ if fastest["real_time_factor"] > 10:
132
+ recommendations.append(f"Use {fastest['model']} for real-time applications")
133
+
134
+ # Find best balance (medium or base if available)
135
+ balance_models = [r for r in successful_results if r["model"] in ["base", "medium"]]
136
+ if balance_models:
137
+ best_balance = min(balance_models, key=lambda x: x["total_time_ms"])
138
+ recommendations.append(f"Use {best_balance['model']} for balanced speed/accuracy")
139
+
140
+ # Recommend large models for accuracy
141
+ large_models = [r for r in successful_results if "large" in r["model"]]
142
+ if large_models:
143
+ best_large = min(large_models, key=lambda x: x["total_time_ms"])
144
+ recommendations.append(f"Use {best_large['model']} for best accuracy")
145
+ else:
146
+ fastest = None
147
+ recommendations = ["Unable to generate recommendations - no successful benchmarks"]
148
+
149
+ return {
150
+ "success": True,
151
+ "benchmarks": results,
152
+ "models_tested": len(model_list),
153
+ "models_failed": len(failed),
154
+ "fastest_model": fastest["model"] if fastest else None,
155
+ "fastest_time_ms": fastest["total_time_ms"] if fastest else None,
156
+ "recommendations": recommendations,
157
+ "sample_file": sample_file or "default JFK sample",
158
+ "runs_per_model": runs
159
+ }