voice-mode 2.26.0__py3-none-any.whl → 2.27.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. voice_mode/__version__.py +1 -1
  2. voice_mode/cli.py +611 -3
  3. voice_mode/config.py +11 -3
  4. voice_mode/frontend/.next/BUILD_ID +1 -1
  5. voice_mode/frontend/.next/app-build-manifest.json +5 -5
  6. voice_mode/frontend/.next/app-path-routes-manifest.json +1 -1
  7. voice_mode/frontend/.next/build-manifest.json +3 -3
  8. voice_mode/frontend/.next/next-minimal-server.js.nft.json +1 -1
  9. voice_mode/frontend/.next/next-server.js.nft.json +1 -1
  10. voice_mode/frontend/.next/prerender-manifest.json +1 -1
  11. voice_mode/frontend/.next/required-server-files.json +1 -1
  12. voice_mode/frontend/.next/server/app/_not-found/page.js +1 -1
  13. voice_mode/frontend/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
  14. voice_mode/frontend/.next/server/app/_not-found.html +1 -1
  15. voice_mode/frontend/.next/server/app/_not-found.rsc +1 -1
  16. voice_mode/frontend/.next/server/app/api/connection-details/route.js +2 -2
  17. voice_mode/frontend/.next/server/app/favicon.ico/route.js +2 -2
  18. voice_mode/frontend/.next/server/app/index.html +1 -1
  19. voice_mode/frontend/.next/server/app/index.rsc +2 -2
  20. voice_mode/frontend/.next/server/app/page.js +2 -2
  21. voice_mode/frontend/.next/server/app/page_client-reference-manifest.js +1 -1
  22. voice_mode/frontend/.next/server/app-paths-manifest.json +1 -1
  23. voice_mode/frontend/.next/server/chunks/994.js +1 -1
  24. voice_mode/frontend/.next/server/middleware-build-manifest.js +1 -1
  25. voice_mode/frontend/.next/server/next-font-manifest.js +1 -1
  26. voice_mode/frontend/.next/server/next-font-manifest.json +1 -1
  27. voice_mode/frontend/.next/server/pages/404.html +1 -1
  28. voice_mode/frontend/.next/server/pages/500.html +1 -1
  29. voice_mode/frontend/.next/server/server-reference-manifest.json +1 -1
  30. voice_mode/frontend/.next/standalone/.next/BUILD_ID +1 -1
  31. voice_mode/frontend/.next/standalone/.next/app-build-manifest.json +5 -5
  32. voice_mode/frontend/.next/standalone/.next/app-path-routes-manifest.json +1 -1
  33. voice_mode/frontend/.next/standalone/.next/build-manifest.json +3 -3
  34. voice_mode/frontend/.next/standalone/.next/prerender-manifest.json +1 -1
  35. voice_mode/frontend/.next/standalone/.next/required-server-files.json +1 -1
  36. voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page.js +1 -1
  37. voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
  38. voice_mode/frontend/.next/standalone/.next/server/app/_not-found.html +1 -1
  39. voice_mode/frontend/.next/standalone/.next/server/app/_not-found.rsc +1 -1
  40. voice_mode/frontend/.next/standalone/.next/server/app/api/connection-details/route.js +2 -2
  41. voice_mode/frontend/.next/standalone/.next/server/app/favicon.ico/route.js +2 -2
  42. voice_mode/frontend/.next/standalone/.next/server/app/index.html +1 -1
  43. voice_mode/frontend/.next/standalone/.next/server/app/index.rsc +2 -2
  44. voice_mode/frontend/.next/standalone/.next/server/app/page.js +2 -2
  45. voice_mode/frontend/.next/standalone/.next/server/app/page_client-reference-manifest.js +1 -1
  46. voice_mode/frontend/.next/standalone/.next/server/app-paths-manifest.json +1 -1
  47. voice_mode/frontend/.next/standalone/.next/server/chunks/994.js +1 -1
  48. voice_mode/frontend/.next/standalone/.next/server/middleware-build-manifest.js +1 -1
  49. voice_mode/frontend/.next/standalone/.next/server/next-font-manifest.js +1 -1
  50. voice_mode/frontend/.next/standalone/.next/server/next-font-manifest.json +1 -1
  51. voice_mode/frontend/.next/standalone/.next/server/pages/404.html +1 -1
  52. voice_mode/frontend/.next/standalone/.next/server/pages/500.html +1 -1
  53. voice_mode/frontend/.next/standalone/.next/server/server-reference-manifest.json +1 -1
  54. voice_mode/frontend/.next/standalone/server.js +1 -1
  55. voice_mode/frontend/.next/static/chunks/app/{layout-0e969b20634a3137.js → layout-08be62ed6e344292.js} +1 -1
  56. voice_mode/frontend/.next/static/chunks/app/page-80fc72669f25298f.js +1 -0
  57. voice_mode/frontend/.next/static/chunks/{main-app-b9e128659aafd50e.js → main-app-413f77c1f2c53e3f.js} +1 -1
  58. voice_mode/frontend/.next/trace +43 -43
  59. voice_mode/frontend/.next/types/app/api/connection-details/route.ts +1 -1
  60. voice_mode/frontend/.next/types/app/layout.ts +1 -1
  61. voice_mode/frontend/.next/types/app/page.ts +1 -1
  62. voice_mode/frontend/package-lock.json +8 -8
  63. voice_mode/resources/configuration.py +8 -4
  64. voice_mode/resources/whisper_models.py +10 -13
  65. voice_mode/templates/systemd/voicemode-frontend.service +1 -1
  66. voice_mode/tools/configuration_management.py +7 -2
  67. voice_mode/tools/converse.py +31 -0
  68. voice_mode/tools/services/kokoro/install.py +3 -2
  69. voice_mode/tools/services/whisper/__init__.py +13 -0
  70. voice_mode/tools/services/whisper/install.py +3 -2
  71. voice_mode/tools/services/whisper/list_models.py +70 -0
  72. voice_mode/tools/services/whisper/list_models_tool.py +65 -0
  73. voice_mode/tools/services/whisper/models.py +274 -0
  74. {voice_mode-2.26.0.dist-info → voice_mode-2.27.0.dist-info}/METADATA +1 -1
  75. {voice_mode-2.26.0.dist-info → voice_mode-2.27.0.dist-info}/RECORD +79 -75
  76. voice_mode/frontend/.next/static/chunks/app/page-db597c111ebcc19f.js +0 -1
  77. /voice_mode/frontend/.next/static/{uvJyMdD1IAhgbf_LCTQE6 → wQ5pxzPmwjlzdUfJwSjMg}/_buildManifest.js +0 -0
  78. /voice_mode/frontend/.next/static/{uvJyMdD1IAhgbf_LCTQE6 → wQ5pxzPmwjlzdUfJwSjMg}/_ssgManifest.js +0 -0
  79. {voice_mode-2.26.0.dist-info → voice_mode-2.27.0.dist-info}/WHEEL +0 -0
  80. {voice_mode-2.26.0.dist-info → voice_mode-2.27.0.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,4 @@
1
- // File: /tmp/build-via-sdist-4wig7c1g/voice_mode-2.26.0/voice_mode/frontend/app/api/connection-details/route.ts
1
+ // File: /tmp/build-via-sdist-qw720py5/voice_mode-2.27.0/voice_mode/frontend/app/api/connection-details/route.ts
2
2
  import * as entry from '../../../../../app/api/connection-details/route.js'
3
3
  import type { NextRequest } from 'next/server.js'
4
4
 
@@ -1,4 +1,4 @@
1
- // File: /tmp/build-via-sdist-4wig7c1g/voice_mode-2.26.0/voice_mode/frontend/app/layout.tsx
1
+ // File: /tmp/build-via-sdist-qw720py5/voice_mode-2.27.0/voice_mode/frontend/app/layout.tsx
2
2
  import * as entry from '../../../app/layout.js'
3
3
  import type { ResolvingMetadata, ResolvingViewport } from 'next/dist/lib/metadata/types/metadata-interface.js'
4
4
 
@@ -1,4 +1,4 @@
1
- // File: /tmp/build-via-sdist-4wig7c1g/voice_mode-2.26.0/voice_mode/frontend/app/page.tsx
1
+ // File: /tmp/build-via-sdist-qw720py5/voice_mode-2.27.0/voice_mode/frontend/app/page.tsx
2
2
  import * as entry from '../../../app/page.js'
3
3
  import type { ResolvingMetadata, ResolvingViewport } from 'next/dist/lib/metadata/types/metadata-interface.js'
4
4
 
@@ -1360,9 +1360,9 @@
1360
1360
  }
1361
1361
  },
1362
1362
  "node_modules/browserslist": {
1363
- "version": "4.25.2",
1364
- "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.25.2.tgz",
1365
- "integrity": "sha512-0si2SJK3ooGzIawRu61ZdPCO1IncZwS8IzuX73sPZsXW6EQ/w/DAfPyKI8l1ETTCr2MnvqWitmlCUxgdul45jA==",
1363
+ "version": "4.25.3",
1364
+ "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.25.3.tgz",
1365
+ "integrity": "sha512-cDGv1kkDI4/0e5yON9yM5G/0A5u8sf5TnmdX5C9qHzI9PPu++sQ9zjm1k9NiOrf3riY4OkK0zSGqfvJyJsgCBQ==",
1366
1366
  "dev": true,
1367
1367
  "funding": [
1368
1368
  {
@@ -1380,8 +1380,8 @@
1380
1380
  ],
1381
1381
  "license": "MIT",
1382
1382
  "dependencies": {
1383
- "caniuse-lite": "^1.0.30001733",
1384
- "electron-to-chromium": "^1.5.199",
1383
+ "caniuse-lite": "^1.0.30001735",
1384
+ "electron-to-chromium": "^1.5.204",
1385
1385
  "node-releases": "^2.0.19",
1386
1386
  "update-browserslist-db": "^1.1.3"
1387
1387
  },
@@ -1774,9 +1774,9 @@
1774
1774
  "license": "MIT"
1775
1775
  },
1776
1776
  "node_modules/electron-to-chromium": {
1777
- "version": "1.5.203",
1778
- "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.203.tgz",
1779
- "integrity": "sha512-uz4i0vLhfm6dLZWbz/iH88KNDV+ivj5+2SA+utpgjKaj9Q0iDLuwk6Idhe9BTxciHudyx6IvTvijhkPvFGUQ0g==",
1777
+ "version": "1.5.207",
1778
+ "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.207.tgz",
1779
+ "integrity": "sha512-mryFrrL/GXDTmAtIVMVf+eIXM09BBPlO5IQ7lUyKmK8d+A4VpRGG+M3ofoVef6qyF8s60rJei8ymlJxjUA8Faw==",
1780
1780
  "dev": true,
1781
1781
  "license": "ISC"
1782
1782
  },
@@ -238,13 +238,17 @@ async def environment_variables() -> str:
238
238
  Shows each configuration variable with:
239
239
  - Name: The environment variable name
240
240
  - Environment Value: Current value from environment
241
- - Config File Value: Value from ~/.voicemode.env (if exists)
241
+ - Config File Value: Value from ~/.voicemode/voicemode.env (if exists)
242
242
  - Description: What the variable controls
243
243
 
244
244
  This helps identify configuration sources and troubleshoot settings.
245
245
  """
246
- # Parse config file
247
- user_config_path = Path.home() / ".voicemode.env"
246
+ # Parse config file - try new path first, fall back to old
247
+ user_config_path = Path.home() / ".voicemode" / "voicemode.env"
248
+ if not user_config_path.exists():
249
+ old_path = Path.home() / ".voicemode" / ".voicemode.env"
250
+ if old_path.exists():
251
+ user_config_path = old_path
248
252
  file_config = parse_env_file(user_config_path)
249
253
 
250
254
  # Define all configuration variables with descriptions
@@ -330,7 +334,7 @@ async def environment_template() -> str:
330
334
  Environment variable template for voice mode configuration.
331
335
 
332
336
  Provides a ready-to-use template of all available environment variables
333
- with their current values. This can be saved to ~/.voicemode.env and
337
+ with their current values. This can be saved to ~/.voicemode/voicemode.env and
334
338
  customized as needed.
335
339
 
336
340
  Sensitive values like API keys are masked for security.
@@ -6,7 +6,7 @@ from pathlib import Path
6
6
  from typing import Dict, Any, List
7
7
 
8
8
  from ..server import mcp
9
- from ..config import logger
9
+ from ..config import logger, WHISPER_MODEL_PATH, WHISPER_MODEL
10
10
 
11
11
 
12
12
  @mcp.resource("whisper://models")
@@ -24,17 +24,14 @@ async def list_whisper_models() -> str:
24
24
  and which one is currently being used by the whisper server.
25
25
  """
26
26
  try:
27
- # Get whisper models directory - check both locations
28
- models_dirs = [
29
- Path.home() / ".voicemode/services/whisper/models",
30
- Path.home() / ".voicemode/whisper.cpp/models" # legacy
31
- ]
27
+ # Get whisper models directory from config
28
+ models_dir = Path(WHISPER_MODEL_PATH)
32
29
 
33
- models_dir = None
34
- for dir_path in models_dirs:
35
- if dir_path.exists():
36
- models_dir = dir_path
37
- break
30
+ # If config path doesn't exist, check service installation
31
+ if not models_dir.exists():
32
+ service_models = Path.home() / ".voicemode/services/whisper/models"
33
+ if service_models.exists():
34
+ models_dir = service_models
38
35
 
39
36
  # List all model files
40
37
  models: List[Dict[str, Any]] = []
@@ -55,8 +52,8 @@ async def list_whisper_models() -> str:
55
52
  # Sort models by name
56
53
  models.sort(key=lambda x: x["name"])
57
54
 
58
- # Get current configuration
59
- current_model = os.environ.get("VOICEMODE_WHISPER_MODEL", "large-v2")
55
+ # Get current configuration from config
56
+ current_model = WHISPER_MODEL
60
57
 
61
58
  # Build response
62
59
  data = {
@@ -7,7 +7,7 @@ Wants=network.target
7
7
  [Service]
8
8
  Type=simple
9
9
  WorkingDirectory={FRONTEND_DIR}
10
- EnvironmentFile=%h/.voicemode/.voicemode.env
10
+ EnvironmentFile=%h/.voicemode/voicemode.env
11
11
  Environment=NODE_ENV=development
12
12
  Environment=PORT={PORT}
13
13
  Environment=HOST={HOST}
@@ -11,7 +11,9 @@ import logging
11
11
  logger = logging.getLogger("voice-mode")
12
12
 
13
13
  # Configuration file path (user-level only for security)
14
- USER_CONFIG_PATH = Path.home() / ".voicemode" / ".voicemode.env"
14
+ USER_CONFIG_PATH = Path.home() / ".voicemode" / "voicemode.env"
15
+ # Legacy path for backwards compatibility
16
+ LEGACY_CONFIG_PATH = Path.home() / ".voicemode" / ".voicemode.env"
15
17
 
16
18
 
17
19
  def parse_env_file(file_path: Path) -> Dict[str, str]:
@@ -117,8 +119,11 @@ async def update_config(key: str, value: str) -> str:
117
119
  if not re.match(r'^[A-Z_]+$', key):
118
120
  return f"❌ Invalid key format: {key}. Keys must be uppercase with underscores only."
119
121
 
120
- # Use user config path
122
+ # Use user config path, check for legacy if new doesn't exist
121
123
  config_path = USER_CONFIG_PATH
124
+ if not config_path.exists() and LEGACY_CONFIG_PATH.exists():
125
+ config_path = LEGACY_CONFIG_PATH
126
+ logger.warning(f"Using deprecated .voicemode.env - please rename to voicemode.env")
122
127
 
123
128
  try:
124
129
  # Read existing configuration
@@ -1412,6 +1412,37 @@ async def converse(
1412
1412
  Remember: Lower values (0-1) = more permissive, may detect non-speech as speech
1413
1413
  Higher values (2-3) = more strict, may miss soft speech or whispers
1414
1414
 
1415
+ Parallel Operations Pattern (RECOMMENDED):
1416
+ When performing actions that don't require user confirmation, use wait_for_response=False
1417
+ to speak while simultaneously executing other tools. This creates natural, flowing conversations.
1418
+
1419
+ Pattern: converse("Status update", wait_for_response=False) then immediately run other tools.
1420
+ The speech plays while your actions execute in parallel.
1421
+
1422
+ Examples:
1423
+ - Search narration: converse("Searching for that file", wait_for_response=False) + Grep(...)
1424
+ - Processing update: converse("Analyzing the screenshot", wait_for_response=False) + analyze_screenshot(...)
1425
+ - Creation status: converse("Creating that document now", wait_for_response=False) + Write(...)
1426
+ - Quick confirmation: converse("Done! The file is saved", wait_for_response=False)
1427
+
1428
+ Benefits:
1429
+ - No dead air during operations
1430
+ - User knows what's happening
1431
+ - More natural conversation flow
1432
+ - Better user experience
1433
+
1434
+ When to use parallel pattern:
1435
+ - File operations (reading, writing, searching)
1436
+ - Data processing (analysis, computation)
1437
+ - Status updates during long operations
1438
+ - Confirmations that don't need response
1439
+
1440
+ When NOT to use parallel pattern:
1441
+ - Questions requiring answers
1442
+ - Confirmations needing user approval
1443
+ - Error messages needing acknowledgment
1444
+ - End of conversation farewells (unless doing cleanup)
1445
+
1415
1446
  Skip TTS Examples:
1416
1447
  - Fast iteration mode: converse("Processing your request", skip_tts=True) # Text only, no voice
1417
1448
  - Important announcement: converse("Warning: System will restart", skip_tts=False) # Always use voice
@@ -251,13 +251,14 @@ async def kokoro_install(
251
251
  with open(plist_path, 'w') as f:
252
252
  f.write(plist_content)
253
253
 
254
- # Load the launchagent
254
+ # Unload if already loaded (ignore errors)
255
255
  try:
256
256
  subprocess.run(["launchctl", "unload", plist_path], capture_output=True)
257
257
  except:
258
258
  pass # Ignore if not loaded
259
259
 
260
- subprocess.run(["launchctl", "load", plist_path], check=True)
260
+ # Don't load here - let enable_service handle it with the -w flag
261
+ # This prevents the "already loaded" error when enable_service runs
261
262
  result["launchagent"] = plist_path
262
263
  result["message"] += f"\nLaunchAgent installed: {plist_name}"
263
264
 
@@ -0,0 +1,13 @@
1
+ """Whisper service tools."""
2
+
3
+ from voice_mode.tools.services.whisper.install import whisper_install
4
+ from voice_mode.tools.services.whisper.uninstall import whisper_uninstall
5
+ from voice_mode.tools.services.whisper.download_model import download_model
6
+ from voice_mode.tools.services.whisper.list_models_tool import whisper_list_models
7
+
8
+ __all__ = [
9
+ 'whisper_install',
10
+ 'whisper_uninstall',
11
+ 'download_model',
12
+ 'whisper_list_models'
13
+ ]
@@ -369,13 +369,14 @@ exec "$SERVER_BIN" \\
369
369
  with open(plist_path, 'w') as f:
370
370
  f.write(plist_content)
371
371
 
372
- # Load the launchagent
372
+ # Unload if already loaded (ignore errors)
373
373
  try:
374
374
  subprocess.run(["launchctl", "unload", plist_path], capture_output=True)
375
375
  except:
376
376
  pass # Ignore if not loaded
377
377
 
378
- subprocess.run(["launchctl", "load", plist_path], check=True)
378
+ # Don't load here - let enable_service handle it with the -w flag
379
+ # This prevents the "already loaded" error when enable_service runs
379
380
 
380
381
  # Handle auto_enable
381
382
  enable_message = ""
@@ -0,0 +1,70 @@
1
+ """MCP tool for listing Whisper models and their status."""
2
+
3
+ from typing import Dict, Any
4
+ from voice_mode.tools.services.whisper.models import (
5
+ WHISPER_MODELS,
6
+ get_model_directory,
7
+ get_current_model,
8
+ is_model_installed,
9
+ get_installed_models,
10
+ format_size,
11
+ has_coreml_model,
12
+ is_apple_silicon
13
+ )
14
+
15
+
16
+ async def list_whisper_models() -> Dict[str, Any]:
17
+ """List available Whisper models and their installation status.
18
+
19
+ Returns:
20
+ Dictionary containing model information and status
21
+ """
22
+ try:
23
+ model_dir = get_model_directory()
24
+ current_model = get_current_model()
25
+ installed_models = get_installed_models()
26
+
27
+ # Build models list with status
28
+ models = []
29
+ show_coreml = is_apple_silicon() # Only show Core ML on Apple Silicon
30
+
31
+ for model_name, info in WHISPER_MODELS.items():
32
+ model_status = {
33
+ "name": model_name,
34
+ "size_mb": info["size_mb"],
35
+ "size": format_size(info["size_mb"]),
36
+ "languages": info["languages"],
37
+ "description": info["description"],
38
+ "installed": is_model_installed(model_name),
39
+ "current": model_name == current_model,
40
+ "has_coreml": has_coreml_model(model_name) if show_coreml else False
41
+ }
42
+ models.append(model_status)
43
+
44
+ # Calculate totals
45
+ total_installed_size = sum(
46
+ WHISPER_MODELS[m]["size_mb"] for m in installed_models
47
+ )
48
+ total_available_size = sum(
49
+ m["size_mb"] for m in WHISPER_MODELS.values()
50
+ )
51
+
52
+ return {
53
+ "success": True,
54
+ "models": models,
55
+ "current_model": current_model,
56
+ "model_directory": str(model_dir),
57
+ "installed_count": len(installed_models),
58
+ "total_count": len(WHISPER_MODELS),
59
+ "installed_size_mb": total_installed_size,
60
+ "installed_size": format_size(total_installed_size),
61
+ "available_size_mb": total_available_size,
62
+ "available_size": format_size(total_available_size)
63
+ }
64
+
65
+ except Exception as e:
66
+ return {
67
+ "success": False,
68
+ "error": str(e),
69
+ "models": []
70
+ }
@@ -0,0 +1,65 @@
1
+ """MCP tool for listing Whisper models."""
2
+
3
+ from voice_mode.server import mcp
4
+ from voice_mode.tools.services.whisper.list_models import list_whisper_models
5
+
6
+
7
+ @mcp.tool()
8
+ async def whisper_list_models() -> str:
9
+ """List available Whisper models and their installation status.
10
+
11
+ Shows all available Whisper models with:
12
+ - Installation status (installed/not installed)
13
+ - Model sizes
14
+ - Language support
15
+ - Currently selected model
16
+
17
+ Returns:
18
+ Formatted string showing model status and information
19
+ """
20
+ result = await list_whisper_models()
21
+
22
+ if not result["success"]:
23
+ return f"Error listing models: {result.get('error', 'Unknown error')}"
24
+
25
+ # Format output
26
+ output = ["Whisper Models:", ""]
27
+
28
+ # Check if we should show Core ML column
29
+ show_coreml = any(model.get("has_coreml", False) for model in result["models"])
30
+
31
+ for model in result["models"]:
32
+ # Format status indicators
33
+ current = "→ " if model["current"] else " "
34
+ installed = "[✓ Installed]" if model["installed"] else "[ Download ]"
35
+
36
+ # Add Core ML indicator if on macOS
37
+ coreml = ""
38
+ if show_coreml:
39
+ coreml = "[ML]" if model.get("has_coreml", False) else " "
40
+
41
+ # Format model line
42
+ line = f"{current}{model['name']:15} {installed:14} {coreml} {model['size']:>8} {model['languages']:20}"
43
+ if model["current"]:
44
+ line += " (Currently selected)"
45
+
46
+ output.append(line)
47
+
48
+ # Add footer
49
+ footer = [
50
+ "",
51
+ f"Models directory: {result['model_directory']}",
52
+ f"Total size: {result['installed_size']} installed / {result['available_size']} available",
53
+ "",
54
+ f"Installed models: {result['installed_count']}/{result['total_count']}",
55
+ f"Current model: {result['current_model']}"
56
+ ]
57
+
58
+ # Add Core ML note if on macOS
59
+ if show_coreml:
60
+ footer.append("")
61
+ footer.append("[ML] = Core ML model available for faster inference on Apple Silicon")
62
+
63
+ output.extend(footer)
64
+
65
+ return "\n".join(output)
@@ -0,0 +1,274 @@
1
+ """Whisper model registry and utilities."""
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from typing import Dict, List, Optional, TypedDict
6
+ from voice_mode.config import WHISPER_MODEL_PATH, WHISPER_MODEL
7
+
8
+
9
+ class ModelInfo(TypedDict):
10
+ """Information about a Whisper model."""
11
+ size_mb: int # Download size in MB
12
+ languages: str # Language support description
13
+ description: str # Brief description
14
+ filename: str # Expected filename when downloaded
15
+
16
+
17
+ # Registry of all available Whisper models
18
+ WHISPER_MODELS: Dict[str, ModelInfo] = {
19
+ "tiny": {
20
+ "size_mb": 39,
21
+ "languages": "Multilingual",
22
+ "description": "Fastest, least accurate",
23
+ "filename": "ggml-tiny.bin"
24
+ },
25
+ "tiny.en": {
26
+ "size_mb": 39,
27
+ "languages": "English only",
28
+ "description": "Fastest English model",
29
+ "filename": "ggml-tiny.en.bin"
30
+ },
31
+ "base": {
32
+ "size_mb": 142,
33
+ "languages": "Multilingual",
34
+ "description": "Good balance of speed and accuracy",
35
+ "filename": "ggml-base.bin"
36
+ },
37
+ "base.en": {
38
+ "size_mb": 142,
39
+ "languages": "English only",
40
+ "description": "Good English model",
41
+ "filename": "ggml-base.en.bin"
42
+ },
43
+ "small": {
44
+ "size_mb": 466,
45
+ "languages": "Multilingual",
46
+ "description": "Better accuracy, slower",
47
+ "filename": "ggml-small.bin"
48
+ },
49
+ "small.en": {
50
+ "size_mb": 466,
51
+ "languages": "English only",
52
+ "description": "Better English accuracy",
53
+ "filename": "ggml-small.en.bin"
54
+ },
55
+ "medium": {
56
+ "size_mb": 1500,
57
+ "languages": "Multilingual",
58
+ "description": "High accuracy, slow",
59
+ "filename": "ggml-medium.bin"
60
+ },
61
+ "medium.en": {
62
+ "size_mb": 1500,
63
+ "languages": "English only",
64
+ "description": "High English accuracy",
65
+ "filename": "ggml-medium.en.bin"
66
+ },
67
+ "large-v1": {
68
+ "size_mb": 2900,
69
+ "languages": "Multilingual",
70
+ "description": "Original large model",
71
+ "filename": "ggml-large-v1.bin"
72
+ },
73
+ "large-v2": {
74
+ "size_mb": 2900,
75
+ "languages": "Multilingual",
76
+ "description": "Improved large model (recommended)",
77
+ "filename": "ggml-large-v2.bin"
78
+ },
79
+ "large-v3": {
80
+ "size_mb": 3100,
81
+ "languages": "Multilingual",
82
+ "description": "Latest large model",
83
+ "filename": "ggml-large-v3.bin"
84
+ },
85
+ "large-v3-turbo": {
86
+ "size_mb": 1600,
87
+ "languages": "Multilingual",
88
+ "description": "Faster large model with good accuracy",
89
+ "filename": "ggml-large-v3-turbo.bin"
90
+ }
91
+ }
92
+
93
+
94
+ def get_model_directory() -> Path:
95
+ """Get the directory where Whisper models are stored."""
96
+ # Use the configured path from config.py
97
+ model_dir = Path(WHISPER_MODEL_PATH)
98
+
99
+ # If config path doesn't exist, check service installation
100
+ if not model_dir.exists():
101
+ service_models = Path.home() / ".voicemode" / "services" / "whisper" / "models"
102
+ if service_models.exists():
103
+ return service_models
104
+
105
+ return model_dir
106
+
107
+
108
+ def get_current_model() -> str:
109
+ """Get the currently selected Whisper model."""
110
+ # Use the configured model from config.py
111
+ model = WHISPER_MODEL
112
+
113
+ # Validate it's a known model
114
+ if model not in WHISPER_MODELS:
115
+ return "large-v2" # Default fallback
116
+
117
+ return model
118
+
119
+
120
+ def is_model_installed(model_name: str) -> bool:
121
+ """Check if a model is installed."""
122
+ if model_name not in WHISPER_MODELS:
123
+ return False
124
+
125
+ model_dir = get_model_directory()
126
+ model_info = WHISPER_MODELS[model_name]
127
+ model_path = model_dir / model_info["filename"]
128
+
129
+ return model_path.exists()
130
+
131
+
132
+ def has_coreml_model(model_name: str) -> bool:
133
+ """Check if a Core ML model is available for the given model.
134
+
135
+ Core ML models are only used on macOS with Apple Silicon.
136
+ They have the extension .mlmodelc and provide faster inference.
137
+ """
138
+ import platform
139
+
140
+ # Core ML is only relevant on macOS
141
+ if platform.system() != "Darwin":
142
+ return False
143
+
144
+ if model_name not in WHISPER_MODELS:
145
+ return False
146
+
147
+ model_dir = get_model_directory()
148
+ model_info = WHISPER_MODELS[model_name]
149
+
150
+ # Core ML model would be named like ggml-large-v2-encoder.mlmodelc
151
+ coreml_path = model_dir / f"ggml-{model_name}-encoder.mlmodelc"
152
+
153
+ return coreml_path.exists()
154
+
155
+
156
+ def get_installed_models() -> List[str]:
157
+ """Get list of installed models."""
158
+ installed = []
159
+ for model_name in WHISPER_MODELS:
160
+ if is_model_installed(model_name):
161
+ installed.append(model_name)
162
+ return installed
163
+
164
+
165
+ def get_total_size(models: Optional[List[str]] = None) -> int:
166
+ """Get total size of models in MB.
167
+
168
+ Args:
169
+ models: List of model names. If None, uses all models.
170
+
171
+ Returns:
172
+ Total size in MB
173
+ """
174
+ if models is None:
175
+ models = list(WHISPER_MODELS.keys())
176
+
177
+ total = 0
178
+ for model in models:
179
+ if model in WHISPER_MODELS:
180
+ total += WHISPER_MODELS[model]["size_mb"]
181
+
182
+ return total
183
+
184
+
185
+ def format_size(size_mb: int) -> str:
186
+ """Format size in MB to human-readable string."""
187
+ if size_mb < 1000:
188
+ return f"{size_mb} MB"
189
+ else:
190
+ size_gb = size_mb / 1000
191
+ return f"{size_gb:.1f} GB"
192
+
193
+
194
+ def is_macos() -> bool:
195
+ """Check if running on macOS."""
196
+ import platform
197
+ return platform.system() == "Darwin"
198
+
199
+
200
+ def is_apple_silicon() -> bool:
201
+ """Check if running on Apple Silicon (M1/M2/M3/M4)."""
202
+ import platform
203
+ return platform.system() == "Darwin" and platform.machine() == "arm64"
204
+
205
+
206
+ def set_current_model(model_name: str) -> None:
207
+ """Set the current active Whisper model.
208
+
209
+ Args:
210
+ model_name: Name of the model to set as active
211
+
212
+ Updates the voicemode.env configuration file for persistence.
213
+ """
214
+ from pathlib import Path
215
+ import re
216
+
217
+ # Configuration file path
218
+ config_path = Path.home() / ".voicemode" / ".voicemode.env"
219
+
220
+ # Ensure directory exists
221
+ config_path.parent.mkdir(parents=True, exist_ok=True)
222
+
223
+ # Read existing configuration
224
+ config = {}
225
+ if config_path.exists():
226
+ with open(config_path, 'r') as f:
227
+ for line in f:
228
+ line = line.strip()
229
+ if not line or line.startswith('#'):
230
+ continue
231
+ match = re.match(r'^([A-Z_]+)=(.*)$', line)
232
+ if match:
233
+ key, value = match.groups()
234
+ value = value.strip('"').strip("'")
235
+ config[key] = value
236
+
237
+ # Update the model
238
+ config['VOICEMODE_WHISPER_MODEL'] = model_name
239
+
240
+ # Write back to file, preserving structure
241
+ lines = []
242
+ updated_keys = set()
243
+
244
+ if config_path.exists():
245
+ with open(config_path, 'r') as f:
246
+ for line in f:
247
+ stripped = line.strip()
248
+ if stripped and not stripped.startswith('#'):
249
+ match = re.match(r'^([A-Z_]+)=', stripped)
250
+ if match:
251
+ key = match.group(1)
252
+ if key == 'VOICEMODE_WHISPER_MODEL':
253
+ lines.append(f"VOICEMODE_WHISPER_MODEL={model_name}\n")
254
+ updated_keys.add(key)
255
+ elif key in config:
256
+ lines.append(f"{key}={config[key]}\n")
257
+ updated_keys.add(key)
258
+ else:
259
+ lines.append(line)
260
+ else:
261
+ lines.append(line)
262
+ else:
263
+ lines.append(line)
264
+
265
+ # Add VOICEMODE_WHISPER_MODEL if it wasn't in the file
266
+ if 'VOICEMODE_WHISPER_MODEL' not in updated_keys:
267
+ if lines and not lines[-1].strip() == '':
268
+ lines.append('\n')
269
+ lines.append("# Whisper Configuration\n")
270
+ lines.append(f"VOICEMODE_WHISPER_MODEL={model_name}\n")
271
+
272
+ # Write the updated configuration
273
+ with open(config_path, 'w') as f:
274
+ f.writelines(lines)