voice-mode 2.32.0__py3-none-any.whl → 2.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. voice_mode/__version__.py +1 -1
  2. voice_mode/config.py +1 -1
  3. voice_mode/frontend/.next/BUILD_ID +1 -1
  4. voice_mode/frontend/.next/app-build-manifest.json +5 -5
  5. voice_mode/frontend/.next/build-manifest.json +3 -3
  6. voice_mode/frontend/.next/next-minimal-server.js.nft.json +1 -1
  7. voice_mode/frontend/.next/next-server.js.nft.json +1 -1
  8. voice_mode/frontend/.next/prerender-manifest.json +1 -1
  9. voice_mode/frontend/.next/required-server-files.json +1 -1
  10. voice_mode/frontend/.next/server/app/_not-found/page.js +1 -1
  11. voice_mode/frontend/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
  12. voice_mode/frontend/.next/server/app/_not-found.html +1 -1
  13. voice_mode/frontend/.next/server/app/_not-found.rsc +1 -1
  14. voice_mode/frontend/.next/server/app/api/connection-details/route.js +2 -2
  15. voice_mode/frontend/.next/server/app/favicon.ico/route.js +2 -2
  16. voice_mode/frontend/.next/server/app/index.html +1 -1
  17. voice_mode/frontend/.next/server/app/index.rsc +2 -2
  18. voice_mode/frontend/.next/server/app/page.js +3 -3
  19. voice_mode/frontend/.next/server/app/page_client-reference-manifest.js +1 -1
  20. voice_mode/frontend/.next/server/chunks/994.js +2 -2
  21. voice_mode/frontend/.next/server/middleware-build-manifest.js +1 -1
  22. voice_mode/frontend/.next/server/next-font-manifest.js +1 -1
  23. voice_mode/frontend/.next/server/next-font-manifest.json +1 -1
  24. voice_mode/frontend/.next/server/pages/404.html +1 -1
  25. voice_mode/frontend/.next/server/pages/500.html +1 -1
  26. voice_mode/frontend/.next/server/server-reference-manifest.json +1 -1
  27. voice_mode/frontend/.next/standalone/.next/BUILD_ID +1 -1
  28. voice_mode/frontend/.next/standalone/.next/app-build-manifest.json +5 -5
  29. voice_mode/frontend/.next/standalone/.next/build-manifest.json +3 -3
  30. voice_mode/frontend/.next/standalone/.next/prerender-manifest.json +1 -1
  31. voice_mode/frontend/.next/standalone/.next/required-server-files.json +1 -1
  32. voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page.js +1 -1
  33. voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
  34. voice_mode/frontend/.next/standalone/.next/server/app/_not-found.html +1 -1
  35. voice_mode/frontend/.next/standalone/.next/server/app/_not-found.rsc +1 -1
  36. voice_mode/frontend/.next/standalone/.next/server/app/api/connection-details/route.js +2 -2
  37. voice_mode/frontend/.next/standalone/.next/server/app/favicon.ico/route.js +2 -2
  38. voice_mode/frontend/.next/standalone/.next/server/app/index.html +1 -1
  39. voice_mode/frontend/.next/standalone/.next/server/app/index.rsc +2 -2
  40. voice_mode/frontend/.next/standalone/.next/server/app/page.js +3 -3
  41. voice_mode/frontend/.next/standalone/.next/server/app/page_client-reference-manifest.js +1 -1
  42. voice_mode/frontend/.next/standalone/.next/server/chunks/994.js +2 -2
  43. voice_mode/frontend/.next/standalone/.next/server/middleware-build-manifest.js +1 -1
  44. voice_mode/frontend/.next/standalone/.next/server/next-font-manifest.js +1 -1
  45. voice_mode/frontend/.next/standalone/.next/server/next-font-manifest.json +1 -1
  46. voice_mode/frontend/.next/standalone/.next/server/pages/404.html +1 -1
  47. voice_mode/frontend/.next/standalone/.next/server/pages/500.html +1 -1
  48. voice_mode/frontend/.next/standalone/.next/server/server-reference-manifest.json +1 -1
  49. voice_mode/frontend/.next/standalone/server.js +1 -1
  50. voice_mode/frontend/.next/static/chunks/app/{layout-4c59da29fcf0456f.js → layout-d124af177c48e1c5.js} +1 -1
  51. voice_mode/frontend/.next/static/chunks/app/{page-017e11b769f3a746.js → page-ed5ce28c62d0c8ef.js} +1 -1
  52. voice_mode/frontend/.next/static/chunks/{main-app-822552bd94497f44.js → main-app-0aa7560603c1f114.js} +1 -1
  53. voice_mode/frontend/.next/trace +43 -43
  54. voice_mode/frontend/.next/types/app/api/connection-details/route.ts +1 -1
  55. voice_mode/frontend/.next/types/app/layout.ts +1 -1
  56. voice_mode/frontend/.next/types/app/page.ts +1 -1
  57. voice_mode/prompts/converse.py +0 -1
  58. voice_mode/templates/__init__.py +1 -0
  59. voice_mode/templates/scripts/__init__.py +1 -0
  60. voice_mode/templates/scripts/start-whisper-server.sh +80 -0
  61. voice_mode/tools/services/whisper/install.py +88 -26
  62. voice_mode/tools/services/whisper/model_install.py +38 -47
  63. voice_mode/tools/services/whisper/models.py +1 -1
  64. voice_mode/utils/services/coreml_setup.py +234 -0
  65. voice_mode/utils/services/whisper_helpers.py +57 -32
  66. {voice_mode-2.32.0.dist-info → voice_mode-2.33.0.dist-info}/METADATA +3 -3
  67. {voice_mode-2.32.0.dist-info → voice_mode-2.33.0.dist-info}/RECORD +71 -67
  68. /voice_mode/frontend/.next/static/{e8aNOVoFA4vUks2Chn7qv → dk2w9pXeSUc6w1YbYi6aJ}/_buildManifest.js +0 -0
  69. /voice_mode/frontend/.next/static/{e8aNOVoFA4vUks2Chn7qv → dk2w9pXeSUc6w1YbYi6aJ}/_ssgManifest.js +0 -0
  70. {voice_mode-2.32.0.dist-info → voice_mode-2.33.0.dist-info}/WHEEL +0 -0
  71. {voice_mode-2.32.0.dist-info → voice_mode-2.33.0.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,4 @@
1
- // File: /tmp/build-via-sdist-foqe1vzl/voice_mode-2.32.0/voice_mode/frontend/app/api/connection-details/route.ts
1
+ // File: /tmp/build-via-sdist-wr79qxwc/voice_mode-2.33.0/voice_mode/frontend/app/api/connection-details/route.ts
2
2
  import * as entry from '../../../../../app/api/connection-details/route.js'
3
3
  import type { NextRequest } from 'next/server.js'
4
4
 
@@ -1,4 +1,4 @@
1
- // File: /tmp/build-via-sdist-foqe1vzl/voice_mode-2.32.0/voice_mode/frontend/app/layout.tsx
1
+ // File: /tmp/build-via-sdist-wr79qxwc/voice_mode-2.33.0/voice_mode/frontend/app/layout.tsx
2
2
  import * as entry from '../../../app/layout.js'
3
3
  import type { ResolvingMetadata, ResolvingViewport } from 'next/dist/lib/metadata/types/metadata-interface.js'
4
4
 
@@ -1,4 +1,4 @@
1
- // File: /tmp/build-via-sdist-foqe1vzl/voice_mode-2.32.0/voice_mode/frontend/app/page.tsx
1
+ // File: /tmp/build-via-sdist-wr79qxwc/voice_mode-2.33.0/voice_mode/frontend/app/page.tsx
2
2
  import * as entry from '../../../app/page.js'
3
3
  import type { ResolvingMetadata, ResolvingViewport } from 'next/dist/lib/metadata/types/metadata-interface.js'
4
4
 
@@ -10,7 +10,6 @@ def converse() -> str:
10
10
  "Using tools from voice-mode, have an ongoing two-way conversation",
11
11
  "End the chat when the user indicates they want to end it",
12
12
  "Keep your utterances brief unless a longer response is requested or necessary",
13
- "Listen for up to 120 seconds per response"
14
13
  ]
15
14
 
16
15
  return "\n".join(f"- {instruction}" for instruction in instructions)
@@ -0,0 +1 @@
1
+ # Templates package for Voice Mode
@@ -0,0 +1 @@
1
+ # Script templates for Voice Mode services
@@ -0,0 +1,80 @@
1
+ #!/bin/bash
2
+
3
+ # Whisper Service Startup Script
4
+ # This script is used by both macOS (launchd) and Linux (systemd) to start the whisper service
5
+ # It sources the voicemode.env file to get configuration, especially VOICEMODE_WHISPER_MODEL
6
+
7
+ # Determine whisper directory (script is in bin/, whisper root is parent)
8
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
9
+ WHISPER_DIR="$(dirname "$SCRIPT_DIR")"
10
+
11
+ # Voicemode configuration directory
12
+ VOICEMODE_DIR="$HOME/.voicemode"
13
+ LOG_DIR="$VOICEMODE_DIR/logs/whisper"
14
+
15
+ # Create log directory if it doesn't exist
16
+ mkdir -p "$LOG_DIR"
17
+
18
+ # Log file for this script (separate from whisper server logs)
19
+ STARTUP_LOG="$LOG_DIR/startup.log"
20
+
21
+ # Source voicemode configuration if it exists
22
+ if [ -f "$VOICEMODE_DIR/voicemode.env" ]; then
23
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Sourcing voicemode.env" >> "$STARTUP_LOG"
24
+ source "$VOICEMODE_DIR/voicemode.env"
25
+ else
26
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Warning: voicemode.env not found" >> "$STARTUP_LOG"
27
+ fi
28
+
29
+ # Model selection with environment variable support
30
+ MODEL_NAME="${VOICEMODE_WHISPER_MODEL:-base}"
31
+ MODEL_PATH="$WHISPER_DIR/models/ggml-$MODEL_NAME.bin"
32
+
33
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Starting whisper-server with model: $MODEL_NAME" >> "$STARTUP_LOG"
34
+
35
+ # Check if model exists
36
+ if [ ! -f "$MODEL_PATH" ]; then
37
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Error: Model $MODEL_NAME not found at $MODEL_PATH" >> "$STARTUP_LOG"
38
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Available models:" >> "$STARTUP_LOG"
39
+ ls -1 "$WHISPER_DIR/models/" 2>/dev/null | grep "^ggml-.*\.bin$" >> "$STARTUP_LOG"
40
+
41
+ # Try to find any available model as fallback
42
+ FALLBACK_MODEL=$(ls -1 "$WHISPER_DIR/models/" 2>/dev/null | grep "^ggml-.*\.bin$" | head -1)
43
+ if [ -n "$FALLBACK_MODEL" ]; then
44
+ MODEL_PATH="$WHISPER_DIR/models/$FALLBACK_MODEL"
45
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Using fallback model: $FALLBACK_MODEL" >> "$STARTUP_LOG"
46
+ else
47
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Fatal: No whisper models found" >> "$STARTUP_LOG"
48
+ exit 1
49
+ fi
50
+ fi
51
+
52
+ # Port configuration (with environment variable support)
53
+ WHISPER_PORT="${VOICEMODE_WHISPER_PORT:-2022}"
54
+
55
+ # Determine server binary location
56
+ # Check new CMake build location first, then legacy location
57
+ if [ -f "$WHISPER_DIR/build/bin/whisper-server" ]; then
58
+ SERVER_BIN="$WHISPER_DIR/build/bin/whisper-server"
59
+ elif [ -f "$WHISPER_DIR/server" ]; then
60
+ SERVER_BIN="$WHISPER_DIR/server"
61
+ else
62
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Error: whisper-server binary not found" >> "$STARTUP_LOG"
63
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Checked: $WHISPER_DIR/build/bin/whisper-server" >> "$STARTUP_LOG"
64
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Checked: $WHISPER_DIR/server" >> "$STARTUP_LOG"
65
+ exit 1
66
+ fi
67
+
68
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Using binary: $SERVER_BIN" >> "$STARTUP_LOG"
69
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Model path: $MODEL_PATH" >> "$STARTUP_LOG"
70
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Port: $WHISPER_PORT" >> "$STARTUP_LOG"
71
+
72
+ # Start whisper-server
73
+ # Using exec to replace this script process with whisper-server
74
+ cd "$WHISPER_DIR"
75
+ exec "$SERVER_BIN" \
76
+ --host 0.0.0.0 \
77
+ --port "$WHISPER_PORT" \
78
+ --model "$MODEL_PATH" \
79
+ --inference-path /v1/audio/transcriptions \
80
+ --threads 8
@@ -11,6 +11,11 @@ from pathlib import Path
11
11
  from typing import Dict, Any, Optional, Union
12
12
  import asyncio
13
13
  import aiohttp
14
+ try:
15
+ from importlib.resources import files
16
+ except ImportError:
17
+ # Python < 3.9 fallback
18
+ from importlib_resources import files
14
19
 
15
20
  from voice_mode.server import mcp
16
21
  from voice_mode.config import SERVICE_AUTO_ENABLE
@@ -28,7 +33,7 @@ logger = logging.getLogger("voice-mode")
28
33
  @mcp.tool()
29
34
  async def whisper_install(
30
35
  install_dir: Optional[str] = None,
31
- model: str = "large-v2",
36
+ model: str = "base",
32
37
  use_gpu: Optional[Union[bool, str]] = None,
33
38
  force_reinstall: Union[bool, str] = False,
34
39
  auto_enable: Optional[Union[bool, str]] = None,
@@ -42,7 +47,7 @@ async def whisper_install(
42
47
  Args:
43
48
  install_dir: Directory to install whisper.cpp (default: ~/.voicemode/whisper.cpp)
44
49
  model: Whisper model to download (tiny, base, small, medium, large-v2, large-v3, etc.)
45
- Default is large-v2 for best accuracy. Note: large models require ~3GB RAM.
50
+ Default is base for good balance of speed and accuracy (142MB).
46
51
  use_gpu: Enable GPU support if available (default: auto-detect)
47
52
  force_reinstall: Force reinstallation even if already installed
48
53
  auto_enable: Enable service after install. If None, uses VOICEMODE_SERVICE_AUTO_ENABLE config.
@@ -302,59 +307,117 @@ async def whisper_install(
302
307
  if 'original_dir' in locals():
303
308
  os.chdir(original_dir)
304
309
 
305
- # Create start script for whisper-server
306
- logger.info("Creating whisper-server start script...")
307
- start_script_content = f"""#!/bin/bash
310
+ # Copy template start script for whisper-server
311
+ logger.info("Installing whisper-server start script from template...")
312
+
313
+ # Create bin directory
314
+ bin_dir = os.path.join(install_dir, "bin")
315
+ os.makedirs(bin_dir, exist_ok=True)
316
+
317
+ # Copy template script
318
+ template_content = None
319
+
320
+ # First try to load from source if running in development
321
+ source_template = Path(__file__).parent.parent.parent.parent / "templates" / "scripts" / "start-whisper-server.sh"
322
+ if source_template.exists():
323
+ logger.info(f"Loading template from source: {source_template}")
324
+ template_content = source_template.read_text()
325
+ else:
326
+ # Try loading from package resources
327
+ try:
328
+ template_resource = files("voice_mode.templates.scripts").joinpath("start-whisper-server.sh")
329
+ template_content = template_resource.read_text()
330
+ logger.info("Loaded template from package resources")
331
+ except Exception as e:
332
+ logger.warning(f"Failed to load template script: {e}. Using fallback inline script.")
333
+
334
+ # Fallback to inline script if template not found
335
+ if template_content is None:
336
+ template_content = f"""#!/bin/bash
337
+
338
+ # Whisper Service Startup Script
339
+ # This script is used by both macOS (launchd) and Linux (systemd) to start the whisper service
340
+ # It sources the voicemode.env file to get configuration, especially VOICEMODE_WHISPER_MODEL
341
+
342
+ # Determine whisper directory (script is in bin/, whisper root is parent)
343
+ SCRIPT_DIR="$(cd "$(dirname "${{BASH_SOURCE[0]}}")" && pwd)"
344
+ WHISPER_DIR="$(dirname "$SCRIPT_DIR")"
345
+
346
+ # Voicemode configuration directory
347
+ VOICEMODE_DIR="$HOME/.voicemode"
348
+ LOG_DIR="$VOICEMODE_DIR/logs/whisper"
349
+
350
+ # Create log directory if it doesn't exist
351
+ mkdir -p "$LOG_DIR"
308
352
 
309
- # Configuration
310
- WHISPER_DIR="{install_dir}"
311
- LOG_FILE="{os.path.join(voicemode_dir, 'whisper-server.log')}"
353
+ # Log file for this script (separate from whisper server logs)
354
+ STARTUP_LOG="$LOG_DIR/startup.log"
312
355
 
313
356
  # Source voicemode configuration if it exists
314
- if [ -f "{voicemode_dir}/voicemode.env" ]; then
315
- source "{voicemode_dir}/voicemode.env"
357
+ if [ -f "$VOICEMODE_DIR/voicemode.env" ]; then
358
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Sourcing voicemode.env" >> "$STARTUP_LOG"
359
+ source "$VOICEMODE_DIR/voicemode.env"
360
+ else
361
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Warning: voicemode.env not found" >> "$STARTUP_LOG"
316
362
  fi
317
363
 
318
364
  # Model selection with environment variable support
319
- MODEL_NAME="${{VOICEMODE_WHISPER_MODEL:-{model}}}"
365
+ MODEL_NAME="${{VOICEMODE_WHISPER_MODEL:-base}}"
320
366
  MODEL_PATH="$WHISPER_DIR/models/ggml-$MODEL_NAME.bin"
321
367
 
368
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Starting whisper-server with model: $MODEL_NAME" >> "$STARTUP_LOG"
369
+
322
370
  # Check if model exists
323
371
  if [ ! -f "$MODEL_PATH" ]; then
324
- echo "Error: Model $MODEL_NAME not found at $MODEL_PATH" >> "$LOG_FILE"
325
- echo "Available models:" >> "$LOG_FILE"
326
- ls -1 "$WHISPER_DIR/models/" | grep "^ggml-.*\\.bin$" >> "$LOG_FILE"
327
- exit 1
372
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Error: Model $MODEL_NAME not found at $MODEL_PATH" >> "$STARTUP_LOG"
373
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Available models:" >> "$STARTUP_LOG"
374
+ ls -1 "$WHISPER_DIR/models/" 2>/dev/null | grep "^ggml-.*\\.bin$" >> "$STARTUP_LOG"
375
+
376
+ # Try to find any available model as fallback
377
+ FALLBACK_MODEL=$(ls -1 "$WHISPER_DIR/models/" 2>/dev/null | grep "^ggml-.*\\.bin$" | head -1)
378
+ if [ -n "$FALLBACK_MODEL" ]; then
379
+ MODEL_PATH="$WHISPER_DIR/models/$FALLBACK_MODEL"
380
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Using fallback model: $FALLBACK_MODEL" >> "$STARTUP_LOG"
381
+ else
382
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Fatal: No whisper models found" >> "$STARTUP_LOG"
383
+ exit 1
384
+ fi
328
385
  fi
329
386
 
330
- echo "Starting whisper-server with model: $MODEL_NAME" >> "$LOG_FILE"
331
-
332
- # Note: whisper-server is now built as part of the main build target
387
+ # Port configuration (with environment variable support)
388
+ WHISPER_PORT="${{VOICEMODE_WHISPER_PORT:-2022}}"
333
389
 
334
390
  # Determine server binary location
391
+ # Check new CMake build location first, then legacy location
335
392
  if [ -f "$WHISPER_DIR/build/bin/whisper-server" ]; then
336
393
  SERVER_BIN="$WHISPER_DIR/build/bin/whisper-server"
337
394
  elif [ -f "$WHISPER_DIR/server" ]; then
338
395
  SERVER_BIN="$WHISPER_DIR/server"
339
396
  else
340
- echo "Error: whisper-server binary not found" >> "$LOG_FILE"
397
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Error: whisper-server binary not found" >> "$STARTUP_LOG"
398
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Checked: $WHISPER_DIR/build/bin/whisper-server" >> "$STARTUP_LOG"
399
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Checked: $WHISPER_DIR/server" >> "$STARTUP_LOG"
341
400
  exit 1
342
401
  fi
343
402
 
403
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Using binary: $SERVER_BIN" >> "$STARTUP_LOG"
404
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Model path: $MODEL_PATH" >> "$STARTUP_LOG"
405
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Port: $WHISPER_PORT" >> "$STARTUP_LOG"
406
+
344
407
  # Start whisper-server
408
+ # Using exec to replace this script process with whisper-server
345
409
  cd "$WHISPER_DIR"
346
410
  exec "$SERVER_BIN" \\
347
- --model "$MODEL_PATH" \\
348
411
  --host 0.0.0.0 \\
349
- --port 2022 \\
412
+ --port "$WHISPER_PORT" \\
413
+ --model "$MODEL_PATH" \\
350
414
  --inference-path /v1/audio/transcriptions \\
351
- --threads 8 \\
352
- >> "$LOG_FILE" 2>&1
415
+ --threads 8
353
416
  """
354
417
 
355
- start_script_path = os.path.join(install_dir, "start-whisper-server.sh")
418
+ start_script_path = os.path.join(bin_dir, "start-whisper-server.sh")
356
419
  with open(start_script_path, 'w') as f:
357
- f.write(start_script_content)
420
+ f.write(template_content)
358
421
  os.chmod(start_script_path, 0o755)
359
422
 
360
423
  # Install launchagent on macOS
@@ -471,7 +534,6 @@ WorkingDirectory={install_dir}
471
534
  StandardOutput=append:{os.path.join(voicemode_dir, 'logs', 'whisper', 'whisper.out.log')}
472
535
  StandardError=append:{os.path.join(voicemode_dir, 'logs', 'whisper', 'whisper.err.log')}
473
536
  Environment="PATH=/usr/local/bin:/usr/bin:/bin:/usr/local/cuda/bin"
474
- Environment="VOICEMODE_WHISPER_MODEL={model}"
475
537
 
476
538
  [Install]
477
539
  WantedBy=default.target
@@ -127,7 +127,8 @@ async def whisper_model_install(
127
127
  result = await download_whisper_model(
128
128
  model_name,
129
129
  actual_models_dir,
130
- force_download=force_download
130
+ force_download=force_download,
131
+ skip_core_ml=skip_core_ml
131
132
  )
132
133
 
133
134
  # Build comprehensive result entry
@@ -242,58 +243,48 @@ async def _handle_coreml_dependencies(
242
243
  if skip_core_ml:
243
244
  return {"continue": True}
244
245
 
245
- # Check if torch is already installed
246
- try:
247
- import torch
248
- logger.info("PyTorch already installed for CoreML support")
249
- return {"continue": True}
250
- except ImportError:
251
- pass
246
+ # Check if the CoreML environment already exists
247
+ whisper_dir = Path.home() / ".voicemode" / "services" / "whisper"
248
+ venv_coreml = whisper_dir / "venv-coreml" / "bin" / "python"
249
+
250
+ if venv_coreml.exists():
251
+ # Test if it has the required packages
252
+ try:
253
+ result = subprocess.run(
254
+ [str(venv_coreml), "-c", "import torch, coremltools, whisper"],
255
+ capture_output=True,
256
+ timeout=5
257
+ )
258
+ if result.returncode == 0:
259
+ logger.info("CoreML environment already exists and is valid")
260
+ # Return with a flag indicating CoreML is ready
261
+ return {
262
+ "continue": True,
263
+ "coreml_ready": True,
264
+ "coreml_deps_note": "CoreML environment exists and is valid"
265
+ }
266
+ except:
267
+ pass
252
268
 
253
- # Check if user wants to install torch
269
+ # Check if user wants to create CoreML environment
254
270
  if not install_torch and not auto_confirm:
255
271
  return {
256
272
  "continue": False,
257
273
  "success": False,
258
274
  "requires_confirmation": True,
259
- "message": "CoreML requires PyTorch (~2.5GB). Rerun with install_torch=True to confirm.",
260
- "recommendation": "Set install_torch=True for CoreML acceleration (2-3x faster)"
275
+ "message": "CoreML conversion requires a dedicated Python environment with PyTorch. Setup may download up to 2.5GB if packages aren't cached.",
276
+ "recommendation": "💡 Set install_torch=True for CoreML acceleration (2-3x faster)"
261
277
  }
262
278
 
263
- # Install CoreML dependencies
264
- logger.info("Installing CoreML dependencies...")
279
+ # Note: We don't actually install CoreML dependencies in the voicemode environment anymore
280
+ # The CoreML conversion uses its own dedicated environment in ~/.voicemode/services/whisper/venv-coreml
281
+ # This is handled automatically by whisper_helpers.convert_to_coreml()
265
282
 
266
- try:
267
- # Detect environment and install appropriately
268
- packages = ["torch>=2.0.0", "coremltools>=7.0", "transformers", "ane-transformers"]
269
-
270
- # Try UV first (most common)
271
- if subprocess.run(["which", "uv"], capture_output=True).returncode == 0:
272
- cmd = ["uv", "pip", "install"] + packages
273
- logger.info("Installing via UV...")
274
- else:
275
- # Fallback to pip
276
- cmd = [sys.executable, "-m", "pip", "install"] + packages
277
- logger.info("Installing via pip...")
278
-
279
- # Run installation
280
- result = subprocess.run(cmd, capture_output=True, text=True)
281
-
282
- if result.returncode == 0:
283
- logger.info("CoreML dependencies installed successfully")
284
- return {"continue": True, "coreml_deps_installed": True}
285
- else:
286
- logger.warning(f"Failed to install CoreML dependencies: {result.stderr}")
287
- return {
288
- "continue": True,
289
- "coreml_deps_failed": True,
290
- "warning": "CoreML dependencies installation failed. Models will use Metal acceleration."
291
- }
292
-
293
- except Exception as e:
294
- logger.warning(f"Error installing CoreML dependencies: {e}")
295
- return {
296
- "continue": True,
297
- "coreml_deps_failed": True,
298
- "warning": f"CoreML setup error: {str(e)}. Models will use Metal acceleration."
299
- }
283
+ logger.info("CoreML dependencies will be handled by the conversion process")
284
+
285
+ # We still return success to continue with the model download
286
+ # The actual CoreML environment setup happens during conversion
287
+ return {
288
+ "continue": True,
289
+ "coreml_deps_note": "CoreML environment will be created during conversion if needed"
290
+ }
@@ -113,7 +113,7 @@ def get_active_model() -> str:
113
113
 
114
114
  # Validate it's a known model
115
115
  if model not in WHISPER_MODEL_REGISTRY:
116
- return "large-v2" # Default fallback
116
+ return "base" # Default fallback
117
117
 
118
118
  return model
119
119
 
@@ -0,0 +1,234 @@
1
+ """Setup and manage CoreML Python environment for whisper.cpp."""
2
+
3
+ import os
4
+ import subprocess
5
+ import logging
6
+ from pathlib import Path
7
+ from typing import Optional, Dict, Any
8
+
9
+ logger = logging.getLogger("voice-mode")
10
+
11
+
12
+ def setup_coreml_venv(whisper_dir: Path, force: bool = False) -> Dict[str, Any]:
13
+ """
14
+ Setup a dedicated Python virtual environment for CoreML conversion.
15
+
16
+ Uses whisper.cpp's requirements-coreml.txt to ensure compatibility.
17
+
18
+ Args:
19
+ whisper_dir: Path to whisper.cpp installation
20
+ force: Force recreation of venv even if it exists
21
+
22
+ Returns:
23
+ Dict with 'success' and 'python_path' or 'error'
24
+ """
25
+ venv_dir = whisper_dir / "venv-coreml"
26
+ venv_python = venv_dir / "bin" / "python"
27
+ requirements_file = whisper_dir / "models" / "requirements-coreml.txt"
28
+
29
+ # Check if requirements file exists
30
+ if not requirements_file.exists():
31
+ return {
32
+ "success": False,
33
+ "error": f"CoreML requirements file not found at {requirements_file}"
34
+ }
35
+
36
+ # Check if venv already exists and is valid
37
+ if venv_python.exists() and not force:
38
+ # Test if the venv has the required packages
39
+ try:
40
+ result = subprocess.run(
41
+ [str(venv_python), "-c", "import torch, coremltools, whisper, ane_transformers"],
42
+ capture_output=True,
43
+ text=True,
44
+ timeout=5
45
+ )
46
+ if result.returncode == 0:
47
+ logger.info(f"CoreML venv already exists and is valid at {venv_dir}")
48
+ return {
49
+ "success": True,
50
+ "python_path": str(venv_python),
51
+ "message": "Using existing CoreML virtual environment"
52
+ }
53
+ except (subprocess.TimeoutExpired, subprocess.CalledProcessError):
54
+ logger.info("Existing CoreML venv is incomplete, will recreate")
55
+
56
+ # Create or recreate venv
57
+ logger.info(f"Creating CoreML virtual environment at {venv_dir}")
58
+
59
+ try:
60
+ # Remove existing venv if force or invalid
61
+ if venv_dir.exists() and (force or not venv_python.exists()):
62
+ import shutil
63
+ shutil.rmtree(venv_dir, ignore_errors=True)
64
+
65
+ # Try to use Python 3.11 as recommended by whisper.cpp
66
+ python_cmd = None
67
+ for python_version in ["python3.11", "python3.10", "python3.9", "python3"]:
68
+ if subprocess.run(["which", python_version], capture_output=True).returncode == 0:
69
+ # Check actual version
70
+ version_result = subprocess.run(
71
+ [python_version, "--version"],
72
+ capture_output=True,
73
+ text=True
74
+ )
75
+ if version_result.returncode == 0:
76
+ version = version_result.stdout.strip()
77
+ logger.info(f"Found {version}")
78
+ # Strongly prefer 3.11 as recommended
79
+ if "3.11" in version:
80
+ python_cmd = python_version
81
+ logger.info("Using Python 3.11 (recommended for CoreML)")
82
+ break
83
+ elif "3.10" in version or "3.9" in version:
84
+ if python_cmd is None: # Use as fallback if no 3.11
85
+ python_cmd = python_version
86
+ elif python_cmd is None:
87
+ python_cmd = python_version # Use as last resort
88
+
89
+ if python_cmd is None:
90
+ return {
91
+ "success": False,
92
+ "error": "No suitable Python version found. Python 3.9-3.11 recommended for CoreML."
93
+ }
94
+
95
+ # Create venv
96
+ logger.info(f"Creating venv with {python_cmd}")
97
+ result = subprocess.run(
98
+ [python_cmd, "-m", "venv", str(venv_dir)],
99
+ capture_output=True,
100
+ text=True
101
+ )
102
+
103
+ if result.returncode != 0:
104
+ return {
105
+ "success": False,
106
+ "error": f"Failed to create venv: {result.stderr}"
107
+ }
108
+
109
+ # Upgrade pip
110
+ logger.info("Upgrading pip in CoreML venv")
111
+ subprocess.run(
112
+ [str(venv_python), "-m", "pip", "install", "--upgrade", "pip"],
113
+ capture_output=True,
114
+ text=True
115
+ )
116
+
117
+ # Install requirements with proper versions
118
+ # Based on whisper.cpp documentation and coremltools compatibility
119
+ # Python 3.11 is recommended, torch 2.5.0 is known to work with coremltools
120
+ logger.info("Installing CoreML requirements with compatible versions")
121
+ packages = [
122
+ "torch==2.5.0", # Specific version mentioned in whisper.cpp for coremltools compatibility
123
+ "coremltools>=7.0",
124
+ "openai-whisper",
125
+ "ane_transformers"
126
+ ]
127
+
128
+ # Try installing all at once first
129
+ result = subprocess.run(
130
+ [str(venv_python), "-m", "pip", "install"] + packages,
131
+ capture_output=True,
132
+ text=True
133
+ )
134
+
135
+ if result.returncode != 0:
136
+ # Try installing packages one by one if bulk install fails
137
+ logger.warning("Bulk install failed, trying packages individually")
138
+
139
+ failed_packages = []
140
+ for package in packages:
141
+ logger.info(f"Installing {package}")
142
+ result = subprocess.run(
143
+ [str(venv_python), "-m", "pip", "install", package],
144
+ capture_output=True,
145
+ text=True
146
+ )
147
+ if result.returncode != 0:
148
+ logger.warning(f"Failed to install {package}: {result.stderr}")
149
+ failed_packages.append(package)
150
+
151
+ if failed_packages:
152
+ return {
153
+ "success": False,
154
+ "error": f"Failed to install packages: {', '.join(failed_packages)}",
155
+ "partial": True,
156
+ "python_path": str(venv_python)
157
+ }
158
+
159
+ # Verify installation
160
+ logger.info("Verifying CoreML dependencies")
161
+ result = subprocess.run(
162
+ [str(venv_python), "-c", "import torch, coremltools, whisper, ane_transformers; print('All packages imported successfully')"],
163
+ capture_output=True,
164
+ text=True
165
+ )
166
+
167
+ if result.returncode == 0:
168
+ logger.info("CoreML virtual environment created successfully")
169
+ return {
170
+ "success": True,
171
+ "python_path": str(venv_python),
172
+ "message": "CoreML virtual environment created with all dependencies"
173
+ }
174
+ else:
175
+ return {
176
+ "success": True, # Partial success
177
+ "python_path": str(venv_python),
178
+ "warning": "Some packages may be missing but environment was created",
179
+ "verification_error": result.stderr
180
+ }
181
+
182
+ except Exception as e:
183
+ logger.error(f"Error setting up CoreML venv: {e}")
184
+ return {
185
+ "success": False,
186
+ "error": str(e)
187
+ }
188
+
189
+
190
+ def get_coreml_python(whisper_dir: Path) -> Optional[str]:
191
+ """
192
+ Get the path to Python executable with CoreML dependencies.
193
+
194
+ Checks in order:
195
+ 1. Dedicated venv-coreml environment
196
+ 2. Existing venv environment (if it has CoreML packages)
197
+ 3. None if no suitable environment found
198
+
199
+ Args:
200
+ whisper_dir: Path to whisper.cpp installation
201
+
202
+ Returns:
203
+ Path to Python executable or None
204
+ """
205
+ # Check dedicated CoreML venv first
206
+ venv_coreml_python = whisper_dir / "venv-coreml" / "bin" / "python"
207
+ if venv_coreml_python.exists():
208
+ # Quick check if it has required packages
209
+ try:
210
+ result = subprocess.run(
211
+ [str(venv_coreml_python), "-c", "import torch, coremltools"],
212
+ capture_output=True,
213
+ timeout=5
214
+ )
215
+ if result.returncode == 0:
216
+ return str(venv_coreml_python)
217
+ except:
218
+ pass
219
+
220
+ # Check existing venv as fallback
221
+ venv_python = whisper_dir / "venv" / "bin" / "python"
222
+ if venv_python.exists():
223
+ try:
224
+ result = subprocess.run(
225
+ [str(venv_python), "-c", "import torch, coremltools, whisper"],
226
+ capture_output=True,
227
+ timeout=5
228
+ )
229
+ if result.returncode == 0:
230
+ return str(venv_python)
231
+ except:
232
+ pass
233
+
234
+ return None