voice-mode 2.32.0__tar.gz → 2.33.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. {voice_mode-2.32.0 → voice_mode-2.33.0}/CHANGELOG.md +34 -0
  2. {voice_mode-2.32.0 → voice_mode-2.33.0}/PKG-INFO +3 -3
  3. {voice_mode-2.32.0 → voice_mode-2.33.0}/README.md +2 -2
  4. {voice_mode-2.32.0 → voice_mode-2.33.0}/pyproject.toml +7 -0
  5. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/__version__.py +1 -1
  6. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/config.py +1 -1
  7. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/prompts/converse.py +0 -1
  8. voice_mode-2.33.0/voice_mode/templates/__init__.py +1 -0
  9. voice_mode-2.33.0/voice_mode/templates/scripts/__init__.py +1 -0
  10. voice_mode-2.33.0/voice_mode/templates/scripts/start-whisper-server.sh +80 -0
  11. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/services/whisper/install.py +88 -26
  12. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/services/whisper/model_install.py +38 -47
  13. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/services/whisper/models.py +1 -1
  14. voice_mode-2.33.0/voice_mode/utils/services/coreml_setup.py +234 -0
  15. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/utils/services/whisper_helpers.py +57 -32
  16. {voice_mode-2.32.0 → voice_mode-2.33.0}/.gitignore +0 -0
  17. {voice_mode-2.32.0 → voice_mode-2.33.0}/build_hooks.py +0 -0
  18. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/__init__.py +0 -0
  19. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/__main__.py +0 -0
  20. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/cli.py +0 -0
  21. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/cli_commands/__init__.py +0 -0
  22. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/cli_commands/exchanges.py +0 -0
  23. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/conversation_logger.py +0 -0
  24. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/core.py +0 -0
  25. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/data/versions.json +0 -0
  26. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/exchanges/__init__.py +0 -0
  27. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/exchanges/conversations.py +0 -0
  28. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/exchanges/filters.py +0 -0
  29. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/exchanges/formatters.py +0 -0
  30. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/exchanges/models.py +0 -0
  31. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/exchanges/reader.py +0 -0
  32. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/exchanges/stats.py +0 -0
  33. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/frontend/README.md +0 -0
  34. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/frontend/app/api/connection-details/route.ts +0 -0
  35. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/frontend/app/favicon.ico +0 -0
  36. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/frontend/app/globals.css +0 -0
  37. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/frontend/app/layout.tsx +0 -0
  38. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/frontend/app/page.tsx +0 -0
  39. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/frontend/components/CloseIcon.tsx +0 -0
  40. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/frontend/components/NoAgentNotification.tsx +0 -0
  41. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/frontend/components/TranscriptionView.tsx +0 -0
  42. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/frontend/hooks/useCombinedTranscriptions.ts +0 -0
  43. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/frontend/hooks/useLocalMicTrack.ts +0 -0
  44. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/frontend/next-env.d.ts +0 -0
  45. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/frontend/next.config.mjs +0 -0
  46. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/frontend/package-lock.json +0 -0
  47. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/frontend/package.json +0 -0
  48. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/frontend/pnpm-lock.yaml +0 -0
  49. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/frontend/postcss.config.mjs +0 -0
  50. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/frontend/tailwind.config.ts +0 -0
  51. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/frontend/tsconfig.json +0 -0
  52. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/prompts/README.md +0 -0
  53. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/prompts/__init__.py +0 -0
  54. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/prompts/release_notes.py +0 -0
  55. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/prompts/services.py +0 -0
  56. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/provider_discovery.py +0 -0
  57. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/providers.py +0 -0
  58. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/resources/__init__.py +0 -0
  59. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/resources/audio_files.py +0 -0
  60. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/resources/changelog.py +0 -0
  61. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/resources/configuration.py +0 -0
  62. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/resources/statistics.py +0 -0
  63. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/resources/version.py +0 -0
  64. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/resources/whisper_models.py +0 -0
  65. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/server.py +0 -0
  66. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/shared.py +0 -0
  67. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/simple_failover.py +0 -0
  68. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/statistics.py +0 -0
  69. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/streaming.py +0 -0
  70. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/templates/launchd/com.voicemode.frontend.plist +0 -0
  71. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/templates/launchd/com.voicemode.kokoro.plist +0 -0
  72. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/templates/launchd/com.voicemode.livekit.plist +0 -0
  73. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/templates/launchd/com.voicemode.whisper.plist +0 -0
  74. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/templates/launchd/start-kokoro-with-health-check.sh +0 -0
  75. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/templates/launchd/start-whisper-with-health-check.sh +0 -0
  76. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/templates/systemd/voicemode-frontend.service +0 -0
  77. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/templates/systemd/voicemode-kokoro.service +0 -0
  78. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/templates/systemd/voicemode-livekit.service +0 -0
  79. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/templates/systemd/voicemode-whisper.service +0 -0
  80. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/__init__.py +0 -0
  81. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/configuration_management.py +0 -0
  82. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/converse.py +0 -0
  83. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/dependencies.py +0 -0
  84. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/devices.py +0 -0
  85. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/diagnostics.py +0 -0
  86. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/providers.py +0 -0
  87. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/service.py +0 -0
  88. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/services/kokoro/install.py +0 -0
  89. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/services/kokoro/uninstall.py +0 -0
  90. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/services/list_versions.py +0 -0
  91. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/services/livekit/__init__.py +0 -0
  92. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/services/livekit/frontend.py +0 -0
  93. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/services/livekit/install.py +0 -0
  94. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/services/livekit/production_server.py +0 -0
  95. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/services/livekit/uninstall.py +0 -0
  96. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/services/version_info.py +0 -0
  97. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/services/whisper/__init__.py +0 -0
  98. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/services/whisper/list_models.py +0 -0
  99. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/services/whisper/model_active.py +0 -0
  100. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/services/whisper/model_benchmark.py +0 -0
  101. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/services/whisper/model_remove.py +0 -0
  102. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/services/whisper/uninstall.py +0 -0
  103. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/statistics.py +0 -0
  104. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/voice_registry.py +0 -0
  105. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/utils/__init__.py +0 -0
  106. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/utils/audio_diagnostics.py +0 -0
  107. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/utils/event_logger.py +0 -0
  108. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/utils/ffmpeg_check.py +0 -0
  109. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/utils/format_migration.py +0 -0
  110. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/utils/gpu_detection.py +0 -0
  111. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/utils/migration_helpers.py +0 -0
  112. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/utils/services/common.py +0 -0
  113. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/utils/services/kokoro_helpers.py +0 -0
  114. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/utils/services/livekit_helpers.py +0 -0
  115. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/utils/services/whisper_version.py +0 -0
  116. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/utils/version_helpers.py +0 -0
  117. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/version.py +0 -0
  118. {voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/voice_preferences.py +0 -0
@@ -7,6 +7,40 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [2.33.0] - 2025-08-26
11
+
12
+ ### Fixed
13
+ - **CoreML acceleration improvements**
14
+ - Re-enabled CoreML acceleration in installer after fixing template loading issues
15
+ - Fixed CoreML conversion with dedicated Python environment to avoid dependency conflicts
16
+ - Improved CoreML setup to handle PyTorch dependency management properly
17
+ - Disabled misleading CoreML prompt temporarily while fixing PyTorch installation
18
+
19
+ - **Whisper service improvements**
20
+ - Implemented unified Whisper startup script for Mac and Linux
21
+ - Fixed Whisper service to respect VOICEMODE_WHISPER_MODEL setting properly
22
+ - Changed default Whisper model from large-v2 to base for faster initial setup
23
+
24
+ - **Installer script stability**
25
+ - Fixed script exit after Whisper installation when CoreML setup CLI check fails
26
+ - Properly handle check_voice_mode_cli failures in setup_coreml_acceleration
27
+ - Installer now continues with Kokoro and LiveKit even if CoreML setup encounters issues
28
+ - Fixed installer exit issue after Whisper when checking for voicemode CLI
29
+
30
+ - **Documentation corrections**
31
+ - Removed mention of response_duration from converse prompt to avoid confusion
32
+
33
+ ### Changed
34
+ - **Web documentation improvements**
35
+ - Updated Quick Start to use `curl -O && bash install.sh` for proper interactive prompts
36
+ - Clarified OpenAI API key is optional and serves as backup when local services unavailable
37
+ - Added comprehensive list of what the installer automatically configures
38
+ - Changed example to use `claude converse` instead of interactive prompt
39
+ - Updated README to use `/voicemode:converse` for consistent voice usage
40
+
41
+ - **Configuration updates**
42
+ - Added voicemode MCP to Claude Code configuration for easier integration
43
+
10
44
  ## [2.32.0] - 2025-08-25
11
45
 
12
46
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: voice-mode
3
- Version: 2.32.0
3
+ Version: 2.33.0
4
4
  Summary: VoiceMode - Voice interaction capabilities for AI assistants (formerly voice-mcp)
5
5
  Project-URL: Homepage, https://github.com/mbailey/voicemode
6
6
  Project-URL: Repository, https://github.com/mbailey/voicemode
@@ -129,10 +129,10 @@ After installation, just run:
129
129
  ```bash
130
130
  # With OpenAI API (cloud-based, requires API key)
131
131
  export OPENAI_API_KEY=your-openai-key
132
- claude converse
132
+ claude /voicemode:converse
133
133
 
134
134
  # Or use free local services (Voice Mode will offer to install them)
135
- claude converse
135
+ claude /voicemode:converse
136
136
  ```
137
137
 
138
138
  ### Manual Installation
@@ -55,10 +55,10 @@ After installation, just run:
55
55
  ```bash
56
56
  # With OpenAI API (cloud-based, requires API key)
57
57
  export OPENAI_API_KEY=your-openai-key
58
- claude converse
58
+ claude /voicemode:converse
59
59
 
60
60
  # Or use free local services (Voice Mode will offer to install them)
61
- claude converse
61
+ claude /voicemode:converse
62
62
  ```
63
63
 
64
64
  ### Manual Installation
@@ -99,6 +99,13 @@ voicemode = "voice_mode.cli:voice_mode"
99
99
 
100
100
  [tool.hatch.build.targets.wheel]
101
101
  packages = ["voice_mode"]
102
+ include = [
103
+ "voice_mode/**/*.py",
104
+ "voice_mode/**/*.sh",
105
+ "voice_mode/**/*.plist",
106
+ "voice_mode/**/*.service",
107
+ "voice_mode/templates/**/*",
108
+ ]
102
109
  exclude = [
103
110
  "**/__pycache__",
104
111
  "**/*.pyc",
@@ -1,3 +1,3 @@
1
1
  # This file is automatically updated by 'make release'
2
2
  # Do not edit manually
3
- __version__ = "2.32.0"
3
+ __version__ = "2.33.0"
@@ -239,7 +239,7 @@ LIVEKIT_API_SECRET = os.getenv("LIVEKIT_API_SECRET", "secret")
239
239
  # ==================== WHISPER CONFIGURATION ====================
240
240
 
241
241
  # Whisper-specific configuration
242
- WHISPER_MODEL = os.getenv("VOICEMODE_WHISPER_MODEL", "large-v2")
242
+ WHISPER_MODEL = os.getenv("VOICEMODE_WHISPER_MODEL", "base")
243
243
  WHISPER_PORT = int(os.getenv("VOICEMODE_WHISPER_PORT", "2022"))
244
244
  WHISPER_LANGUAGE = os.getenv("VOICEMODE_WHISPER_LANGUAGE", "auto")
245
245
  WHISPER_MODEL_PATH = expand_path(os.getenv("VOICEMODE_WHISPER_MODEL_PATH", str(Path.home() / ".voicemode" / "services" / "whisper" / "models")))
@@ -10,7 +10,6 @@ def converse() -> str:
10
10
  "Using tools from voice-mode, have an ongoing two-way conversation",
11
11
  "End the chat when the user indicates they want to end it",
12
12
  "Keep your utterances brief unless a longer response is requested or necessary",
13
- "Listen for up to 120 seconds per response"
14
13
  ]
15
14
 
16
15
  return "\n".join(f"- {instruction}" for instruction in instructions)
@@ -0,0 +1 @@
1
+ # Templates package for Voice Mode
@@ -0,0 +1 @@
1
+ # Script templates for Voice Mode services
@@ -0,0 +1,80 @@
1
+ #!/bin/bash
2
+
3
+ # Whisper Service Startup Script
4
+ # This script is used by both macOS (launchd) and Linux (systemd) to start the whisper service
5
+ # It sources the voicemode.env file to get configuration, especially VOICEMODE_WHISPER_MODEL
6
+
7
+ # Determine whisper directory (script is in bin/, whisper root is parent)
8
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
9
+ WHISPER_DIR="$(dirname "$SCRIPT_DIR")"
10
+
11
+ # Voicemode configuration directory
12
+ VOICEMODE_DIR="$HOME/.voicemode"
13
+ LOG_DIR="$VOICEMODE_DIR/logs/whisper"
14
+
15
+ # Create log directory if it doesn't exist
16
+ mkdir -p "$LOG_DIR"
17
+
18
+ # Log file for this script (separate from whisper server logs)
19
+ STARTUP_LOG="$LOG_DIR/startup.log"
20
+
21
+ # Source voicemode configuration if it exists
22
+ if [ -f "$VOICEMODE_DIR/voicemode.env" ]; then
23
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Sourcing voicemode.env" >> "$STARTUP_LOG"
24
+ source "$VOICEMODE_DIR/voicemode.env"
25
+ else
26
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Warning: voicemode.env not found" >> "$STARTUP_LOG"
27
+ fi
28
+
29
+ # Model selection with environment variable support
30
+ MODEL_NAME="${VOICEMODE_WHISPER_MODEL:-base}"
31
+ MODEL_PATH="$WHISPER_DIR/models/ggml-$MODEL_NAME.bin"
32
+
33
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Starting whisper-server with model: $MODEL_NAME" >> "$STARTUP_LOG"
34
+
35
+ # Check if model exists
36
+ if [ ! -f "$MODEL_PATH" ]; then
37
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Error: Model $MODEL_NAME not found at $MODEL_PATH" >> "$STARTUP_LOG"
38
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Available models:" >> "$STARTUP_LOG"
39
+ ls -1 "$WHISPER_DIR/models/" 2>/dev/null | grep "^ggml-.*\.bin$" >> "$STARTUP_LOG"
40
+
41
+ # Try to find any available model as fallback
42
+ FALLBACK_MODEL=$(ls -1 "$WHISPER_DIR/models/" 2>/dev/null | grep "^ggml-.*\.bin$" | head -1)
43
+ if [ -n "$FALLBACK_MODEL" ]; then
44
+ MODEL_PATH="$WHISPER_DIR/models/$FALLBACK_MODEL"
45
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Using fallback model: $FALLBACK_MODEL" >> "$STARTUP_LOG"
46
+ else
47
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Fatal: No whisper models found" >> "$STARTUP_LOG"
48
+ exit 1
49
+ fi
50
+ fi
51
+
52
+ # Port configuration (with environment variable support)
53
+ WHISPER_PORT="${VOICEMODE_WHISPER_PORT:-2022}"
54
+
55
+ # Determine server binary location
56
+ # Check new CMake build location first, then legacy location
57
+ if [ -f "$WHISPER_DIR/build/bin/whisper-server" ]; then
58
+ SERVER_BIN="$WHISPER_DIR/build/bin/whisper-server"
59
+ elif [ -f "$WHISPER_DIR/server" ]; then
60
+ SERVER_BIN="$WHISPER_DIR/server"
61
+ else
62
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Error: whisper-server binary not found" >> "$STARTUP_LOG"
63
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Checked: $WHISPER_DIR/build/bin/whisper-server" >> "$STARTUP_LOG"
64
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Checked: $WHISPER_DIR/server" >> "$STARTUP_LOG"
65
+ exit 1
66
+ fi
67
+
68
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Using binary: $SERVER_BIN" >> "$STARTUP_LOG"
69
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Model path: $MODEL_PATH" >> "$STARTUP_LOG"
70
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Port: $WHISPER_PORT" >> "$STARTUP_LOG"
71
+
72
+ # Start whisper-server
73
+ # Using exec to replace this script process with whisper-server
74
+ cd "$WHISPER_DIR"
75
+ exec "$SERVER_BIN" \
76
+ --host 0.0.0.0 \
77
+ --port "$WHISPER_PORT" \
78
+ --model "$MODEL_PATH" \
79
+ --inference-path /v1/audio/transcriptions \
80
+ --threads 8
@@ -11,6 +11,11 @@ from pathlib import Path
11
11
  from typing import Dict, Any, Optional, Union
12
12
  import asyncio
13
13
  import aiohttp
14
+ try:
15
+ from importlib.resources import files
16
+ except ImportError:
17
+ # Python < 3.9 fallback
18
+ from importlib_resources import files
14
19
 
15
20
  from voice_mode.server import mcp
16
21
  from voice_mode.config import SERVICE_AUTO_ENABLE
@@ -28,7 +33,7 @@ logger = logging.getLogger("voice-mode")
28
33
  @mcp.tool()
29
34
  async def whisper_install(
30
35
  install_dir: Optional[str] = None,
31
- model: str = "large-v2",
36
+ model: str = "base",
32
37
  use_gpu: Optional[Union[bool, str]] = None,
33
38
  force_reinstall: Union[bool, str] = False,
34
39
  auto_enable: Optional[Union[bool, str]] = None,
@@ -42,7 +47,7 @@ async def whisper_install(
42
47
  Args:
43
48
  install_dir: Directory to install whisper.cpp (default: ~/.voicemode/whisper.cpp)
44
49
  model: Whisper model to download (tiny, base, small, medium, large-v2, large-v3, etc.)
45
- Default is large-v2 for best accuracy. Note: large models require ~3GB RAM.
50
+ Default is base for good balance of speed and accuracy (142MB).
46
51
  use_gpu: Enable GPU support if available (default: auto-detect)
47
52
  force_reinstall: Force reinstallation even if already installed
48
53
  auto_enable: Enable service after install. If None, uses VOICEMODE_SERVICE_AUTO_ENABLE config.
@@ -302,59 +307,117 @@ async def whisper_install(
302
307
  if 'original_dir' in locals():
303
308
  os.chdir(original_dir)
304
309
 
305
- # Create start script for whisper-server
306
- logger.info("Creating whisper-server start script...")
307
- start_script_content = f"""#!/bin/bash
310
+ # Copy template start script for whisper-server
311
+ logger.info("Installing whisper-server start script from template...")
312
+
313
+ # Create bin directory
314
+ bin_dir = os.path.join(install_dir, "bin")
315
+ os.makedirs(bin_dir, exist_ok=True)
316
+
317
+ # Copy template script
318
+ template_content = None
319
+
320
+ # First try to load from source if running in development
321
+ source_template = Path(__file__).parent.parent.parent.parent / "templates" / "scripts" / "start-whisper-server.sh"
322
+ if source_template.exists():
323
+ logger.info(f"Loading template from source: {source_template}")
324
+ template_content = source_template.read_text()
325
+ else:
326
+ # Try loading from package resources
327
+ try:
328
+ template_resource = files("voice_mode.templates.scripts").joinpath("start-whisper-server.sh")
329
+ template_content = template_resource.read_text()
330
+ logger.info("Loaded template from package resources")
331
+ except Exception as e:
332
+ logger.warning(f"Failed to load template script: {e}. Using fallback inline script.")
333
+
334
+ # Fallback to inline script if template not found
335
+ if template_content is None:
336
+ template_content = f"""#!/bin/bash
337
+
338
+ # Whisper Service Startup Script
339
+ # This script is used by both macOS (launchd) and Linux (systemd) to start the whisper service
340
+ # It sources the voicemode.env file to get configuration, especially VOICEMODE_WHISPER_MODEL
341
+
342
+ # Determine whisper directory (script is in bin/, whisper root is parent)
343
+ SCRIPT_DIR="$(cd "$(dirname "${{BASH_SOURCE[0]}}")" && pwd)"
344
+ WHISPER_DIR="$(dirname "$SCRIPT_DIR")"
345
+
346
+ # Voicemode configuration directory
347
+ VOICEMODE_DIR="$HOME/.voicemode"
348
+ LOG_DIR="$VOICEMODE_DIR/logs/whisper"
349
+
350
+ # Create log directory if it doesn't exist
351
+ mkdir -p "$LOG_DIR"
308
352
 
309
- # Configuration
310
- WHISPER_DIR="{install_dir}"
311
- LOG_FILE="{os.path.join(voicemode_dir, 'whisper-server.log')}"
353
+ # Log file for this script (separate from whisper server logs)
354
+ STARTUP_LOG="$LOG_DIR/startup.log"
312
355
 
313
356
  # Source voicemode configuration if it exists
314
- if [ -f "{voicemode_dir}/voicemode.env" ]; then
315
- source "{voicemode_dir}/voicemode.env"
357
+ if [ -f "$VOICEMODE_DIR/voicemode.env" ]; then
358
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Sourcing voicemode.env" >> "$STARTUP_LOG"
359
+ source "$VOICEMODE_DIR/voicemode.env"
360
+ else
361
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Warning: voicemode.env not found" >> "$STARTUP_LOG"
316
362
  fi
317
363
 
318
364
  # Model selection with environment variable support
319
- MODEL_NAME="${{VOICEMODE_WHISPER_MODEL:-{model}}}"
365
+ MODEL_NAME="${{VOICEMODE_WHISPER_MODEL:-base}}"
320
366
  MODEL_PATH="$WHISPER_DIR/models/ggml-$MODEL_NAME.bin"
321
367
 
368
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Starting whisper-server with model: $MODEL_NAME" >> "$STARTUP_LOG"
369
+
322
370
  # Check if model exists
323
371
  if [ ! -f "$MODEL_PATH" ]; then
324
- echo "Error: Model $MODEL_NAME not found at $MODEL_PATH" >> "$LOG_FILE"
325
- echo "Available models:" >> "$LOG_FILE"
326
- ls -1 "$WHISPER_DIR/models/" | grep "^ggml-.*\\.bin$" >> "$LOG_FILE"
327
- exit 1
372
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Error: Model $MODEL_NAME not found at $MODEL_PATH" >> "$STARTUP_LOG"
373
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Available models:" >> "$STARTUP_LOG"
374
+ ls -1 "$WHISPER_DIR/models/" 2>/dev/null | grep "^ggml-.*\\.bin$" >> "$STARTUP_LOG"
375
+
376
+ # Try to find any available model as fallback
377
+ FALLBACK_MODEL=$(ls -1 "$WHISPER_DIR/models/" 2>/dev/null | grep "^ggml-.*\\.bin$" | head -1)
378
+ if [ -n "$FALLBACK_MODEL" ]; then
379
+ MODEL_PATH="$WHISPER_DIR/models/$FALLBACK_MODEL"
380
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Using fallback model: $FALLBACK_MODEL" >> "$STARTUP_LOG"
381
+ else
382
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Fatal: No whisper models found" >> "$STARTUP_LOG"
383
+ exit 1
384
+ fi
328
385
  fi
329
386
 
330
- echo "Starting whisper-server with model: $MODEL_NAME" >> "$LOG_FILE"
331
-
332
- # Note: whisper-server is now built as part of the main build target
387
+ # Port configuration (with environment variable support)
388
+ WHISPER_PORT="${{VOICEMODE_WHISPER_PORT:-2022}}"
333
389
 
334
390
  # Determine server binary location
391
+ # Check new CMake build location first, then legacy location
335
392
  if [ -f "$WHISPER_DIR/build/bin/whisper-server" ]; then
336
393
  SERVER_BIN="$WHISPER_DIR/build/bin/whisper-server"
337
394
  elif [ -f "$WHISPER_DIR/server" ]; then
338
395
  SERVER_BIN="$WHISPER_DIR/server"
339
396
  else
340
- echo "Error: whisper-server binary not found" >> "$LOG_FILE"
397
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Error: whisper-server binary not found" >> "$STARTUP_LOG"
398
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Checked: $WHISPER_DIR/build/bin/whisper-server" >> "$STARTUP_LOG"
399
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Checked: $WHISPER_DIR/server" >> "$STARTUP_LOG"
341
400
  exit 1
342
401
  fi
343
402
 
403
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Using binary: $SERVER_BIN" >> "$STARTUP_LOG"
404
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Model path: $MODEL_PATH" >> "$STARTUP_LOG"
405
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Port: $WHISPER_PORT" >> "$STARTUP_LOG"
406
+
344
407
  # Start whisper-server
408
+ # Using exec to replace this script process with whisper-server
345
409
  cd "$WHISPER_DIR"
346
410
  exec "$SERVER_BIN" \\
347
- --model "$MODEL_PATH" \\
348
411
  --host 0.0.0.0 \\
349
- --port 2022 \\
412
+ --port "$WHISPER_PORT" \\
413
+ --model "$MODEL_PATH" \\
350
414
  --inference-path /v1/audio/transcriptions \\
351
- --threads 8 \\
352
- >> "$LOG_FILE" 2>&1
415
+ --threads 8
353
416
  """
354
417
 
355
- start_script_path = os.path.join(install_dir, "start-whisper-server.sh")
418
+ start_script_path = os.path.join(bin_dir, "start-whisper-server.sh")
356
419
  with open(start_script_path, 'w') as f:
357
- f.write(start_script_content)
420
+ f.write(template_content)
358
421
  os.chmod(start_script_path, 0o755)
359
422
 
360
423
  # Install launchagent on macOS
@@ -471,7 +534,6 @@ WorkingDirectory={install_dir}
471
534
  StandardOutput=append:{os.path.join(voicemode_dir, 'logs', 'whisper', 'whisper.out.log')}
472
535
  StandardError=append:{os.path.join(voicemode_dir, 'logs', 'whisper', 'whisper.err.log')}
473
536
  Environment="PATH=/usr/local/bin:/usr/bin:/bin:/usr/local/cuda/bin"
474
- Environment="VOICEMODE_WHISPER_MODEL={model}"
475
537
 
476
538
  [Install]
477
539
  WantedBy=default.target
@@ -127,7 +127,8 @@ async def whisper_model_install(
127
127
  result = await download_whisper_model(
128
128
  model_name,
129
129
  actual_models_dir,
130
- force_download=force_download
130
+ force_download=force_download,
131
+ skip_core_ml=skip_core_ml
131
132
  )
132
133
 
133
134
  # Build comprehensive result entry
@@ -242,58 +243,48 @@ async def _handle_coreml_dependencies(
242
243
  if skip_core_ml:
243
244
  return {"continue": True}
244
245
 
245
- # Check if torch is already installed
246
- try:
247
- import torch
248
- logger.info("PyTorch already installed for CoreML support")
249
- return {"continue": True}
250
- except ImportError:
251
- pass
246
+ # Check if the CoreML environment already exists
247
+ whisper_dir = Path.home() / ".voicemode" / "services" / "whisper"
248
+ venv_coreml = whisper_dir / "venv-coreml" / "bin" / "python"
249
+
250
+ if venv_coreml.exists():
251
+ # Test if it has the required packages
252
+ try:
253
+ result = subprocess.run(
254
+ [str(venv_coreml), "-c", "import torch, coremltools, whisper"],
255
+ capture_output=True,
256
+ timeout=5
257
+ )
258
+ if result.returncode == 0:
259
+ logger.info("CoreML environment already exists and is valid")
260
+ # Return with a flag indicating CoreML is ready
261
+ return {
262
+ "continue": True,
263
+ "coreml_ready": True,
264
+ "coreml_deps_note": "CoreML environment exists and is valid"
265
+ }
266
+ except:
267
+ pass
252
268
 
253
- # Check if user wants to install torch
269
+ # Check if user wants to create CoreML environment
254
270
  if not install_torch and not auto_confirm:
255
271
  return {
256
272
  "continue": False,
257
273
  "success": False,
258
274
  "requires_confirmation": True,
259
- "message": "CoreML requires PyTorch (~2.5GB). Rerun with install_torch=True to confirm.",
260
- "recommendation": "Set install_torch=True for CoreML acceleration (2-3x faster)"
275
+ "message": "CoreML conversion requires a dedicated Python environment with PyTorch. Setup may download up to 2.5GB if packages aren't cached.",
276
+ "recommendation": "💡 Set install_torch=True for CoreML acceleration (2-3x faster)"
261
277
  }
262
278
 
263
- # Install CoreML dependencies
264
- logger.info("Installing CoreML dependencies...")
279
+ # Note: We don't actually install CoreML dependencies in the voicemode environment anymore
280
+ # The CoreML conversion uses its own dedicated environment in ~/.voicemode/services/whisper/venv-coreml
281
+ # This is handled automatically by whisper_helpers.convert_to_coreml()
265
282
 
266
- try:
267
- # Detect environment and install appropriately
268
- packages = ["torch>=2.0.0", "coremltools>=7.0", "transformers", "ane-transformers"]
269
-
270
- # Try UV first (most common)
271
- if subprocess.run(["which", "uv"], capture_output=True).returncode == 0:
272
- cmd = ["uv", "pip", "install"] + packages
273
- logger.info("Installing via UV...")
274
- else:
275
- # Fallback to pip
276
- cmd = [sys.executable, "-m", "pip", "install"] + packages
277
- logger.info("Installing via pip...")
278
-
279
- # Run installation
280
- result = subprocess.run(cmd, capture_output=True, text=True)
281
-
282
- if result.returncode == 0:
283
- logger.info("CoreML dependencies installed successfully")
284
- return {"continue": True, "coreml_deps_installed": True}
285
- else:
286
- logger.warning(f"Failed to install CoreML dependencies: {result.stderr}")
287
- return {
288
- "continue": True,
289
- "coreml_deps_failed": True,
290
- "warning": "CoreML dependencies installation failed. Models will use Metal acceleration."
291
- }
292
-
293
- except Exception as e:
294
- logger.warning(f"Error installing CoreML dependencies: {e}")
295
- return {
296
- "continue": True,
297
- "coreml_deps_failed": True,
298
- "warning": f"CoreML setup error: {str(e)}. Models will use Metal acceleration."
299
- }
283
+ logger.info("CoreML dependencies will be handled by the conversion process")
284
+
285
+ # We still return success to continue with the model download
286
+ # The actual CoreML environment setup happens during conversion
287
+ return {
288
+ "continue": True,
289
+ "coreml_deps_note": "CoreML environment will be created during conversion if needed"
290
+ }
@@ -113,7 +113,7 @@ def get_active_model() -> str:
113
113
 
114
114
  # Validate it's a known model
115
115
  if model not in WHISPER_MODEL_REGISTRY:
116
- return "large-v2" # Default fallback
116
+ return "base" # Default fallback
117
117
 
118
118
  return model
119
119