voice-mode 2.32.0__tar.gz → 2.33.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. {voice_mode-2.32.0 → voice_mode-2.33.2}/CHANGELOG.md +52 -0
  2. {voice_mode-2.32.0 → voice_mode-2.33.2}/PKG-INFO +11 -12
  3. {voice_mode-2.32.0 → voice_mode-2.33.2}/README.md +10 -11
  4. {voice_mode-2.32.0 → voice_mode-2.33.2}/pyproject.toml +7 -0
  5. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/__version__.py +1 -1
  6. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/config.py +1 -1
  7. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/prompts/converse.py +0 -1
  8. voice_mode-2.33.2/voice_mode/templates/__init__.py +1 -0
  9. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/templates/launchd/com.voicemode.whisper.plist +7 -13
  10. voice_mode-2.33.2/voice_mode/templates/scripts/__init__.py +1 -0
  11. voice_mode-2.33.2/voice_mode/templates/scripts/start-whisper-server.sh +80 -0
  12. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/services/whisper/install.py +100 -132
  13. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/services/whisper/model_install.py +38 -47
  14. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/services/whisper/models.py +1 -1
  15. voice_mode-2.33.2/voice_mode/utils/services/coreml_setup.py +234 -0
  16. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/utils/services/whisper_helpers.py +57 -32
  17. {voice_mode-2.32.0 → voice_mode-2.33.2}/.gitignore +0 -0
  18. {voice_mode-2.32.0 → voice_mode-2.33.2}/build_hooks.py +0 -0
  19. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/__init__.py +0 -0
  20. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/__main__.py +0 -0
  21. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/cli.py +0 -0
  22. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/cli_commands/__init__.py +0 -0
  23. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/cli_commands/exchanges.py +0 -0
  24. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/conversation_logger.py +0 -0
  25. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/core.py +0 -0
  26. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/data/versions.json +0 -0
  27. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/exchanges/__init__.py +0 -0
  28. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/exchanges/conversations.py +0 -0
  29. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/exchanges/filters.py +0 -0
  30. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/exchanges/formatters.py +0 -0
  31. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/exchanges/models.py +0 -0
  32. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/exchanges/reader.py +0 -0
  33. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/exchanges/stats.py +0 -0
  34. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/frontend/README.md +0 -0
  35. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/frontend/app/api/connection-details/route.ts +0 -0
  36. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/frontend/app/favicon.ico +0 -0
  37. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/frontend/app/globals.css +0 -0
  38. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/frontend/app/layout.tsx +0 -0
  39. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/frontend/app/page.tsx +0 -0
  40. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/frontend/components/CloseIcon.tsx +0 -0
  41. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/frontend/components/NoAgentNotification.tsx +0 -0
  42. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/frontend/components/TranscriptionView.tsx +0 -0
  43. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/frontend/hooks/useCombinedTranscriptions.ts +0 -0
  44. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/frontend/hooks/useLocalMicTrack.ts +0 -0
  45. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/frontend/next-env.d.ts +0 -0
  46. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/frontend/next.config.mjs +0 -0
  47. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/frontend/package-lock.json +0 -0
  48. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/frontend/package.json +0 -0
  49. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/frontend/pnpm-lock.yaml +0 -0
  50. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/frontend/postcss.config.mjs +0 -0
  51. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/frontend/tailwind.config.ts +0 -0
  52. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/frontend/tsconfig.json +0 -0
  53. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/prompts/README.md +0 -0
  54. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/prompts/__init__.py +0 -0
  55. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/prompts/release_notes.py +0 -0
  56. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/prompts/services.py +0 -0
  57. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/provider_discovery.py +0 -0
  58. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/providers.py +0 -0
  59. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/resources/__init__.py +0 -0
  60. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/resources/audio_files.py +0 -0
  61. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/resources/changelog.py +0 -0
  62. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/resources/configuration.py +0 -0
  63. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/resources/statistics.py +0 -0
  64. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/resources/version.py +0 -0
  65. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/resources/whisper_models.py +0 -0
  66. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/server.py +0 -0
  67. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/shared.py +0 -0
  68. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/simple_failover.py +0 -0
  69. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/statistics.py +0 -0
  70. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/streaming.py +0 -0
  71. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/templates/launchd/com.voicemode.frontend.plist +0 -0
  72. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/templates/launchd/com.voicemode.kokoro.plist +0 -0
  73. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/templates/launchd/com.voicemode.livekit.plist +0 -0
  74. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/templates/launchd/start-kokoro-with-health-check.sh +0 -0
  75. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/templates/launchd/start-whisper-with-health-check.sh +0 -0
  76. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/templates/systemd/voicemode-frontend.service +0 -0
  77. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/templates/systemd/voicemode-kokoro.service +0 -0
  78. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/templates/systemd/voicemode-livekit.service +0 -0
  79. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/templates/systemd/voicemode-whisper.service +0 -0
  80. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/__init__.py +0 -0
  81. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/configuration_management.py +0 -0
  82. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/converse.py +0 -0
  83. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/dependencies.py +0 -0
  84. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/devices.py +0 -0
  85. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/diagnostics.py +0 -0
  86. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/providers.py +0 -0
  87. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/service.py +0 -0
  88. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/services/kokoro/install.py +0 -0
  89. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/services/kokoro/uninstall.py +0 -0
  90. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/services/list_versions.py +0 -0
  91. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/services/livekit/__init__.py +0 -0
  92. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/services/livekit/frontend.py +0 -0
  93. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/services/livekit/install.py +0 -0
  94. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/services/livekit/production_server.py +0 -0
  95. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/services/livekit/uninstall.py +0 -0
  96. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/services/version_info.py +0 -0
  97. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/services/whisper/__init__.py +0 -0
  98. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/services/whisper/list_models.py +0 -0
  99. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/services/whisper/model_active.py +0 -0
  100. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/services/whisper/model_benchmark.py +0 -0
  101. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/services/whisper/model_remove.py +0 -0
  102. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/services/whisper/uninstall.py +0 -0
  103. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/statistics.py +0 -0
  104. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/tools/voice_registry.py +0 -0
  105. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/utils/__init__.py +0 -0
  106. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/utils/audio_diagnostics.py +0 -0
  107. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/utils/event_logger.py +0 -0
  108. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/utils/ffmpeg_check.py +0 -0
  109. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/utils/format_migration.py +0 -0
  110. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/utils/gpu_detection.py +0 -0
  111. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/utils/migration_helpers.py +0 -0
  112. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/utils/services/common.py +0 -0
  113. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/utils/services/kokoro_helpers.py +0 -0
  114. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/utils/services/livekit_helpers.py +0 -0
  115. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/utils/services/whisper_version.py +0 -0
  116. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/utils/version_helpers.py +0 -0
  117. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/version.py +0 -0
  118. {voice_mode-2.32.0 → voice_mode-2.33.2}/voice_mode/voice_preferences.py +0 -0
@@ -7,6 +7,58 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [2.33.2] - 2025-08-26
11
+
12
+ ## [2.33.1] - 2025-08-26
13
+
14
+ ### Fixed
15
+ - **Whisper service LaunchAgent fixes**
16
+ - Fixed LaunchAgent plist to call start-whisper-server.sh script instead of binary directly
17
+ - Script provides dynamic model selection via VOICEMODE_WHISPER_MODEL environment variable
18
+ - Added proper command-line arguments (--inference-path, --threads) missing from direct binary call
19
+ - Resolved Signal 15 (SIGTERM) restart loop caused by missing parameters
20
+
21
+ ### Changed
22
+ - **Service configuration templates**
23
+ - Refactored Whisper installer to use plist template file instead of inline generation
24
+ - Template approach improves maintainability and makes configuration easier to find
25
+ - Removed duplicate inline plist fallback to prevent template/code divergence
26
+ - Templates are packaged with distribution ensuring availability
27
+
28
+ ## [2.33.0] - 2025-08-26
29
+
30
+ ### Fixed
31
+ - **CoreML acceleration improvements**
32
+ - Re-enabled CoreML acceleration in installer after fixing template loading issues
33
+ - Fixed CoreML conversion with dedicated Python environment to avoid dependency conflicts
34
+ - Improved CoreML setup to handle PyTorch dependency management properly
35
+ - Disabled misleading CoreML prompt temporarily while fixing PyTorch installation
36
+
37
+ - **Whisper service improvements**
38
+ - Implemented unified Whisper startup script for Mac and Linux
39
+ - Fixed Whisper service to respect VOICEMODE_WHISPER_MODEL setting properly
40
+ - Changed default Whisper model from large-v2 to base for faster initial setup
41
+
42
+ - **Installer script stability**
43
+ - Fixed script exit after Whisper installation when CoreML setup CLI check fails
44
+ - Properly handle check_voice_mode_cli failures in setup_coreml_acceleration
45
+ - Installer now continues with Kokoro and LiveKit even if CoreML setup encounters issues
46
+ - Fixed installer exit issue after Whisper when checking for voicemode CLI
47
+
48
+ - **Documentation corrections**
49
+ - Removed mention of response_duration from converse prompt to avoid confusion
50
+
51
+ ### Changed
52
+ - **Web documentation improvements**
53
+ - Updated Quick Start to use `curl -O && bash install.sh` for proper interactive prompts
54
+ - Clarified OpenAI API key is optional and serves as backup when local services unavailable
55
+ - Added comprehensive list of what the installer automatically configures
56
+ - Changed example to use `claude converse` instead of interactive prompt
57
+ - Updated README to use `/voicemode:converse` for consistent voice usage
58
+
59
+ - **Configuration updates**
60
+ - Added voicemode MCP to Claude Code configuration for easier integration
61
+
10
62
  ## [2.32.0] - 2025-08-25
11
63
 
12
64
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: voice-mode
3
- Version: 2.32.0
3
+ Version: 2.33.2
4
4
  Summary: VoiceMode - Voice interaction capabilities for AI assistants (formerly voice-mcp)
5
5
  Project-URL: Homepage, https://github.com/mbailey/voicemode
6
6
  Project-URL: Repository, https://github.com/mbailey/voicemode
@@ -116,7 +116,15 @@ Natural voice conversations for AI assistants. Voice Mode brings human-like voic
116
116
  Install Claude Code with Voice Mode configured and ready to run on Linux, macOS, and Windows WSL:
117
117
 
118
118
  ```bash
119
+ # Download and run the installer
119
120
  curl -O https://getvoicemode.com/install.sh && bash install.sh
121
+
122
+ # While local voice services can be installed automatically, we recommend
123
+ # providing an OpenAI API key as a fallback in case local services are unavailable
124
+ export OPENAI_API_KEY=your-openai-key # Optional but recommended
125
+
126
+ # Start a voice conversation
127
+ claude /voicemode:converse
120
128
  ```
121
129
 
122
130
  This installer will:
@@ -124,16 +132,7 @@ This installer will:
124
132
  - Install Claude Code if not already installed
125
133
  - Configure Voice Mode as an MCP server
126
134
  - Set up your system for voice conversations
127
-
128
- After installation, just run:
129
- ```bash
130
- # With OpenAI API (cloud-based, requires API key)
131
- export OPENAI_API_KEY=your-openai-key
132
- claude converse
133
-
134
- # Or use free local services (Voice Mode will offer to install them)
135
- claude converse
136
- ```
135
+ - Offer to install free local STT/TTS services if no API key is provided
137
136
 
138
137
  ### Manual Installation
139
138
 
@@ -693,7 +692,7 @@ To save all audio files (both TTS output and STT input):
693
692
  export VOICEMODE_SAVE_AUDIO=true
694
693
  ```
695
694
 
696
- Audio files are saved to: `~/voicemode_audio/` with timestamps in the filename.
695
+ Audio files are saved to: `~/.voicemode/audio/YYYY/MM/` with timestamps in the filename.
697
696
 
698
697
  ## Documentation
699
698
 
@@ -42,7 +42,15 @@ Natural voice conversations for AI assistants. Voice Mode brings human-like voic
42
42
  Install Claude Code with Voice Mode configured and ready to run on Linux, macOS, and Windows WSL:
43
43
 
44
44
  ```bash
45
+ # Download and run the installer
45
46
  curl -O https://getvoicemode.com/install.sh && bash install.sh
47
+
48
+ # While local voice services can be installed automatically, we recommend
49
+ # providing an OpenAI API key as a fallback in case local services are unavailable
50
+ export OPENAI_API_KEY=your-openai-key # Optional but recommended
51
+
52
+ # Start a voice conversation
53
+ claude /voicemode:converse
46
54
  ```
47
55
 
48
56
  This installer will:
@@ -50,16 +58,7 @@ This installer will:
50
58
  - Install Claude Code if not already installed
51
59
  - Configure Voice Mode as an MCP server
52
60
  - Set up your system for voice conversations
53
-
54
- After installation, just run:
55
- ```bash
56
- # With OpenAI API (cloud-based, requires API key)
57
- export OPENAI_API_KEY=your-openai-key
58
- claude converse
59
-
60
- # Or use free local services (Voice Mode will offer to install them)
61
- claude converse
62
- ```
61
+ - Offer to install free local STT/TTS services if no API key is provided
63
62
 
64
63
  ### Manual Installation
65
64
 
@@ -619,7 +618,7 @@ To save all audio files (both TTS output and STT input):
619
618
  export VOICEMODE_SAVE_AUDIO=true
620
619
  ```
621
620
 
622
- Audio files are saved to: `~/voicemode_audio/` with timestamps in the filename.
621
+ Audio files are saved to: `~/.voicemode/audio/YYYY/MM/` with timestamps in the filename.
623
622
 
624
623
  ## Documentation
625
624
 
@@ -99,6 +99,13 @@ voicemode = "voice_mode.cli:voice_mode"
99
99
 
100
100
  [tool.hatch.build.targets.wheel]
101
101
  packages = ["voice_mode"]
102
+ include = [
103
+ "voice_mode/**/*.py",
104
+ "voice_mode/**/*.sh",
105
+ "voice_mode/**/*.plist",
106
+ "voice_mode/**/*.service",
107
+ "voice_mode/templates/**/*",
108
+ ]
102
109
  exclude = [
103
110
  "**/__pycache__",
104
111
  "**/*.pyc",
@@ -1,3 +1,3 @@
1
1
  # This file is automatically updated by 'make release'
2
2
  # Do not edit manually
3
- __version__ = "2.32.0"
3
+ __version__ = "2.33.2"
@@ -239,7 +239,7 @@ LIVEKIT_API_SECRET = os.getenv("LIVEKIT_API_SECRET", "secret")
239
239
  # ==================== WHISPER CONFIGURATION ====================
240
240
 
241
241
  # Whisper-specific configuration
242
- WHISPER_MODEL = os.getenv("VOICEMODE_WHISPER_MODEL", "large-v2")
242
+ WHISPER_MODEL = os.getenv("VOICEMODE_WHISPER_MODEL", "base")
243
243
  WHISPER_PORT = int(os.getenv("VOICEMODE_WHISPER_PORT", "2022"))
244
244
  WHISPER_LANGUAGE = os.getenv("VOICEMODE_WHISPER_LANGUAGE", "auto")
245
245
  WHISPER_MODEL_PATH = expand_path(os.getenv("VOICEMODE_WHISPER_MODEL_PATH", str(Path.home() / ".voicemode" / "services" / "whisper" / "models")))
@@ -10,7 +10,6 @@ def converse() -> str:
10
10
  "Using tools from voice-mode, have an ongoing two-way conversation",
11
11
  "End the chat when the user indicates they want to end it",
12
12
  "Keep your utterances brief unless a longer response is requested or necessary",
13
- "Listen for up to 120 seconds per response"
14
13
  ]
15
14
 
16
15
  return "\n".join(f"- {instruction}" for instruction in instructions)
@@ -0,0 +1 @@
1
+ # Templates package for Voice Mode
@@ -1,32 +1,26 @@
1
1
  <?xml version="1.0" encoding="UTF-8"?>
2
2
  <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3
- <!-- com.voicemode.whisper.plist v1.0.0 -->
4
- <!-- Last updated: 2025-01-25 -->
5
- <!-- Compatible with: whisper.cpp v1.5.0+ -->
3
+ <!-- com.voicemode.whisper.plist v1.1.0 -->
4
+ <!-- Last updated: 2025-08-25 -->
5
+ <!-- Uses unified startup script for dynamic model selection -->
6
6
  <plist version="1.0">
7
7
  <dict>
8
8
  <key>Label</key>
9
9
  <string>com.voicemode.whisper</string>
10
10
  <key>ProgramArguments</key>
11
11
  <array>
12
- <string>{WHISPER_BIN}</string>
13
- <string>--host</string>
14
- <string>0.0.0.0</string>
15
- <string>--port</string>
16
- <string>{WHISPER_PORT}</string>
17
- <string>--model</string>
18
- <string>{MODEL_FILE}</string>
12
+ <string>{START_SCRIPT_PATH}</string>
19
13
  </array>
20
14
  <key>RunAtLoad</key>
21
15
  <true/>
22
16
  <key>KeepAlive</key>
23
17
  <true/>
24
18
  <key>StandardOutPath</key>
25
- <string>{LOG_DIR}/whisper.out.log</string>
19
+ <string>{LOG_DIR}/whisper/whisper.out.log</string>
26
20
  <key>StandardErrorPath</key>
27
- <string>{LOG_DIR}/whisper.err.log</string>
21
+ <string>{LOG_DIR}/whisper/whisper.err.log</string>
28
22
  <key>WorkingDirectory</key>
29
- <string>{WORKING_DIR}</string>
23
+ <string>{INSTALL_DIR}</string>
30
24
  <key>EnvironmentVariables</key>
31
25
  <dict>
32
26
  <key>PATH</key>
@@ -0,0 +1 @@
1
+ # Script templates for Voice Mode services
@@ -0,0 +1,80 @@
1
+ #!/bin/bash
2
+
3
+ # Whisper Service Startup Script
4
+ # This script is used by both macOS (launchd) and Linux (systemd) to start the whisper service
5
+ # It sources the voicemode.env file to get configuration, especially VOICEMODE_WHISPER_MODEL
6
+
7
+ # Determine whisper directory (script is in bin/, whisper root is parent)
8
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
9
+ WHISPER_DIR="$(dirname "$SCRIPT_DIR")"
10
+
11
+ # Voicemode configuration directory
12
+ VOICEMODE_DIR="$HOME/.voicemode"
13
+ LOG_DIR="$VOICEMODE_DIR/logs/whisper"
14
+
15
+ # Create log directory if it doesn't exist
16
+ mkdir -p "$LOG_DIR"
17
+
18
+ # Log file for this script (separate from whisper server logs)
19
+ STARTUP_LOG="$LOG_DIR/startup.log"
20
+
21
+ # Source voicemode configuration if it exists
22
+ if [ -f "$VOICEMODE_DIR/voicemode.env" ]; then
23
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Sourcing voicemode.env" >> "$STARTUP_LOG"
24
+ source "$VOICEMODE_DIR/voicemode.env"
25
+ else
26
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Warning: voicemode.env not found" >> "$STARTUP_LOG"
27
+ fi
28
+
29
+ # Model selection with environment variable support
30
+ MODEL_NAME="${VOICEMODE_WHISPER_MODEL:-base}"
31
+ MODEL_PATH="$WHISPER_DIR/models/ggml-$MODEL_NAME.bin"
32
+
33
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Starting whisper-server with model: $MODEL_NAME" >> "$STARTUP_LOG"
34
+
35
+ # Check if model exists
36
+ if [ ! -f "$MODEL_PATH" ]; then
37
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Error: Model $MODEL_NAME not found at $MODEL_PATH" >> "$STARTUP_LOG"
38
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Available models:" >> "$STARTUP_LOG"
39
+ ls -1 "$WHISPER_DIR/models/" 2>/dev/null | grep "^ggml-.*\.bin$" >> "$STARTUP_LOG"
40
+
41
+ # Try to find any available model as fallback
42
+ FALLBACK_MODEL=$(ls -1 "$WHISPER_DIR/models/" 2>/dev/null | grep "^ggml-.*\.bin$" | head -1)
43
+ if [ -n "$FALLBACK_MODEL" ]; then
44
+ MODEL_PATH="$WHISPER_DIR/models/$FALLBACK_MODEL"
45
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Using fallback model: $FALLBACK_MODEL" >> "$STARTUP_LOG"
46
+ else
47
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Fatal: No whisper models found" >> "$STARTUP_LOG"
48
+ exit 1
49
+ fi
50
+ fi
51
+
52
+ # Port configuration (with environment variable support)
53
+ WHISPER_PORT="${VOICEMODE_WHISPER_PORT:-2022}"
54
+
55
+ # Determine server binary location
56
+ # Check new CMake build location first, then legacy location
57
+ if [ -f "$WHISPER_DIR/build/bin/whisper-server" ]; then
58
+ SERVER_BIN="$WHISPER_DIR/build/bin/whisper-server"
59
+ elif [ -f "$WHISPER_DIR/server" ]; then
60
+ SERVER_BIN="$WHISPER_DIR/server"
61
+ else
62
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Error: whisper-server binary not found" >> "$STARTUP_LOG"
63
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Checked: $WHISPER_DIR/build/bin/whisper-server" >> "$STARTUP_LOG"
64
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Checked: $WHISPER_DIR/server" >> "$STARTUP_LOG"
65
+ exit 1
66
+ fi
67
+
68
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Using binary: $SERVER_BIN" >> "$STARTUP_LOG"
69
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Model path: $MODEL_PATH" >> "$STARTUP_LOG"
70
+ echo "[$(date '+%Y-%m-%d %H:%M:%S')] Port: $WHISPER_PORT" >> "$STARTUP_LOG"
71
+
72
+ # Start whisper-server
73
+ # Using exec to replace this script process with whisper-server
74
+ cd "$WHISPER_DIR"
75
+ exec "$SERVER_BIN" \
76
+ --host 0.0.0.0 \
77
+ --port "$WHISPER_PORT" \
78
+ --model "$MODEL_PATH" \
79
+ --inference-path /v1/audio/transcriptions \
80
+ --threads 8
@@ -11,6 +11,11 @@ from pathlib import Path
11
11
  from typing import Dict, Any, Optional, Union
12
12
  import asyncio
13
13
  import aiohttp
14
+ try:
15
+ from importlib.resources import files
16
+ except ImportError:
17
+ # Python < 3.9 fallback
18
+ from importlib_resources import files
14
19
 
15
20
  from voice_mode.server import mcp
16
21
  from voice_mode.config import SERVICE_AUTO_ENABLE
@@ -28,7 +33,7 @@ logger = logging.getLogger("voice-mode")
28
33
  @mcp.tool()
29
34
  async def whisper_install(
30
35
  install_dir: Optional[str] = None,
31
- model: str = "large-v2",
36
+ model: str = "base",
32
37
  use_gpu: Optional[Union[bool, str]] = None,
33
38
  force_reinstall: Union[bool, str] = False,
34
39
  auto_enable: Optional[Union[bool, str]] = None,
@@ -42,7 +47,7 @@ async def whisper_install(
42
47
  Args:
43
48
  install_dir: Directory to install whisper.cpp (default: ~/.voicemode/whisper.cpp)
44
49
  model: Whisper model to download (tiny, base, small, medium, large-v2, large-v3, etc.)
45
- Default is large-v2 for best accuracy. Note: large models require ~3GB RAM.
50
+ Default is base for good balance of speed and accuracy (142MB).
46
51
  use_gpu: Enable GPU support if available (default: auto-detect)
47
52
  force_reinstall: Force reinstallation even if already installed
48
53
  auto_enable: Enable service after install. If None, uses VOICEMODE_SERVICE_AUTO_ENABLE config.
@@ -214,7 +219,8 @@ async def whisper_install(
214
219
  if is_macos:
215
220
  # On macOS, always enable Metal
216
221
  cmake_flags.append("-DGGML_METAL=ON")
217
- # On Apple Silicon, also enable Core ML for better performance
222
+ # On Apple Silicon, also enable Core ML support with fallback
223
+ # This allows using CoreML models if available, but falls back to Metal if not
218
224
  if platform.machine() == "arm64":
219
225
  cmake_flags.append("-DWHISPER_COREML=ON")
220
226
  cmake_flags.append("-DWHISPER_COREML_ALLOW_FALLBACK=ON")
@@ -302,59 +308,34 @@ async def whisper_install(
302
308
  if 'original_dir' in locals():
303
309
  os.chdir(original_dir)
304
310
 
305
- # Create start script for whisper-server
306
- logger.info("Creating whisper-server start script...")
307
- start_script_content = f"""#!/bin/bash
308
-
309
- # Configuration
310
- WHISPER_DIR="{install_dir}"
311
- LOG_FILE="{os.path.join(voicemode_dir, 'whisper-server.log')}"
312
-
313
- # Source voicemode configuration if it exists
314
- if [ -f "{voicemode_dir}/voicemode.env" ]; then
315
- source "{voicemode_dir}/voicemode.env"
316
- fi
317
-
318
- # Model selection with environment variable support
319
- MODEL_NAME="${{VOICEMODE_WHISPER_MODEL:-{model}}}"
320
- MODEL_PATH="$WHISPER_DIR/models/ggml-$MODEL_NAME.bin"
321
-
322
- # Check if model exists
323
- if [ ! -f "$MODEL_PATH" ]; then
324
- echo "Error: Model $MODEL_NAME not found at $MODEL_PATH" >> "$LOG_FILE"
325
- echo "Available models:" >> "$LOG_FILE"
326
- ls -1 "$WHISPER_DIR/models/" | grep "^ggml-.*\\.bin$" >> "$LOG_FILE"
327
- exit 1
328
- fi
329
-
330
- echo "Starting whisper-server with model: $MODEL_NAME" >> "$LOG_FILE"
331
-
332
- # Note: whisper-server is now built as part of the main build target
333
-
334
- # Determine server binary location
335
- if [ -f "$WHISPER_DIR/build/bin/whisper-server" ]; then
336
- SERVER_BIN="$WHISPER_DIR/build/bin/whisper-server"
337
- elif [ -f "$WHISPER_DIR/server" ]; then
338
- SERVER_BIN="$WHISPER_DIR/server"
339
- else
340
- echo "Error: whisper-server binary not found" >> "$LOG_FILE"
341
- exit 1
342
- fi
343
-
344
- # Start whisper-server
345
- cd "$WHISPER_DIR"
346
- exec "$SERVER_BIN" \\
347
- --model "$MODEL_PATH" \\
348
- --host 0.0.0.0 \\
349
- --port 2022 \\
350
- --inference-path /v1/audio/transcriptions \\
351
- --threads 8 \\
352
- >> "$LOG_FILE" 2>&1
353
- """
354
-
355
- start_script_path = os.path.join(install_dir, "start-whisper-server.sh")
311
+ # Copy template start script for whisper-server
312
+ logger.info("Installing whisper-server start script from template...")
313
+
314
+ # Create bin directory
315
+ bin_dir = os.path.join(install_dir, "bin")
316
+ os.makedirs(bin_dir, exist_ok=True)
317
+
318
+ # Copy template script
319
+ template_content = None
320
+
321
+ # First try to load from source if running in development
322
+ source_template = Path(__file__).parent.parent.parent / "templates" / "scripts" / "start-whisper-server.sh"
323
+ if source_template.exists():
324
+ logger.info(f"Loading template from source: {source_template}")
325
+ template_content = source_template.read_text()
326
+ else:
327
+ # Try loading from package resources
328
+ try:
329
+ template_resource = files("voice_mode.templates.scripts").joinpath("start-whisper-server.sh")
330
+ template_content = template_resource.read_text()
331
+ logger.info("Loaded template from package resources")
332
+ except Exception as e:
333
+ logger.warning(f"Failed to load template script: {e}. Using fallback inline script.")
334
+
335
+ # Create the start script (whether template was loaded from file or created inline)
336
+ start_script_path = os.path.join(bin_dir, "start-whisper-server.sh")
356
337
  with open(start_script_path, 'w') as f:
357
- f.write(start_script_content)
338
+ f.write(template_content)
358
339
  os.chmod(start_script_path, 0o755)
359
340
 
360
341
  # Install launchagent on macOS
@@ -370,33 +351,22 @@ exec "$SERVER_BIN" \\
370
351
  plist_name = "com.voicemode.whisper.plist"
371
352
  plist_path = os.path.join(launchagents_dir, plist_name)
372
353
 
373
- plist_content = f"""<?xml version="1.0" encoding="UTF-8"?>
374
- <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
375
- <plist version="1.0">
376
- <dict>
377
- <key>Label</key>
378
- <string>com.voicemode.whisper</string>
379
- <key>ProgramArguments</key>
380
- <array>
381
- <string>{start_script_path}</string>
382
- </array>
383
- <key>WorkingDirectory</key>
384
- <string>{install_dir}</string>
385
- <key>RunAtLoad</key>
386
- <true/>
387
- <key>KeepAlive</key>
388
- <true/>
389
- <key>StandardOutPath</key>
390
- <string>{os.path.join(voicemode_dir, 'logs', 'whisper', 'whisper.out.log')}</string>
391
- <key>StandardErrorPath</key>
392
- <string>{os.path.join(voicemode_dir, 'logs', 'whisper', 'whisper.err.log')}</string>
393
- <key>EnvironmentVariables</key>
394
- <dict>
395
- <key>PATH</key>
396
- <string>/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/opt/homebrew/bin</string>
397
- </dict>
398
- </dict>
399
- </plist>"""
354
+ # Load plist template
355
+ # First try to load from source if running in development
356
+ source_template = Path(__file__).parent.parent.parent / "templates" / "launchd" / "com.voicemode.whisper.plist"
357
+ if source_template.exists():
358
+ logger.info(f"Loading plist template from source: {source_template}")
359
+ plist_content = source_template.read_text()
360
+ else:
361
+ # Load from package resources
362
+ template_resource = files("voice_mode.templates.launchd").joinpath("com.voicemode.whisper.plist")
363
+ plist_content = template_resource.read_text()
364
+ logger.info("Loaded plist template from package resources")
365
+
366
+ # Replace placeholders
367
+ plist_content = plist_content.replace("{START_SCRIPT_PATH}", start_script_path)
368
+ plist_content = plist_content.replace("{LOG_DIR}", os.path.join(voicemode_dir, 'logs'))
369
+ plist_content = plist_content.replace("{INSTALL_DIR}", install_dir)
400
370
 
401
371
  with open(plist_path, 'w') as f:
402
372
  f.write(plist_content)
@@ -444,9 +414,8 @@ exec "$SERVER_BIN" \\
444
414
  "start_script": start_script_path,
445
415
  "message": f"Successfully installed whisper.cpp {current_version} with {gpu_type} support and whisper-server on port 2022{enable_message}{' (' + migration_msg + ')' if migration_msg else ''}"
446
416
  }
447
-
448
- # Install systemd service on Linux
449
417
  elif system == "Linux":
418
+ # Install systemd service on Linux
450
419
  logger.info("Installing systemd user service for whisper-server...")
451
420
  systemd_user_dir = os.path.expanduser("~/.config/systemd/user")
452
421
  os.makedirs(systemd_user_dir, exist_ok=True)
@@ -459,23 +428,22 @@ exec "$SERVER_BIN" \\
459
428
  service_path = os.path.join(systemd_user_dir, service_name)
460
429
 
461
430
  service_content = f"""[Unit]
462
- Description=Whisper.cpp Speech Recognition Server
463
- After=network.target
431
+ Description=Whisper.cpp Speech Recognition Server
432
+ After=network.target
464
433
 
465
- [Service]
466
- Type=simple
467
- ExecStart={start_script_path}
468
- Restart=on-failure
469
- RestartSec=10
470
- WorkingDirectory={install_dir}
471
- StandardOutput=append:{os.path.join(voicemode_dir, 'logs', 'whisper', 'whisper.out.log')}
472
- StandardError=append:{os.path.join(voicemode_dir, 'logs', 'whisper', 'whisper.err.log')}
473
- Environment="PATH=/usr/local/bin:/usr/bin:/bin:/usr/local/cuda/bin"
474
- Environment="VOICEMODE_WHISPER_MODEL={model}"
434
+ [Service]
435
+ Type=simple
436
+ ExecStart={start_script_path}
437
+ Restart=on-failure
438
+ RestartSec=10
439
+ WorkingDirectory={install_dir}
440
+ StandardOutput=append:{os.path.join(voicemode_dir, 'logs', 'whisper', 'whisper.out.log')}
441
+ StandardError=append:{os.path.join(voicemode_dir, 'logs', 'whisper', 'whisper.err.log')}
442
+ Environment="PATH=/usr/local/bin:/usr/bin:/bin:/usr/local/cuda/bin"
475
443
 
476
- [Install]
477
- WantedBy=default.target
478
- """
444
+ [Install]
445
+ WantedBy=default.target
446
+ """
479
447
 
480
448
  with open(service_path, 'w') as f:
481
449
  f.write(service_content)
@@ -510,49 +478,49 @@ WantedBy=default.target
510
478
  current_version = get_current_version(Path(install_dir))
511
479
  return {
512
480
  "success": True,
513
- "install_path": install_dir,
514
- "model_path": model_path,
515
- "gpu_enabled": use_gpu,
516
- "gpu_type": gpu_type,
517
- "version": current_version,
518
- "performance_info": {
519
- "system": system,
520
- "gpu_acceleration": gpu_type,
521
- "model": model,
522
- "binary_path": main_path if 'main_path' in locals() else os.path.join(install_dir, "main"),
523
- "server_port": 2022,
524
- "server_url": "http://localhost:2022"
525
- },
526
- "systemd_service": service_path,
527
- "systemd_enabled": systemd_enabled,
528
- "start_script": start_script_path,
529
- "message": f"Successfully installed whisper.cpp {current_version} with {gpu_type} support. {systemd_message}{enable_message}{' (' + migration_msg + ')' if migration_msg else ''}"
481
+ "install_path": install_dir,
482
+ "model_path": model_path,
483
+ "gpu_enabled": use_gpu,
484
+ "gpu_type": gpu_type,
485
+ "version": current_version,
486
+ "performance_info": {
487
+ "system": system,
488
+ "gpu_acceleration": gpu_type,
489
+ "model": model,
490
+ "binary_path": main_path if 'main_path' in locals() else os.path.join(install_dir, "main"),
491
+ "server_port": 2022,
492
+ "server_url": "http://localhost:2022"
493
+ },
494
+ "systemd_service": service_path,
495
+ "systemd_enabled": systemd_enabled,
496
+ "start_script": start_script_path,
497
+ "message": f"Successfully installed whisper.cpp {current_version} with {gpu_type} support. {systemd_message}{enable_message}{' (' + migration_msg + ')' if migration_msg else ''}"
530
498
  }
531
-
499
+
532
500
  else:
533
501
  # Handle auto_enable for other systems (if we add Windows support later)
534
502
  enable_message = ""
535
503
  if auto_enable is None:
536
- auto_enable = SERVICE_AUTO_ENABLE
504
+ auto_enable = SERVICE_AUTO_ENABLE
537
505
 
538
506
  if auto_enable:
539
- logger.info("Auto-enable not supported on this platform")
507
+ logger.info("Auto-enable not supported on this platform")
540
508
 
541
509
  current_version = get_current_version(Path(install_dir))
542
510
  return {
543
511
  "success": True,
544
- "install_path": install_dir,
545
- "model_path": model_path,
546
- "gpu_enabled": use_gpu,
547
- "gpu_type": gpu_type,
548
- "version": current_version,
549
- "performance_info": {
550
- "system": system,
551
- "gpu_acceleration": gpu_type,
552
- "model": model,
553
- "binary_path": main_path if 'main_path' in locals() else os.path.join(install_dir, "main")
554
- },
555
- "message": f"Successfully installed whisper.cpp {current_version} with {gpu_type} support{enable_message}{' (' + migration_msg + ')' if migration_msg else ''}"
512
+ "install_path": install_dir,
513
+ "model_path": model_path,
514
+ "gpu_enabled": use_gpu,
515
+ "gpu_type": gpu_type,
516
+ "version": current_version,
517
+ "performance_info": {
518
+ "system": system,
519
+ "gpu_acceleration": gpu_type,
520
+ "model": model,
521
+ "binary_path": main_path if 'main_path' in locals() else os.path.join(install_dir, "main")
522
+ },
523
+ "message": f"Successfully installed whisper.cpp {current_version} with {gpu_type} support{enable_message}{' (' + migration_msg + ')' if migration_msg else ''}"
556
524
  }
557
525
 
558
526
  except subprocess.CalledProcessError as e:
@@ -569,4 +537,4 @@ WantedBy=default.target
569
537
  return {
570
538
  "success": False,
571
539
  "error": str(e)
572
- }
540
+ }