voice-mode 2.31.0__tar.gz → 2.33.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {voice_mode-2.31.0 → voice_mode-2.33.0}/CHANGELOG.md +42 -1
- {voice_mode-2.31.0 → voice_mode-2.33.0}/PKG-INFO +3 -3
- {voice_mode-2.31.0 → voice_mode-2.33.0}/README.md +2 -2
- {voice_mode-2.31.0 → voice_mode-2.33.0}/pyproject.toml +7 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/__version__.py +1 -1
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/cli.py +1 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/config.py +1 -1
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/prompts/converse.py +0 -1
- voice_mode-2.33.0/voice_mode/templates/__init__.py +1 -0
- voice_mode-2.33.0/voice_mode/templates/scripts/__init__.py +1 -0
- voice_mode-2.33.0/voice_mode/templates/scripts/start-whisper-server.sh +80 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/services/whisper/install.py +88 -26
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/services/whisper/model_install.py +38 -47
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/services/whisper/models.py +1 -1
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/utils/services/common.py +1 -1
- voice_mode-2.33.0/voice_mode/utils/services/coreml_setup.py +234 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/utils/services/whisper_helpers.py +57 -32
- {voice_mode-2.31.0 → voice_mode-2.33.0}/.gitignore +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/build_hooks.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/__init__.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/__main__.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/cli_commands/__init__.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/cli_commands/exchanges.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/conversation_logger.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/core.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/data/versions.json +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/exchanges/__init__.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/exchanges/conversations.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/exchanges/filters.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/exchanges/formatters.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/exchanges/models.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/exchanges/reader.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/exchanges/stats.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/frontend/README.md +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/frontend/app/api/connection-details/route.ts +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/frontend/app/favicon.ico +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/frontend/app/globals.css +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/frontend/app/layout.tsx +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/frontend/app/page.tsx +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/frontend/components/CloseIcon.tsx +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/frontend/components/NoAgentNotification.tsx +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/frontend/components/TranscriptionView.tsx +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/frontend/hooks/useCombinedTranscriptions.ts +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/frontend/hooks/useLocalMicTrack.ts +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/frontend/next-env.d.ts +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/frontend/next.config.mjs +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/frontend/package-lock.json +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/frontend/package.json +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/frontend/pnpm-lock.yaml +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/frontend/postcss.config.mjs +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/frontend/tailwind.config.ts +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/frontend/tsconfig.json +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/prompts/README.md +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/prompts/__init__.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/prompts/release_notes.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/prompts/services.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/provider_discovery.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/providers.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/resources/__init__.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/resources/audio_files.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/resources/changelog.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/resources/configuration.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/resources/statistics.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/resources/version.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/resources/whisper_models.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/server.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/shared.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/simple_failover.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/statistics.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/streaming.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/templates/launchd/com.voicemode.frontend.plist +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/templates/launchd/com.voicemode.kokoro.plist +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/templates/launchd/com.voicemode.livekit.plist +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/templates/launchd/com.voicemode.whisper.plist +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/templates/launchd/start-kokoro-with-health-check.sh +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/templates/launchd/start-whisper-with-health-check.sh +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/templates/systemd/voicemode-frontend.service +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/templates/systemd/voicemode-kokoro.service +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/templates/systemd/voicemode-livekit.service +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/templates/systemd/voicemode-whisper.service +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/__init__.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/configuration_management.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/converse.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/dependencies.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/devices.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/diagnostics.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/providers.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/service.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/services/kokoro/install.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/services/kokoro/uninstall.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/services/list_versions.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/services/livekit/__init__.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/services/livekit/frontend.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/services/livekit/install.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/services/livekit/production_server.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/services/livekit/uninstall.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/services/version_info.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/services/whisper/__init__.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/services/whisper/list_models.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/services/whisper/model_active.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/services/whisper/model_benchmark.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/services/whisper/model_remove.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/services/whisper/uninstall.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/statistics.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/tools/voice_registry.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/utils/__init__.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/utils/audio_diagnostics.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/utils/event_logger.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/utils/ffmpeg_check.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/utils/format_migration.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/utils/gpu_detection.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/utils/migration_helpers.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/utils/services/kokoro_helpers.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/utils/services/livekit_helpers.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/utils/services/whisper_version.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/utils/version_helpers.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/version.py +0 -0
- {voice_mode-2.31.0 → voice_mode-2.33.0}/voice_mode/voice_preferences.py +0 -0
@@ -7,7 +7,41 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
9
|
|
10
|
-
## [2.
|
10
|
+
## [2.33.0] - 2025-08-26
|
11
|
+
|
12
|
+
### Fixed
|
13
|
+
- **CoreML acceleration improvements**
|
14
|
+
- Re-enabled CoreML acceleration in installer after fixing template loading issues
|
15
|
+
- Fixed CoreML conversion with dedicated Python environment to avoid dependency conflicts
|
16
|
+
- Improved CoreML setup to handle PyTorch dependency management properly
|
17
|
+
- Disabled misleading CoreML prompt temporarily while fixing PyTorch installation
|
18
|
+
|
19
|
+
- **Whisper service improvements**
|
20
|
+
- Implemented unified Whisper startup script for Mac and Linux
|
21
|
+
- Fixed Whisper service to respect VOICEMODE_WHISPER_MODEL setting properly
|
22
|
+
- Changed default Whisper model from large-v2 to base for faster initial setup
|
23
|
+
|
24
|
+
- **Installer script stability**
|
25
|
+
- Fixed script exit after Whisper installation when CoreML setup CLI check fails
|
26
|
+
- Properly handle check_voice_mode_cli failures in setup_coreml_acceleration
|
27
|
+
- Installer now continues with Kokoro and LiveKit even if CoreML setup encounters issues
|
28
|
+
- Fixed installer exit issue after Whisper when checking for voicemode CLI
|
29
|
+
|
30
|
+
- **Documentation corrections**
|
31
|
+
- Removed mention of response_duration from converse prompt to avoid confusion
|
32
|
+
|
33
|
+
### Changed
|
34
|
+
- **Web documentation improvements**
|
35
|
+
- Updated Quick Start to use `curl -O && bash install.sh` for proper interactive prompts
|
36
|
+
- Clarified OpenAI API key is optional and serves as backup when local services unavailable
|
37
|
+
- Added comprehensive list of what the installer automatically configures
|
38
|
+
- Changed example to use `claude converse` instead of interactive prompt
|
39
|
+
- Updated README to use `/voicemode:converse` for consistent voice usage
|
40
|
+
|
41
|
+
- **Configuration updates**
|
42
|
+
- Added voicemode MCP to Claude Code configuration for easier integration
|
43
|
+
|
44
|
+
## [2.32.0] - 2025-08-25
|
11
45
|
|
12
46
|
### Added
|
13
47
|
- **Safe shell completions in installer**
|
@@ -33,6 +67,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
33
67
|
- **Installer script reliability**
|
34
68
|
- Fixed false positive failure detection when Whisper shows "Make clean" warning
|
35
69
|
- Improved service installation success detection
|
70
|
+
- Fixed incorrect `whisper model-install` command (should be `whisper model install`)
|
71
|
+
- Removed non-existent `--auto-confirm` flag from CoreML installation
|
72
|
+
|
73
|
+
- **Clean CLI output**
|
74
|
+
- Replaced deprecated `proc.connections()` with `proc.net_connections()` to eliminate warnings
|
75
|
+
- Suppressed httpx INFO logging in CLI commands for cleaner output
|
76
|
+
- All warnings and debug info still available with `--debug` flag
|
36
77
|
|
37
78
|
## [2.30.0] - 2025-08-25
|
38
79
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: voice-mode
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.33.0
|
4
4
|
Summary: VoiceMode - Voice interaction capabilities for AI assistants (formerly voice-mcp)
|
5
5
|
Project-URL: Homepage, https://github.com/mbailey/voicemode
|
6
6
|
Project-URL: Repository, https://github.com/mbailey/voicemode
|
@@ -129,10 +129,10 @@ After installation, just run:
|
|
129
129
|
```bash
|
130
130
|
# With OpenAI API (cloud-based, requires API key)
|
131
131
|
export OPENAI_API_KEY=your-openai-key
|
132
|
-
claude converse
|
132
|
+
claude /voicemode:converse
|
133
133
|
|
134
134
|
# Or use free local services (Voice Mode will offer to install them)
|
135
|
-
claude converse
|
135
|
+
claude /voicemode:converse
|
136
136
|
```
|
137
137
|
|
138
138
|
### Manual Installation
|
@@ -55,10 +55,10 @@ After installation, just run:
|
|
55
55
|
```bash
|
56
56
|
# With OpenAI API (cloud-based, requires API key)
|
57
57
|
export OPENAI_API_KEY=your-openai-key
|
58
|
-
claude converse
|
58
|
+
claude /voicemode:converse
|
59
59
|
|
60
60
|
# Or use free local services (Voice Mode will offer to install them)
|
61
|
-
claude converse
|
61
|
+
claude /voicemode:converse
|
62
62
|
```
|
63
63
|
|
64
64
|
### Manual Installation
|
@@ -99,6 +99,13 @@ voicemode = "voice_mode.cli:voice_mode"
|
|
99
99
|
|
100
100
|
[tool.hatch.build.targets.wheel]
|
101
101
|
packages = ["voice_mode"]
|
102
|
+
include = [
|
103
|
+
"voice_mode/**/*.py",
|
104
|
+
"voice_mode/**/*.sh",
|
105
|
+
"voice_mode/**/*.plist",
|
106
|
+
"voice_mode/**/*.service",
|
107
|
+
"voice_mode/templates/**/*",
|
108
|
+
]
|
102
109
|
exclude = [
|
103
110
|
"**/__pycache__",
|
104
111
|
"**/*.pyc",
|
@@ -22,6 +22,7 @@ if not os.environ.get('VOICEMODE_DEBUG', '').lower() in ('true', '1', 'yes'):
|
|
22
22
|
# Also suppress INFO logging for CLI commands (but not for MCP server)
|
23
23
|
import logging
|
24
24
|
logging.getLogger("voice-mode").setLevel(logging.WARNING)
|
25
|
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
25
26
|
|
26
27
|
|
27
28
|
# Service management CLI - runs MCP server by default, subcommands override
|
@@ -239,7 +239,7 @@ LIVEKIT_API_SECRET = os.getenv("LIVEKIT_API_SECRET", "secret")
|
|
239
239
|
# ==================== WHISPER CONFIGURATION ====================
|
240
240
|
|
241
241
|
# Whisper-specific configuration
|
242
|
-
WHISPER_MODEL = os.getenv("VOICEMODE_WHISPER_MODEL", "
|
242
|
+
WHISPER_MODEL = os.getenv("VOICEMODE_WHISPER_MODEL", "base")
|
243
243
|
WHISPER_PORT = int(os.getenv("VOICEMODE_WHISPER_PORT", "2022"))
|
244
244
|
WHISPER_LANGUAGE = os.getenv("VOICEMODE_WHISPER_LANGUAGE", "auto")
|
245
245
|
WHISPER_MODEL_PATH = expand_path(os.getenv("VOICEMODE_WHISPER_MODEL_PATH", str(Path.home() / ".voicemode" / "services" / "whisper" / "models")))
|
@@ -10,7 +10,6 @@ def converse() -> str:
|
|
10
10
|
"Using tools from voice-mode, have an ongoing two-way conversation",
|
11
11
|
"End the chat when the user indicates they want to end it",
|
12
12
|
"Keep your utterances brief unless a longer response is requested or necessary",
|
13
|
-
"Listen for up to 120 seconds per response"
|
14
13
|
]
|
15
14
|
|
16
15
|
return "\n".join(f"- {instruction}" for instruction in instructions)
|
@@ -0,0 +1 @@
|
|
1
|
+
# Templates package for Voice Mode
|
@@ -0,0 +1 @@
|
|
1
|
+
# Script templates for Voice Mode services
|
@@ -0,0 +1,80 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
# Whisper Service Startup Script
|
4
|
+
# This script is used by both macOS (launchd) and Linux (systemd) to start the whisper service
|
5
|
+
# It sources the voicemode.env file to get configuration, especially VOICEMODE_WHISPER_MODEL
|
6
|
+
|
7
|
+
# Determine whisper directory (script is in bin/, whisper root is parent)
|
8
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
9
|
+
WHISPER_DIR="$(dirname "$SCRIPT_DIR")"
|
10
|
+
|
11
|
+
# Voicemode configuration directory
|
12
|
+
VOICEMODE_DIR="$HOME/.voicemode"
|
13
|
+
LOG_DIR="$VOICEMODE_DIR/logs/whisper"
|
14
|
+
|
15
|
+
# Create log directory if it doesn't exist
|
16
|
+
mkdir -p "$LOG_DIR"
|
17
|
+
|
18
|
+
# Log file for this script (separate from whisper server logs)
|
19
|
+
STARTUP_LOG="$LOG_DIR/startup.log"
|
20
|
+
|
21
|
+
# Source voicemode configuration if it exists
|
22
|
+
if [ -f "$VOICEMODE_DIR/voicemode.env" ]; then
|
23
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Sourcing voicemode.env" >> "$STARTUP_LOG"
|
24
|
+
source "$VOICEMODE_DIR/voicemode.env"
|
25
|
+
else
|
26
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Warning: voicemode.env not found" >> "$STARTUP_LOG"
|
27
|
+
fi
|
28
|
+
|
29
|
+
# Model selection with environment variable support
|
30
|
+
MODEL_NAME="${VOICEMODE_WHISPER_MODEL:-base}"
|
31
|
+
MODEL_PATH="$WHISPER_DIR/models/ggml-$MODEL_NAME.bin"
|
32
|
+
|
33
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Starting whisper-server with model: $MODEL_NAME" >> "$STARTUP_LOG"
|
34
|
+
|
35
|
+
# Check if model exists
|
36
|
+
if [ ! -f "$MODEL_PATH" ]; then
|
37
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Error: Model $MODEL_NAME not found at $MODEL_PATH" >> "$STARTUP_LOG"
|
38
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Available models:" >> "$STARTUP_LOG"
|
39
|
+
ls -1 "$WHISPER_DIR/models/" 2>/dev/null | grep "^ggml-.*\.bin$" >> "$STARTUP_LOG"
|
40
|
+
|
41
|
+
# Try to find any available model as fallback
|
42
|
+
FALLBACK_MODEL=$(ls -1 "$WHISPER_DIR/models/" 2>/dev/null | grep "^ggml-.*\.bin$" | head -1)
|
43
|
+
if [ -n "$FALLBACK_MODEL" ]; then
|
44
|
+
MODEL_PATH="$WHISPER_DIR/models/$FALLBACK_MODEL"
|
45
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Using fallback model: $FALLBACK_MODEL" >> "$STARTUP_LOG"
|
46
|
+
else
|
47
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Fatal: No whisper models found" >> "$STARTUP_LOG"
|
48
|
+
exit 1
|
49
|
+
fi
|
50
|
+
fi
|
51
|
+
|
52
|
+
# Port configuration (with environment variable support)
|
53
|
+
WHISPER_PORT="${VOICEMODE_WHISPER_PORT:-2022}"
|
54
|
+
|
55
|
+
# Determine server binary location
|
56
|
+
# Check new CMake build location first, then legacy location
|
57
|
+
if [ -f "$WHISPER_DIR/build/bin/whisper-server" ]; then
|
58
|
+
SERVER_BIN="$WHISPER_DIR/build/bin/whisper-server"
|
59
|
+
elif [ -f "$WHISPER_DIR/server" ]; then
|
60
|
+
SERVER_BIN="$WHISPER_DIR/server"
|
61
|
+
else
|
62
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Error: whisper-server binary not found" >> "$STARTUP_LOG"
|
63
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Checked: $WHISPER_DIR/build/bin/whisper-server" >> "$STARTUP_LOG"
|
64
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Checked: $WHISPER_DIR/server" >> "$STARTUP_LOG"
|
65
|
+
exit 1
|
66
|
+
fi
|
67
|
+
|
68
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Using binary: $SERVER_BIN" >> "$STARTUP_LOG"
|
69
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Model path: $MODEL_PATH" >> "$STARTUP_LOG"
|
70
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Port: $WHISPER_PORT" >> "$STARTUP_LOG"
|
71
|
+
|
72
|
+
# Start whisper-server
|
73
|
+
# Using exec to replace this script process with whisper-server
|
74
|
+
cd "$WHISPER_DIR"
|
75
|
+
exec "$SERVER_BIN" \
|
76
|
+
--host 0.0.0.0 \
|
77
|
+
--port "$WHISPER_PORT" \
|
78
|
+
--model "$MODEL_PATH" \
|
79
|
+
--inference-path /v1/audio/transcriptions \
|
80
|
+
--threads 8
|
@@ -11,6 +11,11 @@ from pathlib import Path
|
|
11
11
|
from typing import Dict, Any, Optional, Union
|
12
12
|
import asyncio
|
13
13
|
import aiohttp
|
14
|
+
try:
|
15
|
+
from importlib.resources import files
|
16
|
+
except ImportError:
|
17
|
+
# Python < 3.9 fallback
|
18
|
+
from importlib_resources import files
|
14
19
|
|
15
20
|
from voice_mode.server import mcp
|
16
21
|
from voice_mode.config import SERVICE_AUTO_ENABLE
|
@@ -28,7 +33,7 @@ logger = logging.getLogger("voice-mode")
|
|
28
33
|
@mcp.tool()
|
29
34
|
async def whisper_install(
|
30
35
|
install_dir: Optional[str] = None,
|
31
|
-
model: str = "
|
36
|
+
model: str = "base",
|
32
37
|
use_gpu: Optional[Union[bool, str]] = None,
|
33
38
|
force_reinstall: Union[bool, str] = False,
|
34
39
|
auto_enable: Optional[Union[bool, str]] = None,
|
@@ -42,7 +47,7 @@ async def whisper_install(
|
|
42
47
|
Args:
|
43
48
|
install_dir: Directory to install whisper.cpp (default: ~/.voicemode/whisper.cpp)
|
44
49
|
model: Whisper model to download (tiny, base, small, medium, large-v2, large-v3, etc.)
|
45
|
-
Default is
|
50
|
+
Default is base for good balance of speed and accuracy (142MB).
|
46
51
|
use_gpu: Enable GPU support if available (default: auto-detect)
|
47
52
|
force_reinstall: Force reinstallation even if already installed
|
48
53
|
auto_enable: Enable service after install. If None, uses VOICEMODE_SERVICE_AUTO_ENABLE config.
|
@@ -302,59 +307,117 @@ async def whisper_install(
|
|
302
307
|
if 'original_dir' in locals():
|
303
308
|
os.chdir(original_dir)
|
304
309
|
|
305
|
-
#
|
306
|
-
logger.info("
|
307
|
-
|
310
|
+
# Copy template start script for whisper-server
|
311
|
+
logger.info("Installing whisper-server start script from template...")
|
312
|
+
|
313
|
+
# Create bin directory
|
314
|
+
bin_dir = os.path.join(install_dir, "bin")
|
315
|
+
os.makedirs(bin_dir, exist_ok=True)
|
316
|
+
|
317
|
+
# Copy template script
|
318
|
+
template_content = None
|
319
|
+
|
320
|
+
# First try to load from source if running in development
|
321
|
+
source_template = Path(__file__).parent.parent.parent.parent / "templates" / "scripts" / "start-whisper-server.sh"
|
322
|
+
if source_template.exists():
|
323
|
+
logger.info(f"Loading template from source: {source_template}")
|
324
|
+
template_content = source_template.read_text()
|
325
|
+
else:
|
326
|
+
# Try loading from package resources
|
327
|
+
try:
|
328
|
+
template_resource = files("voice_mode.templates.scripts").joinpath("start-whisper-server.sh")
|
329
|
+
template_content = template_resource.read_text()
|
330
|
+
logger.info("Loaded template from package resources")
|
331
|
+
except Exception as e:
|
332
|
+
logger.warning(f"Failed to load template script: {e}. Using fallback inline script.")
|
333
|
+
|
334
|
+
# Fallback to inline script if template not found
|
335
|
+
if template_content is None:
|
336
|
+
template_content = f"""#!/bin/bash
|
337
|
+
|
338
|
+
# Whisper Service Startup Script
|
339
|
+
# This script is used by both macOS (launchd) and Linux (systemd) to start the whisper service
|
340
|
+
# It sources the voicemode.env file to get configuration, especially VOICEMODE_WHISPER_MODEL
|
341
|
+
|
342
|
+
# Determine whisper directory (script is in bin/, whisper root is parent)
|
343
|
+
SCRIPT_DIR="$(cd "$(dirname "${{BASH_SOURCE[0]}}")" && pwd)"
|
344
|
+
WHISPER_DIR="$(dirname "$SCRIPT_DIR")"
|
345
|
+
|
346
|
+
# Voicemode configuration directory
|
347
|
+
VOICEMODE_DIR="$HOME/.voicemode"
|
348
|
+
LOG_DIR="$VOICEMODE_DIR/logs/whisper"
|
349
|
+
|
350
|
+
# Create log directory if it doesn't exist
|
351
|
+
mkdir -p "$LOG_DIR"
|
308
352
|
|
309
|
-
#
|
310
|
-
|
311
|
-
LOG_FILE="{os.path.join(voicemode_dir, 'whisper-server.log')}"
|
353
|
+
# Log file for this script (separate from whisper server logs)
|
354
|
+
STARTUP_LOG="$LOG_DIR/startup.log"
|
312
355
|
|
313
356
|
# Source voicemode configuration if it exists
|
314
|
-
if [ -f "
|
315
|
-
|
357
|
+
if [ -f "$VOICEMODE_DIR/voicemode.env" ]; then
|
358
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Sourcing voicemode.env" >> "$STARTUP_LOG"
|
359
|
+
source "$VOICEMODE_DIR/voicemode.env"
|
360
|
+
else
|
361
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Warning: voicemode.env not found" >> "$STARTUP_LOG"
|
316
362
|
fi
|
317
363
|
|
318
364
|
# Model selection with environment variable support
|
319
|
-
MODEL_NAME="${{VOICEMODE_WHISPER_MODEL:-
|
365
|
+
MODEL_NAME="${{VOICEMODE_WHISPER_MODEL:-base}}"
|
320
366
|
MODEL_PATH="$WHISPER_DIR/models/ggml-$MODEL_NAME.bin"
|
321
367
|
|
368
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Starting whisper-server with model: $MODEL_NAME" >> "$STARTUP_LOG"
|
369
|
+
|
322
370
|
# Check if model exists
|
323
371
|
if [ ! -f "$MODEL_PATH" ]; then
|
324
|
-
echo "Error: Model $MODEL_NAME not found at $MODEL_PATH" >> "$
|
325
|
-
echo "Available models:" >> "$
|
326
|
-
ls -1 "$WHISPER_DIR/models/" | grep "^ggml-.*\\.bin$" >> "$
|
327
|
-
|
372
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Error: Model $MODEL_NAME not found at $MODEL_PATH" >> "$STARTUP_LOG"
|
373
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Available models:" >> "$STARTUP_LOG"
|
374
|
+
ls -1 "$WHISPER_DIR/models/" 2>/dev/null | grep "^ggml-.*\\.bin$" >> "$STARTUP_LOG"
|
375
|
+
|
376
|
+
# Try to find any available model as fallback
|
377
|
+
FALLBACK_MODEL=$(ls -1 "$WHISPER_DIR/models/" 2>/dev/null | grep "^ggml-.*\\.bin$" | head -1)
|
378
|
+
if [ -n "$FALLBACK_MODEL" ]; then
|
379
|
+
MODEL_PATH="$WHISPER_DIR/models/$FALLBACK_MODEL"
|
380
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Using fallback model: $FALLBACK_MODEL" >> "$STARTUP_LOG"
|
381
|
+
else
|
382
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Fatal: No whisper models found" >> "$STARTUP_LOG"
|
383
|
+
exit 1
|
384
|
+
fi
|
328
385
|
fi
|
329
386
|
|
330
|
-
|
331
|
-
|
332
|
-
# Note: whisper-server is now built as part of the main build target
|
387
|
+
# Port configuration (with environment variable support)
|
388
|
+
WHISPER_PORT="${{VOICEMODE_WHISPER_PORT:-2022}}"
|
333
389
|
|
334
390
|
# Determine server binary location
|
391
|
+
# Check new CMake build location first, then legacy location
|
335
392
|
if [ -f "$WHISPER_DIR/build/bin/whisper-server" ]; then
|
336
393
|
SERVER_BIN="$WHISPER_DIR/build/bin/whisper-server"
|
337
394
|
elif [ -f "$WHISPER_DIR/server" ]; then
|
338
395
|
SERVER_BIN="$WHISPER_DIR/server"
|
339
396
|
else
|
340
|
-
echo "Error: whisper-server binary not found" >> "$
|
397
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Error: whisper-server binary not found" >> "$STARTUP_LOG"
|
398
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Checked: $WHISPER_DIR/build/bin/whisper-server" >> "$STARTUP_LOG"
|
399
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Checked: $WHISPER_DIR/server" >> "$STARTUP_LOG"
|
341
400
|
exit 1
|
342
401
|
fi
|
343
402
|
|
403
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Using binary: $SERVER_BIN" >> "$STARTUP_LOG"
|
404
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Model path: $MODEL_PATH" >> "$STARTUP_LOG"
|
405
|
+
echo "[$(date '+%Y-%m-%d %H:%M:%S')] Port: $WHISPER_PORT" >> "$STARTUP_LOG"
|
406
|
+
|
344
407
|
# Start whisper-server
|
408
|
+
# Using exec to replace this script process with whisper-server
|
345
409
|
cd "$WHISPER_DIR"
|
346
410
|
exec "$SERVER_BIN" \\
|
347
|
-
--model "$MODEL_PATH" \\
|
348
411
|
--host 0.0.0.0 \\
|
349
|
-
--port
|
412
|
+
--port "$WHISPER_PORT" \\
|
413
|
+
--model "$MODEL_PATH" \\
|
350
414
|
--inference-path /v1/audio/transcriptions \\
|
351
|
-
--threads 8
|
352
|
-
>> "$LOG_FILE" 2>&1
|
415
|
+
--threads 8
|
353
416
|
"""
|
354
417
|
|
355
|
-
start_script_path = os.path.join(
|
418
|
+
start_script_path = os.path.join(bin_dir, "start-whisper-server.sh")
|
356
419
|
with open(start_script_path, 'w') as f:
|
357
|
-
f.write(
|
420
|
+
f.write(template_content)
|
358
421
|
os.chmod(start_script_path, 0o755)
|
359
422
|
|
360
423
|
# Install launchagent on macOS
|
@@ -471,7 +534,6 @@ WorkingDirectory={install_dir}
|
|
471
534
|
StandardOutput=append:{os.path.join(voicemode_dir, 'logs', 'whisper', 'whisper.out.log')}
|
472
535
|
StandardError=append:{os.path.join(voicemode_dir, 'logs', 'whisper', 'whisper.err.log')}
|
473
536
|
Environment="PATH=/usr/local/bin:/usr/bin:/bin:/usr/local/cuda/bin"
|
474
|
-
Environment="VOICEMODE_WHISPER_MODEL={model}"
|
475
537
|
|
476
538
|
[Install]
|
477
539
|
WantedBy=default.target
|
@@ -127,7 +127,8 @@ async def whisper_model_install(
|
|
127
127
|
result = await download_whisper_model(
|
128
128
|
model_name,
|
129
129
|
actual_models_dir,
|
130
|
-
force_download=force_download
|
130
|
+
force_download=force_download,
|
131
|
+
skip_core_ml=skip_core_ml
|
131
132
|
)
|
132
133
|
|
133
134
|
# Build comprehensive result entry
|
@@ -242,58 +243,48 @@ async def _handle_coreml_dependencies(
|
|
242
243
|
if skip_core_ml:
|
243
244
|
return {"continue": True}
|
244
245
|
|
245
|
-
# Check if
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
246
|
+
# Check if the CoreML environment already exists
|
247
|
+
whisper_dir = Path.home() / ".voicemode" / "services" / "whisper"
|
248
|
+
venv_coreml = whisper_dir / "venv-coreml" / "bin" / "python"
|
249
|
+
|
250
|
+
if venv_coreml.exists():
|
251
|
+
# Test if it has the required packages
|
252
|
+
try:
|
253
|
+
result = subprocess.run(
|
254
|
+
[str(venv_coreml), "-c", "import torch, coremltools, whisper"],
|
255
|
+
capture_output=True,
|
256
|
+
timeout=5
|
257
|
+
)
|
258
|
+
if result.returncode == 0:
|
259
|
+
logger.info("CoreML environment already exists and is valid")
|
260
|
+
# Return with a flag indicating CoreML is ready
|
261
|
+
return {
|
262
|
+
"continue": True,
|
263
|
+
"coreml_ready": True,
|
264
|
+
"coreml_deps_note": "CoreML environment exists and is valid"
|
265
|
+
}
|
266
|
+
except:
|
267
|
+
pass
|
252
268
|
|
253
|
-
# Check if user wants to
|
269
|
+
# Check if user wants to create CoreML environment
|
254
270
|
if not install_torch and not auto_confirm:
|
255
271
|
return {
|
256
272
|
"continue": False,
|
257
273
|
"success": False,
|
258
274
|
"requires_confirmation": True,
|
259
|
-
"message": "CoreML requires PyTorch
|
260
|
-
"recommendation": "Set install_torch=True for CoreML acceleration (2-3x faster)"
|
275
|
+
"message": "CoreML conversion requires a dedicated Python environment with PyTorch. Setup may download up to 2.5GB if packages aren't cached.",
|
276
|
+
"recommendation": "💡 Set install_torch=True for CoreML acceleration (2-3x faster)"
|
261
277
|
}
|
262
278
|
|
263
|
-
#
|
264
|
-
|
279
|
+
# Note: We don't actually install CoreML dependencies in the voicemode environment anymore
|
280
|
+
# The CoreML conversion uses its own dedicated environment in ~/.voicemode/services/whisper/venv-coreml
|
281
|
+
# This is handled automatically by whisper_helpers.convert_to_coreml()
|
265
282
|
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
else:
|
275
|
-
# Fallback to pip
|
276
|
-
cmd = [sys.executable, "-m", "pip", "install"] + packages
|
277
|
-
logger.info("Installing via pip...")
|
278
|
-
|
279
|
-
# Run installation
|
280
|
-
result = subprocess.run(cmd, capture_output=True, text=True)
|
281
|
-
|
282
|
-
if result.returncode == 0:
|
283
|
-
logger.info("CoreML dependencies installed successfully")
|
284
|
-
return {"continue": True, "coreml_deps_installed": True}
|
285
|
-
else:
|
286
|
-
logger.warning(f"Failed to install CoreML dependencies: {result.stderr}")
|
287
|
-
return {
|
288
|
-
"continue": True,
|
289
|
-
"coreml_deps_failed": True,
|
290
|
-
"warning": "CoreML dependencies installation failed. Models will use Metal acceleration."
|
291
|
-
}
|
292
|
-
|
293
|
-
except Exception as e:
|
294
|
-
logger.warning(f"Error installing CoreML dependencies: {e}")
|
295
|
-
return {
|
296
|
-
"continue": True,
|
297
|
-
"coreml_deps_failed": True,
|
298
|
-
"warning": f"CoreML setup error: {str(e)}. Models will use Metal acceleration."
|
299
|
-
}
|
283
|
+
logger.info("CoreML dependencies will be handled by the conversion process")
|
284
|
+
|
285
|
+
# We still return success to continue with the model download
|
286
|
+
# The actual CoreML environment setup happens during conversion
|
287
|
+
return {
|
288
|
+
"continue": True,
|
289
|
+
"coreml_deps_note": "CoreML environment will be created during conversion if needed"
|
290
|
+
}
|
@@ -25,7 +25,7 @@ def find_process_by_port(port: int) -> Optional[psutil.Process]:
|
|
25
25
|
if proc_name in ['ssh', 'sshd']:
|
26
26
|
continue
|
27
27
|
|
28
|
-
for conn in proc.
|
28
|
+
for conn in proc.net_connections():
|
29
29
|
if conn.laddr.port == port and conn.status == 'LISTEN':
|
30
30
|
# Verify this is a real local process
|
31
31
|
try:
|