PyPI - voice-mode - Versions diffs - 2.32.0__tar.gz → 2.33.0__tar.gz - Mend

voice-mode 2.32.0tar.gz → 2.33.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (118) hide show

{voice_mode-2.32.0 → voice_mode-2.33.0}/CHANGELOG.md RENAMED Viewed

@@ -7,6 +7,40 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [2.33.0] - 2025-08-26
+### Fixed
+- **CoreML acceleration improvements**
+  - Re-enabled CoreML acceleration in installer after fixing template loading issues
+  - Fixed CoreML conversion with dedicated Python environment to avoid dependency conflicts
+  - Improved CoreML setup to handle PyTorch dependency management properly
+  - Disabled misleading CoreML prompt temporarily while fixing PyTorch installation
+- **Whisper service improvements**
+  - Implemented unified Whisper startup script for Mac and Linux
+  - Fixed Whisper service to respect VOICEMODE_WHISPER_MODEL setting properly
+  - Changed default Whisper model from large-v2 to base for faster initial setup
+- **Installer script stability**
+  - Fixed script exit after Whisper installation when CoreML setup CLI check fails
+  - Properly handle check_voice_mode_cli failures in setup_coreml_acceleration
+  - Installer now continues with Kokoro and LiveKit even if CoreML setup encounters issues
+  - Fixed installer exit issue after Whisper when checking for voicemode CLI
+- **Documentation corrections**
+  - Removed mention of response_duration from converse prompt to avoid confusion
+### Changed
+- **Web documentation improvements**
+  - Updated Quick Start to use `curl -O && bash install.sh` for proper interactive prompts
+  - Clarified OpenAI API key is optional and serves as backup when local services unavailable
+  - Added comprehensive list of what the installer automatically configures
+  - Changed example to use `claude converse` instead of interactive prompt
+  - Updated README to use `/voicemode:converse` for consistent voice usage
+- **Configuration updates**
+  - Added voicemode MCP to Claude Code configuration for easier integration
 ## [2.32.0] - 2025-08-25
 ### Added

{voice_mode-2.32.0 → voice_mode-2.33.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: voice-mode
-Version: 2.32.0
+Version: 2.33.0
 Summary: VoiceMode - Voice interaction capabilities for AI assistants (formerly voice-mcp)
 Project-URL: Homepage, https://github.com/mbailey/voicemode
 Project-URL: Repository, https://github.com/mbailey/voicemode
@@ -129,10 +129,10 @@ After installation, just run:
 ```bash
 # With OpenAI API (cloud-based, requires API key)
 export OPENAI_API_KEY=your-openai-key
-claude converse
+claude /voicemode:converse
 # Or use free local services (Voice Mode will offer to install them)
-claude converse
+claude /voicemode:converse
 ```
 ### Manual Installation

{voice_mode-2.32.0 → voice_mode-2.33.0}/README.md RENAMED Viewed

@@ -55,10 +55,10 @@ After installation, just run:
 ```bash
 # With OpenAI API (cloud-based, requires API key)
 export OPENAI_API_KEY=your-openai-key
-claude converse
+claude /voicemode:converse
 # Or use free local services (Voice Mode will offer to install them)
-claude converse
+claude /voicemode:converse
 ```
 ### Manual Installation

{voice_mode-2.32.0 → voice_mode-2.33.0}/pyproject.toml RENAMED Viewed

@@ -99,6 +99,13 @@ voicemode = "voice_mode.cli:voice_mode"
 [tool.hatch.build.targets.wheel]
 packages = ["voice_mode"]
+include = [
+  "voice_mode/**/*.py",
+  "voice_mode/**/*.sh",
+  "voice_mode/**/*.plist",
+  "voice_mode/**/*.service",
+  "voice_mode/templates/**/*",
+]
 exclude = [
   "**/__pycache__",
   "**/*.pyc",

{voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/__version__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 # This file is automatically updated by 'make release'
 # Do not edit manually
-__version__ = "2.32.0"
+__version__ = "2.33.0"

{voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/config.py RENAMED Viewed

@@ -239,7 +239,7 @@ LIVEKIT_API_SECRET = os.getenv("LIVEKIT_API_SECRET", "secret")
 # ==================== WHISPER CONFIGURATION ====================
 # Whisper-specific configuration
-WHISPER_MODEL = os.getenv("VOICEMODE_WHISPER_MODEL", "large-v2")
+WHISPER_MODEL = os.getenv("VOICEMODE_WHISPER_MODEL", "base")
 WHISPER_PORT = int(os.getenv("VOICEMODE_WHISPER_PORT", "2022"))
 WHISPER_LANGUAGE = os.getenv("VOICEMODE_WHISPER_LANGUAGE", "auto")
 WHISPER_MODEL_PATH = expand_path(os.getenv("VOICEMODE_WHISPER_MODEL_PATH", str(Path.home() / ".voicemode" / "services" / "whisper" / "models")))

{voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/prompts/converse.py RENAMED Viewed

@@ -10,7 +10,6 @@ def converse() -> str:
         "Using tools from voice-mode, have an ongoing two-way conversation",
         "End the chat when the user indicates they want to end it",
         "Keep your utterances brief unless a longer response is requested or necessary",
-        "Listen for up to 120 seconds per response"
     ]
     return "\n".join(f"- {instruction}" for instruction in instructions)

voice_mode-2.33.0/voice_mode/templates/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # Templates package for Voice Mode

voice_mode-2.33.0/voice_mode/templates/scripts/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # Script templates for Voice Mode services

voice_mode-2.33.0/voice_mode/templates/scripts/start-whisper-server.sh ADDED Viewed

@@ -0,0 +1,80 @@
+#!/bin/bash
+# Whisper Service Startup Script
+# This script is used by both macOS (launchd) and Linux (systemd) to start the whisper service
+# It sources the voicemode.env file to get configuration, especially VOICEMODE_WHISPER_MODEL
+# Determine whisper directory (script is in bin/, whisper root is parent)
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+WHISPER_DIR="$(dirname "$SCRIPT_DIR")"
+# Voicemode configuration directory
+VOICEMODE_DIR="$HOME/.voicemode"
+LOG_DIR="$VOICEMODE_DIR/logs/whisper"
+# Create log directory if it doesn't exist
+mkdir -p "$LOG_DIR"
+# Log file for this script (separate from whisper server logs)
+STARTUP_LOG="$LOG_DIR/startup.log"
+# Source voicemode configuration if it exists
+if [ -f "$VOICEMODE_DIR/voicemode.env" ]; then
+    echo "[$(date '+%Y-%m-%d %H:%M:%S')] Sourcing voicemode.env" >> "$STARTUP_LOG"
+    source "$VOICEMODE_DIR/voicemode.env"
+else
+    echo "[$(date '+%Y-%m-%d %H:%M:%S')] Warning: voicemode.env not found" >> "$STARTUP_LOG"
+fi
+# Model selection with environment variable support
+MODEL_NAME="${VOICEMODE_WHISPER_MODEL:-base}"
+MODEL_PATH="$WHISPER_DIR/models/ggml-$MODEL_NAME.bin"
+echo "[$(date '+%Y-%m-%d %H:%M:%S')] Starting whisper-server with model: $MODEL_NAME" >> "$STARTUP_LOG"
+# Check if model exists
+if [ ! -f "$MODEL_PATH" ]; then
+    echo "[$(date '+%Y-%m-%d %H:%M:%S')] Error: Model $MODEL_NAME not found at $MODEL_PATH" >> "$STARTUP_LOG"
+    echo "[$(date '+%Y-%m-%d %H:%M:%S')] Available models:" >> "$STARTUP_LOG"
+    ls -1 "$WHISPER_DIR/models/" 2>/dev/null | grep "^ggml-.*\.bin$" >> "$STARTUP_LOG"
+    # Try to find any available model as fallback
+    FALLBACK_MODEL=$(ls -1 "$WHISPER_DIR/models/" 2>/dev/null | grep "^ggml-.*\.bin$" | head -1)
+    if [ -n "$FALLBACK_MODEL" ]; then
+        MODEL_PATH="$WHISPER_DIR/models/$FALLBACK_MODEL"
+        echo "[$(date '+%Y-%m-%d %H:%M:%S')] Using fallback model: $FALLBACK_MODEL" >> "$STARTUP_LOG"
+    else
+        echo "[$(date '+%Y-%m-%d %H:%M:%S')] Fatal: No whisper models found" >> "$STARTUP_LOG"
+        exit 1
+    fi
+fi
+# Port configuration (with environment variable support)
+WHISPER_PORT="${VOICEMODE_WHISPER_PORT:-2022}"
+# Determine server binary location
+# Check new CMake build location first, then legacy location
+if [ -f "$WHISPER_DIR/build/bin/whisper-server" ]; then
+    SERVER_BIN="$WHISPER_DIR/build/bin/whisper-server"
+elif [ -f "$WHISPER_DIR/server" ]; then
+    SERVER_BIN="$WHISPER_DIR/server"
+else
+    echo "[$(date '+%Y-%m-%d %H:%M:%S')] Error: whisper-server binary not found" >> "$STARTUP_LOG"
+    echo "[$(date '+%Y-%m-%d %H:%M:%S')] Checked: $WHISPER_DIR/build/bin/whisper-server" >> "$STARTUP_LOG"
+    echo "[$(date '+%Y-%m-%d %H:%M:%S')] Checked: $WHISPER_DIR/server" >> "$STARTUP_LOG"
+    exit 1
+fi
+echo "[$(date '+%Y-%m-%d %H:%M:%S')] Using binary: $SERVER_BIN" >> "$STARTUP_LOG"
+echo "[$(date '+%Y-%m-%d %H:%M:%S')] Model path: $MODEL_PATH" >> "$STARTUP_LOG"
+echo "[$(date '+%Y-%m-%d %H:%M:%S')] Port: $WHISPER_PORT" >> "$STARTUP_LOG"
+# Start whisper-server
+# Using exec to replace this script process with whisper-server
+cd "$WHISPER_DIR"
+exec "$SERVER_BIN" \
+    --host 0.0.0.0 \
+    --port "$WHISPER_PORT" \
+    --model "$MODEL_PATH" \
+    --inference-path /v1/audio/transcriptions \
+    --threads 8

{voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/services/whisper/install.py RENAMED Viewed

@@ -11,6 +11,11 @@ from pathlib import Path
 from typing import Dict, Any, Optional, Union
 import asyncio
 import aiohttp
+try:
+    from importlib.resources import files
+except ImportError:
+    # Python < 3.9 fallback
+    from importlib_resources import files
 from voice_mode.server import mcp
 from voice_mode.config import SERVICE_AUTO_ENABLE
@@ -28,7 +33,7 @@ logger = logging.getLogger("voice-mode")
 @mcp.tool()
 async def whisper_install(
     install_dir: Optional[str] = None,
-    model: str = "large-v2",
+    model: str = "base",
     use_gpu: Optional[Union[bool, str]] = None,
     force_reinstall: Union[bool, str] = False,
     auto_enable: Optional[Union[bool, str]] = None,
@@ -42,7 +47,7 @@ async def whisper_install(
     Args:
         install_dir: Directory to install whisper.cpp (default: ~/.voicemode/whisper.cpp)
         model: Whisper model to download (tiny, base, small, medium, large-v2, large-v3, etc.)
-               Default is large-v2 for best accuracy. Note: large models require ~3GB RAM.
+               Default is base for good balance of speed and accuracy (142MB).
         use_gpu: Enable GPU support if available (default: auto-detect)
         force_reinstall: Force reinstallation even if already installed
         auto_enable: Enable service after install. If None, uses VOICEMODE_SERVICE_AUTO_ENABLE config.
@@ -302,59 +307,117 @@ async def whisper_install(
         if 'original_dir' in locals():
             os.chdir(original_dir)
-        # Create start script for whisper-server
-        logger.info("Creating whisper-server start script...")
-        start_script_content = f"""#!/bin/bash
+        # Copy template start script for whisper-server
+        logger.info("Installing whisper-server start script from template...")
+        # Create bin directory
+        bin_dir = os.path.join(install_dir, "bin")
+        os.makedirs(bin_dir, exist_ok=True)
+        # Copy template script
+        template_content = None
+        # First try to load from source if running in development
+        source_template = Path(__file__).parent.parent.parent.parent / "templates" / "scripts" / "start-whisper-server.sh"
+        if source_template.exists():
+            logger.info(f"Loading template from source: {source_template}")
+            template_content = source_template.read_text()
+        else:
+            # Try loading from package resources
+            try:
+                template_resource = files("voice_mode.templates.scripts").joinpath("start-whisper-server.sh")
+                template_content = template_resource.read_text()
+                logger.info("Loaded template from package resources")
+            except Exception as e:
+                logger.warning(f"Failed to load template script: {e}. Using fallback inline script.")
+        # Fallback to inline script if template not found
+        if template_content is None:
+            template_content = f"""#!/bin/bash
+# Whisper Service Startup Script
+# This script is used by both macOS (launchd) and Linux (systemd) to start the whisper service
+# It sources the voicemode.env file to get configuration, especially VOICEMODE_WHISPER_MODEL
+# Determine whisper directory (script is in bin/, whisper root is parent)
+SCRIPT_DIR="$(cd "$(dirname "${{BASH_SOURCE[0]}}")" && pwd)"
+WHISPER_DIR="$(dirname "$SCRIPT_DIR")"
+# Voicemode configuration directory
+VOICEMODE_DIR="$HOME/.voicemode"
+LOG_DIR="$VOICEMODE_DIR/logs/whisper"
+# Create log directory if it doesn't exist
+mkdir -p "$LOG_DIR"
-# Configuration
-WHISPER_DIR="{install_dir}"
-LOG_FILE="{os.path.join(voicemode_dir, 'whisper-server.log')}"
+# Log file for this script (separate from whisper server logs)
+STARTUP_LOG="$LOG_DIR/startup.log"
 # Source voicemode configuration if it exists
-if [ -f "{voicemode_dir}/voicemode.env" ]; then
-    source "{voicemode_dir}/voicemode.env"
+if [ -f "$VOICEMODE_DIR/voicemode.env" ]; then
+    echo "[$(date '+%Y-%m-%d %H:%M:%S')] Sourcing voicemode.env" >> "$STARTUP_LOG"
+    source "$VOICEMODE_DIR/voicemode.env"
+else
+    echo "[$(date '+%Y-%m-%d %H:%M:%S')] Warning: voicemode.env not found" >> "$STARTUP_LOG"
 fi
 # Model selection with environment variable support
-MODEL_NAME="${{VOICEMODE_WHISPER_MODEL:-{model}}}"
+MODEL_NAME="${{VOICEMODE_WHISPER_MODEL:-base}}"
 MODEL_PATH="$WHISPER_DIR/models/ggml-$MODEL_NAME.bin"
+echo "[$(date '+%Y-%m-%d %H:%M:%S')] Starting whisper-server with model: $MODEL_NAME" >> "$STARTUP_LOG"
 # Check if model exists
 if [ ! -f "$MODEL_PATH" ]; then
-    echo "Error: Model $MODEL_NAME not found at $MODEL_PATH" >> "$LOG_FILE"
-    echo "Available models:" >> "$LOG_FILE"
-    ls -1 "$WHISPER_DIR/models/" | grep "^ggml-.*\\.bin$" >> "$LOG_FILE"
-    exit 1
+    echo "[$(date '+%Y-%m-%d %H:%M:%S')] Error: Model $MODEL_NAME not found at $MODEL_PATH" >> "$STARTUP_LOG"
+    echo "[$(date '+%Y-%m-%d %H:%M:%S')] Available models:" >> "$STARTUP_LOG"
+    ls -1 "$WHISPER_DIR/models/" 2>/dev/null | grep "^ggml-.*\\.bin$" >> "$STARTUP_LOG"
+    # Try to find any available model as fallback
+    FALLBACK_MODEL=$(ls -1 "$WHISPER_DIR/models/" 2>/dev/null | grep "^ggml-.*\\.bin$" | head -1)
+    if [ -n "$FALLBACK_MODEL" ]; then
+        MODEL_PATH="$WHISPER_DIR/models/$FALLBACK_MODEL"
+        echo "[$(date '+%Y-%m-%d %H:%M:%S')] Using fallback model: $FALLBACK_MODEL" >> "$STARTUP_LOG"
+    else
+        echo "[$(date '+%Y-%m-%d %H:%M:%S')] Fatal: No whisper models found" >> "$STARTUP_LOG"
+        exit 1
+    fi
 fi
-echo "Starting whisper-server with model: $MODEL_NAME" >> "$LOG_FILE"
-# Note: whisper-server is now built as part of the main build target
+# Port configuration (with environment variable support)
+WHISPER_PORT="${{VOICEMODE_WHISPER_PORT:-2022}}"
 # Determine server binary location
+# Check new CMake build location first, then legacy location
 if [ -f "$WHISPER_DIR/build/bin/whisper-server" ]; then
     SERVER_BIN="$WHISPER_DIR/build/bin/whisper-server"
 elif [ -f "$WHISPER_DIR/server" ]; then
     SERVER_BIN="$WHISPER_DIR/server"
 else
-    echo "Error: whisper-server binary not found" >> "$LOG_FILE"
+    echo "[$(date '+%Y-%m-%d %H:%M:%S')] Error: whisper-server binary not found" >> "$STARTUP_LOG"
+    echo "[$(date '+%Y-%m-%d %H:%M:%S')] Checked: $WHISPER_DIR/build/bin/whisper-server" >> "$STARTUP_LOG"
+    echo "[$(date '+%Y-%m-%d %H:%M:%S')] Checked: $WHISPER_DIR/server" >> "$STARTUP_LOG"
     exit 1
 fi
+echo "[$(date '+%Y-%m-%d %H:%M:%S')] Using binary: $SERVER_BIN" >> "$STARTUP_LOG"
+echo "[$(date '+%Y-%m-%d %H:%M:%S')] Model path: $MODEL_PATH" >> "$STARTUP_LOG"
+echo "[$(date '+%Y-%m-%d %H:%M:%S')] Port: $WHISPER_PORT" >> "$STARTUP_LOG"
 # Start whisper-server
+# Using exec to replace this script process with whisper-server
 cd "$WHISPER_DIR"
 exec "$SERVER_BIN" \\
-    --model "$MODEL_PATH" \\
     --host 0.0.0.0 \\
-    --port 2022 \\
+    --port "$WHISPER_PORT" \\
+    --model "$MODEL_PATH" \\
     --inference-path /v1/audio/transcriptions \\
-    --threads 8 \\
-    >> "$LOG_FILE" 2>&1
+    --threads 8
 """
-        start_script_path = os.path.join(install_dir, "start-whisper-server.sh")
+        start_script_path = os.path.join(bin_dir, "start-whisper-server.sh")
         with open(start_script_path, 'w') as f:
-            f.write(start_script_content)
+            f.write(template_content)
         os.chmod(start_script_path, 0o755)
         # Install launchagent on macOS
@@ -471,7 +534,6 @@ WorkingDirectory={install_dir}
 StandardOutput=append:{os.path.join(voicemode_dir, 'logs', 'whisper', 'whisper.out.log')}
 StandardError=append:{os.path.join(voicemode_dir, 'logs', 'whisper', 'whisper.err.log')}
 Environment="PATH=/usr/local/bin:/usr/bin:/bin:/usr/local/cuda/bin"
-Environment="VOICEMODE_WHISPER_MODEL={model}"
 [Install]
 WantedBy=default.target

{voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/services/whisper/model_install.py RENAMED Viewed

@@ -127,7 +127,8 @@ async def whisper_model_install(
             result = await download_whisper_model(
                 model_name,
                 actual_models_dir,
-                force_download=force_download
+                force_download=force_download,
+                skip_core_ml=skip_core_ml
             )
             # Build comprehensive result entry
@@ -242,58 +243,48 @@ async def _handle_coreml_dependencies(
     if skip_core_ml:
         return {"continue": True}
-    # Check if torch is already installed
-    try:
-        import torch
-        logger.info("PyTorch already installed for CoreML support")
-        return {"continue": True}
-    except ImportError:
-        pass
+    # Check if the CoreML environment already exists
+    whisper_dir = Path.home() / ".voicemode" / "services" / "whisper"
+    venv_coreml = whisper_dir / "venv-coreml" / "bin" / "python"
+    if venv_coreml.exists():
+        # Test if it has the required packages
+        try:
+            result = subprocess.run(
+                [str(venv_coreml), "-c", "import torch, coremltools, whisper"],
+                capture_output=True,
+                timeout=5
+            )
+            if result.returncode == 0:
+                logger.info("CoreML environment already exists and is valid")
+                # Return with a flag indicating CoreML is ready
+                return {
+                    "continue": True,
+                    "coreml_ready": True,
+                    "coreml_deps_note": "CoreML environment exists and is valid"
+                }
+        except:
+            pass
-    # Check if user wants to install torch
+    # Check if user wants to create CoreML environment
     if not install_torch and not auto_confirm:
         return {
             "continue": False,
             "success": False,
             "requires_confirmation": True,
-            "message": "CoreML requires PyTorch (~2.5GB). Rerun with install_torch=True to confirm.",
-            "recommendation": "Set install_torch=True for CoreML acceleration (2-3x faster)"
+            "message": "CoreML conversion requires a dedicated Python environment with PyTorch. Setup may download up to 2.5GB if packages aren't cached.",
+            "recommendation": "💡 Set install_torch=True for CoreML acceleration (2-3x faster)"
         }
-    # Install CoreML dependencies
-    logger.info("Installing CoreML dependencies...")
+    # Note: We don't actually install CoreML dependencies in the voicemode environment anymore
+    # The CoreML conversion uses its own dedicated environment in ~/.voicemode/services/whisper/venv-coreml
+    # This is handled automatically by whisper_helpers.convert_to_coreml()
-    try:
-        # Detect environment and install appropriately
-        packages = ["torch>=2.0.0", "coremltools>=7.0", "transformers", "ane-transformers"]
-        # Try UV first (most common)
-        if subprocess.run(["which", "uv"], capture_output=True).returncode == 0:
-            cmd = ["uv", "pip", "install"] + packages
-            logger.info("Installing via UV...")
-        else:
-            # Fallback to pip
-            cmd = [sys.executable, "-m", "pip", "install"] + packages
-            logger.info("Installing via pip...")
-        # Run installation
-        result = subprocess.run(cmd, capture_output=True, text=True)
-        if result.returncode == 0:
-            logger.info("CoreML dependencies installed successfully")
-            return {"continue": True, "coreml_deps_installed": True}
-        else:
-            logger.warning(f"Failed to install CoreML dependencies: {result.stderr}")
-            return {
-                "continue": True,
-                "coreml_deps_failed": True,
-                "warning": "CoreML dependencies installation failed. Models will use Metal acceleration."
-            }
-    except Exception as e:
-        logger.warning(f"Error installing CoreML dependencies: {e}")
-        return {
-            "continue": True,
-            "coreml_deps_failed": True,
-            "warning": f"CoreML setup error: {str(e)}. Models will use Metal acceleration."
-        }
+    logger.info("CoreML dependencies will be handled by the conversion process")
+    # We still return success to continue with the model download
+    # The actual CoreML environment setup happens during conversion
+    return {
+        "continue": True,
+        "coreml_deps_note": "CoreML environment will be created during conversion if needed"
+    }

{voice_mode-2.32.0 → voice_mode-2.33.0}/voice_mode/tools/services/whisper/models.py RENAMED Viewed

@@ -113,7 +113,7 @@ def get_active_model() -> str:
     # Validate it's a known model
     if model not in WHISPER_MODEL_REGISTRY:
-        return "large-v2"  # Default fallback
+        return "base"  # Default fallback
     return model

voice-mode 2.32.0__tar.gz → 2.33.0__tar.gz

voice-mode 2.32.0tar.gz → 2.33.0tar.gz