PyPI - voice-mode - Versions diffs - 2.27.0__py3-none-any.whl → 2.28.1__py3-none-any.whl - Mend

voice-mode 2.27.0py3-none-any.whl → 2.28.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

voice_mode/utils/services/whisper_helpers.py CHANGED Viewed

@@ -1,9 +1,11 @@
 """Helper functions for whisper service management."""
 import os
+import re
 import subprocess
 import platform
 import logging
+import shutil
 from pathlib import Path
 from typing import Optional, List, Dict, Union
@@ -108,7 +110,6 @@ async def download_whisper_model(
                 break
         if original_script:
-            import shutil
             shutil.copy2(original_script, download_script)
             os.chmod(download_script, 0o755)
         else:
@@ -116,7 +117,6 @@ async def download_whisper_model(
             # (happens during install when models_dir is install_dir/models)
             parent_script = models_dir.parent / "models" / "download-ggml-model.sh"
             if parent_script.exists() and parent_script != download_script:
-                import shutil
                 shutil.copy2(parent_script, download_script)
                 os.chmod(download_script, 0o755)
             else:
@@ -146,16 +146,52 @@ async def download_whisper_model(
         # Check for Core ML support on Apple Silicon
         if platform.system() == "Darwin" and platform.machine() == "arm64":
+            # Check if Core ML dependencies are needed
+            requirements_file = Path(models_dir) / "requirements-coreml.txt"
+            if requirements_file.exists() and shutil.which("uv"):
+                # Try to check if torch is available
+                try:
+                    subprocess.run(
+                        ["uv", "run", "python", "-c", "import torch"],
+                        capture_output=True,
+                        check=True,
+                        timeout=5
+                    )
+                    torch_available = True
+                except (subprocess.CalledProcessError, subprocess.TimeoutExpired):
+                    torch_available = False
+                if not torch_available:
+                    logger.info("Installing Core ML dependencies for optimal performance...")
+                    try:
+                        subprocess.run(
+                            ["uv", "pip", "install", "-r", str(requirements_file)],
+                            capture_output=True,
+                            check=True,
+                            timeout=120
+                        )
+                        logger.info("Core ML dependencies installed successfully")
+                    except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
+                        logger.info("Could not install Core ML dependencies automatically. Whisper will still work with Metal acceleration.")
             core_ml_result = await convert_to_coreml(model, models_dir)
             if core_ml_result["success"]:
                 logger.info(f"Core ML conversion completed for {model}")
             else:
-                logger.warning(f"Core ML conversion failed: {core_ml_result.get('error')}")
+                # Log appropriate level based on error category
+                error_category = core_ml_result.get('error_category', 'unknown')
+                if error_category in ['missing_pytorch', 'missing_coremltools', 'missing_whisper', 'missing_ane_transformers', 'missing_module']:
+                    logger.info(f"Core ML conversion skipped - {core_ml_result.get('error', 'Missing dependencies')}. Whisper will use Metal acceleration.")
+                else:
+                    logger.warning(f"Core ML conversion failed ({error_category}): {core_ml_result.get('error', 'Unknown error')}")
+        # Always include Core ML status in response
         return {
             "success": True,
             "path": str(model_path),
-            "message": f"Model {model} downloaded successfully"
+            "message": f"Model {model} downloaded successfully",
+            "core_ml_status": core_ml_result,
+            "acceleration": "coreml" if core_ml_result.get("success") else "metal"
         }
     except subprocess.CalledProcessError as e:
@@ -200,26 +236,82 @@ async def convert_to_coreml(
         }
     # Find the Core ML conversion script
-    whisper_dir = Path.home() / ".voicemode" / "whisper.cpp"
-    convert_script = whisper_dir / "models" / "generate-coreml-model.sh"
+    # Try new location first, then fall back to old location
+    whisper_dir = Path.home() / ".voicemode" / "services" / "whisper"
+    if not whisper_dir.exists():
+        whisper_dir = Path.home() / ".voicemode" / "whisper.cpp"
+    # Use the uv wrapper script if it exists, otherwise fallback to original
+    convert_script = whisper_dir / "models" / "generate-coreml-model-uv.sh"
+    if not convert_script.exists():
+        convert_script = whisper_dir / "models" / "generate-coreml-model.sh"
     if not convert_script.exists():
         return {
             "success": False,
-            "error": "Core ML conversion script not found"
+            "error": f"Core ML conversion script not found at {convert_script}"
         }
     logger.info(f"Converting {model} to Core ML format...")
     try:
-        # Run conversion script
-        result = subprocess.run(
-            ["bash", str(convert_script), model],
-            cwd=str(models_dir),
-            capture_output=True,
-            text=True,
-            check=True
-        )
+        # Check if we should use uv for Python dependencies
+        # Try to find the voicemode project root for uv
+        voicemode_root = None
+        current = Path(__file__).parent
+        while current != current.parent:
+            if (current / "pyproject.toml").exists():
+                with open(current / "pyproject.toml") as f:
+                    content = f.read()
+                    if 'name = "voice-mode"' in content or 'name = "voicemode"' in content:
+                        voicemode_root = current
+                        break
+            current = current.parent
+        # If we found voicemode root and uv is available, use it
+        if voicemode_root and shutil.which("uv"):
+            # Run the Python script directly with uv instead of using the bash wrapper
+            logger.info("Using uv for Core ML conversion with Python dependencies")
+            # Run from the whisper models directory
+            script_path = whisper_dir / "models" / "convert-whisper-to-coreml.py"
+            result = subprocess.run(
+                ["uv", "run", "--project", str(voicemode_root), "python",
+                 str(script_path),
+                 "--model", model, "--encoder-only", "True", "--optimize-ane", "True"],
+                cwd=str(whisper_dir / "models"),
+                capture_output=True,
+                text=True,
+                check=True
+            )
+            # Now compile the mlpackage to mlmodelc using coremlc
+            mlpackage_path = models_dir / f"coreml-encoder-{model}.mlpackage"
+            if mlpackage_path.exists():
+                logger.info(f"Compiling Core ML model with coremlc...")
+                compile_result = subprocess.run(
+                    ["xcrun", "coremlc", "compile", str(mlpackage_path), str(models_dir)],
+                    capture_output=True,
+                    text=True,
+                    check=True
+                )
+                # Move the compiled model to the correct name
+                compiled_path = models_dir / f"coreml-encoder-{model}.mlmodelc"
+                if compiled_path.exists():
+                    shutil.rmtree(coreml_path, ignore_errors=True)
+                    shutil.move(str(compiled_path), str(coreml_path))
+        else:
+            # Fallback to original bash script
+            logger.info("Using standard Python for Core ML conversion")
+            # Run from the whisper models directory where the script is located
+            script_dir = convert_script.parent
+            result = subprocess.run(
+                ["bash", str(convert_script), model],
+                cwd=str(script_dir),
+                capture_output=True,
+                text=True,
+                check=True
+            )
         if coreml_path.exists():
             return {
@@ -234,16 +326,111 @@ async def convert_to_coreml(
             }
     except subprocess.CalledProcessError as e:
-        logger.error(f"Core ML conversion failed: {e.stderr}")
+        error_text = e.stderr if e.stderr else ""
+        stdout_text = e.stdout if e.stdout else ""
+        # Combine both for error detection since Python errors can appear in either
+        combined_output = error_text + stdout_text
+        # Enhanced error detection with specific categories
+        error_details = {
+            "success": False,
+            "error_type": "subprocess_error",
+            "return_code": e.returncode,
+            "command": " ".join(e.cmd) if hasattr(e, 'cmd') else "conversion script",
+        }
+        # Detect specific missing dependencies
+        if "ModuleNotFoundError" in combined_output:
+            if "torch" in combined_output:
+                error_details.update({
+                    "error_category": "missing_pytorch",
+                    "error": "PyTorch not installed - required for Core ML conversion",
+                    "install_command": "uv pip install torch",
+                    "manual_install": "pip install torch",
+                    "package_size": "~2.5GB"
+                })
+            elif "coremltools" in combined_output:
+                error_details.update({
+                    "error_category": "missing_coremltools",
+                    "error": "CoreMLTools not installed",
+                    "install_command": "uv pip install coremltools",
+                    "manual_install": "pip install coremltools",
+                    "package_size": "~50MB"
+                })
+            elif "whisper" in combined_output:
+                error_details.update({
+                    "error_category": "missing_whisper",
+                    "error": "OpenAI Whisper package not installed",
+                    "install_command": "uv pip install openai-whisper",
+                    "manual_install": "pip install openai-whisper",
+                    "package_size": "~100MB"
+                })
+            elif "ane_transformers" in combined_output:
+                error_details.update({
+                    "error_category": "missing_ane_transformers",
+                    "error": "ANE Transformers not installed for Apple Neural Engine optimization",
+                    "install_command": "uv pip install ane_transformers",
+                    "manual_install": "pip install ane_transformers",
+                    "package_size": "~10MB"
+                })
+            else:
+                # Generic module not found
+                module_match = re.search(r"No module named '([^']+)'", combined_output)
+                module_name = module_match.group(1) if module_match else "unknown"
+                error_details.update({
+                    "error_category": "missing_module",
+                    "error": f"Python module '{module_name}' not installed",
+                    "install_command": f"uv pip install {module_name}",
+                    "manual_install": f"pip install {module_name}"
+                })
+        elif "xcrun: error" in combined_output and "coremlc" in combined_output:
+            error_details.update({
+                "error_category": "missing_coremlc",
+                "error": "Core ML compiler (coremlc) not found - requires full Xcode installation",
+                "install_command": "Install Xcode from Mac App Store",
+                "note": "Command Line Tools alone are insufficient. Full Xcode provides coremlc for Core ML compilation.",
+                "alternative": "Models will work with Metal acceleration without Core ML compilation"
+            })
+        elif "xcrun: error" in combined_output:
+            error_details.update({
+                "error_category": "missing_xcode_tools",
+                "error": "Xcode Command Line Tools not installed or xcrun not available",
+                "install_command": "xcode-select --install",
+                "note": "Requires Xcode Command Line Tools"
+            })
+        elif "timeout" in combined_output.lower():
+            error_details.update({
+                "error_category": "conversion_timeout",
+                "error": "Core ML conversion timed out",
+                "suggestion": "Try with a smaller model or increase timeout"
+            })
+        else:
+            # Generic conversion failure
+            error_details.update({
+                "error_category": "conversion_failure",
+                "error": f"Core ML conversion failed",
+                "stderr": error_text[:500] if error_text else None,  # Truncate long errors
+                "stdout": stdout_text[:500] if stdout_text else None
+            })
+        logger.error(f"Core ML conversion failed - Category: {error_details.get('error_category', 'unknown')}, Error: {error_text[:200]}")
+        return error_details
+    except subprocess.TimeoutExpired as e:
+        logger.error(f"Core ML conversion timed out after {e.timeout} seconds")
         return {
             "success": False,
-            "error": f"Conversion failed: {e.stderr}"
+            "error_category": "timeout",
+            "error": f"Core ML conversion timed out after {e.timeout} seconds",
+            "suggestion": "Model conversion is taking too long. Try again or use a smaller model."
         }
     except Exception as e:
-        logger.error(f"Error during Core ML conversion: {e}")
+        logger.error(f"Unexpected error during Core ML conversion: {e}")
         return {
             "success": False,
-            "error": str(e)
+            "error_category": "unexpected_error",
+            "error": str(e),
+            "error_type": type(e).__name__
         }

voice_mode/utils/services/whisper_version.py ADDED Viewed

@@ -0,0 +1,138 @@
+"""Helper functions to get whisper.cpp version and capabilities."""
+import subprocess
+import re
+from pathlib import Path
+from typing import Dict, Any, Optional
+def get_whisper_version_info() -> Dict[str, Any]:
+    """Get version and capability information for whisper.cpp.
+    Returns:
+        Dict containing version, commit hash, Core ML support status, etc.
+    """
+    info = {
+        "version": None,
+        "commit": None,
+        "coreml_supported": False,
+        "metal_supported": False,
+        "cuda_supported": False,
+        "build_type": None
+    }
+    # Find whisper-cli binary
+    whisper_dir = Path.home() / ".voicemode" / "services" / "whisper"
+    whisper_cli = whisper_dir / "build" / "bin" / "whisper-cli"
+    # Fallback to legacy location
+    if not whisper_cli.exists():
+        whisper_cli = whisper_dir / "main"
+    if not whisper_cli.exists():
+        return info
+    try:
+        # Get version from git if available
+        if (whisper_dir / ".git").exists():
+            result = subprocess.run(
+                ["git", "describe", "--tags", "--always"],
+                cwd=whisper_dir,
+                capture_output=True,
+                text=True,
+                timeout=5
+            )
+            if result.returncode == 0:
+                info["version"] = result.stdout.strip()
+            # Get commit hash
+            result = subprocess.run(
+                ["git", "rev-parse", "--short", "HEAD"],
+                cwd=whisper_dir,
+                capture_output=True,
+                text=True,
+                timeout=5
+            )
+            if result.returncode == 0:
+                info["commit"] = result.stdout.strip()
+        # Run whisper-cli to check capabilities
+        # Use a non-existent file to make it fail quickly but still show system info
+        result = subprocess.run(
+            [str(whisper_cli), "-h"],
+            capture_output=True,
+            text=True,
+            timeout=5
+        )
+        if result.returncode == 0:
+            output = result.stdout + result.stderr
+            # Check for Core ML support in help text or by running with dummy input
+            # Try running with minimal command to get system info
+            test_result = subprocess.run(
+                [str(whisper_cli), "--help"],
+                capture_output=True,
+                text=True,
+                timeout=5
+            )
+            # The system_info line shows what's compiled in
+            # We need to actually run it to see the capabilities
+            # Let's try with a non-existent model to fail fast but show system info
+            test_result = subprocess.run(
+                [str(whisper_cli), "-m", "nonexistent.bin"],
+                capture_output=True,
+                text=True,
+                timeout=5
+            )
+            test_output = test_result.stdout + test_result.stderr
+            # Parse system_info line for capabilities
+            if "COREML = 1" in test_output:
+                info["coreml_supported"] = True
+            elif "COREML = 0" in test_output:
+                info["coreml_supported"] = False
+            if "Metal" in test_output:
+                info["metal_supported"] = True
+            if "CUDA = 1" in test_output or "CUBLAS = 1" in test_output:
+                info["cuda_supported"] = True
+        # Check if this is a CMake or Make build
+        if (whisper_dir / "build" / "CMakeCache.txt").exists():
+            info["build_type"] = "CMake"
+            # Parse CMake cache for feature flags
+            with open(whisper_dir / "build" / "CMakeCache.txt") as f:
+                cmake_content = f.read()
+                if "WHISPER_COREML:BOOL=ON" in cmake_content:
+                    info["coreml_supported"] = True
+                if "GGML_METAL:BOOL=ON" in cmake_content or "WHISPER_METAL:BOOL=ON" in cmake_content:
+                    info["metal_supported"] = True
+                if "GGML_CUDA:BOOL=ON" in cmake_content or "WHISPER_CUDA:BOOL=ON" in cmake_content:
+                    info["cuda_supported"] = True
+        else:
+            info["build_type"] = "Make"
+    except Exception as e:
+        # Silently handle errors
+        pass
+    return info
+def check_coreml_model_exists(model_name: str) -> bool:
+    """Check if a Core ML model exists for the given whisper model.
+    Args:
+        model_name: Name of the whisper model (e.g., "large-v3-turbo")
+    Returns:
+        True if Core ML model exists, False otherwise
+    """
+    whisper_dir = Path.home() / ".voicemode" / "services" / "whisper"
+    coreml_model = whisper_dir / "models" / f"ggml-{model_name}-encoder.mlmodelc"
+    return coreml_model.exists()

{voice_mode-2.27.0.dist-info → voice_mode-2.28.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: voice-mode
-Version: 2.27.0
+Version: 2.28.1
 Summary: VoiceMode - Voice interaction capabilities for AI assistants (formerly voice-mcp)
 Project-URL: Homepage, https://github.com/mbailey/voicemode
 Project-URL: Repository, https://github.com/mbailey/voicemode
@@ -98,6 +98,10 @@ Natural voice conversations for AI assistants. Voice Mode brings human-like voic
 1. **🎤 Computer with microphone and speakers** OR **☁️ LiveKit server** ([LiveKit Cloud](https://docs.livekit.io/home/cloud/) or [self-hosted](https://github.com/livekit/livekit))
 2. **🔑 OpenAI API Key** (optional) - Voice Mode can install free, open-source transcription and text-to-speech services locally
+**Optional for enhanced performance:**
+- **🍎 Xcode** (macOS only) - Required for Core ML acceleration of Whisper models (2-3x faster inference). Install from [Mac App Store](https://apps.apple.com/app/xcode/id497799835) then run `sudo xcode-select -s /Applications/Xcode.app/Contents/Developer`
 ## Quick Start
 > 📖 **Using a different tool?** See our [Integration Guides](docs/integrations/README.md) for Cursor, VS Code, Gemini CLI, and more!

voice-mode 2.27.0__py3-none-any.whl → 2.28.1__py3-none-any.whl

voice-mode 2.27.0py3-none-any.whl → 2.28.1py3-none-any.whl