PyPI - voice-mode - Versions diffs - 2.27.0__py3-none-any.whl → 2.28.0__py3-none-any.whl - Mend

voice-mode 2.27.0py3-none-any.whl → 2.28.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

voice_mode/frontend/.next/types/app/api/connection-details/route.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-// File: /tmp/build-via-sdist-qw720py5/voice_mode-2.27.0/voice_mode/frontend/app/api/connection-details/route.ts
+// File: /tmp/build-via-sdist-_mb6zwu1/voice_mode-2.28.0/voice_mode/frontend/app/api/connection-details/route.ts
 import * as entry from '../../../../../app/api/connection-details/route.js'
 import type { NextRequest } from 'next/server.js'

voice_mode/frontend/.next/types/app/layout.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-// File: /tmp/build-via-sdist-qw720py5/voice_mode-2.27.0/voice_mode/frontend/app/layout.tsx
+// File: /tmp/build-via-sdist-_mb6zwu1/voice_mode-2.28.0/voice_mode/frontend/app/layout.tsx
 import * as entry from '../../../app/layout.js'
 import type { ResolvingMetadata, ResolvingViewport } from 'next/dist/lib/metadata/types/metadata-interface.js'

voice_mode/frontend/.next/types/app/page.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-// File: /tmp/build-via-sdist-qw720py5/voice_mode-2.27.0/voice_mode/frontend/app/page.tsx
+// File: /tmp/build-via-sdist-_mb6zwu1/voice_mode-2.28.0/voice_mode/frontend/app/page.tsx
 import * as entry from '../../../app/page.js'
 import type { ResolvingMetadata, ResolvingViewport } from 'next/dist/lib/metadata/types/metadata-interface.js'

voice_mode/frontend/package-lock.json CHANGED Viewed

@@ -1489,9 +1489,9 @@
       }
     },
     "node_modules/caniuse-lite": {
-      "version": "1.0.30001735",
-      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001735.tgz",
-      "integrity": "sha512-EV/laoX7Wq2J9TQlyIXRxTJqIw4sxfXS4OYgudGxBYRuTv0q7AM6yMEpU/Vo1I94thg9U6EZ2NfZx9GJq83u7w==",
+      "version": "1.0.30001737",
+      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001737.tgz",
+      "integrity": "sha512-BiloLiXtQNrY5UyF0+1nSJLXUENuhka2pzy2Fx5pGxqavdrxSCW4U6Pn/PoG3Efspi2frRbHpBV2XsrPE6EDlw==",
       "dev": true,
       "funding": [
         {
@@ -1774,9 +1774,9 @@
       "license": "MIT"
     },
     "node_modules/electron-to-chromium": {
-      "version": "1.5.207",
-      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.207.tgz",
-      "integrity": "sha512-mryFrrL/GXDTmAtIVMVf+eIXM09BBPlO5IQ7lUyKmK8d+A4VpRGG+M3ofoVef6qyF8s60rJei8ymlJxjUA8Faw==",
+      "version": "1.5.208",
+      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.208.tgz",
+      "integrity": "sha512-ozZyibehoe7tOhNaf16lKmljVf+3npZcJIEbJRVftVsmAg5TeA1mGS9dVCZzOwr2xT7xK15V0p7+GZqSPgkuPg==",
       "dev": true,
       "license": "ISC"
     },

voice_mode/tools/converse.py CHANGED Viewed

@@ -613,31 +613,51 @@ async def _speech_to_text_internal(
         if stt_config.get('base_url') and ("127.0.0.1" in stt_config['base_url'] or "localhost" in stt_config['base_url']):
             provider = "whisper-local"
-        # Validate format for provider
-        export_format = validate_audio_format(STT_AUDIO_FORMAT, provider, "stt")
-        # Convert WAV to target format for upload
-        logger.debug(f"Converting WAV to {export_format.upper()} for upload...")
-        try:
-            audio = AudioSegment.from_wav(wav_file)
-            logger.debug(f"Audio loaded - Duration: {len(audio)}ms, Channels: {audio.channels}, Frame rate: {audio.frame_rate}")
-            # Get export parameters for the format
-            export_params = get_format_export_params(export_format)
+        # Check if we can skip conversion for local whisper
+        skip_conversion = False
+        if provider == "whisper-local":
+            # Check if whisper is truly local (not SSH-forwarded)
+            from voice_mode.utils.services.common import check_service_status
+            from voice_mode.config import WHISPER_PORT
+            status, _ = check_service_status(WHISPER_PORT)
+            if status == "local":
+                skip_conversion = True
+                logger.info("Detected truly local whisper - skipping audio conversion, using WAV directly")
+        if skip_conversion:
+            # Use WAV directly for local whisper
+            upload_file = wav_file
+            export_format = "wav"
+            logger.debug("Using WAV file directly for local whisper upload")
+        else:
+            # Validate format for provider
+            export_format = validate_audio_format(STT_AUDIO_FORMAT, provider, "stt")
-            with tempfile.NamedTemporaryFile(suffix=f'.{export_format}', delete=False) as export_file_obj:
-                export_file = export_file_obj.name
-                audio.export(export_file, **export_params)
-                upload_file = export_file
-                logger.debug(f"{export_format.upper()} created for STT upload: {upload_file}")
-        except Exception as e:
-            if "ffmpeg" in str(e).lower() or "avconv" in str(e).lower():
-                logger.error(f"Audio conversion failed - FFmpeg may not be installed: {e}")
-                from voice_mode.utils.ffmpeg_check import get_install_instructions
-                logger.error(f"\n{get_install_instructions()}")
-                raise RuntimeError("FFmpeg is required but not found. Please install FFmpeg and try again.") from e
-            else:
-                raise
+            # Convert WAV to target format for upload
+            logger.debug(f"Converting WAV to {export_format.upper()} for upload...")
+            conversion_start = time.perf_counter()
+            try:
+                audio = AudioSegment.from_wav(wav_file)
+                logger.debug(f"Audio loaded - Duration: {len(audio)}ms, Channels: {audio.channels}, Frame rate: {audio.frame_rate}")
+                # Get export parameters for the format
+                export_params = get_format_export_params(export_format)
+                with tempfile.NamedTemporaryFile(suffix=f'.{export_format}', delete=False) as export_file_obj:
+                    export_file = export_file_obj.name
+                    audio.export(export_file, **export_params)
+                    upload_file = export_file
+                    conversion_time = time.perf_counter() - conversion_start
+                    logger.info(f"Audio conversion: WAV → {export_format.upper()} took {conversion_time:.3f}s")
+                    logger.debug(f"{export_format.upper()} created for STT upload: {upload_file}")
+            except Exception as e:
+                if "ffmpeg" in str(e).lower() or "avconv" in str(e).lower():
+                    logger.error(f"Audio conversion failed - FFmpeg may not be installed: {e}")
+                    from voice_mode.utils.ffmpeg_check import get_install_instructions
+                    logger.error(f"\n{get_install_instructions()}")
+                    raise RuntimeError("FFmpeg is required but not found. Please install FFmpeg and try again.") from e
+                else:
+                    raise
         # Save debug file for upload version
         if DEBUG:

voice_mode/tools/service.py CHANGED Viewed

@@ -233,18 +233,45 @@ async def status_service(service_name: str) -> str:
         if service_name == "whisper":
             # Get model info
             model = "unknown"
+            model_name = None
             for i, arg in enumerate(cmdline):
                 if arg == "--model" and i + 1 < len(cmdline):
                     model = Path(cmdline[i + 1]).name
+                    # Extract model name from filename (e.g., ggml-large-v3-turbo.bin -> large-v3-turbo)
+                    if model.startswith("ggml-") and model.endswith(".bin"):
+                        model_name = model[5:-4]
                     break
             extra_info_parts.append(f"Model: {model}")
-            # Try to get version info
+            # Get version and capability info
             try:
-                from voice_mode.tools.services.version_info import get_whisper_version
-                version_info = get_whisper_version()
+                from voice_mode.utils.services.whisper_version import get_whisper_version_info, check_coreml_model_exists
+                version_info = get_whisper_version_info()
                 if version_info.get("version"):
                     extra_info_parts.append(f"Version: {version_info['version']}")
+                elif version_info.get("commit"):
+                    extra_info_parts.append(f"Commit: {version_info['commit']}")
+                # Show Core ML status on Apple Silicon
+                if platform.machine() == "arm64" and platform.system() == "Darwin":
+                    if version_info.get("coreml_supported"):
+                        # Check if the current model has Core ML
+                        if model_name and check_coreml_model_exists(model_name):
+                            extra_info_parts.append("Core ML: ✓ Enabled & Active")
+                        else:
+                            extra_info_parts.append("Core ML: ✓ Supported (model not converted)")
+                    else:
+                        extra_info_parts.append("Core ML: ✗ Not compiled in")
+                # Show GPU support
+                gpu_support = []
+                if version_info.get("metal_supported"):
+                    gpu_support.append("Metal")
+                if version_info.get("cuda_supported"):
+                    gpu_support.append("CUDA")
+                if gpu_support:
+                    extra_info_parts.append(f"GPU: {', '.join(gpu_support)}")
             except:
                 pass

voice_mode/tools/services/kokoro/install.py CHANGED Viewed

@@ -243,7 +243,7 @@ async def kokoro_install(
     <key>EnvironmentVariables</key>
     <dict>
         <key>PATH</key>
-        <string>/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/opt/homebrew/bin</string>
+        <string>{os.path.expanduser("~/.local/bin")}:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/opt/homebrew/bin</string>
     </dict>
 </dict>
 </plist>"""

voice_mode/tools/services/whisper/__init__.py CHANGED Viewed

@@ -2,12 +2,22 @@
 from voice_mode.tools.services.whisper.install import whisper_install
 from voice_mode.tools.services.whisper.uninstall import whisper_uninstall
-from voice_mode.tools.services.whisper.download_model import download_model
-from voice_mode.tools.services.whisper.list_models_tool import whisper_list_models
+from voice_mode.tools.services.whisper.model_install import whisper_model_install
+from voice_mode.tools.services.whisper.list_models import whisper_models
+from voice_mode.tools.services.whisper.model_active import whisper_model_active
+from voice_mode.tools.services.whisper.model_remove import whisper_model_remove
+from voice_mode.tools.services.whisper.model_benchmark import whisper_model_benchmark
 __all__ = [
     'whisper_install',
     'whisper_uninstall',
-    'download_model',
-    'whisper_list_models'
-]
+    'whisper_model_install',
+    'whisper_models',
+    'whisper_model_active',
+    'whisper_model_remove',
+    'whisper_model_benchmark'
+]
+# Backwards compatibility aliases
+download_model = whisper_model_install  # Deprecated alias
+whisper_list_models = whisper_models    # Deprecated alias

voice_mode/tools/services/whisper/install.py CHANGED Viewed

@@ -206,13 +206,20 @@ async def whisper_install(
             except subprocess.CalledProcessError:
                 logger.warning("Make clean failed, continuing anyway...")
-        # Build with appropriate flags
+        # Build with CMake for better control and Core ML support
         build_env = os.environ.copy()
+        cmake_flags = []
-        if is_macos and use_gpu:
-            build_env["WHISPER_METAL"] = "1"
+        # Enable GPU support based on platform
+        if is_macos:
+            # On macOS, always enable Metal
+            cmake_flags.append("-DGGML_METAL=ON")
+            # On Apple Silicon, also enable Core ML for better performance
+            if platform.machine() == "arm64":
+                cmake_flags.append("-DWHISPER_COREML=ON")
+                logger.info("Enabling Core ML support for Apple Silicon")
         elif is_linux and use_gpu:
-            build_env["WHISPER_CUDA"] = "1"
+            cmake_flags.append("-DGGML_CUDA=ON")
         # Get number of CPU cores for parallel build
         cpu_count = os.cpu_count() or 4
@@ -220,13 +227,31 @@ async def whisper_install(
         # Determine if we should show build output
         debug_mode = os.environ.get("VOICEMODE_DEBUG", "").lower() in ("true", "1", "yes")
+        # Configure with CMake
+        logger.info("Configuring whisper.cpp build...")
+        cmake_cmd = ["cmake", "-B", "build"] + cmake_flags
         if debug_mode:
-            subprocess.run(["make", f"-j{cpu_count}"], env=build_env, check=True)
+            subprocess.run(cmake_cmd, env=build_env, check=True)
         else:
-            # Suppress output unless there's an error
-            logger.info("Building whisper.cpp (this may take a few minutes)...")
             try:
-                result = subprocess.run(["make", f"-j{cpu_count}"], env=build_env,
+                result = subprocess.run(cmake_cmd, env=build_env,
+                                      capture_output=True, text=True, check=True)
+            except subprocess.CalledProcessError as e:
+                logger.error(f"Configuration failed: {e}")
+                if e.stderr:
+                    logger.error(f"Configuration errors:\n{e.stderr}")
+                raise
+        # Build with CMake
+        logger.info("Building whisper.cpp (this may take a few minutes)...")
+        build_cmd = ["cmake", "--build", "build", "-j", str(cpu_count), "--config", "Release"]
+        if debug_mode:
+            subprocess.run(build_cmd, env=build_env, check=True)
+        else:
+            try:
+                result = subprocess.run(build_cmd, env=build_env,
                                       capture_output=True, text=True, check=True)
                 logger.info("Build completed successfully")
             except subprocess.CalledProcessError as e:
@@ -258,7 +283,8 @@ async def whisper_install(
         model_path = download_result["path"]
         # Test whisper with sample if available
-        main_path = os.path.join(install_dir, "main")
+        # With CMake build, binaries are in build/bin/
+        main_path = os.path.join(install_dir, "build", "bin", "whisper-cli")
         sample_path = os.path.join(install_dir, "samples", "jfk.wav")
         if os.path.exists(sample_path) and os.path.exists(main_path):
             try:
@@ -283,6 +309,11 @@ async def whisper_install(
 WHISPER_DIR="{install_dir}"
 LOG_FILE="{os.path.join(voicemode_dir, 'whisper-server.log')}"
+# Source voicemode configuration if it exists
+if [ -f "{voicemode_dir}/voicemode.env" ]; then
+    source "{voicemode_dir}/voicemode.env"
+fi
 # Model selection with environment variable support
 MODEL_NAME="${{VOICEMODE_WHISPER_MODEL:-{model}}}"
 MODEL_PATH="$WHISPER_DIR/models/ggml-$MODEL_NAME.bin"

voice_mode/tools/services/whisper/list_models.py CHANGED Viewed

@@ -2,18 +2,18 @@
 from typing import Dict, Any
 from voice_mode.tools.services.whisper.models import (
-    WHISPER_MODELS,
+    WHISPER_MODEL_REGISTRY,
     get_model_directory,
-    get_current_model,
-    is_model_installed,
-    get_installed_models,
+    get_active_model,
+    is_whisper_model_installed,
+    get_installed_whisper_models,
     format_size,
-    has_coreml_model,
+    has_whisper_coreml_model,
     is_apple_silicon
 )
-async def list_whisper_models() -> Dict[str, Any]:
+async def whisper_models() -> Dict[str, Any]:
     """List available Whisper models and their installation status.
     Returns:
@@ -21,32 +21,32 @@ async def list_whisper_models() -> Dict[str, Any]:
     """
     try:
         model_dir = get_model_directory()
-        current_model = get_current_model()
-        installed_models = get_installed_models()
+        current_model = get_active_model()
+        installed_models = get_installed_whisper_models()
         # Build models list with status
         models = []
         show_coreml = is_apple_silicon()  # Only show Core ML on Apple Silicon
-        for model_name, info in WHISPER_MODELS.items():
+        for model_name, info in WHISPER_MODEL_REGISTRY.items():
             model_status = {
                 "name": model_name,
                 "size_mb": info["size_mb"],
                 "size": format_size(info["size_mb"]),
                 "languages": info["languages"],
                 "description": info["description"],
-                "installed": is_model_installed(model_name),
+                "installed": is_whisper_model_installed(model_name),
                 "current": model_name == current_model,
-                "has_coreml": has_coreml_model(model_name) if show_coreml else False
+                "has_coreml": has_whisper_coreml_model(model_name) if show_coreml else False
             }
             models.append(model_status)
         # Calculate totals
         total_installed_size = sum(
-            WHISPER_MODELS[m]["size_mb"] for m in installed_models
+            WHISPER_MODEL_REGISTRY[m]["size_mb"] for m in installed_models
         )
         total_available_size = sum(
-            m["size_mb"] for m in WHISPER_MODELS.values()
+            m["size_mb"] for m in WHISPER_MODEL_REGISTRY.values()
         )
         return {
@@ -55,7 +55,7 @@ async def list_whisper_models() -> Dict[str, Any]:
             "current_model": current_model,
             "model_directory": str(model_dir),
             "installed_count": len(installed_models),
-            "total_count": len(WHISPER_MODELS),
+            "total_count": len(WHISPER_MODEL_REGISTRY),
             "installed_size_mb": total_installed_size,
             "installed_size": format_size(total_installed_size),
             "available_size_mb": total_available_size,

voice_mode/tools/services/whisper/model_active.py ADDED Viewed

@@ -0,0 +1,54 @@
+"""MCP tool for showing/setting active Whisper model."""
+from typing import Optional, Dict, Any
+from voice_mode.tools.services.whisper.models import (
+    get_active_model,
+    set_active_model,
+    is_whisper_model_installed,
+    WHISPER_MODEL_REGISTRY
+)
+async def whisper_model_active(model_name: Optional[str] = None) -> Dict[str, Any]:
+    """Show or set the active Whisper model.
+    Args:
+        model_name: Model to set as active (None to just show current)
+    Returns:
+        Dict with current/new active model info
+    """
+    if model_name is None:
+        # Just show current
+        current = get_active_model()
+        return {
+            "success": True,
+            "active_model": current,
+            "installed": is_whisper_model_installed(current),
+            "message": f"Current active model: {current}"
+        }
+    # Validate model exists in registry
+    if model_name not in WHISPER_MODEL_REGISTRY:
+        return {
+            "success": False,
+            "error": f"Model {model_name} is not a valid Whisper model",
+            "available_models": list(WHISPER_MODEL_REGISTRY.keys())
+        }
+    # Check if model is installed
+    if not is_whisper_model_installed(model_name):
+        return {
+            "success": False,
+            "error": f"Model {model_name} is not installed. Install it first with whisper_model_install()",
+            "model": model_name
+        }
+    # Set new active model
+    set_active_model(model_name)
+    return {
+        "success": True,
+        "active_model": model_name,
+        "message": f"Active model set to {model_name}. Restart whisper service for changes to take effect."
+    }

voice_mode/tools/services/whisper/model_benchmark.py ADDED Viewed

@@ -0,0 +1,159 @@
+"""MCP tool for benchmarking Whisper models."""
+from typing import Union, List, Dict, Any, Optional
+from voice_mode.tools.services.whisper.models import (
+    get_installed_whisper_models,
+    benchmark_whisper_model,
+    is_whisper_model_installed,
+    WHISPER_MODEL_REGISTRY
+)
+async def whisper_model_benchmark(
+    models: Union[str, List[str]] = "installed",
+    sample_file: Optional[str] = None,
+    runs: int = 1
+) -> Dict[str, Any]:
+    """Benchmark Whisper model performance.
+    Args:
+        models: 'installed' (default), 'all', specific model name, or list of models
+        sample_file: Optional audio file for testing (uses default JFK sample if None)
+        runs: Number of benchmark runs per model (default: 1)
+    Returns:
+        Dict with benchmark results and recommendations
+    """
+    # Determine which models to benchmark
+    if models == "installed":
+        model_list = get_installed_whisper_models()
+        if not model_list:
+            return {
+                "success": False,
+                "error": "No Whisper models are installed. Install models first with whisper_model_install()"
+            }
+    elif models == "all":
+        # Only benchmark installed models from the full list
+        all_models = list(WHISPER_MODEL_REGISTRY.keys())
+        model_list = [m for m in all_models if is_whisper_model_installed(m)]
+        if not model_list:
+            return {
+                "success": False,
+                "error": "No Whisper models are installed"
+            }
+    elif isinstance(models, str):
+        # Single model specified
+        if not is_whisper_model_installed(models):
+            return {
+                "success": False,
+                "error": f"Model {models} is not installed"
+            }
+        model_list = [models]
+    elif isinstance(models, list):
+        # List of models specified
+        model_list = []
+        for model in models:
+            if is_whisper_model_installed(model):
+                model_list.append(model)
+            else:
+                # Model not installed, skip silently or could use logger.warning
+                pass
+        if not model_list:
+            return {
+                "success": False,
+                "error": "None of the specified models are installed"
+            }
+    else:
+        return {
+            "success": False,
+            "error": f"Invalid models parameter: {models}"
+        }
+    # Run benchmarks
+    results = []
+    failed = []
+    for model in model_list:
+        best_result = None
+        for run_num in range(runs):
+            result = benchmark_whisper_model(model, sample_file)
+            if result.get("success"):
+                # Keep the best (fastest) result from multiple runs
+                if best_result is None or result["total_time_ms"] < best_result["total_time_ms"]:
+                    best_result = result
+            else:
+                # If any run fails, record the failure
+                if model not in failed:
+                    failed.append(model)
+                    results.append({
+                        "model": model,
+                        "success": False,
+                        "error": result.get("error", "Benchmark failed")
+                    })
+                break
+        if best_result:
+            results.append(best_result)
+    if not results:
+        return {
+            "success": False,
+            "error": "No benchmarks completed successfully"
+        }
+    # Find successful results for analysis
+    successful_results = [r for r in results if r.get("success")]
+    if successful_results:
+        # Find fastest model
+        fastest = min(successful_results, key=lambda x: x["total_time_ms"])
+        # Generate recommendations based on results
+        recommendations = []
+        # Categorize by speed
+        for result in successful_results:
+            rtf = result.get("real_time_factor", 0)
+            if rtf > 20:
+                category = "Ultra-fast (good for real-time)"
+            elif rtf > 5:
+                category = "Fast (good for interactive use)"
+            elif rtf > 1:
+                category = "Moderate (good balance)"
+            else:
+                category = "Slow (best accuracy)"
+            result["category"] = category
+        # Generate specific recommendations
+        if fastest["real_time_factor"] > 10:
+            recommendations.append(f"Use {fastest['model']} for real-time applications")
+        # Find best balance (medium or base if available)
+        balance_models = [r for r in successful_results if r["model"] in ["base", "medium"]]
+        if balance_models:
+            best_balance = min(balance_models, key=lambda x: x["total_time_ms"])
+            recommendations.append(f"Use {best_balance['model']} for balanced speed/accuracy")
+        # Recommend large models for accuracy
+        large_models = [r for r in successful_results if "large" in r["model"]]
+        if large_models:
+            best_large = min(large_models, key=lambda x: x["total_time_ms"])
+            recommendations.append(f"Use {best_large['model']} for best accuracy")
+    else:
+        fastest = None
+        recommendations = ["Unable to generate recommendations - no successful benchmarks"]
+    return {
+        "success": True,
+        "benchmarks": results,
+        "models_tested": len(model_list),
+        "models_failed": len(failed),
+        "fastest_model": fastest["model"] if fastest else None,
+        "fastest_time_ms": fastest["total_time_ms"] if fastest else None,
+        "recommendations": recommendations,
+        "sample_file": sample_file or "default JFK sample",
+        "runs_per_model": runs
+    }

voice-mode 2.27.0__py3-none-any.whl → 2.28.0__py3-none-any.whl

voice-mode 2.27.0py3-none-any.whl → 2.28.0py3-none-any.whl