PyPI - voice-mode - Versions diffs - 4.4.0__tar.gz → 4.5.0__tar.gz - Mend

voice-mode 4.4.0tar.gz → 4.5.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (136) hide show

{voice_mode-4.4.0 → voice_mode-4.5.0}/CHANGELOG.md RENAMED Viewed

@@ -7,6 +7,39 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [4.5.0] - 2025-09-18
+### Added
+- **Enhanced STT Logging**
+  - Add comprehensive logging for speech-to-text operations
+  - Log provider selection and fallback attempts
+  - Include transcription details and provider info in logs
+- **Configuration Management**
+  - Add `voicemode config edit` command for easy configuration file editing
+  - Support custom editor selection via --editor flag
+  - Automatically open configuration file in default editor
+- **Tool Environment Variables**
+  - Replace VOICEMODE_TOOLS with VOICEMODE_TOOLS_ENABLED and VOICEMODE_TOOLS_DISABLED
+  - Allow fine-grained control over tool availability
+  - Support comma-separated lists for enabling/disabling specific tools
+### Changed
+- **Provider Selection Architecture**
+  - Consolidate dual provider selection systems into single simple failover approach
+  - Remove SIMPLE_FAILOVER configuration - simple failover is now the only mode
+  - Simplify get_tts_config and get_stt_config to use direct configuration
+  - Eliminate ~400 lines of unused provider registry selection logic
+  - Provider registry now only stores endpoint info without complex selection
+### Fixed
+- Disable OpenAI client retries for local endpoints to avoid delays
+- Fix logger name consistency (voicemode vs voice-mode) for STT logging
+- Prevent test_installers from killing running voice services during tests
+- Update tests to work with refactored provider system
+- Resolve test failures related to new environment variables
 ## [4.4.0] - 2025-09-10
 ### Added

{voice_mode-4.4.0 → voice_mode-4.5.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: voice-mode
-Version: 4.4.0
+Version: 4.5.0
 Summary: VoiceMode - Voice interaction capabilities for AI assistants (formerly voice-mcp)
 Project-URL: Homepage, https://github.com/mbailey/voicemode
 Project-URL: Repository, https://github.com/mbailey/voicemode
@@ -261,9 +261,12 @@ claude mcp add --scope user voice-mode uvx voice-mode
 # Using Claude Code with Nix (NixOS)
 claude mcp add voice-mode nix run github:mbailey/voicemode
-# Using UV
+# Using UV (recommended)
 uvx voice-mode
+# For cleanest experience with UV (no deprecation warnings):
+UV_PYTHON=python3.13 uvx voice-mode
 # Using pip
 pip install voice-mode

{voice_mode-4.4.0 → voice_mode-4.5.0}/README.md RENAMED Viewed

@@ -183,9 +183,12 @@ claude mcp add --scope user voice-mode uvx voice-mode
 # Using Claude Code with Nix (NixOS)
 claude mcp add voice-mode nix run github:mbailey/voicemode
-# Using UV
+# Using UV (recommended)
 uvx voice-mode
+# For cleanest experience with UV (no deprecation warnings):
+UV_PYTHON=python3.13 uvx voice-mode
 # Using pip
 pip install voice-mode

{voice_mode-4.4.0 → voice_mode-4.5.0}/pyproject.toml RENAMED Viewed

@@ -210,3 +210,11 @@ directory = "htmlcov"
 [tool.coverage.xml]
 output = "coverage.xml"
+[dependency-groups]
+dev = [
+    "pytest>=8.4.2",
+    "pytest-asyncio>=1.2.0",
+    "pytest-cov>=7.0.0",
+    "pytest-mock>=3.15.0",
+]

{voice_mode-4.4.0 → voice_mode-4.5.0}/voice_mode/__version__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 # This file is automatically updated by 'make release'
 # Do not edit manually
-__version__ = "4.4.0"
+__version__ = "4.5.0"

{voice_mode-4.4.0 → voice_mode-4.5.0}/voice_mode/cli.py RENAMED Viewed

@@ -5,6 +5,8 @@ import asyncio
 import sys
 import os
 import warnings
+import subprocess
+import shutil
 import click
@@ -30,10 +32,12 @@ if not os.environ.get('VOICEMODE_DEBUG', '').lower() in ('true', '1', 'yes'):
 @click.version_option()
 @click.help_option('-h', '--help', help='Show this message and exit')
 @click.option('--debug', is_flag=True, help='Enable debug mode and show all warnings')
+@click.option('--tools-enabled', help='Comma-separated list of tools to enable (whitelist)')
+@click.option('--tools-disabled', help='Comma-separated list of tools to disable (blacklist)')
 @click.pass_context
-def voice_mode_main_cli(ctx, debug):
+def voice_mode_main_cli(ctx, debug, tools_enabled, tools_disabled):
     """Voice Mode - MCP server and service management.
     Without arguments, starts the MCP server.
     With subcommands, executes service management operations.
     """
@@ -44,7 +48,13 @@ def voice_mode_main_cli(ctx, debug):
         # Re-enable INFO logging
         import logging
         logging.getLogger("voice-mode").setLevel(logging.INFO)
+    # Set environment variables from CLI args
+    if tools_enabled:
+        os.environ['VOICEMODE_TOOLS_ENABLED'] = tools_enabled
+    if tools_disabled:
+        os.environ['VOICEMODE_TOOLS_DISABLED'] = tools_disabled
     if ctx.invoked_subcommand is None:
         # No subcommand - run MCP server
         # Note: warnings are already suppressed at module level unless debug is enabled
@@ -1277,6 +1287,72 @@ def config_set(key, value):
     click.echo(result)
+@config.command("edit")
+@click.help_option('-h', '--help')
+@click.option('--editor', help='Editor to use (overrides $EDITOR)')
+def config_edit(editor):
+    """Open the configuration file in your default editor.
+    Opens ~/.voicemode/voicemode.env in your configured editor.
+    Uses $EDITOR environment variable by default, or you can specify with --editor.
+    Examples:
+        voicemode config edit           # Use $EDITOR
+        voicemode config edit --editor vim
+        voicemode config edit --editor "code --wait"
+    """
+    from pathlib import Path
+    # Find the config file
+    config_path = Path.home() / ".voicemode" / "voicemode.env"
+    # Create default config if it doesn't exist
+    if not config_path.exists():
+        config_path.parent.mkdir(parents=True, exist_ok=True)
+        from voice_mode.config import load_voicemode_env
+        # This will create the default config
+        load_voicemode_env()
+    # Determine which editor to use
+    if editor:
+        editor_cmd = editor
+    else:
+        # Try environment variables in order of preference
+        editor_cmd = (
+            os.environ.get('EDITOR') or
+            os.environ.get('VISUAL') or
+            shutil.which('nano') or
+            shutil.which('vim') or
+            shutil.which('vi')
+        )
+    if not editor_cmd:
+        click.echo("❌ No editor found. Please set $EDITOR or use --editor")
+        click.echo("   Example: export EDITOR=vim")
+        click.echo("   Or use: voicemode config edit --editor vim")
+        return
+    # Handle complex editor commands (e.g., "code --wait")
+    if ' ' in editor_cmd:
+        import shlex
+        cmd_parts = shlex.split(editor_cmd)
+        cmd = cmd_parts + [str(config_path)]
+    else:
+        cmd = [editor_cmd, str(config_path)]
+    # Open the editor
+    try:
+        click.echo(f"Opening {config_path} in {editor_cmd}...")
+        subprocess.run(cmd, check=True)
+        click.echo("✅ Configuration file edited successfully")
+        click.echo("\nChanges will take effect when voicemode is restarted.")
+    except subprocess.CalledProcessError:
+        click.echo(f"❌ Editor exited with an error")
+    except FileNotFoundError:
+        click.echo(f"❌ Editor not found: {editor_cmd}")
+        click.echo("   Please check that the editor is installed and in your PATH")
 # Diagnostics group
 @voice_mode_main_cli.group()
 @click.help_option('-h', '--help', help='Show this message and exit')

{voice_mode-4.4.0 → voice_mode-4.5.0}/voice_mode/cli_commands/transcribe.py RENAMED Viewed

@@ -6,12 +6,6 @@ import asyncio
 from pathlib import Path
 from typing import Optional
-from voice_mode.tools.transcription import (
-    transcribe_audio,
-    TranscriptionBackend,
-    OutputFormat
-)
 @click.group()
 def transcribe():
@@ -61,6 +55,13 @@ def audio_command(
         voice-mode transcribe audio spanish.mp3 --language es --backend whisperx
     """
     async def run():
+        # Import here to avoid loading tools at module level
+        from voice_mode.tools.transcription import (
+            transcribe_audio,
+            TranscriptionBackend,
+            OutputFormat
+        )
         # Perform transcription
         result = await transcribe_audio(
             audio_file=audio_file,

{voice_mode-4.4.0 → voice_mode-4.5.0}/voice_mode/config.py RENAMED Viewed

@@ -253,7 +253,7 @@ PREFER_LOCAL = os.getenv("VOICEMODE_PREFER_LOCAL", "true").lower() in ("true", "
 ALWAYS_TRY_LOCAL = os.getenv("VOICEMODE_ALWAYS_TRY_LOCAL", "true").lower() in ("true", "1", "yes", "on")
 # Use simple failover without health checks
-SIMPLE_FAILOVER = os.getenv("VOICEMODE_SIMPLE_FAILOVER", "true").lower() in ("true", "1", "yes", "on")
+# Simple failover is now the only mode - configuration removed
 # Auto-start configuration
 AUTO_START_KOKORO = os.getenv("VOICEMODE_AUTO_START_KOKORO", "").lower() in ("true", "1", "yes", "on")

{voice_mode-4.4.0 → voice_mode-4.5.0}/voice_mode/conversation_logger.py RENAMED Viewed

@@ -189,6 +189,9 @@ class ConversationLogger:
             "timing": kwargs.get("timing"),
             "silence_detection": kwargs.get("silence_detection"),
             "error": kwargs.get("error"),
+            # Fallback information
+            "is_fallback": kwargs.get("is_fallback"),
+            "fallback_reason": kwargs.get("fallback_reason"),
             # Timing metrics
             "transcription_time": kwargs.get("transcription_time"),
             "total_turnaround_time": kwargs.get("total_turnaround_time"),
@@ -205,6 +208,9 @@ class ConversationLogger:
             "provider": kwargs.get("provider"),
             "provider_url": kwargs.get("provider_url"),
             "provider_type": kwargs.get("provider_type"),
+            # Fallback information
+            "is_fallback": kwargs.get("is_fallback"),
+            "fallback_reason": kwargs.get("fallback_reason"),
             "audio_format": kwargs.get("audio_format"),
             "timing": kwargs.get("timing"),
             "transport": kwargs.get("transport"),

{voice_mode-4.4.0 → voice_mode-4.5.0}/voice_mode/core.py RENAMED Viewed

@@ -18,6 +18,7 @@ from typing import Optional
 import numpy as np
 from pydub import AudioSegment
 from openai import AsyncOpenAI
+from .provider_discovery import is_local_provider
 import httpx
 from .config import SAMPLE_RATE
@@ -135,16 +136,22 @@ def get_openai_clients(api_key: str, stt_base_url: Optional[str] = None, tts_bas
         'limits': httpx.Limits(max_keepalive_connections=5, max_connections=10),
     }
+    # Disable retries for local endpoints - they either work or don't
+    stt_max_retries = 0 if is_local_provider(stt_base_url) else 2
+    tts_max_retries = 0 if is_local_provider(tts_base_url) else 2
     return {
         'stt': AsyncOpenAI(
             api_key=api_key,
             base_url=stt_base_url,
-            http_client=httpx.AsyncClient(**http_client_config)
+            http_client=httpx.AsyncClient(**http_client_config),
+            max_retries=stt_max_retries
         ),
         'tts': AsyncOpenAI(
             api_key=api_key,
             base_url=tts_base_url,
-            http_client=httpx.AsyncClient(**http_client_config)
+            http_client=httpx.AsyncClient(**http_client_config),
+            max_retries=tts_max_retries
         )
     }

{voice_mode-4.4.0 → voice_mode-4.5.0}/voice_mode/provider_discovery.py RENAMED Viewed

@@ -26,6 +26,8 @@ logger = logging.getLogger("voice-mode")
 def detect_provider_type(base_url: str) -> str:
     """Detect provider type from base URL."""
+    if not base_url:
+        return "unknown"
     if "openai.com" in base_url:
         return "openai"
     elif ":8880" in base_url:
@@ -47,6 +49,8 @@ def detect_provider_type(base_url: str) -> str:
 def is_local_provider(base_url: str) -> bool:
     """Check if a provider URL is for a local service."""
+    if not base_url:
+        return False
     provider_type = detect_provider_type(base_url)
     return provider_type in ["kokoro", "whisper", "local"] or \
            "127.0.0.1" in base_url or \
@@ -57,13 +61,11 @@ def is_local_provider(base_url: str) -> bool:
 class EndpointInfo:
     """Information about a discovered endpoint."""
     base_url: str
-    healthy: bool
     models: List[str]
     voices: List[str]  # Only for TTS
-    last_health_check: str  # ISO format timestamp
-    response_time_ms: Optional[float] = None
-    error: Optional[str] = None
     provider_type: Optional[str] = None  # e.g., "openai", "kokoro", "whisper"
+    last_check: Optional[str] = None  # ISO format timestamp of last attempt
+    last_error: Optional[str] = None  # Last error if any
 class ProviderRegistry:
@@ -78,44 +80,38 @@ class ProviderRegistry:
         self._initialized = False
     async def initialize(self):
-        """Initialize the registry by assuming all configured endpoints are healthy."""
+        """Initialize the registry with configured endpoints."""
         if self._initialized:
             return
         async with self._discovery_lock:
             if self._initialized:  # Double-check after acquiring lock
                 return
-            logger.info("Initializing provider registry (optimistic mode)...")
-            # Initialize TTS endpoints as healthy
+            logger.info("Initializing provider registry...")
+            # Initialize TTS endpoints
             for url in TTS_BASE_URLS:
                 provider_type = detect_provider_type(url)
                 self.registry["tts"][url] = EndpointInfo(
                     base_url=url,
-                    healthy=True,
                     models=["gpt4o-mini-tts", "tts-1", "tts-1-hd"] if provider_type == "openai" else ["tts-1"],
                     voices=["alloy", "echo", "fable", "nova", "onyx", "shimmer"] if provider_type == "openai" else ["af_alloy", "af_aoede", "af_bella", "af_heart", "af_jadzia", "af_jessica", "af_kore", "af_nicole", "af_nova", "af_river", "af_sarah", "af_sky", "af_v0", "af_v0bella", "af_v0irulan", "af_v0nicole", "af_v0sarah", "af_v0sky", "am_adam", "am_echo", "am_eric", "am_fenrir", "am_liam", "am_michael", "am_onyx", "am_puck", "am_santa", "am_v0adam", "am_v0gurney", "am_v0michael", "bf_alice", "bf_emma", "bf_lily", "bf_v0emma", "bf_v0isabella", "bm_daniel", "bm_fable", "bm_george", "bm_lewis", "bm_v0george", "bm_v0lewis", "ef_dora", "em_alex", "em_santa", "ff_siwis", "hf_alpha", "hf_beta", "hm_omega", "hm_psi", "if_sara", "im_nicola", "jf_alpha", "jf_gongitsune", "jf_nezumi", "jf_tebukuro", "jm_kumo", "pf_dora", "pm_alex", "pm_santa", "zf_xiaobei", "zf_xiaoni", "zf_xiaoxiao", "zf_xiaoyi", "zm_yunjian", "zm_yunxi", "zm_yunxia", "zm_yunyang"],
-                    last_health_check=datetime.now(timezone.utc).isoformat(),
-                    response_time_ms=None,
                     provider_type=provider_type
                 )
-            # Initialize STT endpoints as healthy
+            # Initialize STT endpoints
             for url in STT_BASE_URLS:
                 provider_type = detect_provider_type(url)
                 self.registry["stt"][url] = EndpointInfo(
                     base_url=url,
-                    healthy=True,
                     models=["whisper-1"],
-                    voices=[],
-                    last_health_check=datetime.now(timezone.utc).isoformat(),
-                    response_time_ms=None,
+                    voices=[],  # STT doesn't have voices
                     provider_type=provider_type
                 )
             self._initialized = True
-            logger.info(f"Provider registry initialized with {len(self.registry['tts'])} TTS and {len(self.registry['stt'])} STT endpoints (all assumed healthy)")
+            logger.info(f"Provider registry initialized with {len(self.registry['tts'])} TTS and {len(self.registry['stt'])} STT endpoints")
     async def _discover_endpoints(self, service_type: str, base_urls: List[str]):
         """Discover all endpoints for a service type."""
@@ -131,12 +127,11 @@ class ProviderRegistry:
                     logger.error(f"Failed to discover {service_type} endpoint {url}: {result}")
                     self.registry[service_type][url] = EndpointInfo(
                         base_url=url,
-                        healthy=False,
                         models=[],
                         voices=[],
-                        last_health_check=datetime.now(timezone.utc).isoformat(),
-                        error=str(result),
-                        provider_type=detect_provider_type(url)
+                        provider_type=detect_provider_type(url),
+                        last_check=datetime.now(timezone.utc).isoformat(),
+                        last_error=str(result)
                     )
     async def _discover_endpoint(self, service_type: str, base_url: str) -> None:
@@ -201,12 +196,11 @@ class ProviderRegistry:
             # Store endpoint info
             self.registry[service_type][base_url] = EndpointInfo(
                 base_url=base_url,
-                healthy=True,
                 models=models,
                 voices=voices,
-                last_health_check=datetime.now(timezone.utc).isoformat(),
-                response_time_ms=response_time,
-                provider_type=detect_provider_type(base_url)
+                provider_type=detect_provider_type(base_url),
+                last_check=datetime.now(timezone.utc).isoformat(),
+                last_error=None
             )
             logger.info(f"Successfully discovered {service_type} endpoint {base_url} with {len(models)} models and {len(voices)} voices")
@@ -215,12 +209,11 @@ class ProviderRegistry:
             logger.warning(f"Endpoint {base_url} discovery failed: {e}")
             self.registry[service_type][base_url] = EndpointInfo(
                 base_url=base_url,
-                healthy=False,
                 models=[],
                 voices=[],
-                last_health_check=datetime.now(timezone.utc).isoformat(),
-                error=str(e),
-                provider_type=detect_provider_type(base_url)
+                provider_type=detect_provider_type(base_url),
+                last_check=datetime.now(timezone.utc).isoformat(),
+                last_error=str(e)
             )
     async def _discover_voices(self, base_url: str, client: AsyncOpenAI) -> List[str]:
@@ -247,41 +240,35 @@ class ProviderRegistry:
         # The system will use configured defaults instead
         return []
-    async def check_health(self, service_type: str, base_url: str) -> bool:
-        """Check the health of a specific endpoint and update registry."""
-        logger.debug(f"Health check for {service_type} endpoint: {base_url}")
-        # Re-discover the endpoint
-        await self._discover_endpoint(service_type, base_url)
-        # Return health status
-        endpoint_info = self.registry[service_type].get(base_url)
-        return endpoint_info.healthy if endpoint_info else False
-    def get_healthy_endpoints(self, service_type: str) -> List[EndpointInfo]:
-        """Get all healthy endpoints for a service type."""
+    def get_endpoints(self, service_type: str) -> List[EndpointInfo]:
+        """Get all endpoints for a service type in priority order."""
         endpoints = []
         # Return endpoints in the order they were configured
         base_urls = TTS_BASE_URLS if service_type == "tts" else STT_BASE_URLS
         for url in base_urls:
             info = self.registry[service_type].get(url)
-            if info and info.healthy:
+            if info:
                 endpoints.append(info)
         return endpoints
+    def get_healthy_endpoints(self, service_type: str) -> List[EndpointInfo]:
+        """Deprecated: Use get_endpoints instead. Returns all endpoints."""
+        return self.get_endpoints(service_type)
     def find_endpoint_with_voice(self, voice: str) -> Optional[EndpointInfo]:
-        """Find the first healthy TTS endpoint that supports a specific voice."""
-        for endpoint in self.get_healthy_endpoints("tts"):
+        """Find the first TTS endpoint that supports a specific voice."""
+        for endpoint in self.get_endpoints("tts"):
             if voice in endpoint.voices:
                 return endpoint
         return None
     def find_endpoint_with_model(self, service_type: str, model: str) -> Optional[EndpointInfo]:
-        """Find the first healthy endpoint that supports a specific model."""
-        for endpoint in self.get_healthy_endpoints(service_type):
+        """Find the first endpoint that supports a specific model."""
+        for endpoint in self.get_endpoints(service_type):
             if model in endpoint.models:
                 return endpoint
         return None
@@ -291,47 +278,36 @@ class ProviderRegistry:
         return {
             "tts": {
                 url: {
-                    "healthy": info.healthy,
                     "models": info.models,
                     "voices": info.voices,
-                    "response_time_ms": info.response_time_ms,
-                    "last_check": info.last_health_check,
-                    "error": info.error
+                    "provider_type": info.provider_type,
+                    "last_check": info.last_check,
+                    "last_error": info.last_error
                 }
                 for url, info in self.registry["tts"].items()
             },
             "stt": {
                 url: {
-                    "healthy": info.healthy,
                     "models": info.models,
-                    "response_time_ms": info.response_time_ms,
-                    "last_check": info.last_health_check,
-                    "error": info.error
+                    "provider_type": info.provider_type,
+                    "last_check": info.last_check,
+                    "last_error": info.last_error
                 }
                 for url, info in self.registry["stt"].items()
             }
         }
-    async def mark_unhealthy(self, service_type: str, base_url: str, error: str):
-        """Mark an endpoint as unhealthy after a failure.
-        If ALWAYS_TRY_LOCAL is enabled and the provider is local, it will not be
-        permanently marked as unhealthy - it will be retried on next request.
+    async def mark_failed(self, service_type: str, base_url: str, error: str):
+        """Record that an endpoint failed.
+        This updates the last_error and last_check fields for diagnostics,
+        but doesn't prevent the endpoint from being tried again.
         """
         if base_url in self.registry[service_type]:
-            # Check if we should skip marking local providers as unhealthy
-            if config.ALWAYS_TRY_LOCAL and is_local_provider(base_url):
-                # Log the error but don't mark as unhealthy
-                logger.info(f"Local {service_type} endpoint {base_url} failed ({error}) but will be retried (ALWAYS_TRY_LOCAL enabled)")
-                # Update error and last check time for diagnostics, but keep healthy=True
-                self.registry[service_type][base_url].error = f"{error} (will retry)"
-                self.registry[service_type][base_url].last_health_check = datetime.now(timezone.utc).isoformat()
-            else:
-                # Normal behavior - mark as unhealthy
-                self.registry[service_type][base_url].healthy = False
-                self.registry[service_type][base_url].error = error
-                self.registry[service_type][base_url].last_health_check = datetime.now(timezone.utc).isoformat()
-                logger.warning(f"Marked {service_type} endpoint {base_url} as unhealthy: {error}")
+            # Update error and last check time for diagnostics
+            self.registry[service_type][base_url].last_error = error
+            self.registry[service_type][base_url].last_check = datetime.now(timezone.utc).isoformat()
+            logger.info(f"{service_type} endpoint {base_url} failed: {error}")
 # Global registry instance

voice-mode 4.4.0__tar.gz → 4.5.0__tar.gz

voice-mode 4.4.0tar.gz → 4.5.0tar.gz