PyPI - voice-mode - Versions diffs - 3.34.3__py3-none-any.whl → 4.1.0__py3-none-any.whl - Mend

voice-mode 3.34.3py3-none-any.whl → 4.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (158) hide show

voice_mode/tools/configuration_management.py CHANGED Viewed

@@ -5,7 +5,7 @@ import re
 from pathlib import Path
 from typing import Dict, Optional, List
 from voice_mode.server import mcp
-from voice_mode.config import BASE_DIR
+from voice_mode.config import BASE_DIR, reload_configuration, find_voicemode_env_files
 import logging
 logger = logging.getLogger("voice-mode")
@@ -109,7 +109,7 @@ async def update_config(key: str, value: str) -> str:
     """Update a configuration value in the voicemode.env file.
     Args:
-        key: The configuration key to update (e.g., 'VOICEMODE_TTS_VOICES')
+        key: The configuration key to update (e.g., 'VOICEMODE_VOICES')
         value: The new value for the configuration
     Returns:
@@ -175,7 +175,7 @@ async def list_config_keys() -> str:
         ("Provider Configuration", [
             ("VOICEMODE_TTS_BASE_URLS", "Comma-separated list of TTS endpoints"),
             ("VOICEMODE_STT_BASE_URLS", "Comma-separated list of STT endpoints"),
-            ("VOICEMODE_TTS_VOICES", "Comma-separated list of preferred voices"),
+            ("VOICEMODE_VOICES", "Comma-separated list of preferred voices"),
             ("VOICEMODE_TTS_MODELS", "Comma-separated list of preferred models"),
             ("VOICEMODE_PREFER_LOCAL", "Prefer local providers over cloud (true/false)"),
             ("VOICEMODE_ALWAYS_TRY_LOCAL", "Always attempt local providers (true/false)"),
@@ -211,6 +211,107 @@ async def list_config_keys() -> str:
             lines.append(f"    {description}")
         lines.append("")
-    lines.append("💡 Usage: update_config(key='VOICEMODE_TTS_VOICES', value='af_sky,nova')")
+    lines.append("💡 Usage: update_config(key='VOICEMODE_VOICES', value='af_sky,nova')")
-    return "\n".join(lines)
+    return "\n".join(lines)
+@mcp.tool()
+async def config_reload() -> str:
+    """Reload configuration from .voicemode.env files and clear all caches.
+    This tool reloads configuration from:
+    1. Global ~/.voicemode/voicemode.env file
+    2. Project-specific .voicemode.env files (searched up directory tree)
+    3. Environment variables (highest priority)
+    Returns:
+        Status message showing which files were loaded and any changes
+    """
+    try:
+        # Get config files before reload
+        old_files = find_voicemode_env_files()
+        # Reload configuration
+        reload_configuration()
+        # Get config files after reload
+        new_files = find_voicemode_env_files()
+        lines = ["✅ Configuration reloaded successfully!", ""]
+        if new_files:
+            lines.append("📁 Configuration files loaded (in order):")
+            for i, config_file in enumerate(new_files, 1):
+                lines.append(f"  {i}. {config_file}")
+        else:
+            lines.append("📁 No configuration files found - using defaults")
+        lines.append("")
+        lines.append("🔄 All caches have been cleared")
+        lines.append("📊 Voice preferences and provider settings updated")
+        logger.info(f"Configuration reloaded from {len(new_files)} files")
+        return "\n".join(lines)
+    except Exception as e:
+        logger.error(f"Failed to reload configuration: {e}")
+        return f"❌ Failed to reload configuration: {str(e)}"
+@mcp.tool()
+async def show_config_files() -> str:
+    """Show which .voicemode.env files are being used for configuration.
+    This shows the current configuration file discovery and loading order:
+    - Global configuration from ~/.voicemode/voicemode.env
+    - Project-specific configuration (searched up directory tree)
+    - Current working directory for context
+    Returns:
+        Formatted list of configuration files and their status
+    """
+    try:
+        config_files = find_voicemode_env_files()
+        lines = ["📋 Voice Mode Configuration Files", "=" * 40, ""]
+        lines.append(f"🗂️  Current directory: {Path.cwd()}")
+        lines.append("")
+        if config_files:
+            lines.append("📁 Configuration files (loading order):")
+            lines.append("")
+            for i, config_file in enumerate(config_files, 1):
+                status = "✅ EXISTS" if config_file.exists() else "❌ MISSING"
+                file_type = ""
+                if config_file.name == "voicemode.env" and config_file.parent.name == ".voicemode":
+                    if config_file.parent == Path.home() / ".voicemode":
+                        file_type = " (Global)"
+                    else:
+                        file_type = " (Project - in .voicemode dir)"
+                elif config_file.name == ".voicemode.env":
+                    if config_file.parent == Path.cwd():
+                        file_type = " (Project - current dir)"
+                    else:
+                        file_type = " (Project - parent dir)"
+                lines.append(f"  {i}. {config_file}{file_type}")
+                lines.append(f"     {status}")
+                lines.append("")
+        else:
+            lines.append("❌ No configuration files found")
+            lines.append("")
+            lines.append("💡 Tip: Create ~/.voicemode/voicemode.env for global configuration")
+            lines.append("💡 Tip: Create .voicemode.env in project directories for project-specific settings")
+        lines.append("")
+        lines.append("🔄 Use reload_config() to reload after making changes")
+        return "\n".join(lines)
+    except Exception as e:
+        logger.error(f"Failed to show config files: {e}")
+        return f"❌ Failed to show config files: {str(e)}"

voice_mode/tools/converse.py CHANGED Viewed

@@ -85,6 +85,7 @@ from voice_mode.utils import (
     log_tool_request_start,
     log_tool_request_end
 )
+from voice_mode.pronounce import get_manager as get_pronounce_manager, is_enabled as pronounce_enabled
 logger = logging.getLogger("voice-mode")
@@ -255,6 +256,11 @@ async def text_to_speech_with_failover(
     """
     from voice_mode.config import SIMPLE_FAILOVER
+    # Apply pronunciation rules if enabled
+    if pronounce_enabled():
+        pronounce_mgr = get_pronounce_manager()
+        message = pronounce_mgr.process_tts(message)
     # Use simple failover if enabled
     if SIMPLE_FAILOVER:
         from voice_mode.simple_failover import simple_tts_failover
@@ -695,6 +701,11 @@ async def _speech_to_text_internal(
             logger.debug(f"STT API response type: {type(transcription)}")
             text = transcription.strip() if isinstance(transcription, str) else transcription.text.strip()
+            # Apply pronunciation rules if enabled
+            if text and pronounce_enabled():
+                pronounce_mgr = get_pronounce_manager()
+                text = pronounce_mgr.process_stt(text)
             if text:
                 logger.info(f"✓ STT result: '{text}'")
@@ -875,6 +886,45 @@ def record_audio(duration: float) -> np.ndarray:
         logger.error(f"Recording failed: {e}")
         logger.error(f"Audio config when error occurred - Sample rate: {SAMPLE_RATE}, Channels: {CHANNELS}")
+        # Check if this is a device error that might be recoverable
+        error_str = str(e).lower()
+        if any(err in error_str for err in ['device unavailable', 'device disconnected',
+                                             'invalid device', 'unanticipated host error',
+                                             'portaudio error']):
+            logger.info("Audio device error detected - attempting to reinitialize audio system")
+            # Try to reinitialize sounddevice
+            try:
+                # Get current default device info before reinit
+                try:
+                    old_device = sd.query_devices(kind='input')
+                    old_device_name = old_device.get('name', 'Unknown')
+                except:
+                    old_device_name = 'Previous device'
+                sd._terminate()
+                sd._initialize()
+                # Get new default device info
+                try:
+                    new_device = sd.query_devices(kind='input')
+                    new_device_name = new_device.get('name', 'Unknown')
+                    logger.info(f"Audio system reinitialized - switched from '{old_device_name}' to '{new_device_name}'")
+                except:
+                    logger.info("Audio system reinitialized - retrying with new default device")
+                # Wait a moment for the system to stabilize
+                import time as time_module
+                time_module.sleep(0.5)
+                # Try recording again with the new device (recursive call)
+                logger.info("Retrying recording with new audio device...")
+                return record_audio(duration)
+            except Exception as reinit_error:
+                logger.error(f"Failed to reinitialize audio: {reinit_error}")
+                # Fall through to normal error handling
         # Import here to avoid circular imports
         from voice_mode.utils.audio_diagnostics import get_audio_error_help
@@ -989,6 +1039,14 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
             """Callback for continuous audio stream"""
             if status:
                 logger.warning(f"Audio stream status: {status}")
+                # Check for device-related errors
+                status_str = str(status).lower()
+                if any(err in status_str for err in ['device unavailable', 'device disconnected',
+                                                      'invalid device', 'unanticipated host error',
+                                                      'stream is stopped', 'portaudio error']):
+                    # Signal that we should stop recording due to device error
+                    audio_queue.put(None)  # Sentinel value to indicate error
+                    return
             # Put the audio data in the queue for processing
             audio_queue.put(indata.copy())
@@ -1007,6 +1065,12 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
                         # Get audio chunk from queue with timeout
                         chunk = audio_queue.get(timeout=0.1)
+                        # Check for error sentinel
+                        if chunk is None:
+                            logger.error("Audio device error detected - stopping recording")
+                            # Raise an exception to trigger recovery logic
+                            raise sd.PortAudioError("Audio device disconnected or unavailable")
                         # Flatten for consistency
                         chunk_flat = chunk.flatten()
                         chunks.append(chunk_flat)
@@ -1109,6 +1173,45 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
             # Import here to avoid circular imports
             from voice_mode.utils.audio_diagnostics import get_audio_error_help
+            # Check if this is a device error that might be recoverable
+            error_str = str(e).lower()
+            if any(err in error_str for err in ['device unavailable', 'device disconnected',
+                                                 'invalid device', 'unanticipated host error',
+                                                 'portaudio error']):
+                logger.info("Audio device error detected - attempting to reinitialize audio system")
+                # Try to reinitialize sounddevice
+                try:
+                    # Get current default device info before reinit
+                    try:
+                        old_device = sd.query_devices(kind='input')
+                        old_device_name = old_device.get('name', 'Unknown')
+                    except:
+                        old_device_name = 'Previous device'
+                    sd._terminate()
+                    sd._initialize()
+                    # Get new default device info
+                    try:
+                        new_device = sd.query_devices(kind='input')
+                        new_device_name = new_device.get('name', 'Unknown')
+                        logger.info(f"Audio system reinitialized - switched from '{old_device_name}' to '{new_device_name}'")
+                    except:
+                        logger.info("Audio system reinitialized - retrying with new default device")
+                    # Wait a moment for the system to stabilize
+                    import time as time_module
+                    time_module.sleep(0.5)
+                    # Try recording again with the new device (recursive call in sync context)
+                    logger.info("Retrying recording with new audio device...")
+                    return record_audio_with_silence_detection(max_duration, disable_silence_detection, min_duration, vad_aggressiveness)
+                except Exception as reinit_error:
+                    logger.error(f"Failed to reinitialize audio: {reinit_error}")
+                    # Fall through to normal error handling
             # Get helpful error message
             help_message = get_audio_error_help(e)
             logger.error(f"\n{help_message}")
@@ -1555,6 +1658,12 @@ async def converse(
     # Run startup initialization if needed
     await startup_initialization()
+    # Refresh audio device cache to pick up any device changes (AirPods, etc.)
+    # This takes ~1ms and ensures we use the current default device
+    import sounddevice as sd
+    sd._terminate()
+    sd._initialize()
     # Get event logger and start session
     event_logger = get_event_logger()
     session_id = None

voice_mode/tools/pronounce.py ADDED Viewed

@@ -0,0 +1,245 @@
+"""MCP tools for managing pronunciation rules."""
+import json
+import yaml
+from typing import Optional, Literal, List, Dict
+from voice_mode.server import mcp
+from voice_mode.pronounce import get_manager, is_enabled
+@mcp.tool()
+async def pronounce(
+    action: Literal["list", "add", "remove", "enable", "disable", "test", "reload"],
+    pattern: Optional[str] = None,
+    replacement: Optional[str] = None,
+    rule_type: Literal["tts", "stt"] = "tts",
+    description: Optional[str] = None,
+    name: Optional[str] = None,
+    test_text: Optional[str] = None
+) -> str:
+    """
+    Manage pronunciation rules for TTS/STT text processing.
+    This tool allows managing pronunciation rules that improve TTS pronunciation
+    and correct STT transcription errors. Rules are applied automatically when
+    text is processed.
+    Actions:
+    - list: Show all non-private rules (returns count of private rules)
+    - add: Add a new rule (requires pattern, replacement, rule_type)
+    - remove: Remove a rule by name (requires name, rule_type)
+    - enable: Enable a disabled rule (requires name, rule_type)
+    - disable: Disable an enabled rule (requires name, rule_type)
+    - test: Test rules on text (requires test_text, rule_type)
+    - reload: Reload rules from configuration files
+    Examples:
+    - List all TTS rules:
+      pronunciation_rules(action="list", rule_type="tts")
+    - Add a rule to pronounce "3M" correctly:
+      pronunciation_rules(
+          action="add",
+          pattern=r"\b3M\b",
+          replacement="three em",
+          rule_type="tts",
+          description="Pronounce 3M company name"
+      )
+    - Test how text would be pronounced:
+      pronunciation_rules(
+          action="test",
+          test_text="I work at 3M",
+          rule_type="tts"
+      )
+    - Correct common Whisper mishearing:
+      pronunciation_rules(
+          action="add",
+          pattern="me tool",
+          replacement="metool",
+          rule_type="stt",
+          description="Correct 'me tool' to 'metool'"
+      )
+    Args:
+        action: The action to perform
+        pattern: Regex pattern for add action
+        replacement: Replacement text for add action
+        rule_type: Type of rule (tts for text-to-speech, stt for speech-to-text)
+        description: Human-readable description for add action
+        name: Rule name for remove/enable/disable actions
+        test_text: Text to test for test action
+    Returns:
+        Result of the action as a formatted string
+    """
+    manager = get_manager()
+    if action == "list":
+        # List rules (excluding private ones)
+        all_rules = manager.list_rules(include_private=True)
+        public_rules = manager.list_rules(include_private=False)
+        # Filter by type if specified
+        if rule_type:
+            public_rules = [r for r in public_rules if r['direction'] == rule_type]
+            all_rules = [r for r in all_rules if r['direction'] == rule_type]
+        # Format the response
+        if not public_rules:
+            private_count = len(all_rules)
+            if private_count > 0:
+                return f"No public {rule_type} rules found. ({private_count} private rules hidden)"
+            else:
+                return f"No {rule_type} rules found."
+        # Build response
+        result = f"Pronunciation Rules ({rule_type.upper()}):\n\n"
+        for rule in public_rules:
+            status = "✓" if rule['enabled'] else "✗"
+            result += f"{status} {rule['name']}: \n"
+            result += f"  Pattern: {rule['pattern']}\n"
+            result += f"  Replace: {rule['replacement']}\n"
+            if rule['description']:
+                result += f"  Desc: {rule['description']}\n"
+            result += "\n"
+        # Add private rule count if any
+        private_count = len(all_rules) - len(public_rules)
+        if private_count > 0:
+            result += f"({private_count} private rules hidden from view)\n"
+        return result
+    elif action == "add":
+        if not pattern or not replacement:
+            return "Error: 'add' action requires pattern and replacement"
+        success = manager.add_rule(
+            direction=rule_type,
+            pattern=pattern,
+            replacement=replacement,
+            name=name,
+            description=description or "",
+            enabled=True,
+            private=False  # MCP-created rules are public
+        )
+        if success:
+            return f"✓ Rule added successfully for {rule_type.upper()}"
+        else:
+            return "✗ Failed to add rule. Check if the regex pattern is valid."
+    elif action == "remove":
+        if not name:
+            return "Error: 'remove' action requires rule name"
+        success = manager.remove_rule(rule_type, name)
+        if success:
+            return f"✓ Rule '{name}' removed from {rule_type.upper()}"
+        else:
+            return f"✗ Rule '{name}' not found in {rule_type.upper()} rules (may be private)"
+    elif action == "enable":
+        if not name:
+            return "Error: 'enable' action requires rule name"
+        success = manager.enable_rule(rule_type, name)
+        if success:
+            return f"✓ Rule '{name}' enabled in {rule_type.upper()}"
+        else:
+            return f"✗ Failed to enable rule '{name}' (not found or private)"
+    elif action == "disable":
+        if not name:
+            return "Error: 'disable' action requires rule name"
+        success = manager.disable_rule(rule_type, name)
+        if success:
+            return f"✓ Rule '{name}' disabled in {rule_type.upper()}"
+        else:
+            return f"✗ Failed to disable rule '{name}' (not found or private)"
+    elif action == "test":
+        if not test_text:
+            return "Error: 'test' action requires test_text"
+        result = manager.test_rule(test_text, rule_type)
+        if test_text != result:
+            return f"Original: {test_text}\nModified: {result}\n\nRules were applied to transform the text."
+        else:
+            return f"No changes: {test_text}\n\nNo rules matched or all rules are disabled."
+    elif action == "reload":
+        manager.reload_rules()
+        # Get counts
+        all_rules = manager.list_rules(include_private=True)
+        tts_count = len([r for r in all_rules if r['direction'] == 'tts'])
+        stt_count = len([r for r in all_rules if r['direction'] == 'stt'])
+        return f"✓ Pronunciation rules reloaded\nLoaded {tts_count} TTS rules and {stt_count} STT rules"
+    else:
+        return f"Error: Unknown action '{action}'. Use: list, add, remove, enable, disable, test, reload"
+@mcp.tool()
+async def pronounce_status() -> str:
+    """
+    Get the status of the pronunciation middleware.
+    Shows whether pronunciation processing is enabled and provides
+    statistics about loaded rules.
+    Returns:
+        Status information as a formatted string
+    """
+    enabled = is_enabled()
+    manager = get_manager()
+    # Get rule counts
+    all_rules = manager.list_rules(include_private=True)
+    public_rules = manager.list_rules(include_private=False)
+    tts_all = len([r for r in all_rules if r['direction'] == 'tts'])
+    tts_public = len([r for r in public_rules if r['direction'] == 'tts'])
+    tts_enabled = len([r for r in all_rules if r['direction'] == 'tts' and r['enabled']])
+    stt_all = len([r for r in all_rules if r['direction'] == 'stt'])
+    stt_public = len([r for r in public_rules if r['direction'] == 'stt'])
+    stt_enabled = len([r for r in all_rules if r['direction'] == 'stt' and r['enabled']])
+    status = f"Pronunciation Middleware Status:\n"
+    status += f"{'='*40}\n"
+    status += f"Enabled: {'✓ Yes' if enabled else '✗ No'}\n\n"
+    status += f"TTS Rules:\n"
+    status += f"  Total: {tts_all} ({tts_public} public, {tts_all - tts_public} private)\n"
+    status += f"  Enabled: {tts_enabled}\n\n"
+    status += f"STT Rules:\n"
+    status += f"  Total: {stt_all} ({stt_public} public, {stt_all - stt_public} private)\n"
+    status += f"  Enabled: {stt_enabled}\n\n"
+    status += f"Configuration:\n"
+    import os
+    log_enabled = os.environ.get('VOICEMODE_PRONUNCIATION_LOG_SUBSTITUTIONS', '').lower() == 'true'
+    private_mode = os.environ.get('VOICEMODE_PRONUNCIATION_PRIVATE_MODE', '').lower() == 'true'
+    status += f"  Logging: {'✓ Enabled' if log_enabled else '✗ Disabled'}\n"
+    status += f"  Private Mode: {'✓ All rules private' if private_mode else '✗ Normal'}\n"
+    # Show config file paths
+    status += f"\nConfiguration Files:\n"
+    for path in manager.config_paths:
+        status += f"  - {path}\n"
+    return status

voice_mode/tools/transcription/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""Audio transcription with word-level timestamps."""
+from .types import TranscriptionBackend, OutputFormat, TranscriptionResult, WordData, SegmentData
+from .core import transcribe_audio, transcribe_audio_sync
+__all__ = [
+    'transcribe_audio',
+    'transcribe_audio_sync',
+    'TranscriptionBackend',
+    'OutputFormat',
+    'TranscriptionResult',
+    'WordData',
+    'SegmentData',
+]

voice-mode 3.34.3__py3-none-any.whl → 4.1.0__py3-none-any.whl

voice-mode 3.34.3py3-none-any.whl → 4.1.0py3-none-any.whl