PyPI - voice-mode - Versions diffs - 3.34.3__py3-none-any.whl → 4.0.1__py3-none-any.whl - Mend

voice-mode 3.34.3py3-none-any.whl → 4.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (154) hide show

voice_mode/frontend/package-lock.json CHANGED Viewed

@@ -21,6 +21,7 @@
         "@types/node": "^20.17.13",
         "@types/react": "^18.3.18",
         "@types/react-dom": "^18.3.5",
+        "autoprefixer": "^10.4.21",
         "eslint": "^8.57.1",
         "eslint-config-next": "14.2.29",
         "eslint-config-prettier": "9.1.0",
@@ -1254,6 +1255,44 @@
         "node": ">= 0.4"
       }
     },
+    "node_modules/autoprefixer": {
+      "version": "10.4.21",
+      "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.21.tgz",
+      "integrity": "sha512-O+A6LWV5LDHSJD3LjHYoNi4VLsj/Whi7k6zG12xTYaU4cQ8oxQGckXNX8cRHK5yOZ/ppVHe0ZBXGzSV9jXdVbQ==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/postcss/"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/autoprefixer"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "browserslist": "^4.24.4",
+        "caniuse-lite": "^1.0.30001702",
+        "fraction.js": "^4.3.7",
+        "normalize-range": "^0.1.2",
+        "picocolors": "^1.1.1",
+        "postcss-value-parser": "^4.2.0"
+      },
+      "bin": {
+        "autoprefixer": "bin/autoprefixer"
+      },
+      "engines": {
+        "node": "^10 || ^12 || >=14"
+      },
+      "peerDependencies": {
+        "postcss": "^8.1.0"
+      }
+    },
     "node_modules/available-typed-arrays": {
       "version": "1.0.7",
       "dev": true,
@@ -1320,6 +1359,39 @@
         "node": ">=8"
       }
     },
+    "node_modules/browserslist": {
+      "version": "4.25.4",
+      "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.25.4.tgz",
+      "integrity": "sha512-4jYpcjabC606xJ3kw2QwGEZKX0Aw7sgQdZCvIK9dhVSPh76BKo+C+btT1RRofH7B+8iNpEbgGNVWiLki5q93yg==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/browserslist"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/browserslist"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "caniuse-lite": "^1.0.30001737",
+        "electron-to-chromium": "^1.5.211",
+        "node-releases": "^2.0.19",
+        "update-browserslist-db": "^1.1.3"
+      },
+      "bin": {
+        "browserslist": "cli.js"
+      },
+      "engines": {
+        "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7"
+      }
+    },
     "node_modules/busboy": {
       "version": "1.6.0",
       "dev": true,
@@ -1417,7 +1489,9 @@
       }
     },
     "node_modules/caniuse-lite": {
-      "version": "1.0.30001726",
+      "version": "1.0.30001739",
+      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001739.tgz",
+      "integrity": "sha512-y+j60d6ulelrNSwpPyrHdl+9mJnQzHBr08xm48Qno0nSk4h3Qojh+ziv2qE6rXf4k3tadF4o1J/1tAbVm1NtnA==",
       "dev": true,
       "funding": [
         {
@@ -1699,6 +1773,13 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/electron-to-chromium": {
+      "version": "1.5.211",
+      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.211.tgz",
+      "integrity": "sha512-IGBvimJkotaLzFnwIVgW9/UD/AOJ2tByUmeOrtqBfACSbAw5b1G0XpvdaieKyc7ULmbwXVx+4e4Be8pOPBrYkw==",
+      "dev": true,
+      "license": "ISC"
+    },
     "node_modules/emoji-regex": {
       "version": "9.2.2",
       "dev": true,
@@ -1865,6 +1946,16 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/escalade": {
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
+      "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
     "node_modules/escape-string-regexp": {
       "version": "4.0.0",
       "dev": true,
@@ -2462,6 +2553,20 @@
         "url": "https://github.com/sponsors/isaacs"
       }
     },
+    "node_modules/fraction.js": {
+      "version": "4.3.7",
+      "resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-4.3.7.tgz",
+      "integrity": "sha512-ZsDfxO51wGAXREY55a7la9LScWpwv9RxIrYABrlvOFBlH/ShPnrtsXeuUIfXKKOVicNxQ+o8JTbJvjS4M89yew==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": "*"
+      },
+      "funding": {
+        "type": "patreon",
+        "url": "https://github.com/sponsors/rawify"
+      }
+    },
     "node_modules/framer-motion": {
       "version": "11.18.2",
       "resolved": "https://registry.npmjs.org/framer-motion/-/framer-motion-11.18.2.tgz",
@@ -3693,6 +3798,13 @@
         "node": "^10 || ^12 || >=14"
       }
     },
+    "node_modules/node-releases": {
+      "version": "2.0.19",
+      "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.19.tgz",
+      "integrity": "sha512-xxOWJsBKtzAq7DY0J+DTzuz58K8e7sJbdgwkbMWQe8UYB6ekmsQ45q0M/tJDsGaZmbC+l7n57UV8Hl5tHxO9uw==",
+      "dev": true,
+      "license": "MIT"
+    },
     "node_modules/normalize-path": {
       "version": "3.0.0",
       "dev": true,
@@ -3701,6 +3813,16 @@
         "node": ">=0.10.0"
       }
     },
+    "node_modules/normalize-range": {
+      "version": "0.1.2",
+      "resolved": "https://registry.npmjs.org/normalize-range/-/normalize-range-0.1.2.tgz",
+      "integrity": "sha512-bdok/XvKII3nUpklnV6P2hxtMNrCboOjAcyBuQnWEhO665FwrSNRxU+AqpsyvO6LgGYPspN+lu5CLtw4jPRKNA==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
     "node_modules/object-assign": {
       "version": "4.1.1",
       "dev": true,
@@ -5218,6 +5340,37 @@
         "@unrs/resolver-binding-win32-x64-msvc": "1.9.2"
       }
     },
+    "node_modules/update-browserslist-db": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.3.tgz",
+      "integrity": "sha512-UxhIZQ+QInVdunkDAaiazvvT/+fXL5Osr0JZlJulepYu6Jd7qJtDZjlur0emRlT71EN3ScPoE7gvsuIKKNavKw==",
+      "dev": true,
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/browserslist"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/browserslist"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "escalade": "^3.2.0",
+        "picocolors": "^1.1.1"
+      },
+      "bin": {
+        "update-browserslist-db": "cli.js"
+      },
+      "peerDependencies": {
+        "browserslist": ">= 4.21.0"
+      }
+    },
     "node_modules/uri-js": {
       "version": "4.4.1",
       "dev": true,

voice_mode/providers.py CHANGED Viewed

@@ -9,9 +9,8 @@ import logging
 from typing import Dict, Optional, List, Any, Tuple
 from openai import AsyncOpenAI
-from .config import TTS_VOICES, TTS_MODELS, TTS_BASE_URLS, OPENAI_API_KEY
+from .config import TTS_VOICES, TTS_MODELS, TTS_BASE_URLS, OPENAI_API_KEY, get_voice_preferences
 from .provider_discovery import provider_registry, EndpointInfo
-from .voice_preferences import get_preferred_voices
 logger = logging.getLogger("voice-mode")
@@ -68,14 +67,14 @@ async def get_tts_client_and_voice(
         return client, selected_voice, selected_model, endpoint_info
     # Voice-first selection algorithm
-    # Get user preferences and prepend to system defaults
-    user_preferences = get_preferred_voices()
-    combined_voice_list = user_preferences + [v for v in TTS_VOICES if v not in user_preferences]
+    # Get user preferences from configuration
+    voice_preferences = get_voice_preferences()
+    combined_voice_list = voice_preferences
     logger.info(f"TTS Provider Selection (voice-first)")
-    if user_preferences:
-        logger.info(f"  User voice preferences: {user_preferences}")
-    logger.info(f"  Combined voice list: {combined_voice_list}")
+    if voice_preferences:
+        logger.info(f"  Voice preferences: {voice_preferences}")
+    logger.info(f"  Voice list: {combined_voice_list}")
     logger.info(f"  Preferred models: {TTS_MODELS}")
     logger.info(f"  Available endpoints: {TTS_BASE_URLS}")

voice_mode/resources/configuration.py CHANGED Viewed

@@ -267,7 +267,7 @@ async def environment_variables() -> str:
         ("VOICEMODE_AUTO_START_KOKORO", "Auto-start Kokoro service (true/false)"),
         ("VOICEMODE_TTS_BASE_URLS", "Comma-separated list of TTS endpoints"),
         ("VOICEMODE_STT_BASE_URLS", "Comma-separated list of STT endpoints"),
-        ("VOICEMODE_TTS_VOICES", "Comma-separated list of preferred voices"),
+        ("VOICEMODE_VOICES", "Comma-separated list of preferred voices"),
         ("VOICEMODE_TTS_MODELS", "Comma-separated list of preferred models"),
         # Audio Settings
         ("VOICEMODE_AUDIO_FORMAT", "Audio format for recording (pcm/mp3/wav/flac/aac/opus)"),
@@ -358,7 +358,7 @@ async def environment_template() -> str:
         f"export VOICEMODE_AUTO_START_KOKORO=\"{str(AUTO_START_KOKORO).lower()}\"",
         f"export VOICEMODE_TTS_BASE_URLS=\"{','.join(TTS_BASE_URLS)}\"",
         f"export VOICEMODE_STT_BASE_URLS=\"{','.join(STT_BASE_URLS)}\"",
-        f"export VOICEMODE_TTS_VOICES=\"{','.join(TTS_VOICES)}\"",
+        f"export VOICEMODE_VOICES=\"{','.join(TTS_VOICES)}\"",
         f"export VOICEMODE_TTS_MODELS=\"{','.join(TTS_MODELS)}\"",
         "",
         "# Audio Settings",

voice_mode/tools/configuration_management.py CHANGED Viewed

@@ -5,7 +5,7 @@ import re
 from pathlib import Path
 from typing import Dict, Optional, List
 from voice_mode.server import mcp
-from voice_mode.config import BASE_DIR
+from voice_mode.config import BASE_DIR, reload_configuration, find_voicemode_env_files
 import logging
 logger = logging.getLogger("voice-mode")
@@ -109,7 +109,7 @@ async def update_config(key: str, value: str) -> str:
     """Update a configuration value in the voicemode.env file.
     Args:
-        key: The configuration key to update (e.g., 'VOICEMODE_TTS_VOICES')
+        key: The configuration key to update (e.g., 'VOICEMODE_VOICES')
         value: The new value for the configuration
     Returns:
@@ -175,7 +175,7 @@ async def list_config_keys() -> str:
         ("Provider Configuration", [
             ("VOICEMODE_TTS_BASE_URLS", "Comma-separated list of TTS endpoints"),
             ("VOICEMODE_STT_BASE_URLS", "Comma-separated list of STT endpoints"),
-            ("VOICEMODE_TTS_VOICES", "Comma-separated list of preferred voices"),
+            ("VOICEMODE_VOICES", "Comma-separated list of preferred voices"),
             ("VOICEMODE_TTS_MODELS", "Comma-separated list of preferred models"),
             ("VOICEMODE_PREFER_LOCAL", "Prefer local providers over cloud (true/false)"),
             ("VOICEMODE_ALWAYS_TRY_LOCAL", "Always attempt local providers (true/false)"),
@@ -211,6 +211,107 @@ async def list_config_keys() -> str:
             lines.append(f"    {description}")
         lines.append("")
-    lines.append("💡 Usage: update_config(key='VOICEMODE_TTS_VOICES', value='af_sky,nova')")
+    lines.append("💡 Usage: update_config(key='VOICEMODE_VOICES', value='af_sky,nova')")
-    return "\n".join(lines)
+    return "\n".join(lines)
+@mcp.tool()
+async def config_reload() -> str:
+    """Reload configuration from .voicemode.env files and clear all caches.
+    This tool reloads configuration from:
+    1. Global ~/.voicemode/voicemode.env file
+    2. Project-specific .voicemode.env files (searched up directory tree)
+    3. Environment variables (highest priority)
+    Returns:
+        Status message showing which files were loaded and any changes
+    """
+    try:
+        # Get config files before reload
+        old_files = find_voicemode_env_files()
+        # Reload configuration
+        reload_configuration()
+        # Get config files after reload
+        new_files = find_voicemode_env_files()
+        lines = ["✅ Configuration reloaded successfully!", ""]
+        if new_files:
+            lines.append("📁 Configuration files loaded (in order):")
+            for i, config_file in enumerate(new_files, 1):
+                lines.append(f"  {i}. {config_file}")
+        else:
+            lines.append("📁 No configuration files found - using defaults")
+        lines.append("")
+        lines.append("🔄 All caches have been cleared")
+        lines.append("📊 Voice preferences and provider settings updated")
+        logger.info(f"Configuration reloaded from {len(new_files)} files")
+        return "\n".join(lines)
+    except Exception as e:
+        logger.error(f"Failed to reload configuration: {e}")
+        return f"❌ Failed to reload configuration: {str(e)}"
+@mcp.tool()
+async def show_config_files() -> str:
+    """Show which .voicemode.env files are being used for configuration.
+    This shows the current configuration file discovery and loading order:
+    - Global configuration from ~/.voicemode/voicemode.env
+    - Project-specific configuration (searched up directory tree)
+    - Current working directory for context
+    Returns:
+        Formatted list of configuration files and their status
+    """
+    try:
+        config_files = find_voicemode_env_files()
+        lines = ["📋 Voice Mode Configuration Files", "=" * 40, ""]
+        lines.append(f"🗂️  Current directory: {Path.cwd()}")
+        lines.append("")
+        if config_files:
+            lines.append("📁 Configuration files (loading order):")
+            lines.append("")
+            for i, config_file in enumerate(config_files, 1):
+                status = "✅ EXISTS" if config_file.exists() else "❌ MISSING"
+                file_type = ""
+                if config_file.name == "voicemode.env" and config_file.parent.name == ".voicemode":
+                    if config_file.parent == Path.home() / ".voicemode":
+                        file_type = " (Global)"
+                    else:
+                        file_type = " (Project - in .voicemode dir)"
+                elif config_file.name == ".voicemode.env":
+                    if config_file.parent == Path.cwd():
+                        file_type = " (Project - current dir)"
+                    else:
+                        file_type = " (Project - parent dir)"
+                lines.append(f"  {i}. {config_file}{file_type}")
+                lines.append(f"     {status}")
+                lines.append("")
+        else:
+            lines.append("❌ No configuration files found")
+            lines.append("")
+            lines.append("💡 Tip: Create ~/.voicemode/voicemode.env for global configuration")
+            lines.append("💡 Tip: Create .voicemode.env in project directories for project-specific settings")
+        lines.append("")
+        lines.append("🔄 Use reload_config() to reload after making changes")
+        return "\n".join(lines)
+    except Exception as e:
+        logger.error(f"Failed to show config files: {e}")
+        return f"❌ Failed to show config files: {str(e)}"

voice_mode/tools/converse.py CHANGED Viewed

@@ -875,6 +875,45 @@ def record_audio(duration: float) -> np.ndarray:
         logger.error(f"Recording failed: {e}")
         logger.error(f"Audio config when error occurred - Sample rate: {SAMPLE_RATE}, Channels: {CHANNELS}")
+        # Check if this is a device error that might be recoverable
+        error_str = str(e).lower()
+        if any(err in error_str for err in ['device unavailable', 'device disconnected',
+                                             'invalid device', 'unanticipated host error',
+                                             'portaudio error']):
+            logger.info("Audio device error detected - attempting to reinitialize audio system")
+            # Try to reinitialize sounddevice
+            try:
+                # Get current default device info before reinit
+                try:
+                    old_device = sd.query_devices(kind='input')
+                    old_device_name = old_device.get('name', 'Unknown')
+                except:
+                    old_device_name = 'Previous device'
+                sd._terminate()
+                sd._initialize()
+                # Get new default device info
+                try:
+                    new_device = sd.query_devices(kind='input')
+                    new_device_name = new_device.get('name', 'Unknown')
+                    logger.info(f"Audio system reinitialized - switched from '{old_device_name}' to '{new_device_name}'")
+                except:
+                    logger.info("Audio system reinitialized - retrying with new default device")
+                # Wait a moment for the system to stabilize
+                import time as time_module
+                time_module.sleep(0.5)
+                # Try recording again with the new device (recursive call)
+                logger.info("Retrying recording with new audio device...")
+                return record_audio(duration)
+            except Exception as reinit_error:
+                logger.error(f"Failed to reinitialize audio: {reinit_error}")
+                # Fall through to normal error handling
         # Import here to avoid circular imports
         from voice_mode.utils.audio_diagnostics import get_audio_error_help
@@ -989,6 +1028,14 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
             """Callback for continuous audio stream"""
             if status:
                 logger.warning(f"Audio stream status: {status}")
+                # Check for device-related errors
+                status_str = str(status).lower()
+                if any(err in status_str for err in ['device unavailable', 'device disconnected',
+                                                      'invalid device', 'unanticipated host error',
+                                                      'stream is stopped', 'portaudio error']):
+                    # Signal that we should stop recording due to device error
+                    audio_queue.put(None)  # Sentinel value to indicate error
+                    return
             # Put the audio data in the queue for processing
             audio_queue.put(indata.copy())
@@ -1007,6 +1054,12 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
                         # Get audio chunk from queue with timeout
                         chunk = audio_queue.get(timeout=0.1)
+                        # Check for error sentinel
+                        if chunk is None:
+                            logger.error("Audio device error detected - stopping recording")
+                            # Raise an exception to trigger recovery logic
+                            raise sd.PortAudioError("Audio device disconnected or unavailable")
                         # Flatten for consistency
                         chunk_flat = chunk.flatten()
                         chunks.append(chunk_flat)
@@ -1109,6 +1162,45 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
             # Import here to avoid circular imports
             from voice_mode.utils.audio_diagnostics import get_audio_error_help
+            # Check if this is a device error that might be recoverable
+            error_str = str(e).lower()
+            if any(err in error_str for err in ['device unavailable', 'device disconnected',
+                                                 'invalid device', 'unanticipated host error',
+                                                 'portaudio error']):
+                logger.info("Audio device error detected - attempting to reinitialize audio system")
+                # Try to reinitialize sounddevice
+                try:
+                    # Get current default device info before reinit
+                    try:
+                        old_device = sd.query_devices(kind='input')
+                        old_device_name = old_device.get('name', 'Unknown')
+                    except:
+                        old_device_name = 'Previous device'
+                    sd._terminate()
+                    sd._initialize()
+                    # Get new default device info
+                    try:
+                        new_device = sd.query_devices(kind='input')
+                        new_device_name = new_device.get('name', 'Unknown')
+                        logger.info(f"Audio system reinitialized - switched from '{old_device_name}' to '{new_device_name}'")
+                    except:
+                        logger.info("Audio system reinitialized - retrying with new default device")
+                    # Wait a moment for the system to stabilize
+                    import time as time_module
+                    time_module.sleep(0.5)
+                    # Try recording again with the new device (recursive call in sync context)
+                    logger.info("Retrying recording with new audio device...")
+                    return record_audio_with_silence_detection(max_duration, disable_silence_detection, min_duration, vad_aggressiveness)
+                except Exception as reinit_error:
+                    logger.error(f"Failed to reinitialize audio: {reinit_error}")
+                    # Fall through to normal error handling
             # Get helpful error message
             help_message = get_audio_error_help(e)
             logger.error(f"\n{help_message}")
@@ -1555,6 +1647,12 @@ async def converse(
     # Run startup initialization if needed
     await startup_initialization()
+    # Refresh audio device cache to pick up any device changes (AirPods, etc.)
+    # This takes ~1ms and ensures we use the current default device
+    import sounddevice as sd
+    sd._terminate()
+    sd._initialize()
     # Get event logger and start session
     event_logger = get_event_logger()
     session_id = None

voice_mode/tools/transcription/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""Audio transcription with word-level timestamps."""
+from .types import TranscriptionBackend, OutputFormat, TranscriptionResult, WordData, SegmentData
+from .core import transcribe_audio, transcribe_audio_sync
+__all__ = [
+    'transcribe_audio',
+    'transcribe_audio_sync',
+    'TranscriptionBackend',
+    'OutputFormat',
+    'TranscriptionResult',
+    'WordData',
+    'SegmentData',
+]

voice-mode 3.34.3__py3-none-any.whl → 4.0.1__py3-none-any.whl

voice-mode 3.34.3py3-none-any.whl → 4.0.1py3-none-any.whl