PyPI - voice-mode - Versions diffs - 2.19.0__tar.gz → 2.20.1__tar.gz - Mend

voice-mode 2.19.0tar.gz → 2.20.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

{voice_mode-2.19.0 → voice_mode-2.20.1}/CHANGELOG.md RENAMED Viewed

@@ -7,8 +7,49 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [2.20.1] - 2025-08-11
+### Fixed
+- **Speed parameter validation error** - Fixed MCP validation error when passing speed parameter as string
+  - Added type conversion from string to float for speed parameter
+  - Now properly handles speed values passed by MCP clients (e.g., via uvx)
+  - Added comprehensive validation and error messages for invalid speed values
+## [2.20.0] - 2025-08-10
+### Added
+- **VAD aggressiveness control**
+  - New `vad_aggressiveness` parameter in converse tool for controlling Voice Activity Detection sensitivity (0-3)
+  - 0 = least aggressive filtering (more permissive), 3 = most aggressive (strict)
+  - Allows adapting to different environments: quiet rooms (0-1) vs noisy environments (2-3)
+  - Also configurable via VOICEMODE_VAD_AGGRESSIVENESS environment variable
+### Changed
+- **Improved VAD documentation**
+  - Clarified that aggressiveness controls how strictly VAD filters out non-speech
+  - Updated examples to better demonstrate appropriate use cases
+  - Fixed configuration documentation that had backwards descriptions
 ## [2.19.0] - 2025-08-10
+### Added
+- **MCP prompt command: /release-notes**
+  - New command to display recent changelog entries directly in Claude Code
+  - Shows 5 most recent versions by default (configurable with parameter)
+  - Parses and formats CHANGELOG.md for easy reading
+  - Inspired by Claude Code's own /release-notes feature
+  - Includes comprehensive test coverage
+### Fixed
+- Release notes prompt now handles empty string parameters correctly
+- Command works properly with both source and installed packages
+- Changelog is now accessible as an MCP resource when package is installed
+### Changed
+- Release notes output format now matches Claude Code's clean, minimal style
+- Removed decorative headers and footers for cleaner terminal output
+- Release notes displayed in chronological order (oldest first)
 ## [2.18.0] - 2025-08-10
 ### Added

{voice_mode-2.19.0 → voice_mode-2.20.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: voice-mode
-Version: 2.19.0
+Version: 2.20.1
 Summary: VoiceMode - Voice interaction capabilities for AI assistants (formerly voice-mcp)
 Project-URL: Homepage, https://github.com/mbailey/voicemode
 Project-URL: Repository, https://github.com/mbailey/voicemode

{voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/__version__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 # This file is automatically updated by 'make release'
 # Do not edit manually
-__version__ = "2.19.0"
+__version__ = "2.20.1"

{voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/tools/converse.py RENAMED Viewed

@@ -871,7 +871,7 @@ def record_audio(duration: float) -> np.ndarray:
             sys.stderr = original_stderr
-def record_audio_with_silence_detection(max_duration: float, disable_silence_detection: bool = False, min_duration: float = 0.0) -> np.ndarray:
+def record_audio_with_silence_detection(max_duration: float, disable_silence_detection: bool = False, min_duration: float = 0.0, vad_aggressiveness: Optional[int] = None) -> np.ndarray:
     """Record audio from microphone with automatic silence detection.
     Uses WebRTC VAD to detect when the user stops speaking and automatically
@@ -881,6 +881,7 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
         max_duration: Maximum recording duration in seconds
         disable_silence_detection: If True, disables silence detection and uses fixed duration recording
         min_duration: Minimum recording duration before silence detection can stop (default: 0.0)
+        vad_aggressiveness: VAD aggressiveness level (0-3). If None, uses VAD_AGGRESSIVENESS from config
     Returns:
         Numpy array of recorded audio samples
@@ -902,8 +903,9 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
     logger.info(f"🎤 Recording with silence detection (max {max_duration}s)...")
     try:
-        # Initialize VAD
-        vad = webrtcvad.Vad(VAD_AGGRESSIVENESS)
+        # Initialize VAD with provided aggressiveness or default
+        effective_vad_aggressiveness = vad_aggressiveness if vad_aggressiveness is not None else VAD_AGGRESSIVENESS
+        vad = webrtcvad.Vad(effective_vad_aggressiveness)
         # Calculate chunk size (must be 10, 20, or 30ms worth of samples)
         chunk_samples = int(SAMPLE_RATE * VAD_CHUNK_DURATION_MS / 1000)
@@ -932,7 +934,7 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
         original_stdout = sys.stdout
         original_stderr = sys.stderr
-        logger.debug(f"VAD config - Aggressiveness: {VAD_AGGRESSIVENESS}, "
+        logger.debug(f"VAD config - Aggressiveness: {effective_vad_aggressiveness} (param: {vad_aggressiveness}, default: {VAD_AGGRESSIVENESS}), "
                     f"Silence threshold: {SILENCE_THRESHOLD_MS}ms, "
                     f"Min duration: {MIN_RECORDING_DURATION}s, "
                     f"Initial grace period: {INITIAL_SILENCE_GRACE_PERIOD}s")
@@ -1227,7 +1229,8 @@ async def converse(
     audio_feedback_style: Optional[str] = None,
     audio_format: Optional[str] = None,
     disable_silence_detection: Union[bool, str] = False,
-    speed: Optional[float] = None
+    speed: Optional[float] = None,
+    vad_aggressiveness: Optional[int] = None
 ) -> str:
     """Have a voice conversation - speak a message and optionally listen for response.
@@ -1289,6 +1292,16 @@ async def converse(
         speed: Speech rate/speed for TTS playback (default: None uses normal speed)
                Values: 0.25 to 4.0 (0.5 = half speed, 2.0 = double speed)
                Supported by both OpenAI and Kokoro TTS providers.
+        vad_aggressiveness: Voice Activity Detection aggressiveness level (default: None uses VOICEMODE_VAD_AGGRESSIVENESS env var)
+                            Controls how strict the VAD is about filtering out non-speech audio.
+                            Values: 0-3 (integer)
+                            - 0: Least aggressive filtering - includes more audio, may include non-speech
+                            - 1: Slightly stricter filtering
+                            - 2: Balanced filtering (default) - good for most environments
+                            - 3: Most aggressive filtering - strict speech detection, may cut off soft speech
+                            Use lower values (0-1) in quiet environments to catch all speech
+                            Use higher values (2-3) in noisy environments to reduce false triggers
         If wait_for_response is False: Confirmation that message was spoken
         If wait_for_response is True: The voice response received (or error/timeout message)
@@ -1320,6 +1333,15 @@ async def converse(
         - Slower speech: converse("This is slower speech", speed=0.8)
         Note: Speed control works with both OpenAI and Kokoro TTS providers
+    VAD Aggressiveness Examples:
+        - Quiet room, capture all speech: converse("Let's have a conversation", vad_aggressiveness=0)
+        - Normal home/office: converse("Tell me about your day")  # Uses default (2)
+        - Noisy cafe/outdoors: converse("Can you hear me?", vad_aggressiveness=3)
+        - Balance for most cases: converse("How are you?", vad_aggressiveness=2)
+        Remember: Lower values (0-1) = more permissive, may detect non-speech as speech
+                 Higher values (2-3) = more strict, may miss soft speech or whispers
     """
     # Convert string booleans to actual booleans
     if isinstance(wait_for_response, str):
@@ -1329,8 +1351,25 @@ async def converse(
     if isinstance(audio_feedback, str):
         audio_feedback = audio_feedback.lower() in ('true', '1', 'yes', 'on')
+    # Convert string speed to float
+    if speed is not None and isinstance(speed, str):
+        try:
+            speed = float(speed)
+        except ValueError:
+            return f"❌ Error: speed must be a number (got '{speed}')"
+    # Validate speed parameter range
+    if speed is not None:
+        if not (0.25 <= speed <= 4.0):
+            return f"❌ Error: speed must be between 0.25 and 4.0 (got {speed})"
     logger.info(f"Converse: '{message[:50]}{'...' if len(message) > 50 else ''}' (wait_for_response: {wait_for_response})")
+    # Validate vad_aggressiveness parameter
+    if vad_aggressiveness is not None:
+        if not isinstance(vad_aggressiveness, int) or vad_aggressiveness < 0 or vad_aggressiveness > 3:
+            return f"Error: vad_aggressiveness must be an integer between 0 and 3 (got {vad_aggressiveness})"
     # Validate duration parameters
     if wait_for_response:
         if min_listen_duration < 0:
@@ -1604,9 +1643,9 @@ async def converse(
                         event_logger.log_event(event_logger.RECORDING_START)
                     record_start = time.perf_counter()
-                    logger.debug(f"About to call record_audio_with_silence_detection with duration={listen_duration}, disable_silence_detection={disable_silence_detection}, min_duration={min_listen_duration}")
+                    logger.debug(f"About to call record_audio_with_silence_detection with duration={listen_duration}, disable_silence_detection={disable_silence_detection}, min_duration={min_listen_duration}, vad_aggressiveness={vad_aggressiveness}")
                     audio_data = await asyncio.get_event_loop().run_in_executor(
-                        None, record_audio_with_silence_detection, listen_duration, disable_silence_detection, min_listen_duration
+                        None, record_audio_with_silence_detection, listen_duration, disable_silence_detection, min_listen_duration, vad_aggressiveness
                     )
                     timings['record'] = time.perf_counter() - record_start