PyPI - voice-mode - Versions diffs - 2.18.0__tar.gz → 2.20.0__tar.gz - Mend

voice-mode 2.18.0tar.gz → 2.20.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

{voice_mode-2.18.0 → voice_mode-2.20.0}/CHANGELOG.md RENAMED Viewed

@@ -7,6 +7,41 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
+## [2.20.0] - 2025-08-10
+### Added
+- **VAD aggressiveness control**
+  - New `vad_aggressiveness` parameter in converse tool for controlling Voice Activity Detection sensitivity (0-3)
+  - 0 = least aggressive filtering (more permissive), 3 = most aggressive (strict)
+  - Allows adapting to different environments: quiet rooms (0-1) vs noisy environments (2-3)
+  - Also configurable via VOICEMODE_VAD_AGGRESSIVENESS environment variable
+### Changed
+- **Improved VAD documentation**
+  - Clarified that aggressiveness controls how strictly VAD filters out non-speech
+  - Updated examples to better demonstrate appropriate use cases
+  - Fixed configuration documentation that had backwards descriptions
+## [2.19.0] - 2025-08-10
+### Added
+- **MCP prompt command: /release-notes**
+  - New command to display recent changelog entries directly in Claude Code
+  - Shows 5 most recent versions by default (configurable with parameter)
+  - Parses and formats CHANGELOG.md for easy reading
+  - Inspired by Claude Code's own /release-notes feature
+  - Includes comprehensive test coverage
+### Fixed
+- Release notes prompt now handles empty string parameters correctly
+- Command works properly with both source and installed packages
+- Changelog is now accessible as an MCP resource when package is installed
+### Changed
+- Release notes output format now matches Claude Code's clean, minimal style
+- Removed decorative headers and footers for cleaner terminal output
+- Release notes displayed in chronological order (oldest first)
 ## [2.18.0] - 2025-08-10
 ### Added

{voice_mode-2.18.0 → voice_mode-2.20.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: voice-mode
-Version: 2.18.0
+Version: 2.20.0
 Summary: VoiceMode - Voice interaction capabilities for AI assistants (formerly voice-mcp)
 Project-URL: Homepage, https://github.com/mbailey/voicemode
 Project-URL: Repository, https://github.com/mbailey/voicemode

{voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/__version__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 # This file is automatically updated by 'make release'
 # Do not edit manually
-__version__ = "2.18.0"
+__version__ = "2.20.0"

voice_mode-2.20.0/voice_mode/prompts/release_notes.py ADDED Viewed

@@ -0,0 +1,78 @@
+"""Release notes prompt for displaying recent CHANGELOG entries."""
+from voice_mode.server import mcp
+# Import the resource at module level to ensure it's registered
+from voice_mode.resources.changelog import changelog_resource
+@mcp.prompt(name="release-notes")
+def release_notes_prompt(versions: str = "5") -> str:
+    """View recent release notes from Voice Mode's CHANGELOG."""
+    # Handle empty string from Claude Code
+    if not versions or versions == "":
+        versions = "5"
+    # Get the changelog content from the resource
+    # Resources decorated with @mcp.resource need to access the fn attribute
+    if hasattr(changelog_resource, 'fn'):
+        changelog_content = changelog_resource.fn()
+    else:
+        changelog_content = changelog_resource()
+    # If we got an error message, return it
+    if changelog_content.startswith("Error") or changelog_content.startswith("CHANGELOG.md not found"):
+        return changelog_content
+    # Parse the changelog content
+    lines = changelog_content.split('\n')
+    versions_found = []
+    current_version = None
+    current_content = []
+    for line in lines:
+        # Skip the header and empty lines at the beginning
+        if line.startswith('# Changelog') or (not line.strip() and not current_version):
+            continue
+        # Check for version header (## [x.y.z] - date)
+        if line.startswith('## [') and '] - ' in line:
+            # Save previous version if exists
+            if current_version and current_content:
+                versions_found.append({
+                    'header': current_version,
+                    'content': '\n'.join(current_content).strip()
+                })
+            # Start new version
+            current_version = line
+            current_content = []
+            # Stop if we have enough versions
+            if len(versions_found) >= int(versions):
+                break
+        elif current_version:
+            # Add content to current version
+            current_content.append(line)
+    # Don't forget the last version
+    if current_version and current_content and len(versions_found) < int(versions):
+        versions_found.append({
+            'header': current_version,
+            'content': '\n'.join(current_content).strip()
+        })
+    # Reverse to show oldest first (newest last)
+    versions_found.reverse()
+    # Format the output
+    output = []
+    for version in versions_found:
+        output.append(version['header'])
+        output.append(version['content'])
+        output.append('')  # Empty line between versions
+    result = '\n'.join(output).strip()
+    # Return just the changelog entries without header/footer
+    # to match Claude Code's clean output format
+    return result

voice_mode-2.20.0/voice_mode/resources/changelog.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""CHANGELOG resource for Voice Mode."""
+from pathlib import Path
+from voice_mode.server import mcp
+@mcp.resource("changelog://voice-mode")
+def changelog_resource() -> str:
+    """Voice Mode changelog and release history."""
+    # Try to find CHANGELOG.md in various locations
+    possible_paths = [
+        # When running from source
+        Path(__file__).parent.parent.parent / "CHANGELOG.md",
+        # When installed, might be in package data
+        Path(__file__).parent.parent / "CHANGELOG.md",
+        # Fallback to current directory
+        Path("CHANGELOG.md"),
+    ]
+    for path in possible_paths:
+        if path.exists():
+            try:
+                return path.read_text()
+            except Exception as e:
+                return f"Error reading CHANGELOG.md from {path}: {str(e)}"
+    return """CHANGELOG.md not found in package.
+For the latest changelog, please visit:
+https://github.com/mbailey/voicemode/blob/master/CHANGELOG.md"""

{voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/tools/converse.py RENAMED Viewed

@@ -871,7 +871,7 @@ def record_audio(duration: float) -> np.ndarray:
             sys.stderr = original_stderr
-def record_audio_with_silence_detection(max_duration: float, disable_silence_detection: bool = False, min_duration: float = 0.0) -> np.ndarray:
+def record_audio_with_silence_detection(max_duration: float, disable_silence_detection: bool = False, min_duration: float = 0.0, vad_aggressiveness: Optional[int] = None) -> np.ndarray:
     """Record audio from microphone with automatic silence detection.
     Uses WebRTC VAD to detect when the user stops speaking and automatically
@@ -881,6 +881,7 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
         max_duration: Maximum recording duration in seconds
         disable_silence_detection: If True, disables silence detection and uses fixed duration recording
         min_duration: Minimum recording duration before silence detection can stop (default: 0.0)
+        vad_aggressiveness: VAD aggressiveness level (0-3). If None, uses VAD_AGGRESSIVENESS from config
     Returns:
         Numpy array of recorded audio samples
@@ -902,8 +903,9 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
     logger.info(f"🎤 Recording with silence detection (max {max_duration}s)...")
     try:
-        # Initialize VAD
-        vad = webrtcvad.Vad(VAD_AGGRESSIVENESS)
+        # Initialize VAD with provided aggressiveness or default
+        effective_vad_aggressiveness = vad_aggressiveness if vad_aggressiveness is not None else VAD_AGGRESSIVENESS
+        vad = webrtcvad.Vad(effective_vad_aggressiveness)
         # Calculate chunk size (must be 10, 20, or 30ms worth of samples)
         chunk_samples = int(SAMPLE_RATE * VAD_CHUNK_DURATION_MS / 1000)
@@ -932,7 +934,7 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
         original_stdout = sys.stdout
         original_stderr = sys.stderr
-        logger.debug(f"VAD config - Aggressiveness: {VAD_AGGRESSIVENESS}, "
+        logger.debug(f"VAD config - Aggressiveness: {effective_vad_aggressiveness} (param: {vad_aggressiveness}, default: {VAD_AGGRESSIVENESS}), "
                     f"Silence threshold: {SILENCE_THRESHOLD_MS}ms, "
                     f"Min duration: {MIN_RECORDING_DURATION}s, "
                     f"Initial grace period: {INITIAL_SILENCE_GRACE_PERIOD}s")
@@ -1227,7 +1229,8 @@ async def converse(
     audio_feedback_style: Optional[str] = None,
     audio_format: Optional[str] = None,
     disable_silence_detection: Union[bool, str] = False,
-    speed: Optional[float] = None
+    speed: Optional[float] = None,
+    vad_aggressiveness: Optional[int] = None
 ) -> str:
     """Have a voice conversation - speak a message and optionally listen for response.
@@ -1289,6 +1292,16 @@ async def converse(
         speed: Speech rate/speed for TTS playback (default: None uses normal speed)
                Values: 0.25 to 4.0 (0.5 = half speed, 2.0 = double speed)
                Supported by both OpenAI and Kokoro TTS providers.
+        vad_aggressiveness: Voice Activity Detection aggressiveness level (default: None uses VOICEMODE_VAD_AGGRESSIVENESS env var)
+                            Controls how strict the VAD is about filtering out non-speech audio.
+                            Values: 0-3 (integer)
+                            - 0: Least aggressive filtering - includes more audio, may include non-speech
+                            - 1: Slightly stricter filtering
+                            - 2: Balanced filtering (default) - good for most environments
+                            - 3: Most aggressive filtering - strict speech detection, may cut off soft speech
+                            Use lower values (0-1) in quiet environments to catch all speech
+                            Use higher values (2-3) in noisy environments to reduce false triggers
         If wait_for_response is False: Confirmation that message was spoken
         If wait_for_response is True: The voice response received (or error/timeout message)
@@ -1320,6 +1333,15 @@ async def converse(
         - Slower speech: converse("This is slower speech", speed=0.8)
         Note: Speed control works with both OpenAI and Kokoro TTS providers
+    VAD Aggressiveness Examples:
+        - Quiet room, capture all speech: converse("Let's have a conversation", vad_aggressiveness=0)
+        - Normal home/office: converse("Tell me about your day")  # Uses default (2)
+        - Noisy cafe/outdoors: converse("Can you hear me?", vad_aggressiveness=3)
+        - Balance for most cases: converse("How are you?", vad_aggressiveness=2)
+        Remember: Lower values (0-1) = more permissive, may detect non-speech as speech
+                 Higher values (2-3) = more strict, may miss soft speech or whispers
     """
     # Convert string booleans to actual booleans
     if isinstance(wait_for_response, str):
@@ -1331,6 +1353,11 @@ async def converse(
     logger.info(f"Converse: '{message[:50]}{'...' if len(message) > 50 else ''}' (wait_for_response: {wait_for_response})")
+    # Validate vad_aggressiveness parameter
+    if vad_aggressiveness is not None:
+        if not isinstance(vad_aggressiveness, int) or vad_aggressiveness < 0 or vad_aggressiveness > 3:
+            return f"Error: vad_aggressiveness must be an integer between 0 and 3 (got {vad_aggressiveness})"
     # Validate duration parameters
     if wait_for_response:
         if min_listen_duration < 0:
@@ -1604,9 +1631,9 @@ async def converse(
                         event_logger.log_event(event_logger.RECORDING_START)
                     record_start = time.perf_counter()
-                    logger.debug(f"About to call record_audio_with_silence_detection with duration={listen_duration}, disable_silence_detection={disable_silence_detection}, min_duration={min_listen_duration}")
+                    logger.debug(f"About to call record_audio_with_silence_detection with duration={listen_duration}, disable_silence_detection={disable_silence_detection}, min_duration={min_listen_duration}, vad_aggressiveness={vad_aggressiveness}")
                     audio_data = await asyncio.get_event_loop().run_in_executor(
-                        None, record_audio_with_silence_detection, listen_duration, disable_silence_detection, min_listen_duration
+                        None, record_audio_with_silence_detection, listen_duration, disable_silence_detection, min_listen_duration, vad_aggressiveness
                     )
                     timings['record'] = time.perf_counter() - record_start