voice-mode 2.18.0__tar.gz → 2.20.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {voice_mode-2.18.0 → voice_mode-2.20.0}/CHANGELOG.md +35 -0
  2. {voice_mode-2.18.0 → voice_mode-2.20.0}/PKG-INFO +1 -1
  3. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/__version__.py +1 -1
  4. voice_mode-2.20.0/voice_mode/prompts/release_notes.py +78 -0
  5. voice_mode-2.20.0/voice_mode/resources/changelog.py +30 -0
  6. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/tools/converse.py +34 -7
  7. {voice_mode-2.18.0 → voice_mode-2.20.0}/.gitignore +0 -0
  8. {voice_mode-2.18.0 → voice_mode-2.20.0}/README.md +0 -0
  9. {voice_mode-2.18.0 → voice_mode-2.20.0}/pyproject.toml +0 -0
  10. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/__init__.py +0 -0
  11. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/__main__.py +0 -0
  12. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/cli.py +0 -0
  13. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/cli_commands/__init__.py +0 -0
  14. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/cli_commands/exchanges.py +0 -0
  15. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/config.py +0 -0
  16. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/conversation_logger.py +0 -0
  17. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/core.py +0 -0
  18. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/data/versions.json +0 -0
  19. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/exchanges/__init__.py +0 -0
  20. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/exchanges/conversations.py +0 -0
  21. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/exchanges/filters.py +0 -0
  22. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/exchanges/formatters.py +0 -0
  23. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/exchanges/models.py +0 -0
  24. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/exchanges/reader.py +0 -0
  25. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/exchanges/stats.py +0 -0
  26. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/prompts/README.md +0 -0
  27. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/prompts/__init__.py +0 -0
  28. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/prompts/converse.py +0 -0
  29. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/prompts/services.py +0 -0
  30. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/provider_discovery.py +0 -0
  31. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/providers.py +0 -0
  32. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/resources/__init__.py +0 -0
  33. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/resources/audio_files.py +0 -0
  34. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/resources/configuration.py +0 -0
  35. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/resources/statistics.py +0 -0
  36. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/resources/version.py +0 -0
  37. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/resources/whisper_models.py +0 -0
  38. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/server.py +0 -0
  39. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/shared.py +0 -0
  40. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/simple_failover.py +0 -0
  41. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/statistics.py +0 -0
  42. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/streaming.py +0 -0
  43. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/templates/launchd/com.voicemode.kokoro.plist +0 -0
  44. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/templates/launchd/com.voicemode.whisper.plist +0 -0
  45. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/templates/launchd/start-kokoro-with-health-check.sh +0 -0
  46. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/templates/launchd/start-whisper-with-health-check.sh +0 -0
  47. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/templates/systemd/voicemode-kokoro.service +0 -0
  48. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/templates/systemd/voicemode-whisper.service +0 -0
  49. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/tools/__init__.py +0 -0
  50. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/tools/configuration_management.py +0 -0
  51. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/tools/dependencies.py +0 -0
  52. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/tools/devices.py +0 -0
  53. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/tools/diagnostics.py +0 -0
  54. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/tools/providers.py +0 -0
  55. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/tools/service.py +0 -0
  56. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/tools/services/kokoro/install.py +0 -0
  57. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/tools/services/kokoro/uninstall.py +0 -0
  58. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/tools/services/list_versions.py +0 -0
  59. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/tools/services/version_info.py +0 -0
  60. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/tools/services/whisper/download_model.py +0 -0
  61. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/tools/services/whisper/install.py +0 -0
  62. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/tools/services/whisper/uninstall.py +0 -0
  63. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/tools/statistics.py +0 -0
  64. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/tools/voice_registry.py +0 -0
  65. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/utils/__init__.py +0 -0
  66. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/utils/audio_diagnostics.py +0 -0
  67. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/utils/event_logger.py +0 -0
  68. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/utils/ffmpeg_check.py +0 -0
  69. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/utils/format_migration.py +0 -0
  70. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/utils/gpu_detection.py +0 -0
  71. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/utils/migration_helpers.py +0 -0
  72. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/utils/services/common.py +0 -0
  73. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/utils/services/kokoro_helpers.py +0 -0
  74. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/utils/services/whisper_helpers.py +0 -0
  75. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/utils/version_helpers.py +0 -0
  76. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/version.py +0 -0
  77. {voice_mode-2.18.0 → voice_mode-2.20.0}/voice_mode/voice_preferences.py +0 -0
@@ -7,6 +7,41 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [2.20.0] - 2025-08-10
11
+
12
+ ### Added
13
+ - **VAD aggressiveness control**
14
+ - New `vad_aggressiveness` parameter in converse tool for controlling Voice Activity Detection sensitivity (0-3)
15
+ - 0 = least aggressive filtering (more permissive), 3 = most aggressive (strict)
16
+ - Allows adapting to different environments: quiet rooms (0-1) vs noisy environments (2-3)
17
+ - Also configurable via VOICEMODE_VAD_AGGRESSIVENESS environment variable
18
+
19
+ ### Changed
20
+ - **Improved VAD documentation**
21
+ - Clarified that aggressiveness controls how strictly VAD filters out non-speech
22
+ - Updated examples to better demonstrate appropriate use cases
23
+ - Fixed configuration documentation that had backwards descriptions
24
+
25
+ ## [2.19.0] - 2025-08-10
26
+
27
+ ### Added
28
+ - **MCP prompt command: /release-notes**
29
+ - New command to display recent changelog entries directly in Claude Code
30
+ - Shows 5 most recent versions by default (configurable with parameter)
31
+ - Parses and formats CHANGELOG.md for easy reading
32
+ - Inspired by Claude Code's own /release-notes feature
33
+ - Includes comprehensive test coverage
34
+
35
+ ### Fixed
36
+ - Release notes prompt now handles empty string parameters correctly
37
+ - Command works properly with both source and installed packages
38
+ - Changelog is now accessible as an MCP resource when package is installed
39
+
40
+ ### Changed
41
+ - Release notes output format now matches Claude Code's clean, minimal style
42
+ - Removed decorative headers and footers for cleaner terminal output
43
+ - Release notes displayed in chronological order (oldest first)
44
+
10
45
  ## [2.18.0] - 2025-08-10
11
46
 
12
47
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: voice-mode
3
- Version: 2.18.0
3
+ Version: 2.20.0
4
4
  Summary: VoiceMode - Voice interaction capabilities for AI assistants (formerly voice-mcp)
5
5
  Project-URL: Homepage, https://github.com/mbailey/voicemode
6
6
  Project-URL: Repository, https://github.com/mbailey/voicemode
@@ -1,3 +1,3 @@
1
1
  # This file is automatically updated by 'make release'
2
2
  # Do not edit manually
3
- __version__ = "2.18.0"
3
+ __version__ = "2.20.0"
@@ -0,0 +1,78 @@
1
+ """Release notes prompt for displaying recent CHANGELOG entries."""
2
+
3
+ from voice_mode.server import mcp
4
+ # Import the resource at module level to ensure it's registered
5
+ from voice_mode.resources.changelog import changelog_resource
6
+
7
+
8
+ @mcp.prompt(name="release-notes")
9
+ def release_notes_prompt(versions: str = "5") -> str:
10
+ """View recent release notes from Voice Mode's CHANGELOG."""
11
+ # Handle empty string from Claude Code
12
+ if not versions or versions == "":
13
+ versions = "5"
14
+
15
+ # Get the changelog content from the resource
16
+ # Resources decorated with @mcp.resource need to access the fn attribute
17
+ if hasattr(changelog_resource, 'fn'):
18
+ changelog_content = changelog_resource.fn()
19
+ else:
20
+ changelog_content = changelog_resource()
21
+
22
+ # If we got an error message, return it
23
+ if changelog_content.startswith("Error") or changelog_content.startswith("CHANGELOG.md not found"):
24
+ return changelog_content
25
+
26
+ # Parse the changelog content
27
+ lines = changelog_content.split('\n')
28
+ versions_found = []
29
+ current_version = None
30
+ current_content = []
31
+
32
+ for line in lines:
33
+ # Skip the header and empty lines at the beginning
34
+ if line.startswith('# Changelog') or (not line.strip() and not current_version):
35
+ continue
36
+
37
+ # Check for version header (## [x.y.z] - date)
38
+ if line.startswith('## [') and '] - ' in line:
39
+ # Save previous version if exists
40
+ if current_version and current_content:
41
+ versions_found.append({
42
+ 'header': current_version,
43
+ 'content': '\n'.join(current_content).strip()
44
+ })
45
+
46
+ # Start new version
47
+ current_version = line
48
+ current_content = []
49
+
50
+ # Stop if we have enough versions
51
+ if len(versions_found) >= int(versions):
52
+ break
53
+ elif current_version:
54
+ # Add content to current version
55
+ current_content.append(line)
56
+
57
+ # Don't forget the last version
58
+ if current_version and current_content and len(versions_found) < int(versions):
59
+ versions_found.append({
60
+ 'header': current_version,
61
+ 'content': '\n'.join(current_content).strip()
62
+ })
63
+
64
+ # Reverse to show oldest first (newest last)
65
+ versions_found.reverse()
66
+
67
+ # Format the output
68
+ output = []
69
+ for version in versions_found:
70
+ output.append(version['header'])
71
+ output.append(version['content'])
72
+ output.append('') # Empty line between versions
73
+
74
+ result = '\n'.join(output).strip()
75
+
76
+ # Return just the changelog entries without header/footer
77
+ # to match Claude Code's clean output format
78
+ return result
@@ -0,0 +1,30 @@
1
+ """CHANGELOG resource for Voice Mode."""
2
+
3
+ from pathlib import Path
4
+ from voice_mode.server import mcp
5
+
6
+
7
+ @mcp.resource("changelog://voice-mode")
8
+ def changelog_resource() -> str:
9
+ """Voice Mode changelog and release history."""
10
+ # Try to find CHANGELOG.md in various locations
11
+ possible_paths = [
12
+ # When running from source
13
+ Path(__file__).parent.parent.parent / "CHANGELOG.md",
14
+ # When installed, might be in package data
15
+ Path(__file__).parent.parent / "CHANGELOG.md",
16
+ # Fallback to current directory
17
+ Path("CHANGELOG.md"),
18
+ ]
19
+
20
+ for path in possible_paths:
21
+ if path.exists():
22
+ try:
23
+ return path.read_text()
24
+ except Exception as e:
25
+ return f"Error reading CHANGELOG.md from {path}: {str(e)}"
26
+
27
+ return """CHANGELOG.md not found in package.
28
+
29
+ For the latest changelog, please visit:
30
+ https://github.com/mbailey/voicemode/blob/master/CHANGELOG.md"""
@@ -871,7 +871,7 @@ def record_audio(duration: float) -> np.ndarray:
871
871
  sys.stderr = original_stderr
872
872
 
873
873
 
874
- def record_audio_with_silence_detection(max_duration: float, disable_silence_detection: bool = False, min_duration: float = 0.0) -> np.ndarray:
874
+ def record_audio_with_silence_detection(max_duration: float, disable_silence_detection: bool = False, min_duration: float = 0.0, vad_aggressiveness: Optional[int] = None) -> np.ndarray:
875
875
  """Record audio from microphone with automatic silence detection.
876
876
 
877
877
  Uses WebRTC VAD to detect when the user stops speaking and automatically
@@ -881,6 +881,7 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
881
881
  max_duration: Maximum recording duration in seconds
882
882
  disable_silence_detection: If True, disables silence detection and uses fixed duration recording
883
883
  min_duration: Minimum recording duration before silence detection can stop (default: 0.0)
884
+ vad_aggressiveness: VAD aggressiveness level (0-3). If None, uses VAD_AGGRESSIVENESS from config
884
885
 
885
886
  Returns:
886
887
  Numpy array of recorded audio samples
@@ -902,8 +903,9 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
902
903
  logger.info(f"🎤 Recording with silence detection (max {max_duration}s)...")
903
904
 
904
905
  try:
905
- # Initialize VAD
906
- vad = webrtcvad.Vad(VAD_AGGRESSIVENESS)
906
+ # Initialize VAD with provided aggressiveness or default
907
+ effective_vad_aggressiveness = vad_aggressiveness if vad_aggressiveness is not None else VAD_AGGRESSIVENESS
908
+ vad = webrtcvad.Vad(effective_vad_aggressiveness)
907
909
 
908
910
  # Calculate chunk size (must be 10, 20, or 30ms worth of samples)
909
911
  chunk_samples = int(SAMPLE_RATE * VAD_CHUNK_DURATION_MS / 1000)
@@ -932,7 +934,7 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
932
934
  original_stdout = sys.stdout
933
935
  original_stderr = sys.stderr
934
936
 
935
- logger.debug(f"VAD config - Aggressiveness: {VAD_AGGRESSIVENESS}, "
937
+ logger.debug(f"VAD config - Aggressiveness: {effective_vad_aggressiveness} (param: {vad_aggressiveness}, default: {VAD_AGGRESSIVENESS}), "
936
938
  f"Silence threshold: {SILENCE_THRESHOLD_MS}ms, "
937
939
  f"Min duration: {MIN_RECORDING_DURATION}s, "
938
940
  f"Initial grace period: {INITIAL_SILENCE_GRACE_PERIOD}s")
@@ -1227,7 +1229,8 @@ async def converse(
1227
1229
  audio_feedback_style: Optional[str] = None,
1228
1230
  audio_format: Optional[str] = None,
1229
1231
  disable_silence_detection: Union[bool, str] = False,
1230
- speed: Optional[float] = None
1232
+ speed: Optional[float] = None,
1233
+ vad_aggressiveness: Optional[int] = None
1231
1234
  ) -> str:
1232
1235
  """Have a voice conversation - speak a message and optionally listen for response.
1233
1236
 
@@ -1289,6 +1292,16 @@ async def converse(
1289
1292
  speed: Speech rate/speed for TTS playback (default: None uses normal speed)
1290
1293
  Values: 0.25 to 4.0 (0.5 = half speed, 2.0 = double speed)
1291
1294
  Supported by both OpenAI and Kokoro TTS providers.
1295
+ vad_aggressiveness: Voice Activity Detection aggressiveness level (default: None uses VOICEMODE_VAD_AGGRESSIVENESS env var)
1296
+ Controls how strict the VAD is about filtering out non-speech audio.
1297
+ Values: 0-3 (integer)
1298
+ - 0: Least aggressive filtering - includes more audio, may include non-speech
1299
+ - 1: Slightly stricter filtering
1300
+ - 2: Balanced filtering (default) - good for most environments
1301
+ - 3: Most aggressive filtering - strict speech detection, may cut off soft speech
1302
+
1303
+ Use lower values (0-1) in quiet environments to catch all speech
1304
+ Use higher values (2-3) in noisy environments to reduce false triggers
1292
1305
  If wait_for_response is False: Confirmation that message was spoken
1293
1306
  If wait_for_response is True: The voice response received (or error/timeout message)
1294
1307
 
@@ -1320,6 +1333,15 @@ async def converse(
1320
1333
  - Slower speech: converse("This is slower speech", speed=0.8)
1321
1334
 
1322
1335
  Note: Speed control works with both OpenAI and Kokoro TTS providers
1336
+
1337
+ VAD Aggressiveness Examples:
1338
+ - Quiet room, capture all speech: converse("Let's have a conversation", vad_aggressiveness=0)
1339
+ - Normal home/office: converse("Tell me about your day") # Uses default (2)
1340
+ - Noisy cafe/outdoors: converse("Can you hear me?", vad_aggressiveness=3)
1341
+ - Balance for most cases: converse("How are you?", vad_aggressiveness=2)
1342
+
1343
+ Remember: Lower values (0-1) = more permissive, may detect non-speech as speech
1344
+ Higher values (2-3) = more strict, may miss soft speech or whispers
1323
1345
  """
1324
1346
  # Convert string booleans to actual booleans
1325
1347
  if isinstance(wait_for_response, str):
@@ -1331,6 +1353,11 @@ async def converse(
1331
1353
 
1332
1354
  logger.info(f"Converse: '{message[:50]}{'...' if len(message) > 50 else ''}' (wait_for_response: {wait_for_response})")
1333
1355
 
1356
+ # Validate vad_aggressiveness parameter
1357
+ if vad_aggressiveness is not None:
1358
+ if not isinstance(vad_aggressiveness, int) or vad_aggressiveness < 0 or vad_aggressiveness > 3:
1359
+ return f"Error: vad_aggressiveness must be an integer between 0 and 3 (got {vad_aggressiveness})"
1360
+
1334
1361
  # Validate duration parameters
1335
1362
  if wait_for_response:
1336
1363
  if min_listen_duration < 0:
@@ -1604,9 +1631,9 @@ async def converse(
1604
1631
  event_logger.log_event(event_logger.RECORDING_START)
1605
1632
 
1606
1633
  record_start = time.perf_counter()
1607
- logger.debug(f"About to call record_audio_with_silence_detection with duration={listen_duration}, disable_silence_detection={disable_silence_detection}, min_duration={min_listen_duration}")
1634
+ logger.debug(f"About to call record_audio_with_silence_detection with duration={listen_duration}, disable_silence_detection={disable_silence_detection}, min_duration={min_listen_duration}, vad_aggressiveness={vad_aggressiveness}")
1608
1635
  audio_data = await asyncio.get_event_loop().run_in_executor(
1609
- None, record_audio_with_silence_detection, listen_duration, disable_silence_detection, min_listen_duration
1636
+ None, record_audio_with_silence_detection, listen_duration, disable_silence_detection, min_listen_duration, vad_aggressiveness
1610
1637
  )
1611
1638
  timings['record'] = time.perf_counter() - record_start
1612
1639
 
File without changes
File without changes
File without changes