voice-mode 2.19.0__tar.gz → 2.20.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {voice_mode-2.19.0 → voice_mode-2.20.1}/CHANGELOG.md +41 -0
  2. {voice_mode-2.19.0 → voice_mode-2.20.1}/PKG-INFO +1 -1
  3. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/__version__.py +1 -1
  4. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/tools/converse.py +46 -7
  5. {voice_mode-2.19.0 → voice_mode-2.20.1}/.gitignore +0 -0
  6. {voice_mode-2.19.0 → voice_mode-2.20.1}/README.md +0 -0
  7. {voice_mode-2.19.0 → voice_mode-2.20.1}/pyproject.toml +0 -0
  8. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/__init__.py +0 -0
  9. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/__main__.py +0 -0
  10. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/cli.py +0 -0
  11. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/cli_commands/__init__.py +0 -0
  12. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/cli_commands/exchanges.py +0 -0
  13. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/config.py +0 -0
  14. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/conversation_logger.py +0 -0
  15. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/core.py +0 -0
  16. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/data/versions.json +0 -0
  17. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/exchanges/__init__.py +0 -0
  18. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/exchanges/conversations.py +0 -0
  19. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/exchanges/filters.py +0 -0
  20. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/exchanges/formatters.py +0 -0
  21. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/exchanges/models.py +0 -0
  22. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/exchanges/reader.py +0 -0
  23. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/exchanges/stats.py +0 -0
  24. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/prompts/README.md +0 -0
  25. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/prompts/__init__.py +0 -0
  26. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/prompts/converse.py +0 -0
  27. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/prompts/release_notes.py +0 -0
  28. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/prompts/services.py +0 -0
  29. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/provider_discovery.py +0 -0
  30. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/providers.py +0 -0
  31. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/resources/__init__.py +0 -0
  32. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/resources/audio_files.py +0 -0
  33. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/resources/changelog.py +0 -0
  34. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/resources/configuration.py +0 -0
  35. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/resources/statistics.py +0 -0
  36. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/resources/version.py +0 -0
  37. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/resources/whisper_models.py +0 -0
  38. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/server.py +0 -0
  39. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/shared.py +0 -0
  40. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/simple_failover.py +0 -0
  41. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/statistics.py +0 -0
  42. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/streaming.py +0 -0
  43. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/templates/launchd/com.voicemode.kokoro.plist +0 -0
  44. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/templates/launchd/com.voicemode.whisper.plist +0 -0
  45. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/templates/launchd/start-kokoro-with-health-check.sh +0 -0
  46. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/templates/launchd/start-whisper-with-health-check.sh +0 -0
  47. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/templates/systemd/voicemode-kokoro.service +0 -0
  48. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/templates/systemd/voicemode-whisper.service +0 -0
  49. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/tools/__init__.py +0 -0
  50. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/tools/configuration_management.py +0 -0
  51. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/tools/dependencies.py +0 -0
  52. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/tools/devices.py +0 -0
  53. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/tools/diagnostics.py +0 -0
  54. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/tools/providers.py +0 -0
  55. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/tools/service.py +0 -0
  56. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/tools/services/kokoro/install.py +0 -0
  57. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/tools/services/kokoro/uninstall.py +0 -0
  58. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/tools/services/list_versions.py +0 -0
  59. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/tools/services/version_info.py +0 -0
  60. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/tools/services/whisper/download_model.py +0 -0
  61. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/tools/services/whisper/install.py +0 -0
  62. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/tools/services/whisper/uninstall.py +0 -0
  63. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/tools/statistics.py +0 -0
  64. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/tools/voice_registry.py +0 -0
  65. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/utils/__init__.py +0 -0
  66. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/utils/audio_diagnostics.py +0 -0
  67. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/utils/event_logger.py +0 -0
  68. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/utils/ffmpeg_check.py +0 -0
  69. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/utils/format_migration.py +0 -0
  70. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/utils/gpu_detection.py +0 -0
  71. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/utils/migration_helpers.py +0 -0
  72. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/utils/services/common.py +0 -0
  73. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/utils/services/kokoro_helpers.py +0 -0
  74. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/utils/services/whisper_helpers.py +0 -0
  75. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/utils/version_helpers.py +0 -0
  76. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/version.py +0 -0
  77. {voice_mode-2.19.0 → voice_mode-2.20.1}/voice_mode/voice_preferences.py +0 -0
@@ -7,8 +7,49 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [2.20.1] - 2025-08-11
11
+
12
+ ### Fixed
13
+ - **Speed parameter validation error** - Fixed MCP validation error when passing speed parameter as string
14
+ - Added type conversion from string to float for speed parameter
15
+ - Now properly handles speed values passed by MCP clients (e.g., via uvx)
16
+ - Added comprehensive validation and error messages for invalid speed values
17
+
18
+ ## [2.20.0] - 2025-08-10
19
+
20
+ ### Added
21
+ - **VAD aggressiveness control**
22
+ - New `vad_aggressiveness` parameter in converse tool for controlling Voice Activity Detection sensitivity (0-3)
23
+ - 0 = least aggressive filtering (more permissive), 3 = most aggressive (strict)
24
+ - Allows adapting to different environments: quiet rooms (0-1) vs noisy environments (2-3)
25
+ - Also configurable via VOICEMODE_VAD_AGGRESSIVENESS environment variable
26
+
27
+ ### Changed
28
+ - **Improved VAD documentation**
29
+ - Clarified that aggressiveness controls how strictly VAD filters out non-speech
30
+ - Updated examples to better demonstrate appropriate use cases
31
+ - Fixed configuration documentation that had backwards descriptions
32
+
10
33
  ## [2.19.0] - 2025-08-10
11
34
 
35
+ ### Added
36
+ - **MCP prompt command: /release-notes**
37
+ - New command to display recent changelog entries directly in Claude Code
38
+ - Shows 5 most recent versions by default (configurable with parameter)
39
+ - Parses and formats CHANGELOG.md for easy reading
40
+ - Inspired by Claude Code's own /release-notes feature
41
+ - Includes comprehensive test coverage
42
+
43
+ ### Fixed
44
+ - Release notes prompt now handles empty string parameters correctly
45
+ - Command works properly with both source and installed packages
46
+ - Changelog is now accessible as an MCP resource when package is installed
47
+
48
+ ### Changed
49
+ - Release notes output format now matches Claude Code's clean, minimal style
50
+ - Removed decorative headers and footers for cleaner terminal output
51
+ - Release notes displayed in chronological order (oldest first)
52
+
12
53
  ## [2.18.0] - 2025-08-10
13
54
 
14
55
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: voice-mode
3
- Version: 2.19.0
3
+ Version: 2.20.1
4
4
  Summary: VoiceMode - Voice interaction capabilities for AI assistants (formerly voice-mcp)
5
5
  Project-URL: Homepage, https://github.com/mbailey/voicemode
6
6
  Project-URL: Repository, https://github.com/mbailey/voicemode
@@ -1,3 +1,3 @@
1
1
  # This file is automatically updated by 'make release'
2
2
  # Do not edit manually
3
- __version__ = "2.19.0"
3
+ __version__ = "2.20.1"
@@ -871,7 +871,7 @@ def record_audio(duration: float) -> np.ndarray:
871
871
  sys.stderr = original_stderr
872
872
 
873
873
 
874
- def record_audio_with_silence_detection(max_duration: float, disable_silence_detection: bool = False, min_duration: float = 0.0) -> np.ndarray:
874
+ def record_audio_with_silence_detection(max_duration: float, disable_silence_detection: bool = False, min_duration: float = 0.0, vad_aggressiveness: Optional[int] = None) -> np.ndarray:
875
875
  """Record audio from microphone with automatic silence detection.
876
876
 
877
877
  Uses WebRTC VAD to detect when the user stops speaking and automatically
@@ -881,6 +881,7 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
881
881
  max_duration: Maximum recording duration in seconds
882
882
  disable_silence_detection: If True, disables silence detection and uses fixed duration recording
883
883
  min_duration: Minimum recording duration before silence detection can stop (default: 0.0)
884
+ vad_aggressiveness: VAD aggressiveness level (0-3). If None, uses VAD_AGGRESSIVENESS from config
884
885
 
885
886
  Returns:
886
887
  Numpy array of recorded audio samples
@@ -902,8 +903,9 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
902
903
  logger.info(f"🎤 Recording with silence detection (max {max_duration}s)...")
903
904
 
904
905
  try:
905
- # Initialize VAD
906
- vad = webrtcvad.Vad(VAD_AGGRESSIVENESS)
906
+ # Initialize VAD with provided aggressiveness or default
907
+ effective_vad_aggressiveness = vad_aggressiveness if vad_aggressiveness is not None else VAD_AGGRESSIVENESS
908
+ vad = webrtcvad.Vad(effective_vad_aggressiveness)
907
909
 
908
910
  # Calculate chunk size (must be 10, 20, or 30ms worth of samples)
909
911
  chunk_samples = int(SAMPLE_RATE * VAD_CHUNK_DURATION_MS / 1000)
@@ -932,7 +934,7 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
932
934
  original_stdout = sys.stdout
933
935
  original_stderr = sys.stderr
934
936
 
935
- logger.debug(f"VAD config - Aggressiveness: {VAD_AGGRESSIVENESS}, "
937
+ logger.debug(f"VAD config - Aggressiveness: {effective_vad_aggressiveness} (param: {vad_aggressiveness}, default: {VAD_AGGRESSIVENESS}), "
936
938
  f"Silence threshold: {SILENCE_THRESHOLD_MS}ms, "
937
939
  f"Min duration: {MIN_RECORDING_DURATION}s, "
938
940
  f"Initial grace period: {INITIAL_SILENCE_GRACE_PERIOD}s")
@@ -1227,7 +1229,8 @@ async def converse(
1227
1229
  audio_feedback_style: Optional[str] = None,
1228
1230
  audio_format: Optional[str] = None,
1229
1231
  disable_silence_detection: Union[bool, str] = False,
1230
- speed: Optional[float] = None
1232
+ speed: Optional[float] = None,
1233
+ vad_aggressiveness: Optional[int] = None
1231
1234
  ) -> str:
1232
1235
  """Have a voice conversation - speak a message and optionally listen for response.
1233
1236
 
@@ -1289,6 +1292,16 @@ async def converse(
1289
1292
  speed: Speech rate/speed for TTS playback (default: None uses normal speed)
1290
1293
  Values: 0.25 to 4.0 (0.5 = half speed, 2.0 = double speed)
1291
1294
  Supported by both OpenAI and Kokoro TTS providers.
1295
+ vad_aggressiveness: Voice Activity Detection aggressiveness level (default: None uses VOICEMODE_VAD_AGGRESSIVENESS env var)
1296
+ Controls how strict the VAD is about filtering out non-speech audio.
1297
+ Values: 0-3 (integer)
1298
+ - 0: Least aggressive filtering - includes more audio, may include non-speech
1299
+ - 1: Slightly stricter filtering
1300
+ - 2: Balanced filtering (default) - good for most environments
1301
+ - 3: Most aggressive filtering - strict speech detection, may cut off soft speech
1302
+
1303
+ Use lower values (0-1) in quiet environments to catch all speech
1304
+ Use higher values (2-3) in noisy environments to reduce false triggers
1292
1305
  If wait_for_response is False: Confirmation that message was spoken
1293
1306
  If wait_for_response is True: The voice response received (or error/timeout message)
1294
1307
 
@@ -1320,6 +1333,15 @@ async def converse(
1320
1333
  - Slower speech: converse("This is slower speech", speed=0.8)
1321
1334
 
1322
1335
  Note: Speed control works with both OpenAI and Kokoro TTS providers
1336
+
1337
+ VAD Aggressiveness Examples:
1338
+ - Quiet room, capture all speech: converse("Let's have a conversation", vad_aggressiveness=0)
1339
+ - Normal home/office: converse("Tell me about your day") # Uses default (2)
1340
+ - Noisy cafe/outdoors: converse("Can you hear me?", vad_aggressiveness=3)
1341
+ - Balance for most cases: converse("How are you?", vad_aggressiveness=2)
1342
+
1343
+ Remember: Lower values (0-1) = more permissive, may detect non-speech as speech
1344
+ Higher values (2-3) = more strict, may miss soft speech or whispers
1323
1345
  """
1324
1346
  # Convert string booleans to actual booleans
1325
1347
  if isinstance(wait_for_response, str):
@@ -1329,8 +1351,25 @@ async def converse(
1329
1351
  if isinstance(audio_feedback, str):
1330
1352
  audio_feedback = audio_feedback.lower() in ('true', '1', 'yes', 'on')
1331
1353
 
1354
+ # Convert string speed to float
1355
+ if speed is not None and isinstance(speed, str):
1356
+ try:
1357
+ speed = float(speed)
1358
+ except ValueError:
1359
+ return f"❌ Error: speed must be a number (got '{speed}')"
1360
+
1361
+ # Validate speed parameter range
1362
+ if speed is not None:
1363
+ if not (0.25 <= speed <= 4.0):
1364
+ return f"❌ Error: speed must be between 0.25 and 4.0 (got {speed})"
1365
+
1332
1366
  logger.info(f"Converse: '{message[:50]}{'...' if len(message) > 50 else ''}' (wait_for_response: {wait_for_response})")
1333
1367
 
1368
+ # Validate vad_aggressiveness parameter
1369
+ if vad_aggressiveness is not None:
1370
+ if not isinstance(vad_aggressiveness, int) or vad_aggressiveness < 0 or vad_aggressiveness > 3:
1371
+ return f"Error: vad_aggressiveness must be an integer between 0 and 3 (got {vad_aggressiveness})"
1372
+
1334
1373
  # Validate duration parameters
1335
1374
  if wait_for_response:
1336
1375
  if min_listen_duration < 0:
@@ -1604,9 +1643,9 @@ async def converse(
1604
1643
  event_logger.log_event(event_logger.RECORDING_START)
1605
1644
 
1606
1645
  record_start = time.perf_counter()
1607
- logger.debug(f"About to call record_audio_with_silence_detection with duration={listen_duration}, disable_silence_detection={disable_silence_detection}, min_duration={min_listen_duration}")
1646
+ logger.debug(f"About to call record_audio_with_silence_detection with duration={listen_duration}, disable_silence_detection={disable_silence_detection}, min_duration={min_listen_duration}, vad_aggressiveness={vad_aggressiveness}")
1608
1647
  audio_data = await asyncio.get_event_loop().run_in_executor(
1609
- None, record_audio_with_silence_detection, listen_duration, disable_silence_detection, min_listen_duration
1648
+ None, record_audio_with_silence_detection, listen_duration, disable_silence_detection, min_listen_duration, vad_aggressiveness
1610
1649
  )
1611
1650
  timings['record'] = time.perf_counter() - record_start
1612
1651
 
File without changes
File without changes
File without changes