voice-mode 2.23.0__tar.gz → 2.25.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. {voice_mode-2.23.0 → voice_mode-2.25.0}/CHANGELOG.md +35 -2
  2. {voice_mode-2.23.0 → voice_mode-2.25.0}/PKG-INFO +1 -1
  3. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/__version__.py +1 -1
  4. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/cli.py +49 -25
  5. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/config.py +15 -5
  6. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/converse.py +97 -40
  7. {voice_mode-2.23.0 → voice_mode-2.25.0}/.gitignore +0 -0
  8. {voice_mode-2.23.0 → voice_mode-2.25.0}/README.md +0 -0
  9. {voice_mode-2.23.0 → voice_mode-2.25.0}/build_hooks.py +0 -0
  10. {voice_mode-2.23.0 → voice_mode-2.25.0}/pyproject.toml +0 -0
  11. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/__init__.py +0 -0
  12. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/__main__.py +0 -0
  13. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/cli_commands/__init__.py +0 -0
  14. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/cli_commands/exchanges.py +0 -0
  15. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/conversation_logger.py +0 -0
  16. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/core.py +0 -0
  17. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/data/versions.json +0 -0
  18. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/exchanges/__init__.py +0 -0
  19. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/exchanges/conversations.py +0 -0
  20. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/exchanges/filters.py +0 -0
  21. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/exchanges/formatters.py +0 -0
  22. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/exchanges/models.py +0 -0
  23. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/exchanges/reader.py +0 -0
  24. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/exchanges/stats.py +0 -0
  25. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/README.md +0 -0
  26. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/app/api/connection-details/route.ts +0 -0
  27. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/app/favicon.ico +0 -0
  28. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/app/globals.css +0 -0
  29. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/app/layout.tsx +0 -0
  30. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/app/page.tsx +0 -0
  31. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/components/CloseIcon.tsx +0 -0
  32. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/components/NoAgentNotification.tsx +0 -0
  33. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/components/TranscriptionView.tsx +0 -0
  34. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/hooks/useCombinedTranscriptions.ts +0 -0
  35. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/hooks/useLocalMicTrack.ts +0 -0
  36. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/next-env.d.ts +0 -0
  37. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/next.config.mjs +0 -0
  38. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/package-lock.json +0 -0
  39. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/package.json +0 -0
  40. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/pnpm-lock.yaml +0 -0
  41. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/postcss.config.mjs +0 -0
  42. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/tailwind.config.ts +0 -0
  43. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/tsconfig.json +0 -0
  44. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/prompts/README.md +0 -0
  45. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/prompts/__init__.py +0 -0
  46. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/prompts/converse.py +0 -0
  47. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/prompts/release_notes.py +0 -0
  48. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/prompts/services.py +0 -0
  49. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/provider_discovery.py +0 -0
  50. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/providers.py +0 -0
  51. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/resources/__init__.py +0 -0
  52. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/resources/audio_files.py +0 -0
  53. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/resources/changelog.py +0 -0
  54. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/resources/configuration.py +0 -0
  55. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/resources/statistics.py +0 -0
  56. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/resources/version.py +0 -0
  57. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/resources/whisper_models.py +0 -0
  58. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/server.py +0 -0
  59. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/shared.py +0 -0
  60. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/simple_failover.py +0 -0
  61. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/statistics.py +0 -0
  62. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/streaming.py +0 -0
  63. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/launchd/com.voicemode.frontend.plist +0 -0
  64. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/launchd/com.voicemode.kokoro.plist +0 -0
  65. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/launchd/com.voicemode.livekit.plist +0 -0
  66. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/launchd/com.voicemode.whisper.plist +0 -0
  67. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/launchd/start-kokoro-with-health-check.sh +0 -0
  68. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/launchd/start-whisper-with-health-check.sh +0 -0
  69. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/systemd/voicemode-frontend.service +0 -0
  70. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/systemd/voicemode-kokoro.service +0 -0
  71. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/systemd/voicemode-livekit.service +0 -0
  72. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/systemd/voicemode-whisper.service +0 -0
  73. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/__init__.py +0 -0
  74. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/configuration_management.py +0 -0
  75. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/dependencies.py +0 -0
  76. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/devices.py +0 -0
  77. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/diagnostics.py +0 -0
  78. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/providers.py +0 -0
  79. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/service.py +0 -0
  80. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/kokoro/install.py +0 -0
  81. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/kokoro/uninstall.py +0 -0
  82. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/list_versions.py +0 -0
  83. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/livekit/__init__.py +0 -0
  84. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/livekit/frontend.py +0 -0
  85. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/livekit/install.py +0 -0
  86. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/livekit/production_server.py +0 -0
  87. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/livekit/uninstall.py +0 -0
  88. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/version_info.py +0 -0
  89. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/whisper/download_model.py +0 -0
  90. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/whisper/install.py +0 -0
  91. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/whisper/uninstall.py +0 -0
  92. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/statistics.py +0 -0
  93. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/voice_registry.py +0 -0
  94. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/__init__.py +0 -0
  95. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/audio_diagnostics.py +0 -0
  96. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/event_logger.py +0 -0
  97. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/ffmpeg_check.py +0 -0
  98. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/format_migration.py +0 -0
  99. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/gpu_detection.py +0 -0
  100. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/migration_helpers.py +0 -0
  101. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/services/common.py +0 -0
  102. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/services/kokoro_helpers.py +0 -0
  103. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/services/livekit_helpers.py +0 -0
  104. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/services/whisper_helpers.py +0 -0
  105. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/version_helpers.py +0 -0
  106. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/version.py +0 -0
  107. {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/voice_preferences.py +0 -0
@@ -7,6 +7,41 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [2.25.0] - 2025-08-18
11
+
12
+ ### Fixed
13
+ - **uvx command refresh flag** - Add --refresh flag to all uvx commands in installer
14
+ - Ensures latest version is always fetched when running voice-mode commands
15
+ - Fixes issues with cached old versions being used
16
+ - Applies to service installation, uninstallation, and status commands
17
+ - **Performance optimization** - Significantly improved help command performance
18
+ - Lazy load heavy imports (numpy, scipy, webrtcvad) only when needed
19
+ - Help command now runs 10x faster (from ~1.5s to ~0.15s)
20
+ - Faster MCP server startup time for better user experience
21
+ - **Config path expansion** - Fixed tilde expansion for user home directories
22
+ - Configuration paths now properly expand `~` to user home directory
23
+ - Fixes issues with paths like `~/Models/kokoro` not being found
24
+ - Added comprehensive tests for path expansion functionality
25
+ - **Frontend imports** - Corrected import statements to use single module
26
+ - Fixed import errors in livekit frontend commands
27
+ - All frontend commands now properly import from frontend module
28
+
29
+ ## [2.24.0] - 2025-08-16
30
+
31
+ ### Added
32
+ - **Enhanced Voice Activity Detection** - Improved silence detection behavior
33
+ - VAD now waits indefinitely for speech before starting silence detection
34
+ - No more timeouts when user hasn't started speaking yet
35
+ - Silent recordings are not sent to STT, reducing API costs and preventing hallucinations
36
+ - Returns "No speech detected" message instead of processing silence
37
+ - Significantly improves user experience for voice interactions
38
+ - **VAD debugging mode** - Comprehensive debugging for Voice Activity Detection
39
+ - New `VOICEMODE_VAD_DEBUG` environment variable enables detailed VAD logging
40
+ - Shows real-time speech detection decisions, state transitions, and timing
41
+ - Helps diagnose issues where recording stops before speech or cuts off early
42
+ - Added test script `scripts/test-vad-enhancement.py` for VAD testing
43
+ - Documented in `docs/vad-debugging.md` with common issues and solutions
44
+
10
45
  ## [2.23.0] - 2025-08-16
11
46
 
12
47
  ### Added
@@ -29,8 +64,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
29
64
 
30
65
  ## [2.22.3] - 2025-08-16
31
66
 
32
- ## [2.23.0] - 2025-08-16
33
-
34
67
  ### Fixed
35
68
  - **Service auto-enable error** - Fix 'FunctionTool' object is not callable
36
69
  - Changed whisper and kokoro installers to use `enable_service` function instead of MCP tool
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: voice-mode
3
- Version: 2.23.0
3
+ Version: 2.25.0
4
4
  Summary: VoiceMode - Voice interaction capabilities for AI assistants (formerly voice-mcp)
5
5
  Project-URL: Homepage, https://github.com/mbailey/voicemode
6
6
  Project-URL: Repository, https://github.com/mbailey/voicemode
@@ -1,3 +1,3 @@
1
1
  # This file is automatically updated by 'make release'
2
2
  # Do not edit manually
3
- __version__ = "2.23.0"
3
+ __version__ = "2.25.0"
@@ -6,7 +6,6 @@ import sys
6
6
  import os
7
7
  import warnings
8
8
  import click
9
- from .server import main as voice_mode_main
10
9
 
11
10
  # Suppress known deprecation warnings for better user experience
12
11
  # These apply to both CLI commands and MCP server operation
@@ -47,6 +46,7 @@ def voice_mode_main_cli(ctx, debug):
47
46
  if ctx.invoked_subcommand is None:
48
47
  # No subcommand - run MCP server
49
48
  # Note: warnings are already suppressed at module level unless debug is enabled
49
+ from .server import main as voice_mode_main
50
50
  voice_mode_main()
51
51
 
52
52
 
@@ -74,36 +74,14 @@ def livekit():
74
74
  pass
75
75
 
76
76
 
77
- # Import service functions
78
- from voice_mode.tools.service import (
79
- status_service, start_service, stop_service, restart_service,
80
- enable_service, disable_service, view_logs, update_service_files
81
- )
82
-
83
- # Import install/uninstall functions
84
- from voice_mode.tools.services.kokoro.install import kokoro_install
85
- from voice_mode.tools.services.kokoro.uninstall import kokoro_uninstall
86
- from voice_mode.tools.services.whisper.install import whisper_install
87
- from voice_mode.tools.services.whisper.uninstall import whisper_uninstall
88
- from voice_mode.tools.services.whisper.download_model import download_model
89
- from voice_mode.tools.services.livekit.install import livekit_install
90
- from voice_mode.tools.services.livekit.uninstall import livekit_uninstall
91
- from voice_mode.tools.services.livekit.frontend import livekit_frontend_start, livekit_frontend_stop, livekit_frontend_status, livekit_frontend_open, livekit_frontend_logs, livekit_frontend_install
92
-
93
- # Import configuration management functions
94
- from voice_mode.tools.configuration_management import update_config, list_config_keys
95
-
96
- # Import diagnostic functions - extract the actual async functions from the tools
97
- from voice_mode.tools.diagnostics import voice_mode_info
98
- from voice_mode.tools.devices import check_audio_devices
99
- from voice_mode.tools.voice_registry import voice_registry
100
- from voice_mode.tools.dependencies import check_audio_dependencies
77
+ # Service functions are imported lazily in their respective command handlers to improve startup time
101
78
 
102
79
 
103
80
  # Kokoro service commands
104
81
  @kokoro.command()
105
82
  def status():
106
83
  """Show Kokoro service status."""
84
+ from voice_mode.tools.service import status_service
107
85
  result = asyncio.run(status_service("kokoro"))
108
86
  click.echo(result)
109
87
 
@@ -111,6 +89,7 @@ def status():
111
89
  @kokoro.command()
112
90
  def start():
113
91
  """Start Kokoro service."""
92
+ from voice_mode.tools.service import start_service
114
93
  result = asyncio.run(start_service("kokoro"))
115
94
  click.echo(result)
116
95
 
@@ -118,6 +97,7 @@ def start():
118
97
  @kokoro.command()
119
98
  def stop():
120
99
  """Stop Kokoro service."""
100
+ from voice_mode.tools.service import stop_service
121
101
  result = asyncio.run(stop_service("kokoro"))
122
102
  click.echo(result)
123
103
 
@@ -125,6 +105,7 @@ def stop():
125
105
  @kokoro.command()
126
106
  def restart():
127
107
  """Restart Kokoro service."""
108
+ from voice_mode.tools.service import restart_service
128
109
  result = asyncio.run(restart_service("kokoro"))
129
110
  click.echo(result)
130
111
 
@@ -132,6 +113,7 @@ def restart():
132
113
  @kokoro.command()
133
114
  def enable():
134
115
  """Enable Kokoro service to start at boot/login."""
116
+ from voice_mode.tools.service import enable_service
135
117
  result = asyncio.run(enable_service("kokoro"))
136
118
  click.echo(result)
137
119
 
@@ -139,6 +121,7 @@ def enable():
139
121
  @kokoro.command()
140
122
  def disable():
141
123
  """Disable Kokoro service from starting at boot/login."""
124
+ from voice_mode.tools.service import disable_service
142
125
  result = asyncio.run(disable_service("kokoro"))
143
126
  click.echo(result)
144
127
 
@@ -147,6 +130,7 @@ def disable():
147
130
  @click.option('--lines', '-n', default=50, help='Number of log lines to show')
148
131
  def logs(lines):
149
132
  """View Kokoro service logs."""
133
+ from voice_mode.tools.service import view_logs
150
134
  result = asyncio.run(view_logs("kokoro", lines))
151
135
  click.echo(result)
152
136
 
@@ -154,6 +138,7 @@ def logs(lines):
154
138
  @kokoro.command("update-service-files")
155
139
  def kokoro_update_service_files():
156
140
  """Update Kokoro service files to latest version."""
141
+ from voice_mode.tools.service import update_service_files
157
142
  result = asyncio.run(update_service_files("kokoro"))
158
143
  click.echo(result)
159
144
 
@@ -193,6 +178,7 @@ def health():
193
178
  @click.option('--auto-enable/--no-auto-enable', default=None, help='Enable service at boot/login')
194
179
  def install(install_dir, port, force, version, auto_enable):
195
180
  """Install kokoro-fastapi TTS service."""
181
+ from voice_mode.tools.services.kokoro.install import kokoro_install
196
182
  result = asyncio.run(kokoro_install.fn(
197
183
  install_dir=install_dir,
198
184
  port=port,
@@ -227,6 +213,7 @@ def install(install_dir, port, force, version, auto_enable):
227
213
  @click.confirmation_option(prompt='Are you sure you want to uninstall Kokoro?')
228
214
  def uninstall(remove_models, remove_all_data):
229
215
  """Uninstall kokoro-fastapi service and optionally remove data."""
216
+ from voice_mode.tools.services.kokoro.uninstall import kokoro_uninstall
230
217
  result = asyncio.run(kokoro_uninstall.fn(
231
218
  remove_models=remove_models,
232
219
  remove_all_data=remove_all_data
@@ -260,6 +247,7 @@ def uninstall(remove_models, remove_all_data):
260
247
  @whisper.command()
261
248
  def status():
262
249
  """Show Whisper service status."""
250
+ from voice_mode.tools.service import status_service
263
251
  result = asyncio.run(status_service("whisper"))
264
252
  click.echo(result)
265
253
 
@@ -267,6 +255,7 @@ def status():
267
255
  @whisper.command()
268
256
  def start():
269
257
  """Start Whisper service."""
258
+ from voice_mode.tools.service import start_service
270
259
  result = asyncio.run(start_service("whisper"))
271
260
  click.echo(result)
272
261
 
@@ -274,6 +263,7 @@ def start():
274
263
  @whisper.command()
275
264
  def stop():
276
265
  """Stop Whisper service."""
266
+ from voice_mode.tools.service import stop_service
277
267
  result = asyncio.run(stop_service("whisper"))
278
268
  click.echo(result)
279
269
 
@@ -281,6 +271,7 @@ def stop():
281
271
  @whisper.command()
282
272
  def restart():
283
273
  """Restart Whisper service."""
274
+ from voice_mode.tools.service import restart_service
284
275
  result = asyncio.run(restart_service("whisper"))
285
276
  click.echo(result)
286
277
 
@@ -288,6 +279,7 @@ def restart():
288
279
  @whisper.command()
289
280
  def enable():
290
281
  """Enable Whisper service to start at boot/login."""
282
+ from voice_mode.tools.service import enable_service
291
283
  result = asyncio.run(enable_service("whisper"))
292
284
  click.echo(result)
293
285
 
@@ -295,6 +287,7 @@ def enable():
295
287
  @whisper.command()
296
288
  def disable():
297
289
  """Disable Whisper service from starting at boot/login."""
290
+ from voice_mode.tools.service import disable_service
298
291
  result = asyncio.run(disable_service("whisper"))
299
292
  click.echo(result)
300
293
 
@@ -303,6 +296,7 @@ def disable():
303
296
  @click.option('--lines', '-n', default=50, help='Number of log lines to show')
304
297
  def logs(lines):
305
298
  """View Whisper service logs."""
299
+ from voice_mode.tools.service import view_logs
306
300
  result = asyncio.run(view_logs("whisper", lines))
307
301
  click.echo(result)
308
302
 
@@ -310,6 +304,7 @@ def logs(lines):
310
304
  @whisper.command("update-service-files")
311
305
  def whisper_update_service_files():
312
306
  """Update Whisper service files to latest version."""
307
+ from voice_mode.tools.service import update_service_files
313
308
  result = asyncio.run(update_service_files("whisper"))
314
309
  click.echo(result)
315
310
 
@@ -350,6 +345,7 @@ def health():
350
345
  @click.option('--auto-enable/--no-auto-enable', default=None, help='Enable service at boot/login')
351
346
  def install(install_dir, model, use_gpu, force, version, auto_enable):
352
347
  """Install whisper.cpp STT service with automatic system detection."""
348
+ from voice_mode.tools.services.whisper.install import whisper_install
353
349
  result = asyncio.run(whisper_install.fn(
354
350
  install_dir=install_dir,
355
351
  model=model,
@@ -394,6 +390,7 @@ def install(install_dir, model, use_gpu, force, version, auto_enable):
394
390
  @click.confirmation_option(prompt='Are you sure you want to uninstall Whisper?')
395
391
  def uninstall(remove_models, remove_all_data):
396
392
  """Uninstall whisper.cpp and optionally remove models and data."""
393
+ from voice_mode.tools.services.whisper.uninstall import whisper_uninstall
397
394
  result = asyncio.run(whisper_uninstall.fn(
398
395
  remove_models=remove_models,
399
396
  remove_all_data=remove_all_data
@@ -437,6 +434,7 @@ def download_model_cmd(model, force, skip_core_ml):
437
434
  medium, medium.en, large-v1, large-v2, large-v3, large-v3-turbo
438
435
  """
439
436
  import json
437
+ from voice_mode.tools.services.whisper.download_model import download_model
440
438
  result = asyncio.run(download_model.fn(
441
439
  model=model,
442
440
  force_download=force,
@@ -478,6 +476,7 @@ def download_model_cmd(model, force, skip_core_ml):
478
476
  @livekit.command()
479
477
  def status():
480
478
  """Show LiveKit service status."""
479
+ from voice_mode.tools.service import status_service
481
480
  result = asyncio.run(status_service("livekit"))
482
481
  click.echo(result)
483
482
 
@@ -485,6 +484,7 @@ def status():
485
484
  @livekit.command()
486
485
  def start():
487
486
  """Start LiveKit service."""
487
+ from voice_mode.tools.service import start_service
488
488
  result = asyncio.run(start_service("livekit"))
489
489
  click.echo(result)
490
490
 
@@ -492,6 +492,7 @@ def start():
492
492
  @livekit.command()
493
493
  def stop():
494
494
  """Stop LiveKit service."""
495
+ from voice_mode.tools.service import stop_service
495
496
  result = asyncio.run(stop_service("livekit"))
496
497
  click.echo(result)
497
498
 
@@ -499,6 +500,7 @@ def stop():
499
500
  @livekit.command()
500
501
  def restart():
501
502
  """Restart LiveKit service."""
503
+ from voice_mode.tools.service import restart_service
502
504
  result = asyncio.run(restart_service("livekit"))
503
505
  click.echo(result)
504
506
 
@@ -506,6 +508,7 @@ def restart():
506
508
  @livekit.command()
507
509
  def enable():
508
510
  """Enable LiveKit service to start at boot/login."""
511
+ from voice_mode.tools.service import enable_service
509
512
  result = asyncio.run(enable_service("livekit"))
510
513
  click.echo(result)
511
514
 
@@ -513,6 +516,7 @@ def enable():
513
516
  @livekit.command()
514
517
  def disable():
515
518
  """Disable LiveKit service from starting at boot/login."""
519
+ from voice_mode.tools.service import disable_service
516
520
  result = asyncio.run(disable_service("livekit"))
517
521
  click.echo(result)
518
522
 
@@ -521,6 +525,7 @@ def disable():
521
525
  @click.option('--lines', '-n', default=50, help='Number of log lines to show')
522
526
  def logs(lines):
523
527
  """View LiveKit service logs."""
528
+ from voice_mode.tools.service import view_logs
524
529
  result = asyncio.run(view_logs("livekit", lines))
525
530
  click.echo(result)
526
531
 
@@ -528,6 +533,7 @@ def logs(lines):
528
533
  @livekit.command()
529
534
  def update():
530
535
  """Update LiveKit service files to the latest version."""
536
+ from voice_mode.tools.service import update_service_files
531
537
  result = asyncio.run(update_service_files("livekit"))
532
538
 
533
539
  if result.get("success"):
@@ -546,6 +552,7 @@ def update():
546
552
  @click.option('--auto-enable/--no-auto-enable', default=None, help='Enable service at boot/login')
547
553
  def install(install_dir, port, force, version, auto_enable):
548
554
  """Install LiveKit server with development configuration."""
555
+ from voice_mode.tools.services.livekit.install import livekit_install
549
556
  result = asyncio.run(livekit_install.fn(
550
557
  install_dir=install_dir,
551
558
  port=port,
@@ -583,6 +590,7 @@ def install(install_dir, port, force, version, auto_enable):
583
590
  @click.confirmation_option(prompt='Are you sure you want to uninstall LiveKit?')
584
591
  def uninstall(remove_config, remove_all_data):
585
592
  """Uninstall LiveKit server and optionally remove configuration and data."""
593
+ from voice_mode.tools.services.livekit.uninstall import livekit_uninstall
586
594
  result = asyncio.run(livekit_uninstall.fn(
587
595
  remove_config=remove_config,
588
596
  remove_all_data=remove_all_data
@@ -615,6 +623,7 @@ def frontend():
615
623
  @click.option('--auto-enable/--no-auto-enable', default=None, help='Enable service after installation (default: from config)')
616
624
  def frontend_install(auto_enable):
617
625
  """Install and setup LiveKit Voice Assistant Frontend."""
626
+ from voice_mode.tools.services.livekit.frontend import livekit_frontend_install
618
627
  result = asyncio.run(livekit_frontend_install.fn(auto_enable=auto_enable))
619
628
 
620
629
  if result.get('success'):
@@ -642,6 +651,7 @@ def frontend_install(auto_enable):
642
651
  @click.option('--host', default='127.0.0.1', help='Host to bind to (default: 127.0.0.1)')
643
652
  def frontend_start(port, host):
644
653
  """Start the LiveKit Voice Assistant Frontend."""
654
+ from voice_mode.tools.services.livekit.frontend import livekit_frontend_start
645
655
  result = asyncio.run(livekit_frontend_start.fn(port=port, host=host))
646
656
 
647
657
  if result.get('success'):
@@ -663,6 +673,7 @@ def frontend_start(port, host):
663
673
  @frontend.command("stop")
664
674
  def frontend_stop():
665
675
  """Stop the LiveKit Voice Assistant Frontend."""
676
+ from voice_mode.tools.services.livekit.frontend import livekit_frontend_stop
666
677
  result = asyncio.run(livekit_frontend_stop.fn())
667
678
 
668
679
  if result.get('success'):
@@ -674,6 +685,7 @@ def frontend_stop():
674
685
  @frontend.command("status")
675
686
  def frontend_status():
676
687
  """Check status of the LiveKit Voice Assistant Frontend."""
688
+ from voice_mode.tools.services.livekit.frontend import livekit_frontend_status
677
689
  result = asyncio.run(livekit_frontend_status.fn())
678
690
 
679
691
  if 'error' in result:
@@ -701,6 +713,7 @@ def frontend_open():
701
713
 
702
714
  Starts the frontend if not already running, then opens it in the default browser.
703
715
  """
716
+ from voice_mode.tools.services.livekit.frontend import livekit_frontend_open
704
717
  result = asyncio.run(livekit_frontend_open.fn())
705
718
 
706
719
  if result.get('success'):
@@ -723,11 +736,13 @@ def frontend_logs(lines, follow):
723
736
  """
724
737
  if follow:
725
738
  # For following, run tail -f directly
739
+ from voice_mode.tools.services.livekit.frontend import livekit_frontend_logs
726
740
  result = asyncio.run(livekit_frontend_logs.fn(follow=True))
727
741
  if result.get('success'):
728
742
  click.echo(f"📂 Log file: {result['log_file']}")
729
743
  click.echo("🔄 Following logs (press Ctrl+C to stop)...")
730
744
  try:
745
+ import subprocess
731
746
  subprocess.run(["tail", "-f", result['log_file']])
732
747
  except KeyboardInterrupt:
733
748
  click.echo("\n✅ Stopped following logs")
@@ -735,6 +750,7 @@ def frontend_logs(lines, follow):
735
750
  click.echo(f"❌ Error: {result.get('error', 'Unknown error')}")
736
751
  else:
737
752
  # Show last N lines
753
+ from voice_mode.tools.services.livekit.frontend import livekit_frontend_logs
738
754
  result = asyncio.run(livekit_frontend_logs.fn(lines=lines, follow=False))
739
755
  if result.get('success'):
740
756
  click.echo(f"📂 Log file: {result['log_file']}")
@@ -748,6 +764,7 @@ def frontend_logs(lines, follow):
748
764
  @frontend.command("enable")
749
765
  def frontend_enable():
750
766
  """Enable frontend service to start automatically at boot/login."""
767
+ from voice_mode.tools.service import enable_service
751
768
  result = asyncio.run(enable_service("frontend"))
752
769
  # enable_service returns a string, not a dict
753
770
  click.echo(result)
@@ -756,6 +773,7 @@ def frontend_enable():
756
773
  @frontend.command("disable")
757
774
  def frontend_disable():
758
775
  """Disable frontend service from starting automatically."""
776
+ from voice_mode.tools.service import disable_service
759
777
  result = asyncio.run(disable_service("frontend"))
760
778
  # disable_service returns a string, not a dict
761
779
  click.echo(result)
@@ -827,6 +845,7 @@ def config():
827
845
  @config.command("list")
828
846
  def config_list():
829
847
  """List all configuration keys with their descriptions."""
848
+ from voice_mode.tools.configuration_management import list_config_keys
830
849
  result = asyncio.run(list_config_keys.fn())
831
850
  click.echo(result)
832
851
 
@@ -873,6 +892,7 @@ def config_get(key):
873
892
  @click.argument('value')
874
893
  def config_set(key, value):
875
894
  """Set a configuration value."""
895
+ from voice_mode.tools.configuration_management import update_config
876
896
  result = asyncio.run(update_config.fn(key, value))
877
897
  click.echo(result)
878
898
 
@@ -887,6 +907,7 @@ def diag():
887
907
  @diag.command()
888
908
  def info():
889
909
  """Show voice-mode installation information."""
910
+ from voice_mode.tools.diagnostics import voice_mode_info
890
911
  result = asyncio.run(voice_mode_info.fn())
891
912
  click.echo(result)
892
913
 
@@ -894,6 +915,7 @@ def info():
894
915
  @diag.command()
895
916
  def devices():
896
917
  """List available audio input and output devices."""
918
+ from voice_mode.tools.devices import check_audio_devices
897
919
  result = asyncio.run(check_audio_devices.fn())
898
920
  click.echo(result)
899
921
 
@@ -901,6 +923,7 @@ def devices():
901
923
  @diag.command()
902
924
  def registry():
903
925
  """Show voice provider registry with all discovered endpoints."""
926
+ from voice_mode.tools.voice_registry import voice_registry
904
927
  result = asyncio.run(voice_registry.fn())
905
928
  click.echo(result)
906
929
 
@@ -909,6 +932,7 @@ def registry():
909
932
  def dependencies():
910
933
  """Check system audio dependencies and provide installation guidance."""
911
934
  import json
935
+ from voice_mode.tools.dependencies import check_audio_dependencies
912
936
  result = asyncio.run(check_audio_dependencies.fn())
913
937
 
914
938
  if isinstance(result, dict):
@@ -149,19 +149,29 @@ def env_bool(env_var: str, default: bool = False) -> bool:
149
149
  value = os.getenv(env_var, "").lower()
150
150
  return value in ("true", "1", "yes", "on") if value else default
151
151
 
152
+ # Helper function to expand paths with tilde
153
+ def expand_path(path_str: str) -> Path:
154
+ """Expand tilde and environment variables in path strings."""
155
+ # First expand any environment variables
156
+ expanded = os.path.expandvars(path_str)
157
+ # Then expand tilde
158
+ expanded = os.path.expanduser(expanded)
159
+ return Path(expanded)
160
+
152
161
  # Base directory for all voicemode data
153
- BASE_DIR = Path(os.getenv("VOICEMODE_BASE_DIR", str(Path.home() / ".voicemode")))
162
+ BASE_DIR = expand_path(os.getenv("VOICEMODE_BASE_DIR", str(Path.home() / ".voicemode")))
154
163
 
155
164
  # Unified directory structure
156
165
  AUDIO_DIR = BASE_DIR / "audio"
157
166
  TRANSCRIPTIONS_DIR = BASE_DIR / "transcriptions"
158
167
  LOGS_DIR = BASE_DIR / "logs"
159
168
  # CONFIG_DIR = BASE_DIR / "config" # Removed - config stored in .voicemode.env file instead
160
- MODELS_DIR = Path(os.getenv("VOICEMODE_MODELS_DIR", str(BASE_DIR / "models")))
169
+ MODELS_DIR = expand_path(os.getenv("VOICEMODE_MODELS_DIR", str(BASE_DIR / "models")))
161
170
 
162
171
  # Debug configuration
163
172
  DEBUG = os.getenv("VOICEMODE_DEBUG", "").lower() in ("true", "1", "yes", "on")
164
173
  TRACE_DEBUG = os.getenv("VOICEMODE_DEBUG", "").lower() == "trace"
174
+ VAD_DEBUG = os.getenv("VOICEMODE_VAD_DEBUG", "").lower() in ("true", "1", "yes", "on")
165
175
  DEBUG_DIR = LOGS_DIR / "debug" # Debug files now go under logs
166
176
 
167
177
  # Master save-all configuration
@@ -224,14 +234,14 @@ LIVEKIT_API_SECRET = os.getenv("LIVEKIT_API_SECRET", "secret")
224
234
  WHISPER_MODEL = os.getenv("VOICEMODE_WHISPER_MODEL", "large-v2")
225
235
  WHISPER_PORT = int(os.getenv("VOICEMODE_WHISPER_PORT", "2022"))
226
236
  WHISPER_LANGUAGE = os.getenv("VOICEMODE_WHISPER_LANGUAGE", "auto")
227
- WHISPER_MODEL_PATH = os.getenv("VOICEMODE_WHISPER_MODEL_PATH", str(BASE_DIR / "models" / "whisper"))
237
+ WHISPER_MODEL_PATH = expand_path(os.getenv("VOICEMODE_WHISPER_MODEL_PATH", str(BASE_DIR / "models" / "whisper")))
228
238
 
229
239
  # ==================== KOKORO CONFIGURATION ====================
230
240
 
231
241
  # Kokoro-specific configuration
232
242
  KOKORO_PORT = int(os.getenv("VOICEMODE_KOKORO_PORT", "8880"))
233
- KOKORO_MODELS_DIR = os.getenv("VOICEMODE_KOKORO_MODELS_DIR", str(BASE_DIR / "models" / "kokoro"))
234
- KOKORO_CACHE_DIR = os.getenv("VOICEMODE_KOKORO_CACHE_DIR", str(BASE_DIR / "cache" / "kokoro"))
243
+ KOKORO_MODELS_DIR = expand_path(os.getenv("VOICEMODE_KOKORO_MODELS_DIR", str(BASE_DIR / "models" / "kokoro")))
244
+ KOKORO_CACHE_DIR = expand_path(os.getenv("VOICEMODE_KOKORO_CACHE_DIR", str(BASE_DIR / "cache" / "kokoro")))
235
245
  KOKORO_DEFAULT_VOICE = os.getenv("VOICEMODE_KOKORO_DEFAULT_VOICE", "af_sky")
236
246
 
237
247
  # ==================== LIVEKIT CONFIGURATION ====================
@@ -32,6 +32,7 @@ from voice_mode.config import (
32
32
  CHANNELS,
33
33
  DEBUG,
34
34
  DEBUG_DIR,
35
+ VAD_DEBUG,
35
36
  SAVE_AUDIO,
36
37
  AUDIO_DIR,
37
38
  OPENAI_API_KEY,
@@ -872,7 +873,7 @@ def record_audio(duration: float) -> np.ndarray:
872
873
  sys.stderr = original_stderr
873
874
 
874
875
 
875
- def record_audio_with_silence_detection(max_duration: float, disable_silence_detection: bool = False, min_duration: float = 0.0, vad_aggressiveness: Optional[int] = None) -> np.ndarray:
876
+ def record_audio_with_silence_detection(max_duration: float, disable_silence_detection: bool = False, min_duration: float = 0.0, vad_aggressiveness: Optional[int] = None) -> Tuple[np.ndarray, bool]:
876
877
  """Record audio from microphone with automatic silence detection.
877
878
 
878
879
  Uses WebRTC VAD to detect when the user stops speaking and automatically
@@ -885,21 +886,25 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
885
886
  vad_aggressiveness: VAD aggressiveness level (0-3). If None, uses VAD_AGGRESSIVENESS from config
886
887
 
887
888
  Returns:
888
- Numpy array of recorded audio samples
889
+ Tuple of (audio_data, speech_detected):
890
+ - audio_data: Numpy array of recorded audio samples
891
+ - speech_detected: Boolean indicating if speech was detected during recording
889
892
  """
890
893
 
891
894
  logger.info(f"record_audio_with_silence_detection called - VAD_AVAILABLE={VAD_AVAILABLE}, DISABLE_SILENCE_DETECTION={DISABLE_SILENCE_DETECTION}, min_duration={min_duration}")
892
895
 
893
896
  if not VAD_AVAILABLE:
894
897
  logger.warning("webrtcvad not available, falling back to fixed duration recording")
895
- return record_audio(max_duration)
898
+ # For fallback, assume speech is present since we can't detect
899
+ return (record_audio(max_duration), True)
896
900
 
897
901
  if DISABLE_SILENCE_DETECTION or disable_silence_detection:
898
902
  if disable_silence_detection:
899
903
  logger.info("Silence detection disabled for this interaction by request")
900
904
  else:
901
905
  logger.info("Silence detection disabled globally via VOICEMODE_DISABLE_SILENCE_DETECTION")
902
- return record_audio(max_duration)
906
+ # For fallback, assume speech is present since we can't detect
907
+ return (record_audio(max_duration), True)
903
908
 
904
909
  logger.info(f"🎤 Recording with silence detection (max {max_duration}s)...")
905
910
 
@@ -940,6 +945,16 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
940
945
  f"Min duration: {MIN_RECORDING_DURATION}s, "
941
946
  f"Initial grace period: {INITIAL_SILENCE_GRACE_PERIOD}s")
942
947
 
948
+ if VAD_DEBUG:
949
+ logger.info(f"[VAD_DEBUG] Starting VAD recording with config:")
950
+ logger.info(f"[VAD_DEBUG] max_duration: {max_duration}s")
951
+ logger.info(f"[VAD_DEBUG] min_duration: {min_duration}s")
952
+ logger.info(f"[VAD_DEBUG] effective_min_duration: {max(MIN_RECORDING_DURATION, min_duration)}s")
953
+ logger.info(f"[VAD_DEBUG] VAD aggressiveness: {effective_vad_aggressiveness}")
954
+ logger.info(f"[VAD_DEBUG] Silence threshold: {SILENCE_THRESHOLD_MS}ms")
955
+ logger.info(f"[VAD_DEBUG] Sample rate: {SAMPLE_RATE}Hz (VAD using {vad_sample_rate}Hz)")
956
+ logger.info(f"[VAD_DEBUG] Chunk duration: {VAD_CHUNK_DURATION_MS}ms")
957
+
943
958
  def audio_callback(indata, frames, time, status):
944
959
  """Callback for continuous audio stream"""
945
960
  if status:
@@ -979,35 +994,53 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
979
994
  # Check if chunk contains speech
980
995
  try:
981
996
  is_speech = vad.is_speech(chunk_bytes, vad_sample_rate)
997
+ if VAD_DEBUG:
998
+ # Log VAD decision every 500ms for less spam
999
+ if int(recording_duration * 1000) % 500 == 0:
1000
+ rms = np.sqrt(np.mean(chunk.astype(float)**2))
1001
+ logger.info(f"[VAD_DEBUG] t={recording_duration:.1f}s: speech={is_speech}, RMS={rms:.0f}, state={'WAITING' if not speech_detected else 'ACTIVE'}")
982
1002
  except Exception as vad_e:
983
1003
  logger.warning(f"VAD error: {vad_e}, treating as speech")
984
1004
  is_speech = True
985
1005
 
986
- if is_speech:
987
- if not speech_detected:
988
- logger.debug("Speech detected, recording...")
989
- speech_detected = True
990
- silence_duration_ms = 0
1006
+ # State machine for speech detection
1007
+ if not speech_detected:
1008
+ # WAITING_FOR_SPEECH state
1009
+ if is_speech:
1010
+ logger.info("🎤 Speech detected, starting active recording")
1011
+ if VAD_DEBUG:
1012
+ logger.info(f"[VAD_DEBUG] STATE CHANGE: WAITING_FOR_SPEECH -> SPEECH_ACTIVE at t={recording_duration:.1f}s")
1013
+ speech_detected = True
1014
+ silence_duration_ms = 0
1015
+ # No timeout in this state - just keep waiting
1016
+ # The only exit is speech detection or max_duration
991
1017
  else:
992
- silence_duration_ms += VAD_CHUNK_DURATION_MS
993
- if speech_detected and silence_duration_ms % 200 == 0: # Log every 200ms
994
- logger.debug(f"Silence: {silence_duration_ms}ms")
1018
+ # We have detected speech at some point
1019
+ if is_speech:
1020
+ # SPEECH_ACTIVE state - reset silence counter
1021
+ silence_duration_ms = 0
1022
+ else:
1023
+ # SILENCE_AFTER_SPEECH state - accumulate silence
1024
+ silence_duration_ms += VAD_CHUNK_DURATION_MS
1025
+ if VAD_DEBUG and silence_duration_ms % 100 == 0: # More frequent logging in debug mode
1026
+ logger.info(f"[VAD_DEBUG] Accumulating silence: {silence_duration_ms}/{SILENCE_THRESHOLD_MS}ms, t={recording_duration:.1f}s")
1027
+ elif silence_duration_ms % 200 == 0: # Log every 200ms
1028
+ logger.debug(f"Silence: {silence_duration_ms}ms")
1029
+
1030
+ # Check if we should stop due to silence threshold
1031
+ # Use the larger of MIN_RECORDING_DURATION (global) or min_duration (parameter)
1032
+ effective_min_duration = max(MIN_RECORDING_DURATION, min_duration)
1033
+ if recording_duration >= effective_min_duration and silence_duration_ms >= SILENCE_THRESHOLD_MS:
1034
+ logger.info(f"✓ Silence threshold reached after {recording_duration:.1f}s of recording")
1035
+ if VAD_DEBUG:
1036
+ logger.info(f"[VAD_DEBUG] STOP: silence_duration={silence_duration_ms}ms >= threshold={SILENCE_THRESHOLD_MS}ms")
1037
+ logger.info(f"[VAD_DEBUG] STOP: recording_duration={recording_duration:.1f}s >= min_duration={effective_min_duration}s")
1038
+ stop_recording = True
1039
+ elif VAD_DEBUG and recording_duration < effective_min_duration:
1040
+ if int(recording_duration * 1000) % 500 == 0: # Log every 500ms
1041
+ logger.info(f"[VAD_DEBUG] Min duration not met: {recording_duration:.1f}s < {effective_min_duration}s")
995
1042
 
996
1043
  recording_duration += chunk_duration_s
997
-
998
- # Check stop conditions
999
- # Use the larger of MIN_RECORDING_DURATION (global) or min_duration (parameter)
1000
- effective_min_duration = max(MIN_RECORDING_DURATION, min_duration)
1001
- if speech_detected and recording_duration >= effective_min_duration:
1002
- if silence_duration_ms >= SILENCE_THRESHOLD_MS:
1003
- logger.info(f"✓ Silence detected after {recording_duration:.1f}s (min: {effective_min_duration:.1f}s), stopping recording")
1004
- stop_recording = True
1005
-
1006
- # Also stop if we haven't detected any speech after a grace period
1007
- # Give user time to start speaking
1008
- if not speech_detected and recording_duration >= INITIAL_SILENCE_GRACE_PERIOD:
1009
- logger.info(f"No speech detected after {INITIAL_SILENCE_GRACE_PERIOD}s grace period, stopping recording")
1010
- stop_recording = True
1011
1044
 
1012
1045
  except queue.Empty:
1013
1046
  # No audio data available, continue waiting
@@ -1019,17 +1052,26 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
1019
1052
  # Concatenate all chunks
1020
1053
  if chunks:
1021
1054
  full_recording = np.concatenate(chunks)
1022
- logger.info(f"✓ Recorded {len(full_recording)} samples ({recording_duration:.1f}s)")
1055
+
1056
+ if not speech_detected:
1057
+ logger.info(f"✓ Recording completed ({recording_duration:.1f}s) - No speech detected")
1058
+ if VAD_DEBUG:
1059
+ logger.info(f"[VAD_DEBUG] FINAL STATE: No speech was ever detected during recording")
1060
+ else:
1061
+ logger.info(f"✓ Recorded {len(full_recording)} samples ({recording_duration:.1f}s) with speech")
1062
+ if VAD_DEBUG:
1063
+ logger.info(f"[VAD_DEBUG] FINAL STATE: Speech was detected, recording complete")
1023
1064
 
1024
1065
  if DEBUG:
1025
1066
  # Calculate RMS for debug
1026
1067
  rms = np.sqrt(np.mean(full_recording.astype(float) ** 2))
1027
1068
  logger.debug(f"Recording stats - RMS: {rms:.2f}, Speech detected: {speech_detected}")
1028
1069
 
1029
- return full_recording
1070
+ # Return tuple: (audio_data, speech_detected)
1071
+ return (full_recording, speech_detected)
1030
1072
  else:
1031
1073
  logger.warning("No audio chunks recorded")
1032
- return np.array([])
1074
+ return (np.array([]), False)
1033
1075
 
1034
1076
  except Exception as e:
1035
1077
  logger.error(f"Recording with VAD failed: {e}")
@@ -1042,7 +1084,8 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
1042
1084
  logger.error(f"\n{help_message}")
1043
1085
 
1044
1086
  logger.info("Falling back to fixed duration recording")
1045
- return record_audio(max_duration)
1087
+ # For fallback, assume speech is present since we can't detect
1088
+ return (record_audio(max_duration), True)
1046
1089
 
1047
1090
  finally:
1048
1091
  # Restore stdio
@@ -1056,7 +1099,8 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
1056
1099
  except Exception as e:
1057
1100
  logger.error(f"VAD initialization failed: {e}")
1058
1101
  logger.info("Falling back to fixed duration recording")
1059
- return record_audio(max_duration)
1102
+ # For fallback, assume speech is present since we can't detect
1103
+ return (record_audio(max_duration), True)
1060
1104
 
1061
1105
 
1062
1106
  async def check_livekit_available() -> bool:
@@ -1713,7 +1757,7 @@ async def converse(
1713
1757
 
1714
1758
  record_start = time.perf_counter()
1715
1759
  logger.debug(f"About to call record_audio_with_silence_detection with duration={listen_duration}, disable_silence_detection={disable_silence_detection}, min_duration={min_listen_duration}, vad_aggressiveness={vad_aggressiveness}")
1716
- audio_data = await asyncio.get_event_loop().run_in_executor(
1760
+ audio_data, speech_detected = await asyncio.get_event_loop().run_in_executor(
1717
1761
  None, record_audio_with_silence_detection, listen_duration, disable_silence_detection, min_listen_duration, vad_aggressiveness
1718
1762
  )
1719
1763
  timings['record'] = time.perf_counter() - record_start
@@ -1736,14 +1780,27 @@ async def converse(
1736
1780
  result = "Error: Could not record audio"
1737
1781
  return result
1738
1782
 
1739
- # Convert to text
1740
- # Log STT start
1741
- if event_logger:
1742
- event_logger.log_event(event_logger.STT_START)
1743
-
1744
- stt_start = time.perf_counter()
1745
- response_text = await speech_to_text(audio_data, SAVE_AUDIO, AUDIO_DIR if SAVE_AUDIO else None, transport)
1746
- timings['stt'] = time.perf_counter() - stt_start
1783
+ # Check if no speech was detected
1784
+ if not speech_detected:
1785
+ logger.info("No speech detected during recording - skipping STT processing")
1786
+ response_text = None
1787
+ timings['stt'] = 0.0
1788
+
1789
+ # Still save the audio if configured
1790
+ if SAVE_AUDIO and AUDIO_DIR:
1791
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1792
+ audio_path = os.path.join(AUDIO_DIR, f"no_speech_{timestamp}.wav")
1793
+ write(audio_path, SAMPLE_RATE, audio_data)
1794
+ logger.debug(f"Saved no-speech audio to: {audio_path}")
1795
+ else:
1796
+ # Convert to text
1797
+ # Log STT start
1798
+ if event_logger:
1799
+ event_logger.log_event(event_logger.STT_START)
1800
+
1801
+ stt_start = time.perf_counter()
1802
+ response_text = await speech_to_text(audio_data, SAVE_AUDIO, AUDIO_DIR if SAVE_AUDIO else None, transport)
1803
+ timings['stt'] = time.perf_counter() - stt_start
1747
1804
 
1748
1805
  # Log STT complete
1749
1806
  if event_logger:
File without changes
File without changes
File without changes
File without changes