voice-mode 2.23.0__tar.gz → 2.25.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {voice_mode-2.23.0 → voice_mode-2.25.0}/CHANGELOG.md +35 -2
- {voice_mode-2.23.0 → voice_mode-2.25.0}/PKG-INFO +1 -1
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/__version__.py +1 -1
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/cli.py +49 -25
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/config.py +15 -5
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/converse.py +97 -40
- {voice_mode-2.23.0 → voice_mode-2.25.0}/.gitignore +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/README.md +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/build_hooks.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/pyproject.toml +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/__init__.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/__main__.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/cli_commands/__init__.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/cli_commands/exchanges.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/conversation_logger.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/core.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/data/versions.json +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/exchanges/__init__.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/exchanges/conversations.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/exchanges/filters.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/exchanges/formatters.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/exchanges/models.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/exchanges/reader.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/exchanges/stats.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/README.md +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/app/api/connection-details/route.ts +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/app/favicon.ico +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/app/globals.css +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/app/layout.tsx +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/app/page.tsx +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/components/CloseIcon.tsx +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/components/NoAgentNotification.tsx +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/components/TranscriptionView.tsx +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/hooks/useCombinedTranscriptions.ts +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/hooks/useLocalMicTrack.ts +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/next-env.d.ts +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/next.config.mjs +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/package-lock.json +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/package.json +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/pnpm-lock.yaml +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/postcss.config.mjs +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/tailwind.config.ts +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/tsconfig.json +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/prompts/README.md +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/prompts/__init__.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/prompts/converse.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/prompts/release_notes.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/prompts/services.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/provider_discovery.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/providers.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/resources/__init__.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/resources/audio_files.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/resources/changelog.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/resources/configuration.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/resources/statistics.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/resources/version.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/resources/whisper_models.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/server.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/shared.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/simple_failover.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/statistics.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/streaming.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/launchd/com.voicemode.frontend.plist +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/launchd/com.voicemode.kokoro.plist +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/launchd/com.voicemode.livekit.plist +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/launchd/com.voicemode.whisper.plist +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/launchd/start-kokoro-with-health-check.sh +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/launchd/start-whisper-with-health-check.sh +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/systemd/voicemode-frontend.service +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/systemd/voicemode-kokoro.service +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/systemd/voicemode-livekit.service +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/systemd/voicemode-whisper.service +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/__init__.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/configuration_management.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/dependencies.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/devices.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/diagnostics.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/providers.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/service.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/kokoro/install.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/kokoro/uninstall.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/list_versions.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/livekit/__init__.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/livekit/frontend.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/livekit/install.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/livekit/production_server.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/livekit/uninstall.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/version_info.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/whisper/download_model.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/whisper/install.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/whisper/uninstall.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/statistics.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/voice_registry.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/__init__.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/audio_diagnostics.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/event_logger.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/ffmpeg_check.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/format_migration.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/gpu_detection.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/migration_helpers.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/services/common.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/services/kokoro_helpers.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/services/livekit_helpers.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/services/whisper_helpers.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/utils/version_helpers.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/version.py +0 -0
- {voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/voice_preferences.py +0 -0
@@ -7,6 +7,41 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
9
|
|
10
|
+
## [2.25.0] - 2025-08-18
|
11
|
+
|
12
|
+
### Fixed
|
13
|
+
- **uvx command refresh flag** - Add --refresh flag to all uvx commands in installer
|
14
|
+
- Ensures latest version is always fetched when running voice-mode commands
|
15
|
+
- Fixes issues with cached old versions being used
|
16
|
+
- Applies to service installation, uninstallation, and status commands
|
17
|
+
- **Performance optimization** - Significantly improved help command performance
|
18
|
+
- Lazy load heavy imports (numpy, scipy, webrtcvad) only when needed
|
19
|
+
- Help command now runs 10x faster (from ~1.5s to ~0.15s)
|
20
|
+
- Faster MCP server startup time for better user experience
|
21
|
+
- **Config path expansion** - Fixed tilde expansion for user home directories
|
22
|
+
- Configuration paths now properly expand `~` to user home directory
|
23
|
+
- Fixes issues with paths like `~/Models/kokoro` not being found
|
24
|
+
- Added comprehensive tests for path expansion functionality
|
25
|
+
- **Frontend imports** - Corrected import statements to use single module
|
26
|
+
- Fixed import errors in livekit frontend commands
|
27
|
+
- All frontend commands now properly import from frontend module
|
28
|
+
|
29
|
+
## [2.24.0] - 2025-08-16
|
30
|
+
|
31
|
+
### Added
|
32
|
+
- **Enhanced Voice Activity Detection** - Improved silence detection behavior
|
33
|
+
- VAD now waits indefinitely for speech before starting silence detection
|
34
|
+
- No more timeouts when user hasn't started speaking yet
|
35
|
+
- Silent recordings are not sent to STT, reducing API costs and preventing hallucinations
|
36
|
+
- Returns "No speech detected" message instead of processing silence
|
37
|
+
- Significantly improves user experience for voice interactions
|
38
|
+
- **VAD debugging mode** - Comprehensive debugging for Voice Activity Detection
|
39
|
+
- New `VOICEMODE_VAD_DEBUG` environment variable enables detailed VAD logging
|
40
|
+
- Shows real-time speech detection decisions, state transitions, and timing
|
41
|
+
- Helps diagnose issues where recording stops before speech or cuts off early
|
42
|
+
- Added test script `scripts/test-vad-enhancement.py` for VAD testing
|
43
|
+
- Documented in `docs/vad-debugging.md` with common issues and solutions
|
44
|
+
|
10
45
|
## [2.23.0] - 2025-08-16
|
11
46
|
|
12
47
|
### Added
|
@@ -29,8 +64,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
29
64
|
|
30
65
|
## [2.22.3] - 2025-08-16
|
31
66
|
|
32
|
-
## [2.23.0] - 2025-08-16
|
33
|
-
|
34
67
|
### Fixed
|
35
68
|
- **Service auto-enable error** - Fix 'FunctionTool' object is not callable
|
36
69
|
- Changed whisper and kokoro installers to use `enable_service` function instead of MCP tool
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: voice-mode
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.25.0
|
4
4
|
Summary: VoiceMode - Voice interaction capabilities for AI assistants (formerly voice-mcp)
|
5
5
|
Project-URL: Homepage, https://github.com/mbailey/voicemode
|
6
6
|
Project-URL: Repository, https://github.com/mbailey/voicemode
|
@@ -6,7 +6,6 @@ import sys
|
|
6
6
|
import os
|
7
7
|
import warnings
|
8
8
|
import click
|
9
|
-
from .server import main as voice_mode_main
|
10
9
|
|
11
10
|
# Suppress known deprecation warnings for better user experience
|
12
11
|
# These apply to both CLI commands and MCP server operation
|
@@ -47,6 +46,7 @@ def voice_mode_main_cli(ctx, debug):
|
|
47
46
|
if ctx.invoked_subcommand is None:
|
48
47
|
# No subcommand - run MCP server
|
49
48
|
# Note: warnings are already suppressed at module level unless debug is enabled
|
49
|
+
from .server import main as voice_mode_main
|
50
50
|
voice_mode_main()
|
51
51
|
|
52
52
|
|
@@ -74,36 +74,14 @@ def livekit():
|
|
74
74
|
pass
|
75
75
|
|
76
76
|
|
77
|
-
#
|
78
|
-
from voice_mode.tools.service import (
|
79
|
-
status_service, start_service, stop_service, restart_service,
|
80
|
-
enable_service, disable_service, view_logs, update_service_files
|
81
|
-
)
|
82
|
-
|
83
|
-
# Import install/uninstall functions
|
84
|
-
from voice_mode.tools.services.kokoro.install import kokoro_install
|
85
|
-
from voice_mode.tools.services.kokoro.uninstall import kokoro_uninstall
|
86
|
-
from voice_mode.tools.services.whisper.install import whisper_install
|
87
|
-
from voice_mode.tools.services.whisper.uninstall import whisper_uninstall
|
88
|
-
from voice_mode.tools.services.whisper.download_model import download_model
|
89
|
-
from voice_mode.tools.services.livekit.install import livekit_install
|
90
|
-
from voice_mode.tools.services.livekit.uninstall import livekit_uninstall
|
91
|
-
from voice_mode.tools.services.livekit.frontend import livekit_frontend_start, livekit_frontend_stop, livekit_frontend_status, livekit_frontend_open, livekit_frontend_logs, livekit_frontend_install
|
92
|
-
|
93
|
-
# Import configuration management functions
|
94
|
-
from voice_mode.tools.configuration_management import update_config, list_config_keys
|
95
|
-
|
96
|
-
# Import diagnostic functions - extract the actual async functions from the tools
|
97
|
-
from voice_mode.tools.diagnostics import voice_mode_info
|
98
|
-
from voice_mode.tools.devices import check_audio_devices
|
99
|
-
from voice_mode.tools.voice_registry import voice_registry
|
100
|
-
from voice_mode.tools.dependencies import check_audio_dependencies
|
77
|
+
# Service functions are imported lazily in their respective command handlers to improve startup time
|
101
78
|
|
102
79
|
|
103
80
|
# Kokoro service commands
|
104
81
|
@kokoro.command()
|
105
82
|
def status():
|
106
83
|
"""Show Kokoro service status."""
|
84
|
+
from voice_mode.tools.service import status_service
|
107
85
|
result = asyncio.run(status_service("kokoro"))
|
108
86
|
click.echo(result)
|
109
87
|
|
@@ -111,6 +89,7 @@ def status():
|
|
111
89
|
@kokoro.command()
|
112
90
|
def start():
|
113
91
|
"""Start Kokoro service."""
|
92
|
+
from voice_mode.tools.service import start_service
|
114
93
|
result = asyncio.run(start_service("kokoro"))
|
115
94
|
click.echo(result)
|
116
95
|
|
@@ -118,6 +97,7 @@ def start():
|
|
118
97
|
@kokoro.command()
|
119
98
|
def stop():
|
120
99
|
"""Stop Kokoro service."""
|
100
|
+
from voice_mode.tools.service import stop_service
|
121
101
|
result = asyncio.run(stop_service("kokoro"))
|
122
102
|
click.echo(result)
|
123
103
|
|
@@ -125,6 +105,7 @@ def stop():
|
|
125
105
|
@kokoro.command()
|
126
106
|
def restart():
|
127
107
|
"""Restart Kokoro service."""
|
108
|
+
from voice_mode.tools.service import restart_service
|
128
109
|
result = asyncio.run(restart_service("kokoro"))
|
129
110
|
click.echo(result)
|
130
111
|
|
@@ -132,6 +113,7 @@ def restart():
|
|
132
113
|
@kokoro.command()
|
133
114
|
def enable():
|
134
115
|
"""Enable Kokoro service to start at boot/login."""
|
116
|
+
from voice_mode.tools.service import enable_service
|
135
117
|
result = asyncio.run(enable_service("kokoro"))
|
136
118
|
click.echo(result)
|
137
119
|
|
@@ -139,6 +121,7 @@ def enable():
|
|
139
121
|
@kokoro.command()
|
140
122
|
def disable():
|
141
123
|
"""Disable Kokoro service from starting at boot/login."""
|
124
|
+
from voice_mode.tools.service import disable_service
|
142
125
|
result = asyncio.run(disable_service("kokoro"))
|
143
126
|
click.echo(result)
|
144
127
|
|
@@ -147,6 +130,7 @@ def disable():
|
|
147
130
|
@click.option('--lines', '-n', default=50, help='Number of log lines to show')
|
148
131
|
def logs(lines):
|
149
132
|
"""View Kokoro service logs."""
|
133
|
+
from voice_mode.tools.service import view_logs
|
150
134
|
result = asyncio.run(view_logs("kokoro", lines))
|
151
135
|
click.echo(result)
|
152
136
|
|
@@ -154,6 +138,7 @@ def logs(lines):
|
|
154
138
|
@kokoro.command("update-service-files")
|
155
139
|
def kokoro_update_service_files():
|
156
140
|
"""Update Kokoro service files to latest version."""
|
141
|
+
from voice_mode.tools.service import update_service_files
|
157
142
|
result = asyncio.run(update_service_files("kokoro"))
|
158
143
|
click.echo(result)
|
159
144
|
|
@@ -193,6 +178,7 @@ def health():
|
|
193
178
|
@click.option('--auto-enable/--no-auto-enable', default=None, help='Enable service at boot/login')
|
194
179
|
def install(install_dir, port, force, version, auto_enable):
|
195
180
|
"""Install kokoro-fastapi TTS service."""
|
181
|
+
from voice_mode.tools.services.kokoro.install import kokoro_install
|
196
182
|
result = asyncio.run(kokoro_install.fn(
|
197
183
|
install_dir=install_dir,
|
198
184
|
port=port,
|
@@ -227,6 +213,7 @@ def install(install_dir, port, force, version, auto_enable):
|
|
227
213
|
@click.confirmation_option(prompt='Are you sure you want to uninstall Kokoro?')
|
228
214
|
def uninstall(remove_models, remove_all_data):
|
229
215
|
"""Uninstall kokoro-fastapi service and optionally remove data."""
|
216
|
+
from voice_mode.tools.services.kokoro.uninstall import kokoro_uninstall
|
230
217
|
result = asyncio.run(kokoro_uninstall.fn(
|
231
218
|
remove_models=remove_models,
|
232
219
|
remove_all_data=remove_all_data
|
@@ -260,6 +247,7 @@ def uninstall(remove_models, remove_all_data):
|
|
260
247
|
@whisper.command()
|
261
248
|
def status():
|
262
249
|
"""Show Whisper service status."""
|
250
|
+
from voice_mode.tools.service import status_service
|
263
251
|
result = asyncio.run(status_service("whisper"))
|
264
252
|
click.echo(result)
|
265
253
|
|
@@ -267,6 +255,7 @@ def status():
|
|
267
255
|
@whisper.command()
|
268
256
|
def start():
|
269
257
|
"""Start Whisper service."""
|
258
|
+
from voice_mode.tools.service import start_service
|
270
259
|
result = asyncio.run(start_service("whisper"))
|
271
260
|
click.echo(result)
|
272
261
|
|
@@ -274,6 +263,7 @@ def start():
|
|
274
263
|
@whisper.command()
|
275
264
|
def stop():
|
276
265
|
"""Stop Whisper service."""
|
266
|
+
from voice_mode.tools.service import stop_service
|
277
267
|
result = asyncio.run(stop_service("whisper"))
|
278
268
|
click.echo(result)
|
279
269
|
|
@@ -281,6 +271,7 @@ def stop():
|
|
281
271
|
@whisper.command()
|
282
272
|
def restart():
|
283
273
|
"""Restart Whisper service."""
|
274
|
+
from voice_mode.tools.service import restart_service
|
284
275
|
result = asyncio.run(restart_service("whisper"))
|
285
276
|
click.echo(result)
|
286
277
|
|
@@ -288,6 +279,7 @@ def restart():
|
|
288
279
|
@whisper.command()
|
289
280
|
def enable():
|
290
281
|
"""Enable Whisper service to start at boot/login."""
|
282
|
+
from voice_mode.tools.service import enable_service
|
291
283
|
result = asyncio.run(enable_service("whisper"))
|
292
284
|
click.echo(result)
|
293
285
|
|
@@ -295,6 +287,7 @@ def enable():
|
|
295
287
|
@whisper.command()
|
296
288
|
def disable():
|
297
289
|
"""Disable Whisper service from starting at boot/login."""
|
290
|
+
from voice_mode.tools.service import disable_service
|
298
291
|
result = asyncio.run(disable_service("whisper"))
|
299
292
|
click.echo(result)
|
300
293
|
|
@@ -303,6 +296,7 @@ def disable():
|
|
303
296
|
@click.option('--lines', '-n', default=50, help='Number of log lines to show')
|
304
297
|
def logs(lines):
|
305
298
|
"""View Whisper service logs."""
|
299
|
+
from voice_mode.tools.service import view_logs
|
306
300
|
result = asyncio.run(view_logs("whisper", lines))
|
307
301
|
click.echo(result)
|
308
302
|
|
@@ -310,6 +304,7 @@ def logs(lines):
|
|
310
304
|
@whisper.command("update-service-files")
|
311
305
|
def whisper_update_service_files():
|
312
306
|
"""Update Whisper service files to latest version."""
|
307
|
+
from voice_mode.tools.service import update_service_files
|
313
308
|
result = asyncio.run(update_service_files("whisper"))
|
314
309
|
click.echo(result)
|
315
310
|
|
@@ -350,6 +345,7 @@ def health():
|
|
350
345
|
@click.option('--auto-enable/--no-auto-enable', default=None, help='Enable service at boot/login')
|
351
346
|
def install(install_dir, model, use_gpu, force, version, auto_enable):
|
352
347
|
"""Install whisper.cpp STT service with automatic system detection."""
|
348
|
+
from voice_mode.tools.services.whisper.install import whisper_install
|
353
349
|
result = asyncio.run(whisper_install.fn(
|
354
350
|
install_dir=install_dir,
|
355
351
|
model=model,
|
@@ -394,6 +390,7 @@ def install(install_dir, model, use_gpu, force, version, auto_enable):
|
|
394
390
|
@click.confirmation_option(prompt='Are you sure you want to uninstall Whisper?')
|
395
391
|
def uninstall(remove_models, remove_all_data):
|
396
392
|
"""Uninstall whisper.cpp and optionally remove models and data."""
|
393
|
+
from voice_mode.tools.services.whisper.uninstall import whisper_uninstall
|
397
394
|
result = asyncio.run(whisper_uninstall.fn(
|
398
395
|
remove_models=remove_models,
|
399
396
|
remove_all_data=remove_all_data
|
@@ -437,6 +434,7 @@ def download_model_cmd(model, force, skip_core_ml):
|
|
437
434
|
medium, medium.en, large-v1, large-v2, large-v3, large-v3-turbo
|
438
435
|
"""
|
439
436
|
import json
|
437
|
+
from voice_mode.tools.services.whisper.download_model import download_model
|
440
438
|
result = asyncio.run(download_model.fn(
|
441
439
|
model=model,
|
442
440
|
force_download=force,
|
@@ -478,6 +476,7 @@ def download_model_cmd(model, force, skip_core_ml):
|
|
478
476
|
@livekit.command()
|
479
477
|
def status():
|
480
478
|
"""Show LiveKit service status."""
|
479
|
+
from voice_mode.tools.service import status_service
|
481
480
|
result = asyncio.run(status_service("livekit"))
|
482
481
|
click.echo(result)
|
483
482
|
|
@@ -485,6 +484,7 @@ def status():
|
|
485
484
|
@livekit.command()
|
486
485
|
def start():
|
487
486
|
"""Start LiveKit service."""
|
487
|
+
from voice_mode.tools.service import start_service
|
488
488
|
result = asyncio.run(start_service("livekit"))
|
489
489
|
click.echo(result)
|
490
490
|
|
@@ -492,6 +492,7 @@ def start():
|
|
492
492
|
@livekit.command()
|
493
493
|
def stop():
|
494
494
|
"""Stop LiveKit service."""
|
495
|
+
from voice_mode.tools.service import stop_service
|
495
496
|
result = asyncio.run(stop_service("livekit"))
|
496
497
|
click.echo(result)
|
497
498
|
|
@@ -499,6 +500,7 @@ def stop():
|
|
499
500
|
@livekit.command()
|
500
501
|
def restart():
|
501
502
|
"""Restart LiveKit service."""
|
503
|
+
from voice_mode.tools.service import restart_service
|
502
504
|
result = asyncio.run(restart_service("livekit"))
|
503
505
|
click.echo(result)
|
504
506
|
|
@@ -506,6 +508,7 @@ def restart():
|
|
506
508
|
@livekit.command()
|
507
509
|
def enable():
|
508
510
|
"""Enable LiveKit service to start at boot/login."""
|
511
|
+
from voice_mode.tools.service import enable_service
|
509
512
|
result = asyncio.run(enable_service("livekit"))
|
510
513
|
click.echo(result)
|
511
514
|
|
@@ -513,6 +516,7 @@ def enable():
|
|
513
516
|
@livekit.command()
|
514
517
|
def disable():
|
515
518
|
"""Disable LiveKit service from starting at boot/login."""
|
519
|
+
from voice_mode.tools.service import disable_service
|
516
520
|
result = asyncio.run(disable_service("livekit"))
|
517
521
|
click.echo(result)
|
518
522
|
|
@@ -521,6 +525,7 @@ def disable():
|
|
521
525
|
@click.option('--lines', '-n', default=50, help='Number of log lines to show')
|
522
526
|
def logs(lines):
|
523
527
|
"""View LiveKit service logs."""
|
528
|
+
from voice_mode.tools.service import view_logs
|
524
529
|
result = asyncio.run(view_logs("livekit", lines))
|
525
530
|
click.echo(result)
|
526
531
|
|
@@ -528,6 +533,7 @@ def logs(lines):
|
|
528
533
|
@livekit.command()
|
529
534
|
def update():
|
530
535
|
"""Update LiveKit service files to the latest version."""
|
536
|
+
from voice_mode.tools.service import update_service_files
|
531
537
|
result = asyncio.run(update_service_files("livekit"))
|
532
538
|
|
533
539
|
if result.get("success"):
|
@@ -546,6 +552,7 @@ def update():
|
|
546
552
|
@click.option('--auto-enable/--no-auto-enable', default=None, help='Enable service at boot/login')
|
547
553
|
def install(install_dir, port, force, version, auto_enable):
|
548
554
|
"""Install LiveKit server with development configuration."""
|
555
|
+
from voice_mode.tools.services.livekit.install import livekit_install
|
549
556
|
result = asyncio.run(livekit_install.fn(
|
550
557
|
install_dir=install_dir,
|
551
558
|
port=port,
|
@@ -583,6 +590,7 @@ def install(install_dir, port, force, version, auto_enable):
|
|
583
590
|
@click.confirmation_option(prompt='Are you sure you want to uninstall LiveKit?')
|
584
591
|
def uninstall(remove_config, remove_all_data):
|
585
592
|
"""Uninstall LiveKit server and optionally remove configuration and data."""
|
593
|
+
from voice_mode.tools.services.livekit.uninstall import livekit_uninstall
|
586
594
|
result = asyncio.run(livekit_uninstall.fn(
|
587
595
|
remove_config=remove_config,
|
588
596
|
remove_all_data=remove_all_data
|
@@ -615,6 +623,7 @@ def frontend():
|
|
615
623
|
@click.option('--auto-enable/--no-auto-enable', default=None, help='Enable service after installation (default: from config)')
|
616
624
|
def frontend_install(auto_enable):
|
617
625
|
"""Install and setup LiveKit Voice Assistant Frontend."""
|
626
|
+
from voice_mode.tools.services.livekit.frontend import livekit_frontend_install
|
618
627
|
result = asyncio.run(livekit_frontend_install.fn(auto_enable=auto_enable))
|
619
628
|
|
620
629
|
if result.get('success'):
|
@@ -642,6 +651,7 @@ def frontend_install(auto_enable):
|
|
642
651
|
@click.option('--host', default='127.0.0.1', help='Host to bind to (default: 127.0.0.1)')
|
643
652
|
def frontend_start(port, host):
|
644
653
|
"""Start the LiveKit Voice Assistant Frontend."""
|
654
|
+
from voice_mode.tools.services.livekit.frontend import livekit_frontend_start
|
645
655
|
result = asyncio.run(livekit_frontend_start.fn(port=port, host=host))
|
646
656
|
|
647
657
|
if result.get('success'):
|
@@ -663,6 +673,7 @@ def frontend_start(port, host):
|
|
663
673
|
@frontend.command("stop")
|
664
674
|
def frontend_stop():
|
665
675
|
"""Stop the LiveKit Voice Assistant Frontend."""
|
676
|
+
from voice_mode.tools.services.livekit.frontend import livekit_frontend_stop
|
666
677
|
result = asyncio.run(livekit_frontend_stop.fn())
|
667
678
|
|
668
679
|
if result.get('success'):
|
@@ -674,6 +685,7 @@ def frontend_stop():
|
|
674
685
|
@frontend.command("status")
|
675
686
|
def frontend_status():
|
676
687
|
"""Check status of the LiveKit Voice Assistant Frontend."""
|
688
|
+
from voice_mode.tools.services.livekit.frontend import livekit_frontend_status
|
677
689
|
result = asyncio.run(livekit_frontend_status.fn())
|
678
690
|
|
679
691
|
if 'error' in result:
|
@@ -701,6 +713,7 @@ def frontend_open():
|
|
701
713
|
|
702
714
|
Starts the frontend if not already running, then opens it in the default browser.
|
703
715
|
"""
|
716
|
+
from voice_mode.tools.services.livekit.frontend import livekit_frontend_open
|
704
717
|
result = asyncio.run(livekit_frontend_open.fn())
|
705
718
|
|
706
719
|
if result.get('success'):
|
@@ -723,11 +736,13 @@ def frontend_logs(lines, follow):
|
|
723
736
|
"""
|
724
737
|
if follow:
|
725
738
|
# For following, run tail -f directly
|
739
|
+
from voice_mode.tools.services.livekit.frontend import livekit_frontend_logs
|
726
740
|
result = asyncio.run(livekit_frontend_logs.fn(follow=True))
|
727
741
|
if result.get('success'):
|
728
742
|
click.echo(f"📂 Log file: {result['log_file']}")
|
729
743
|
click.echo("🔄 Following logs (press Ctrl+C to stop)...")
|
730
744
|
try:
|
745
|
+
import subprocess
|
731
746
|
subprocess.run(["tail", "-f", result['log_file']])
|
732
747
|
except KeyboardInterrupt:
|
733
748
|
click.echo("\n✅ Stopped following logs")
|
@@ -735,6 +750,7 @@ def frontend_logs(lines, follow):
|
|
735
750
|
click.echo(f"❌ Error: {result.get('error', 'Unknown error')}")
|
736
751
|
else:
|
737
752
|
# Show last N lines
|
753
|
+
from voice_mode.tools.services.livekit.frontend import livekit_frontend_logs
|
738
754
|
result = asyncio.run(livekit_frontend_logs.fn(lines=lines, follow=False))
|
739
755
|
if result.get('success'):
|
740
756
|
click.echo(f"📂 Log file: {result['log_file']}")
|
@@ -748,6 +764,7 @@ def frontend_logs(lines, follow):
|
|
748
764
|
@frontend.command("enable")
|
749
765
|
def frontend_enable():
|
750
766
|
"""Enable frontend service to start automatically at boot/login."""
|
767
|
+
from voice_mode.tools.service import enable_service
|
751
768
|
result = asyncio.run(enable_service("frontend"))
|
752
769
|
# enable_service returns a string, not a dict
|
753
770
|
click.echo(result)
|
@@ -756,6 +773,7 @@ def frontend_enable():
|
|
756
773
|
@frontend.command("disable")
|
757
774
|
def frontend_disable():
|
758
775
|
"""Disable frontend service from starting automatically."""
|
776
|
+
from voice_mode.tools.service import disable_service
|
759
777
|
result = asyncio.run(disable_service("frontend"))
|
760
778
|
# disable_service returns a string, not a dict
|
761
779
|
click.echo(result)
|
@@ -827,6 +845,7 @@ def config():
|
|
827
845
|
@config.command("list")
|
828
846
|
def config_list():
|
829
847
|
"""List all configuration keys with their descriptions."""
|
848
|
+
from voice_mode.tools.configuration_management import list_config_keys
|
830
849
|
result = asyncio.run(list_config_keys.fn())
|
831
850
|
click.echo(result)
|
832
851
|
|
@@ -873,6 +892,7 @@ def config_get(key):
|
|
873
892
|
@click.argument('value')
|
874
893
|
def config_set(key, value):
|
875
894
|
"""Set a configuration value."""
|
895
|
+
from voice_mode.tools.configuration_management import update_config
|
876
896
|
result = asyncio.run(update_config.fn(key, value))
|
877
897
|
click.echo(result)
|
878
898
|
|
@@ -887,6 +907,7 @@ def diag():
|
|
887
907
|
@diag.command()
|
888
908
|
def info():
|
889
909
|
"""Show voice-mode installation information."""
|
910
|
+
from voice_mode.tools.diagnostics import voice_mode_info
|
890
911
|
result = asyncio.run(voice_mode_info.fn())
|
891
912
|
click.echo(result)
|
892
913
|
|
@@ -894,6 +915,7 @@ def info():
|
|
894
915
|
@diag.command()
|
895
916
|
def devices():
|
896
917
|
"""List available audio input and output devices."""
|
918
|
+
from voice_mode.tools.devices import check_audio_devices
|
897
919
|
result = asyncio.run(check_audio_devices.fn())
|
898
920
|
click.echo(result)
|
899
921
|
|
@@ -901,6 +923,7 @@ def devices():
|
|
901
923
|
@diag.command()
|
902
924
|
def registry():
|
903
925
|
"""Show voice provider registry with all discovered endpoints."""
|
926
|
+
from voice_mode.tools.voice_registry import voice_registry
|
904
927
|
result = asyncio.run(voice_registry.fn())
|
905
928
|
click.echo(result)
|
906
929
|
|
@@ -909,6 +932,7 @@ def registry():
|
|
909
932
|
def dependencies():
|
910
933
|
"""Check system audio dependencies and provide installation guidance."""
|
911
934
|
import json
|
935
|
+
from voice_mode.tools.dependencies import check_audio_dependencies
|
912
936
|
result = asyncio.run(check_audio_dependencies.fn())
|
913
937
|
|
914
938
|
if isinstance(result, dict):
|
@@ -149,19 +149,29 @@ def env_bool(env_var: str, default: bool = False) -> bool:
|
|
149
149
|
value = os.getenv(env_var, "").lower()
|
150
150
|
return value in ("true", "1", "yes", "on") if value else default
|
151
151
|
|
152
|
+
# Helper function to expand paths with tilde
|
153
|
+
def expand_path(path_str: str) -> Path:
|
154
|
+
"""Expand tilde and environment variables in path strings."""
|
155
|
+
# First expand any environment variables
|
156
|
+
expanded = os.path.expandvars(path_str)
|
157
|
+
# Then expand tilde
|
158
|
+
expanded = os.path.expanduser(expanded)
|
159
|
+
return Path(expanded)
|
160
|
+
|
152
161
|
# Base directory for all voicemode data
|
153
|
-
BASE_DIR =
|
162
|
+
BASE_DIR = expand_path(os.getenv("VOICEMODE_BASE_DIR", str(Path.home() / ".voicemode")))
|
154
163
|
|
155
164
|
# Unified directory structure
|
156
165
|
AUDIO_DIR = BASE_DIR / "audio"
|
157
166
|
TRANSCRIPTIONS_DIR = BASE_DIR / "transcriptions"
|
158
167
|
LOGS_DIR = BASE_DIR / "logs"
|
159
168
|
# CONFIG_DIR = BASE_DIR / "config" # Removed - config stored in .voicemode.env file instead
|
160
|
-
MODELS_DIR =
|
169
|
+
MODELS_DIR = expand_path(os.getenv("VOICEMODE_MODELS_DIR", str(BASE_DIR / "models")))
|
161
170
|
|
162
171
|
# Debug configuration
|
163
172
|
DEBUG = os.getenv("VOICEMODE_DEBUG", "").lower() in ("true", "1", "yes", "on")
|
164
173
|
TRACE_DEBUG = os.getenv("VOICEMODE_DEBUG", "").lower() == "trace"
|
174
|
+
VAD_DEBUG = os.getenv("VOICEMODE_VAD_DEBUG", "").lower() in ("true", "1", "yes", "on")
|
165
175
|
DEBUG_DIR = LOGS_DIR / "debug" # Debug files now go under logs
|
166
176
|
|
167
177
|
# Master save-all configuration
|
@@ -224,14 +234,14 @@ LIVEKIT_API_SECRET = os.getenv("LIVEKIT_API_SECRET", "secret")
|
|
224
234
|
WHISPER_MODEL = os.getenv("VOICEMODE_WHISPER_MODEL", "large-v2")
|
225
235
|
WHISPER_PORT = int(os.getenv("VOICEMODE_WHISPER_PORT", "2022"))
|
226
236
|
WHISPER_LANGUAGE = os.getenv("VOICEMODE_WHISPER_LANGUAGE", "auto")
|
227
|
-
WHISPER_MODEL_PATH = os.getenv("VOICEMODE_WHISPER_MODEL_PATH", str(BASE_DIR / "models" / "whisper"))
|
237
|
+
WHISPER_MODEL_PATH = expand_path(os.getenv("VOICEMODE_WHISPER_MODEL_PATH", str(BASE_DIR / "models" / "whisper")))
|
228
238
|
|
229
239
|
# ==================== KOKORO CONFIGURATION ====================
|
230
240
|
|
231
241
|
# Kokoro-specific configuration
|
232
242
|
KOKORO_PORT = int(os.getenv("VOICEMODE_KOKORO_PORT", "8880"))
|
233
|
-
KOKORO_MODELS_DIR = os.getenv("VOICEMODE_KOKORO_MODELS_DIR", str(BASE_DIR / "models" / "kokoro"))
|
234
|
-
KOKORO_CACHE_DIR = os.getenv("VOICEMODE_KOKORO_CACHE_DIR", str(BASE_DIR / "cache" / "kokoro"))
|
243
|
+
KOKORO_MODELS_DIR = expand_path(os.getenv("VOICEMODE_KOKORO_MODELS_DIR", str(BASE_DIR / "models" / "kokoro")))
|
244
|
+
KOKORO_CACHE_DIR = expand_path(os.getenv("VOICEMODE_KOKORO_CACHE_DIR", str(BASE_DIR / "cache" / "kokoro")))
|
235
245
|
KOKORO_DEFAULT_VOICE = os.getenv("VOICEMODE_KOKORO_DEFAULT_VOICE", "af_sky")
|
236
246
|
|
237
247
|
# ==================== LIVEKIT CONFIGURATION ====================
|
@@ -32,6 +32,7 @@ from voice_mode.config import (
|
|
32
32
|
CHANNELS,
|
33
33
|
DEBUG,
|
34
34
|
DEBUG_DIR,
|
35
|
+
VAD_DEBUG,
|
35
36
|
SAVE_AUDIO,
|
36
37
|
AUDIO_DIR,
|
37
38
|
OPENAI_API_KEY,
|
@@ -872,7 +873,7 @@ def record_audio(duration: float) -> np.ndarray:
|
|
872
873
|
sys.stderr = original_stderr
|
873
874
|
|
874
875
|
|
875
|
-
def record_audio_with_silence_detection(max_duration: float, disable_silence_detection: bool = False, min_duration: float = 0.0, vad_aggressiveness: Optional[int] = None) -> np.ndarray:
|
876
|
+
def record_audio_with_silence_detection(max_duration: float, disable_silence_detection: bool = False, min_duration: float = 0.0, vad_aggressiveness: Optional[int] = None) -> Tuple[np.ndarray, bool]:
|
876
877
|
"""Record audio from microphone with automatic silence detection.
|
877
878
|
|
878
879
|
Uses WebRTC VAD to detect when the user stops speaking and automatically
|
@@ -885,21 +886,25 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
|
|
885
886
|
vad_aggressiveness: VAD aggressiveness level (0-3). If None, uses VAD_AGGRESSIVENESS from config
|
886
887
|
|
887
888
|
Returns:
|
888
|
-
|
889
|
+
Tuple of (audio_data, speech_detected):
|
890
|
+
- audio_data: Numpy array of recorded audio samples
|
891
|
+
- speech_detected: Boolean indicating if speech was detected during recording
|
889
892
|
"""
|
890
893
|
|
891
894
|
logger.info(f"record_audio_with_silence_detection called - VAD_AVAILABLE={VAD_AVAILABLE}, DISABLE_SILENCE_DETECTION={DISABLE_SILENCE_DETECTION}, min_duration={min_duration}")
|
892
895
|
|
893
896
|
if not VAD_AVAILABLE:
|
894
897
|
logger.warning("webrtcvad not available, falling back to fixed duration recording")
|
895
|
-
|
898
|
+
# For fallback, assume speech is present since we can't detect
|
899
|
+
return (record_audio(max_duration), True)
|
896
900
|
|
897
901
|
if DISABLE_SILENCE_DETECTION or disable_silence_detection:
|
898
902
|
if disable_silence_detection:
|
899
903
|
logger.info("Silence detection disabled for this interaction by request")
|
900
904
|
else:
|
901
905
|
logger.info("Silence detection disabled globally via VOICEMODE_DISABLE_SILENCE_DETECTION")
|
902
|
-
|
906
|
+
# For fallback, assume speech is present since we can't detect
|
907
|
+
return (record_audio(max_duration), True)
|
903
908
|
|
904
909
|
logger.info(f"🎤 Recording with silence detection (max {max_duration}s)...")
|
905
910
|
|
@@ -940,6 +945,16 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
|
|
940
945
|
f"Min duration: {MIN_RECORDING_DURATION}s, "
|
941
946
|
f"Initial grace period: {INITIAL_SILENCE_GRACE_PERIOD}s")
|
942
947
|
|
948
|
+
if VAD_DEBUG:
|
949
|
+
logger.info(f"[VAD_DEBUG] Starting VAD recording with config:")
|
950
|
+
logger.info(f"[VAD_DEBUG] max_duration: {max_duration}s")
|
951
|
+
logger.info(f"[VAD_DEBUG] min_duration: {min_duration}s")
|
952
|
+
logger.info(f"[VAD_DEBUG] effective_min_duration: {max(MIN_RECORDING_DURATION, min_duration)}s")
|
953
|
+
logger.info(f"[VAD_DEBUG] VAD aggressiveness: {effective_vad_aggressiveness}")
|
954
|
+
logger.info(f"[VAD_DEBUG] Silence threshold: {SILENCE_THRESHOLD_MS}ms")
|
955
|
+
logger.info(f"[VAD_DEBUG] Sample rate: {SAMPLE_RATE}Hz (VAD using {vad_sample_rate}Hz)")
|
956
|
+
logger.info(f"[VAD_DEBUG] Chunk duration: {VAD_CHUNK_DURATION_MS}ms")
|
957
|
+
|
943
958
|
def audio_callback(indata, frames, time, status):
|
944
959
|
"""Callback for continuous audio stream"""
|
945
960
|
if status:
|
@@ -979,35 +994,53 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
|
|
979
994
|
# Check if chunk contains speech
|
980
995
|
try:
|
981
996
|
is_speech = vad.is_speech(chunk_bytes, vad_sample_rate)
|
997
|
+
if VAD_DEBUG:
|
998
|
+
# Log VAD decision every 500ms for less spam
|
999
|
+
if int(recording_duration * 1000) % 500 == 0:
|
1000
|
+
rms = np.sqrt(np.mean(chunk.astype(float)**2))
|
1001
|
+
logger.info(f"[VAD_DEBUG] t={recording_duration:.1f}s: speech={is_speech}, RMS={rms:.0f}, state={'WAITING' if not speech_detected else 'ACTIVE'}")
|
982
1002
|
except Exception as vad_e:
|
983
1003
|
logger.warning(f"VAD error: {vad_e}, treating as speech")
|
984
1004
|
is_speech = True
|
985
1005
|
|
986
|
-
|
987
|
-
|
988
|
-
|
989
|
-
|
990
|
-
|
1006
|
+
# State machine for speech detection
|
1007
|
+
if not speech_detected:
|
1008
|
+
# WAITING_FOR_SPEECH state
|
1009
|
+
if is_speech:
|
1010
|
+
logger.info("🎤 Speech detected, starting active recording")
|
1011
|
+
if VAD_DEBUG:
|
1012
|
+
logger.info(f"[VAD_DEBUG] STATE CHANGE: WAITING_FOR_SPEECH -> SPEECH_ACTIVE at t={recording_duration:.1f}s")
|
1013
|
+
speech_detected = True
|
1014
|
+
silence_duration_ms = 0
|
1015
|
+
# No timeout in this state - just keep waiting
|
1016
|
+
# The only exit is speech detection or max_duration
|
991
1017
|
else:
|
992
|
-
|
993
|
-
if
|
994
|
-
|
1018
|
+
# We have detected speech at some point
|
1019
|
+
if is_speech:
|
1020
|
+
# SPEECH_ACTIVE state - reset silence counter
|
1021
|
+
silence_duration_ms = 0
|
1022
|
+
else:
|
1023
|
+
# SILENCE_AFTER_SPEECH state - accumulate silence
|
1024
|
+
silence_duration_ms += VAD_CHUNK_DURATION_MS
|
1025
|
+
if VAD_DEBUG and silence_duration_ms % 100 == 0: # More frequent logging in debug mode
|
1026
|
+
logger.info(f"[VAD_DEBUG] Accumulating silence: {silence_duration_ms}/{SILENCE_THRESHOLD_MS}ms, t={recording_duration:.1f}s")
|
1027
|
+
elif silence_duration_ms % 200 == 0: # Log every 200ms
|
1028
|
+
logger.debug(f"Silence: {silence_duration_ms}ms")
|
1029
|
+
|
1030
|
+
# Check if we should stop due to silence threshold
|
1031
|
+
# Use the larger of MIN_RECORDING_DURATION (global) or min_duration (parameter)
|
1032
|
+
effective_min_duration = max(MIN_RECORDING_DURATION, min_duration)
|
1033
|
+
if recording_duration >= effective_min_duration and silence_duration_ms >= SILENCE_THRESHOLD_MS:
|
1034
|
+
logger.info(f"✓ Silence threshold reached after {recording_duration:.1f}s of recording")
|
1035
|
+
if VAD_DEBUG:
|
1036
|
+
logger.info(f"[VAD_DEBUG] STOP: silence_duration={silence_duration_ms}ms >= threshold={SILENCE_THRESHOLD_MS}ms")
|
1037
|
+
logger.info(f"[VAD_DEBUG] STOP: recording_duration={recording_duration:.1f}s >= min_duration={effective_min_duration}s")
|
1038
|
+
stop_recording = True
|
1039
|
+
elif VAD_DEBUG and recording_duration < effective_min_duration:
|
1040
|
+
if int(recording_duration * 1000) % 500 == 0: # Log every 500ms
|
1041
|
+
logger.info(f"[VAD_DEBUG] Min duration not met: {recording_duration:.1f}s < {effective_min_duration}s")
|
995
1042
|
|
996
1043
|
recording_duration += chunk_duration_s
|
997
|
-
|
998
|
-
# Check stop conditions
|
999
|
-
# Use the larger of MIN_RECORDING_DURATION (global) or min_duration (parameter)
|
1000
|
-
effective_min_duration = max(MIN_RECORDING_DURATION, min_duration)
|
1001
|
-
if speech_detected and recording_duration >= effective_min_duration:
|
1002
|
-
if silence_duration_ms >= SILENCE_THRESHOLD_MS:
|
1003
|
-
logger.info(f"✓ Silence detected after {recording_duration:.1f}s (min: {effective_min_duration:.1f}s), stopping recording")
|
1004
|
-
stop_recording = True
|
1005
|
-
|
1006
|
-
# Also stop if we haven't detected any speech after a grace period
|
1007
|
-
# Give user time to start speaking
|
1008
|
-
if not speech_detected and recording_duration >= INITIAL_SILENCE_GRACE_PERIOD:
|
1009
|
-
logger.info(f"No speech detected after {INITIAL_SILENCE_GRACE_PERIOD}s grace period, stopping recording")
|
1010
|
-
stop_recording = True
|
1011
1044
|
|
1012
1045
|
except queue.Empty:
|
1013
1046
|
# No audio data available, continue waiting
|
@@ -1019,17 +1052,26 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
|
|
1019
1052
|
# Concatenate all chunks
|
1020
1053
|
if chunks:
|
1021
1054
|
full_recording = np.concatenate(chunks)
|
1022
|
-
|
1055
|
+
|
1056
|
+
if not speech_detected:
|
1057
|
+
logger.info(f"✓ Recording completed ({recording_duration:.1f}s) - No speech detected")
|
1058
|
+
if VAD_DEBUG:
|
1059
|
+
logger.info(f"[VAD_DEBUG] FINAL STATE: No speech was ever detected during recording")
|
1060
|
+
else:
|
1061
|
+
logger.info(f"✓ Recorded {len(full_recording)} samples ({recording_duration:.1f}s) with speech")
|
1062
|
+
if VAD_DEBUG:
|
1063
|
+
logger.info(f"[VAD_DEBUG] FINAL STATE: Speech was detected, recording complete")
|
1023
1064
|
|
1024
1065
|
if DEBUG:
|
1025
1066
|
# Calculate RMS for debug
|
1026
1067
|
rms = np.sqrt(np.mean(full_recording.astype(float) ** 2))
|
1027
1068
|
logger.debug(f"Recording stats - RMS: {rms:.2f}, Speech detected: {speech_detected}")
|
1028
1069
|
|
1029
|
-
|
1070
|
+
# Return tuple: (audio_data, speech_detected)
|
1071
|
+
return (full_recording, speech_detected)
|
1030
1072
|
else:
|
1031
1073
|
logger.warning("No audio chunks recorded")
|
1032
|
-
return np.array([])
|
1074
|
+
return (np.array([]), False)
|
1033
1075
|
|
1034
1076
|
except Exception as e:
|
1035
1077
|
logger.error(f"Recording with VAD failed: {e}")
|
@@ -1042,7 +1084,8 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
|
|
1042
1084
|
logger.error(f"\n{help_message}")
|
1043
1085
|
|
1044
1086
|
logger.info("Falling back to fixed duration recording")
|
1045
|
-
|
1087
|
+
# For fallback, assume speech is present since we can't detect
|
1088
|
+
return (record_audio(max_duration), True)
|
1046
1089
|
|
1047
1090
|
finally:
|
1048
1091
|
# Restore stdio
|
@@ -1056,7 +1099,8 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
|
|
1056
1099
|
except Exception as e:
|
1057
1100
|
logger.error(f"VAD initialization failed: {e}")
|
1058
1101
|
logger.info("Falling back to fixed duration recording")
|
1059
|
-
|
1102
|
+
# For fallback, assume speech is present since we can't detect
|
1103
|
+
return (record_audio(max_duration), True)
|
1060
1104
|
|
1061
1105
|
|
1062
1106
|
async def check_livekit_available() -> bool:
|
@@ -1713,7 +1757,7 @@ async def converse(
|
|
1713
1757
|
|
1714
1758
|
record_start = time.perf_counter()
|
1715
1759
|
logger.debug(f"About to call record_audio_with_silence_detection with duration={listen_duration}, disable_silence_detection={disable_silence_detection}, min_duration={min_listen_duration}, vad_aggressiveness={vad_aggressiveness}")
|
1716
|
-
audio_data = await asyncio.get_event_loop().run_in_executor(
|
1760
|
+
audio_data, speech_detected = await asyncio.get_event_loop().run_in_executor(
|
1717
1761
|
None, record_audio_with_silence_detection, listen_duration, disable_silence_detection, min_listen_duration, vad_aggressiveness
|
1718
1762
|
)
|
1719
1763
|
timings['record'] = time.perf_counter() - record_start
|
@@ -1736,14 +1780,27 @@ async def converse(
|
|
1736
1780
|
result = "Error: Could not record audio"
|
1737
1781
|
return result
|
1738
1782
|
|
1739
|
-
#
|
1740
|
-
|
1741
|
-
|
1742
|
-
|
1743
|
-
|
1744
|
-
|
1745
|
-
|
1746
|
-
|
1783
|
+
# Check if no speech was detected
|
1784
|
+
if not speech_detected:
|
1785
|
+
logger.info("No speech detected during recording - skipping STT processing")
|
1786
|
+
response_text = None
|
1787
|
+
timings['stt'] = 0.0
|
1788
|
+
|
1789
|
+
# Still save the audio if configured
|
1790
|
+
if SAVE_AUDIO and AUDIO_DIR:
|
1791
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
1792
|
+
audio_path = os.path.join(AUDIO_DIR, f"no_speech_{timestamp}.wav")
|
1793
|
+
write(audio_path, SAMPLE_RATE, audio_data)
|
1794
|
+
logger.debug(f"Saved no-speech audio to: {audio_path}")
|
1795
|
+
else:
|
1796
|
+
# Convert to text
|
1797
|
+
# Log STT start
|
1798
|
+
if event_logger:
|
1799
|
+
event_logger.log_event(event_logger.STT_START)
|
1800
|
+
|
1801
|
+
stt_start = time.perf_counter()
|
1802
|
+
response_text = await speech_to_text(audio_data, SAVE_AUDIO, AUDIO_DIR if SAVE_AUDIO else None, transport)
|
1803
|
+
timings['stt'] = time.perf_counter() - stt_start
|
1747
1804
|
|
1748
1805
|
# Log STT complete
|
1749
1806
|
if event_logger:
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/app/api/connection-details/route.ts
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/components/NoAgentNotification.tsx
RENAMED
File without changes
|
{voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/components/TranscriptionView.tsx
RENAMED
File without changes
|
{voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/frontend/hooks/useCombinedTranscriptions.ts
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/launchd/com.voicemode.frontend.plist
RENAMED
File without changes
|
{voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/launchd/com.voicemode.kokoro.plist
RENAMED
File without changes
|
{voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/launchd/com.voicemode.livekit.plist
RENAMED
File without changes
|
{voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/launchd/com.voicemode.whisper.plist
RENAMED
File without changes
|
File without changes
|
File without changes
|
{voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/systemd/voicemode-frontend.service
RENAMED
File without changes
|
{voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/systemd/voicemode-kokoro.service
RENAMED
File without changes
|
{voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/systemd/voicemode-livekit.service
RENAMED
File without changes
|
{voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/templates/systemd/voicemode-whisper.service
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{voice_mode-2.23.0 → voice_mode-2.25.0}/voice_mode/tools/services/livekit/production_server.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|