voice-mode 3.34.3__py3-none-any.whl → 4.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- voice_mode/__version__.py +1 -1
- voice_mode/cli.py +8 -0
- voice_mode/cli_commands/pronounce_commands.py +223 -0
- voice_mode/cli_commands/transcribe.py +141 -0
- voice_mode/config.py +139 -37
- voice_mode/data/default_pronunciation.yaml +268 -0
- voice_mode/frontend/.next/BUILD_ID +1 -0
- voice_mode/frontend/.next/app-build-manifest.json +28 -0
- voice_mode/frontend/.next/app-path-routes-manifest.json +1 -0
- voice_mode/frontend/.next/build-manifest.json +32 -0
- voice_mode/frontend/.next/export-marker.json +1 -0
- voice_mode/frontend/.next/images-manifest.json +1 -0
- voice_mode/frontend/.next/next-minimal-server.js.nft.json +1 -0
- voice_mode/frontend/.next/next-server.js.nft.json +1 -0
- voice_mode/frontend/.next/package.json +1 -0
- voice_mode/frontend/.next/prerender-manifest.json +1 -0
- voice_mode/frontend/.next/react-loadable-manifest.json +1 -0
- voice_mode/frontend/.next/required-server-files.json +1 -0
- voice_mode/frontend/.next/routes-manifest.json +1 -0
- voice_mode/frontend/.next/server/app/_not-found/page.js +1 -0
- voice_mode/frontend/.next/server/app/_not-found/page.js.nft.json +1 -0
- voice_mode/frontend/.next/server/app/_not-found/page_client-reference-manifest.js +1 -0
- voice_mode/frontend/.next/server/app/_not-found.html +1 -0
- voice_mode/frontend/.next/server/app/_not-found.meta +6 -0
- voice_mode/frontend/.next/server/app/_not-found.rsc +9 -0
- voice_mode/frontend/.next/server/app/api/connection-details/route.js +12 -0
- voice_mode/frontend/.next/server/app/api/connection-details/route.js.nft.json +1 -0
- voice_mode/frontend/.next/server/app/favicon.ico/route.js +12 -0
- voice_mode/frontend/.next/server/app/favicon.ico/route.js.nft.json +1 -0
- voice_mode/frontend/.next/server/app/favicon.ico.body +0 -0
- voice_mode/frontend/.next/server/app/favicon.ico.meta +1 -0
- voice_mode/frontend/.next/server/app/index.html +1 -0
- voice_mode/frontend/.next/server/app/index.meta +5 -0
- voice_mode/frontend/.next/server/app/index.rsc +7 -0
- voice_mode/frontend/.next/server/app/page.js +11 -0
- voice_mode/frontend/.next/server/app/page.js.nft.json +1 -0
- voice_mode/frontend/.next/server/app/page_client-reference-manifest.js +1 -0
- voice_mode/frontend/.next/server/app-paths-manifest.json +6 -0
- voice_mode/frontend/.next/server/chunks/463.js +1 -0
- voice_mode/frontend/.next/server/chunks/682.js +6 -0
- voice_mode/frontend/.next/server/chunks/948.js +2 -0
- voice_mode/frontend/.next/server/chunks/994.js +2 -0
- voice_mode/frontend/.next/server/chunks/font-manifest.json +1 -0
- voice_mode/frontend/.next/server/font-manifest.json +1 -0
- voice_mode/frontend/.next/server/functions-config-manifest.json +1 -0
- voice_mode/frontend/.next/server/interception-route-rewrite-manifest.js +1 -0
- voice_mode/frontend/.next/server/middleware-build-manifest.js +1 -0
- voice_mode/frontend/.next/server/middleware-manifest.json +6 -0
- voice_mode/frontend/.next/server/middleware-react-loadable-manifest.js +1 -0
- voice_mode/frontend/.next/server/next-font-manifest.js +1 -0
- voice_mode/frontend/.next/server/next-font-manifest.json +1 -0
- voice_mode/frontend/.next/server/pages/404.html +1 -0
- voice_mode/frontend/.next/server/pages/500.html +1 -0
- voice_mode/frontend/.next/server/pages/_app.js +1 -0
- voice_mode/frontend/.next/server/pages/_app.js.nft.json +1 -0
- voice_mode/frontend/.next/server/pages/_document.js +1 -0
- voice_mode/frontend/.next/server/pages/_document.js.nft.json +1 -0
- voice_mode/frontend/.next/server/pages/_error.js +1 -0
- voice_mode/frontend/.next/server/pages/_error.js.nft.json +1 -0
- voice_mode/frontend/.next/server/pages-manifest.json +1 -0
- voice_mode/frontend/.next/server/server-reference-manifest.js +1 -0
- voice_mode/frontend/.next/server/server-reference-manifest.json +1 -0
- voice_mode/frontend/.next/server/webpack-runtime.js +1 -0
- voice_mode/frontend/.next/standalone/.next/BUILD_ID +1 -0
- voice_mode/frontend/.next/standalone/.next/app-build-manifest.json +28 -0
- voice_mode/frontend/.next/standalone/.next/app-path-routes-manifest.json +1 -0
- voice_mode/frontend/.next/standalone/.next/build-manifest.json +32 -0
- voice_mode/frontend/.next/standalone/.next/package.json +1 -0
- voice_mode/frontend/.next/standalone/.next/prerender-manifest.json +1 -0
- voice_mode/frontend/.next/standalone/.next/react-loadable-manifest.json +1 -0
- voice_mode/frontend/.next/standalone/.next/required-server-files.json +1 -0
- voice_mode/frontend/.next/standalone/.next/routes-manifest.json +1 -0
- voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page.js +1 -0
- voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page.js.nft.json +1 -0
- voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page_client-reference-manifest.js +1 -0
- voice_mode/frontend/.next/standalone/.next/server/app/_not-found.html +1 -0
- voice_mode/frontend/.next/standalone/.next/server/app/_not-found.meta +6 -0
- voice_mode/frontend/.next/standalone/.next/server/app/_not-found.rsc +9 -0
- voice_mode/frontend/.next/standalone/.next/server/app/api/connection-details/route.js +12 -0
- voice_mode/frontend/.next/standalone/.next/server/app/api/connection-details/route.js.nft.json +1 -0
- voice_mode/frontend/.next/standalone/.next/server/app/favicon.ico/route.js +12 -0
- voice_mode/frontend/.next/standalone/.next/server/app/favicon.ico/route.js.nft.json +1 -0
- voice_mode/frontend/.next/standalone/.next/server/app/favicon.ico.body +0 -0
- voice_mode/frontend/.next/standalone/.next/server/app/favicon.ico.meta +1 -0
- voice_mode/frontend/.next/standalone/.next/server/app/index.html +1 -0
- voice_mode/frontend/.next/standalone/.next/server/app/index.meta +5 -0
- voice_mode/frontend/.next/standalone/.next/server/app/index.rsc +7 -0
- voice_mode/frontend/.next/standalone/.next/server/app/page.js +11 -0
- voice_mode/frontend/.next/standalone/.next/server/app/page.js.nft.json +1 -0
- voice_mode/frontend/.next/standalone/.next/server/app/page_client-reference-manifest.js +1 -0
- voice_mode/frontend/.next/standalone/.next/server/app-paths-manifest.json +6 -0
- voice_mode/frontend/.next/standalone/.next/server/chunks/463.js +1 -0
- voice_mode/frontend/.next/standalone/.next/server/chunks/682.js +6 -0
- voice_mode/frontend/.next/standalone/.next/server/chunks/948.js +2 -0
- voice_mode/frontend/.next/standalone/.next/server/chunks/994.js +2 -0
- voice_mode/frontend/.next/standalone/.next/server/font-manifest.json +1 -0
- voice_mode/frontend/.next/standalone/.next/server/middleware-build-manifest.js +1 -0
- voice_mode/frontend/.next/standalone/.next/server/middleware-manifest.json +6 -0
- voice_mode/frontend/.next/standalone/.next/server/middleware-react-loadable-manifest.js +1 -0
- voice_mode/frontend/.next/standalone/.next/server/next-font-manifest.js +1 -0
- voice_mode/frontend/.next/standalone/.next/server/next-font-manifest.json +1 -0
- voice_mode/frontend/.next/standalone/.next/server/pages/404.html +1 -0
- voice_mode/frontend/.next/standalone/.next/server/pages/500.html +1 -0
- voice_mode/frontend/.next/standalone/.next/server/pages/_app.js +1 -0
- voice_mode/frontend/.next/standalone/.next/server/pages/_app.js.nft.json +1 -0
- voice_mode/frontend/.next/standalone/.next/server/pages/_document.js +1 -0
- voice_mode/frontend/.next/standalone/.next/server/pages/_document.js.nft.json +1 -0
- voice_mode/frontend/.next/standalone/.next/server/pages/_error.js +1 -0
- voice_mode/frontend/.next/standalone/.next/server/pages/_error.js.nft.json +1 -0
- voice_mode/frontend/.next/standalone/.next/server/pages-manifest.json +1 -0
- voice_mode/frontend/.next/standalone/.next/server/server-reference-manifest.js +1 -0
- voice_mode/frontend/.next/standalone/.next/server/server-reference-manifest.json +1 -0
- voice_mode/frontend/.next/standalone/.next/server/webpack-runtime.js +1 -0
- voice_mode/frontend/.next/standalone/package.json +40 -0
- voice_mode/frontend/.next/standalone/server.js +38 -0
- voice_mode/frontend/.next/static/chunks/117-40bc79a2b97edb21.js +2 -0
- voice_mode/frontend/.next/static/chunks/144d3bae-2d5f122b82426d88.js +1 -0
- voice_mode/frontend/.next/static/chunks/471-bd4b96a33883dfa2.js +3 -0
- voice_mode/frontend/.next/static/chunks/app/_not-found/page-5011050e402ab9c8.js +1 -0
- voice_mode/frontend/.next/static/chunks/app/layout-fcb9b9ba5b72c7fc.js +1 -0
- voice_mode/frontend/.next/static/chunks/app/page-7c7ec2ad413ace39.js +1 -0
- voice_mode/frontend/.next/static/chunks/fd9d1056-af324d327b243cf1.js +1 -0
- voice_mode/frontend/.next/static/chunks/framework-f66176bb897dc684.js +1 -0
- voice_mode/frontend/.next/static/chunks/main-3163eca598b76a9f.js +1 -0
- voice_mode/frontend/.next/static/chunks/main-app-d02bd38ac01adb8a.js +1 -0
- voice_mode/frontend/.next/static/chunks/pages/_app-72b849fbd24ac258.js +1 -0
- voice_mode/frontend/.next/static/chunks/pages/_error-7ba65e1336b92748.js +1 -0
- voice_mode/frontend/.next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
- voice_mode/frontend/.next/static/chunks/webpack-0ea9b80f19935b70.js +1 -0
- voice_mode/frontend/.next/static/css/a2f49a47752b5010.css +3 -0
- voice_mode/frontend/.next/static/media/01099be941da1820-s.woff2 +0 -0
- voice_mode/frontend/.next/static/media/39883d31a7792467-s.p.woff2 +0 -0
- voice_mode/frontend/.next/static/media/6368404d2e8d66fe-s.woff2 +0 -0
- voice_mode/frontend/.next/static/pbDjheefW1LwCua_8mPoZ/_buildManifest.js +1 -0
- voice_mode/frontend/.next/static/pbDjheefW1LwCua_8mPoZ/_ssgManifest.js +1 -0
- voice_mode/frontend/.next/trace +43 -0
- voice_mode/frontend/.next/types/app/api/connection-details/route.ts +343 -0
- voice_mode/frontend/.next/types/app/layout.ts +79 -0
- voice_mode/frontend/.next/types/app/page.ts +79 -0
- voice_mode/frontend/.next/types/package.json +1 -0
- voice_mode/frontend/package-lock.json +154 -1
- voice_mode/pronounce.py +397 -0
- voice_mode/providers.py +7 -8
- voice_mode/resources/configuration.py +2 -2
- voice_mode/tools/configuration_management.py +106 -5
- voice_mode/tools/converse.py +109 -0
- voice_mode/tools/pronounce.py +245 -0
- voice_mode/tools/transcription/__init__.py +14 -0
- voice_mode/tools/transcription/backends.py +287 -0
- voice_mode/tools/transcription/core.py +136 -0
- voice_mode/tools/transcription/formats.py +144 -0
- voice_mode/tools/transcription/types.py +52 -0
- {voice_mode-3.34.3.dist-info → voice_mode-4.1.0.dist-info}/METADATA +5 -2
- voice_mode-4.1.0.dist-info/RECORD +259 -0
- voice_mode/voice_preferences.py +0 -125
- voice_mode-3.34.3.dist-info/RECORD +0 -116
- {voice_mode-3.34.3.dist-info → voice_mode-4.1.0.dist-info}/WHEEL +0 -0
- {voice_mode-3.34.3.dist-info → voice_mode-4.1.0.dist-info}/entry_points.txt +0 -0
@@ -5,7 +5,7 @@ import re
|
|
5
5
|
from pathlib import Path
|
6
6
|
from typing import Dict, Optional, List
|
7
7
|
from voice_mode.server import mcp
|
8
|
-
from voice_mode.config import BASE_DIR
|
8
|
+
from voice_mode.config import BASE_DIR, reload_configuration, find_voicemode_env_files
|
9
9
|
import logging
|
10
10
|
|
11
11
|
logger = logging.getLogger("voice-mode")
|
@@ -109,7 +109,7 @@ async def update_config(key: str, value: str) -> str:
|
|
109
109
|
"""Update a configuration value in the voicemode.env file.
|
110
110
|
|
111
111
|
Args:
|
112
|
-
key: The configuration key to update (e.g., '
|
112
|
+
key: The configuration key to update (e.g., 'VOICEMODE_VOICES')
|
113
113
|
value: The new value for the configuration
|
114
114
|
|
115
115
|
Returns:
|
@@ -175,7 +175,7 @@ async def list_config_keys() -> str:
|
|
175
175
|
("Provider Configuration", [
|
176
176
|
("VOICEMODE_TTS_BASE_URLS", "Comma-separated list of TTS endpoints"),
|
177
177
|
("VOICEMODE_STT_BASE_URLS", "Comma-separated list of STT endpoints"),
|
178
|
-
("
|
178
|
+
("VOICEMODE_VOICES", "Comma-separated list of preferred voices"),
|
179
179
|
("VOICEMODE_TTS_MODELS", "Comma-separated list of preferred models"),
|
180
180
|
("VOICEMODE_PREFER_LOCAL", "Prefer local providers over cloud (true/false)"),
|
181
181
|
("VOICEMODE_ALWAYS_TRY_LOCAL", "Always attempt local providers (true/false)"),
|
@@ -211,6 +211,107 @@ async def list_config_keys() -> str:
|
|
211
211
|
lines.append(f" {description}")
|
212
212
|
lines.append("")
|
213
213
|
|
214
|
-
lines.append("💡 Usage: update_config(key='
|
214
|
+
lines.append("💡 Usage: update_config(key='VOICEMODE_VOICES', value='af_sky,nova')")
|
215
215
|
|
216
|
-
return "\n".join(lines)
|
216
|
+
return "\n".join(lines)
|
217
|
+
|
218
|
+
|
219
|
+
@mcp.tool()
|
220
|
+
async def config_reload() -> str:
|
221
|
+
"""Reload configuration from .voicemode.env files and clear all caches.
|
222
|
+
|
223
|
+
This tool reloads configuration from:
|
224
|
+
1. Global ~/.voicemode/voicemode.env file
|
225
|
+
2. Project-specific .voicemode.env files (searched up directory tree)
|
226
|
+
3. Environment variables (highest priority)
|
227
|
+
|
228
|
+
Returns:
|
229
|
+
Status message showing which files were loaded and any changes
|
230
|
+
"""
|
231
|
+
try:
|
232
|
+
# Get config files before reload
|
233
|
+
old_files = find_voicemode_env_files()
|
234
|
+
|
235
|
+
# Reload configuration
|
236
|
+
reload_configuration()
|
237
|
+
|
238
|
+
# Get config files after reload
|
239
|
+
new_files = find_voicemode_env_files()
|
240
|
+
|
241
|
+
lines = ["✅ Configuration reloaded successfully!", ""]
|
242
|
+
|
243
|
+
if new_files:
|
244
|
+
lines.append("📁 Configuration files loaded (in order):")
|
245
|
+
for i, config_file in enumerate(new_files, 1):
|
246
|
+
lines.append(f" {i}. {config_file}")
|
247
|
+
else:
|
248
|
+
lines.append("📁 No configuration files found - using defaults")
|
249
|
+
|
250
|
+
lines.append("")
|
251
|
+
lines.append("🔄 All caches have been cleared")
|
252
|
+
lines.append("📊 Voice preferences and provider settings updated")
|
253
|
+
|
254
|
+
logger.info(f"Configuration reloaded from {len(new_files)} files")
|
255
|
+
|
256
|
+
return "\n".join(lines)
|
257
|
+
|
258
|
+
except Exception as e:
|
259
|
+
logger.error(f"Failed to reload configuration: {e}")
|
260
|
+
return f"❌ Failed to reload configuration: {str(e)}"
|
261
|
+
|
262
|
+
|
263
|
+
@mcp.tool()
|
264
|
+
async def show_config_files() -> str:
|
265
|
+
"""Show which .voicemode.env files are being used for configuration.
|
266
|
+
|
267
|
+
This shows the current configuration file discovery and loading order:
|
268
|
+
- Global configuration from ~/.voicemode/voicemode.env
|
269
|
+
- Project-specific configuration (searched up directory tree)
|
270
|
+
- Current working directory for context
|
271
|
+
|
272
|
+
Returns:
|
273
|
+
Formatted list of configuration files and their status
|
274
|
+
"""
|
275
|
+
try:
|
276
|
+
config_files = find_voicemode_env_files()
|
277
|
+
|
278
|
+
lines = ["📋 Voice Mode Configuration Files", "=" * 40, ""]
|
279
|
+
lines.append(f"🗂️ Current directory: {Path.cwd()}")
|
280
|
+
lines.append("")
|
281
|
+
|
282
|
+
if config_files:
|
283
|
+
lines.append("📁 Configuration files (loading order):")
|
284
|
+
lines.append("")
|
285
|
+
|
286
|
+
for i, config_file in enumerate(config_files, 1):
|
287
|
+
status = "✅ EXISTS" if config_file.exists() else "❌ MISSING"
|
288
|
+
file_type = ""
|
289
|
+
|
290
|
+
if config_file.name == "voicemode.env" and config_file.parent.name == ".voicemode":
|
291
|
+
if config_file.parent == Path.home() / ".voicemode":
|
292
|
+
file_type = " (Global)"
|
293
|
+
else:
|
294
|
+
file_type = " (Project - in .voicemode dir)"
|
295
|
+
elif config_file.name == ".voicemode.env":
|
296
|
+
if config_file.parent == Path.cwd():
|
297
|
+
file_type = " (Project - current dir)"
|
298
|
+
else:
|
299
|
+
file_type = " (Project - parent dir)"
|
300
|
+
|
301
|
+
lines.append(f" {i}. {config_file}{file_type}")
|
302
|
+
lines.append(f" {status}")
|
303
|
+
lines.append("")
|
304
|
+
else:
|
305
|
+
lines.append("❌ No configuration files found")
|
306
|
+
lines.append("")
|
307
|
+
lines.append("💡 Tip: Create ~/.voicemode/voicemode.env for global configuration")
|
308
|
+
lines.append("💡 Tip: Create .voicemode.env in project directories for project-specific settings")
|
309
|
+
|
310
|
+
lines.append("")
|
311
|
+
lines.append("🔄 Use reload_config() to reload after making changes")
|
312
|
+
|
313
|
+
return "\n".join(lines)
|
314
|
+
|
315
|
+
except Exception as e:
|
316
|
+
logger.error(f"Failed to show config files: {e}")
|
317
|
+
return f"❌ Failed to show config files: {str(e)}"
|
voice_mode/tools/converse.py
CHANGED
@@ -85,6 +85,7 @@ from voice_mode.utils import (
|
|
85
85
|
log_tool_request_start,
|
86
86
|
log_tool_request_end
|
87
87
|
)
|
88
|
+
from voice_mode.pronounce import get_manager as get_pronounce_manager, is_enabled as pronounce_enabled
|
88
89
|
|
89
90
|
logger = logging.getLogger("voice-mode")
|
90
91
|
|
@@ -255,6 +256,11 @@ async def text_to_speech_with_failover(
|
|
255
256
|
"""
|
256
257
|
from voice_mode.config import SIMPLE_FAILOVER
|
257
258
|
|
259
|
+
# Apply pronunciation rules if enabled
|
260
|
+
if pronounce_enabled():
|
261
|
+
pronounce_mgr = get_pronounce_manager()
|
262
|
+
message = pronounce_mgr.process_tts(message)
|
263
|
+
|
258
264
|
# Use simple failover if enabled
|
259
265
|
if SIMPLE_FAILOVER:
|
260
266
|
from voice_mode.simple_failover import simple_tts_failover
|
@@ -695,6 +701,11 @@ async def _speech_to_text_internal(
|
|
695
701
|
logger.debug(f"STT API response type: {type(transcription)}")
|
696
702
|
text = transcription.strip() if isinstance(transcription, str) else transcription.text.strip()
|
697
703
|
|
704
|
+
# Apply pronunciation rules if enabled
|
705
|
+
if text and pronounce_enabled():
|
706
|
+
pronounce_mgr = get_pronounce_manager()
|
707
|
+
text = pronounce_mgr.process_stt(text)
|
708
|
+
|
698
709
|
if text:
|
699
710
|
logger.info(f"✓ STT result: '{text}'")
|
700
711
|
|
@@ -875,6 +886,45 @@ def record_audio(duration: float) -> np.ndarray:
|
|
875
886
|
logger.error(f"Recording failed: {e}")
|
876
887
|
logger.error(f"Audio config when error occurred - Sample rate: {SAMPLE_RATE}, Channels: {CHANNELS}")
|
877
888
|
|
889
|
+
# Check if this is a device error that might be recoverable
|
890
|
+
error_str = str(e).lower()
|
891
|
+
if any(err in error_str for err in ['device unavailable', 'device disconnected',
|
892
|
+
'invalid device', 'unanticipated host error',
|
893
|
+
'portaudio error']):
|
894
|
+
logger.info("Audio device error detected - attempting to reinitialize audio system")
|
895
|
+
|
896
|
+
# Try to reinitialize sounddevice
|
897
|
+
try:
|
898
|
+
# Get current default device info before reinit
|
899
|
+
try:
|
900
|
+
old_device = sd.query_devices(kind='input')
|
901
|
+
old_device_name = old_device.get('name', 'Unknown')
|
902
|
+
except:
|
903
|
+
old_device_name = 'Previous device'
|
904
|
+
|
905
|
+
sd._terminate()
|
906
|
+
sd._initialize()
|
907
|
+
|
908
|
+
# Get new default device info
|
909
|
+
try:
|
910
|
+
new_device = sd.query_devices(kind='input')
|
911
|
+
new_device_name = new_device.get('name', 'Unknown')
|
912
|
+
logger.info(f"Audio system reinitialized - switched from '{old_device_name}' to '{new_device_name}'")
|
913
|
+
except:
|
914
|
+
logger.info("Audio system reinitialized - retrying with new default device")
|
915
|
+
|
916
|
+
# Wait a moment for the system to stabilize
|
917
|
+
import time as time_module
|
918
|
+
time_module.sleep(0.5)
|
919
|
+
|
920
|
+
# Try recording again with the new device (recursive call)
|
921
|
+
logger.info("Retrying recording with new audio device...")
|
922
|
+
return record_audio(duration)
|
923
|
+
|
924
|
+
except Exception as reinit_error:
|
925
|
+
logger.error(f"Failed to reinitialize audio: {reinit_error}")
|
926
|
+
# Fall through to normal error handling
|
927
|
+
|
878
928
|
# Import here to avoid circular imports
|
879
929
|
from voice_mode.utils.audio_diagnostics import get_audio_error_help
|
880
930
|
|
@@ -989,6 +1039,14 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
|
|
989
1039
|
"""Callback for continuous audio stream"""
|
990
1040
|
if status:
|
991
1041
|
logger.warning(f"Audio stream status: {status}")
|
1042
|
+
# Check for device-related errors
|
1043
|
+
status_str = str(status).lower()
|
1044
|
+
if any(err in status_str for err in ['device unavailable', 'device disconnected',
|
1045
|
+
'invalid device', 'unanticipated host error',
|
1046
|
+
'stream is stopped', 'portaudio error']):
|
1047
|
+
# Signal that we should stop recording due to device error
|
1048
|
+
audio_queue.put(None) # Sentinel value to indicate error
|
1049
|
+
return
|
992
1050
|
# Put the audio data in the queue for processing
|
993
1051
|
audio_queue.put(indata.copy())
|
994
1052
|
|
@@ -1007,6 +1065,12 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
|
|
1007
1065
|
# Get audio chunk from queue with timeout
|
1008
1066
|
chunk = audio_queue.get(timeout=0.1)
|
1009
1067
|
|
1068
|
+
# Check for error sentinel
|
1069
|
+
if chunk is None:
|
1070
|
+
logger.error("Audio device error detected - stopping recording")
|
1071
|
+
# Raise an exception to trigger recovery logic
|
1072
|
+
raise sd.PortAudioError("Audio device disconnected or unavailable")
|
1073
|
+
|
1010
1074
|
# Flatten for consistency
|
1011
1075
|
chunk_flat = chunk.flatten()
|
1012
1076
|
chunks.append(chunk_flat)
|
@@ -1109,6 +1173,45 @@ def record_audio_with_silence_detection(max_duration: float, disable_silence_det
|
|
1109
1173
|
# Import here to avoid circular imports
|
1110
1174
|
from voice_mode.utils.audio_diagnostics import get_audio_error_help
|
1111
1175
|
|
1176
|
+
# Check if this is a device error that might be recoverable
|
1177
|
+
error_str = str(e).lower()
|
1178
|
+
if any(err in error_str for err in ['device unavailable', 'device disconnected',
|
1179
|
+
'invalid device', 'unanticipated host error',
|
1180
|
+
'portaudio error']):
|
1181
|
+
logger.info("Audio device error detected - attempting to reinitialize audio system")
|
1182
|
+
|
1183
|
+
# Try to reinitialize sounddevice
|
1184
|
+
try:
|
1185
|
+
# Get current default device info before reinit
|
1186
|
+
try:
|
1187
|
+
old_device = sd.query_devices(kind='input')
|
1188
|
+
old_device_name = old_device.get('name', 'Unknown')
|
1189
|
+
except:
|
1190
|
+
old_device_name = 'Previous device'
|
1191
|
+
|
1192
|
+
sd._terminate()
|
1193
|
+
sd._initialize()
|
1194
|
+
|
1195
|
+
# Get new default device info
|
1196
|
+
try:
|
1197
|
+
new_device = sd.query_devices(kind='input')
|
1198
|
+
new_device_name = new_device.get('name', 'Unknown')
|
1199
|
+
logger.info(f"Audio system reinitialized - switched from '{old_device_name}' to '{new_device_name}'")
|
1200
|
+
except:
|
1201
|
+
logger.info("Audio system reinitialized - retrying with new default device")
|
1202
|
+
|
1203
|
+
# Wait a moment for the system to stabilize
|
1204
|
+
import time as time_module
|
1205
|
+
time_module.sleep(0.5)
|
1206
|
+
|
1207
|
+
# Try recording again with the new device (recursive call in sync context)
|
1208
|
+
logger.info("Retrying recording with new audio device...")
|
1209
|
+
return record_audio_with_silence_detection(max_duration, disable_silence_detection, min_duration, vad_aggressiveness)
|
1210
|
+
|
1211
|
+
except Exception as reinit_error:
|
1212
|
+
logger.error(f"Failed to reinitialize audio: {reinit_error}")
|
1213
|
+
# Fall through to normal error handling
|
1214
|
+
|
1112
1215
|
# Get helpful error message
|
1113
1216
|
help_message = get_audio_error_help(e)
|
1114
1217
|
logger.error(f"\n{help_message}")
|
@@ -1555,6 +1658,12 @@ async def converse(
|
|
1555
1658
|
# Run startup initialization if needed
|
1556
1659
|
await startup_initialization()
|
1557
1660
|
|
1661
|
+
# Refresh audio device cache to pick up any device changes (AirPods, etc.)
|
1662
|
+
# This takes ~1ms and ensures we use the current default device
|
1663
|
+
import sounddevice as sd
|
1664
|
+
sd._terminate()
|
1665
|
+
sd._initialize()
|
1666
|
+
|
1558
1667
|
# Get event logger and start session
|
1559
1668
|
event_logger = get_event_logger()
|
1560
1669
|
session_id = None
|
@@ -0,0 +1,245 @@
|
|
1
|
+
"""MCP tools for managing pronunciation rules."""
|
2
|
+
|
3
|
+
import json
|
4
|
+
import yaml
|
5
|
+
from typing import Optional, Literal, List, Dict
|
6
|
+
|
7
|
+
from voice_mode.server import mcp
|
8
|
+
from voice_mode.pronounce import get_manager, is_enabled
|
9
|
+
|
10
|
+
|
11
|
+
@mcp.tool()
|
12
|
+
async def pronounce(
|
13
|
+
action: Literal["list", "add", "remove", "enable", "disable", "test", "reload"],
|
14
|
+
pattern: Optional[str] = None,
|
15
|
+
replacement: Optional[str] = None,
|
16
|
+
rule_type: Literal["tts", "stt"] = "tts",
|
17
|
+
description: Optional[str] = None,
|
18
|
+
name: Optional[str] = None,
|
19
|
+
test_text: Optional[str] = None
|
20
|
+
) -> str:
|
21
|
+
"""
|
22
|
+
Manage pronunciation rules for TTS/STT text processing.
|
23
|
+
|
24
|
+
This tool allows managing pronunciation rules that improve TTS pronunciation
|
25
|
+
and correct STT transcription errors. Rules are applied automatically when
|
26
|
+
text is processed.
|
27
|
+
|
28
|
+
Actions:
|
29
|
+
- list: Show all non-private rules (returns count of private rules)
|
30
|
+
- add: Add a new rule (requires pattern, replacement, rule_type)
|
31
|
+
- remove: Remove a rule by name (requires name, rule_type)
|
32
|
+
- enable: Enable a disabled rule (requires name, rule_type)
|
33
|
+
- disable: Disable an enabled rule (requires name, rule_type)
|
34
|
+
- test: Test rules on text (requires test_text, rule_type)
|
35
|
+
- reload: Reload rules from configuration files
|
36
|
+
|
37
|
+
Examples:
|
38
|
+
- List all TTS rules:
|
39
|
+
pronunciation_rules(action="list", rule_type="tts")
|
40
|
+
|
41
|
+
- Add a rule to pronounce "3M" correctly:
|
42
|
+
pronunciation_rules(
|
43
|
+
action="add",
|
44
|
+
pattern=r"\b3M\b",
|
45
|
+
replacement="three em",
|
46
|
+
rule_type="tts",
|
47
|
+
description="Pronounce 3M company name"
|
48
|
+
)
|
49
|
+
|
50
|
+
- Test how text would be pronounced:
|
51
|
+
pronunciation_rules(
|
52
|
+
action="test",
|
53
|
+
test_text="I work at 3M",
|
54
|
+
rule_type="tts"
|
55
|
+
)
|
56
|
+
|
57
|
+
- Correct common Whisper mishearing:
|
58
|
+
pronunciation_rules(
|
59
|
+
action="add",
|
60
|
+
pattern="me tool",
|
61
|
+
replacement="metool",
|
62
|
+
rule_type="stt",
|
63
|
+
description="Correct 'me tool' to 'metool'"
|
64
|
+
)
|
65
|
+
|
66
|
+
Args:
|
67
|
+
action: The action to perform
|
68
|
+
pattern: Regex pattern for add action
|
69
|
+
replacement: Replacement text for add action
|
70
|
+
rule_type: Type of rule (tts for text-to-speech, stt for speech-to-text)
|
71
|
+
description: Human-readable description for add action
|
72
|
+
name: Rule name for remove/enable/disable actions
|
73
|
+
test_text: Text to test for test action
|
74
|
+
|
75
|
+
Returns:
|
76
|
+
Result of the action as a formatted string
|
77
|
+
"""
|
78
|
+
manager = get_manager()
|
79
|
+
|
80
|
+
if action == "list":
|
81
|
+
# List rules (excluding private ones)
|
82
|
+
all_rules = manager.list_rules(include_private=True)
|
83
|
+
public_rules = manager.list_rules(include_private=False)
|
84
|
+
|
85
|
+
# Filter by type if specified
|
86
|
+
if rule_type:
|
87
|
+
public_rules = [r for r in public_rules if r['direction'] == rule_type]
|
88
|
+
all_rules = [r for r in all_rules if r['direction'] == rule_type]
|
89
|
+
|
90
|
+
# Format the response
|
91
|
+
if not public_rules:
|
92
|
+
private_count = len(all_rules)
|
93
|
+
if private_count > 0:
|
94
|
+
return f"No public {rule_type} rules found. ({private_count} private rules hidden)"
|
95
|
+
else:
|
96
|
+
return f"No {rule_type} rules found."
|
97
|
+
|
98
|
+
# Build response
|
99
|
+
result = f"Pronunciation Rules ({rule_type.upper()}):\n\n"
|
100
|
+
|
101
|
+
for rule in public_rules:
|
102
|
+
status = "✓" if rule['enabled'] else "✗"
|
103
|
+
result += f"{status} {rule['name']}: \n"
|
104
|
+
result += f" Pattern: {rule['pattern']}\n"
|
105
|
+
result += f" Replace: {rule['replacement']}\n"
|
106
|
+
if rule['description']:
|
107
|
+
result += f" Desc: {rule['description']}\n"
|
108
|
+
result += "\n"
|
109
|
+
|
110
|
+
# Add private rule count if any
|
111
|
+
private_count = len(all_rules) - len(public_rules)
|
112
|
+
if private_count > 0:
|
113
|
+
result += f"({private_count} private rules hidden from view)\n"
|
114
|
+
|
115
|
+
return result
|
116
|
+
|
117
|
+
elif action == "add":
|
118
|
+
if not pattern or not replacement:
|
119
|
+
return "Error: 'add' action requires pattern and replacement"
|
120
|
+
|
121
|
+
success = manager.add_rule(
|
122
|
+
direction=rule_type,
|
123
|
+
pattern=pattern,
|
124
|
+
replacement=replacement,
|
125
|
+
name=name,
|
126
|
+
description=description or "",
|
127
|
+
enabled=True,
|
128
|
+
private=False # MCP-created rules are public
|
129
|
+
)
|
130
|
+
|
131
|
+
if success:
|
132
|
+
return f"✓ Rule added successfully for {rule_type.upper()}"
|
133
|
+
else:
|
134
|
+
return "✗ Failed to add rule. Check if the regex pattern is valid."
|
135
|
+
|
136
|
+
elif action == "remove":
|
137
|
+
if not name:
|
138
|
+
return "Error: 'remove' action requires rule name"
|
139
|
+
|
140
|
+
success = manager.remove_rule(rule_type, name)
|
141
|
+
|
142
|
+
if success:
|
143
|
+
return f"✓ Rule '{name}' removed from {rule_type.upper()}"
|
144
|
+
else:
|
145
|
+
return f"✗ Rule '{name}' not found in {rule_type.upper()} rules (may be private)"
|
146
|
+
|
147
|
+
elif action == "enable":
|
148
|
+
if not name:
|
149
|
+
return "Error: 'enable' action requires rule name"
|
150
|
+
|
151
|
+
success = manager.enable_rule(rule_type, name)
|
152
|
+
|
153
|
+
if success:
|
154
|
+
return f"✓ Rule '{name}' enabled in {rule_type.upper()}"
|
155
|
+
else:
|
156
|
+
return f"✗ Failed to enable rule '{name}' (not found or private)"
|
157
|
+
|
158
|
+
elif action == "disable":
|
159
|
+
if not name:
|
160
|
+
return "Error: 'disable' action requires rule name"
|
161
|
+
|
162
|
+
success = manager.disable_rule(rule_type, name)
|
163
|
+
|
164
|
+
if success:
|
165
|
+
return f"✓ Rule '{name}' disabled in {rule_type.upper()}"
|
166
|
+
else:
|
167
|
+
return f"✗ Failed to disable rule '{name}' (not found or private)"
|
168
|
+
|
169
|
+
elif action == "test":
|
170
|
+
if not test_text:
|
171
|
+
return "Error: 'test' action requires test_text"
|
172
|
+
|
173
|
+
result = manager.test_rule(test_text, rule_type)
|
174
|
+
|
175
|
+
if test_text != result:
|
176
|
+
return f"Original: {test_text}\nModified: {result}\n\nRules were applied to transform the text."
|
177
|
+
else:
|
178
|
+
return f"No changes: {test_text}\n\nNo rules matched or all rules are disabled."
|
179
|
+
|
180
|
+
elif action == "reload":
|
181
|
+
manager.reload_rules()
|
182
|
+
|
183
|
+
# Get counts
|
184
|
+
all_rules = manager.list_rules(include_private=True)
|
185
|
+
tts_count = len([r for r in all_rules if r['direction'] == 'tts'])
|
186
|
+
stt_count = len([r for r in all_rules if r['direction'] == 'stt'])
|
187
|
+
|
188
|
+
return f"✓ Pronunciation rules reloaded\nLoaded {tts_count} TTS rules and {stt_count} STT rules"
|
189
|
+
|
190
|
+
else:
|
191
|
+
return f"Error: Unknown action '{action}'. Use: list, add, remove, enable, disable, test, reload"
|
192
|
+
|
193
|
+
|
194
|
+
@mcp.tool()
|
195
|
+
async def pronounce_status() -> str:
|
196
|
+
"""
|
197
|
+
Get the status of the pronunciation middleware.
|
198
|
+
|
199
|
+
Shows whether pronunciation processing is enabled and provides
|
200
|
+
statistics about loaded rules.
|
201
|
+
|
202
|
+
Returns:
|
203
|
+
Status information as a formatted string
|
204
|
+
"""
|
205
|
+
enabled = is_enabled()
|
206
|
+
manager = get_manager()
|
207
|
+
|
208
|
+
# Get rule counts
|
209
|
+
all_rules = manager.list_rules(include_private=True)
|
210
|
+
public_rules = manager.list_rules(include_private=False)
|
211
|
+
|
212
|
+
tts_all = len([r for r in all_rules if r['direction'] == 'tts'])
|
213
|
+
tts_public = len([r for r in public_rules if r['direction'] == 'tts'])
|
214
|
+
tts_enabled = len([r for r in all_rules if r['direction'] == 'tts' and r['enabled']])
|
215
|
+
|
216
|
+
stt_all = len([r for r in all_rules if r['direction'] == 'stt'])
|
217
|
+
stt_public = len([r for r in public_rules if r['direction'] == 'stt'])
|
218
|
+
stt_enabled = len([r for r in all_rules if r['direction'] == 'stt' and r['enabled']])
|
219
|
+
|
220
|
+
status = f"Pronunciation Middleware Status:\n"
|
221
|
+
status += f"{'='*40}\n"
|
222
|
+
status += f"Enabled: {'✓ Yes' if enabled else '✗ No'}\n\n"
|
223
|
+
|
224
|
+
status += f"TTS Rules:\n"
|
225
|
+
status += f" Total: {tts_all} ({tts_public} public, {tts_all - tts_public} private)\n"
|
226
|
+
status += f" Enabled: {tts_enabled}\n\n"
|
227
|
+
|
228
|
+
status += f"STT Rules:\n"
|
229
|
+
status += f" Total: {stt_all} ({stt_public} public, {stt_all - stt_public} private)\n"
|
230
|
+
status += f" Enabled: {stt_enabled}\n\n"
|
231
|
+
|
232
|
+
status += f"Configuration:\n"
|
233
|
+
import os
|
234
|
+
log_enabled = os.environ.get('VOICEMODE_PRONUNCIATION_LOG_SUBSTITUTIONS', '').lower() == 'true'
|
235
|
+
private_mode = os.environ.get('VOICEMODE_PRONUNCIATION_PRIVATE_MODE', '').lower() == 'true'
|
236
|
+
|
237
|
+
status += f" Logging: {'✓ Enabled' if log_enabled else '✗ Disabled'}\n"
|
238
|
+
status += f" Private Mode: {'✓ All rules private' if private_mode else '✗ Normal'}\n"
|
239
|
+
|
240
|
+
# Show config file paths
|
241
|
+
status += f"\nConfiguration Files:\n"
|
242
|
+
for path in manager.config_paths:
|
243
|
+
status += f" - {path}\n"
|
244
|
+
|
245
|
+
return status
|
@@ -0,0 +1,14 @@
|
|
1
|
+
"""Audio transcription with word-level timestamps."""
|
2
|
+
|
3
|
+
from .types import TranscriptionBackend, OutputFormat, TranscriptionResult, WordData, SegmentData
|
4
|
+
from .core import transcribe_audio, transcribe_audio_sync
|
5
|
+
|
6
|
+
__all__ = [
|
7
|
+
'transcribe_audio',
|
8
|
+
'transcribe_audio_sync',
|
9
|
+
'TranscriptionBackend',
|
10
|
+
'OutputFormat',
|
11
|
+
'TranscriptionResult',
|
12
|
+
'WordData',
|
13
|
+
'SegmentData',
|
14
|
+
]
|