voice-mode 2.28.2__tar.gz → 2.29.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. {voice_mode-2.28.2 → voice_mode-2.29.0}/CHANGELOG.md +51 -5
  2. {voice_mode-2.28.2 → voice_mode-2.29.0}/PKG-INFO +6 -1
  3. {voice_mode-2.28.2 → voice_mode-2.29.0}/pyproject.toml +6 -0
  4. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/__version__.py +1 -1
  5. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/converse.py +17 -9
  6. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/service.py +10 -2
  7. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/services/kokoro/install.py +9 -1
  8. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/services/livekit/install.py +9 -1
  9. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/services/whisper/model_install.py +99 -3
  10. {voice_mode-2.28.2 → voice_mode-2.29.0}/.gitignore +0 -0
  11. {voice_mode-2.28.2 → voice_mode-2.29.0}/README.md +0 -0
  12. {voice_mode-2.28.2 → voice_mode-2.29.0}/build_hooks.py +0 -0
  13. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/__init__.py +0 -0
  14. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/__main__.py +0 -0
  15. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/cli.py +0 -0
  16. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/cli_commands/__init__.py +0 -0
  17. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/cli_commands/exchanges.py +0 -0
  18. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/config.py +0 -0
  19. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/conversation_logger.py +0 -0
  20. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/core.py +0 -0
  21. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/data/versions.json +0 -0
  22. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/exchanges/__init__.py +0 -0
  23. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/exchanges/conversations.py +0 -0
  24. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/exchanges/filters.py +0 -0
  25. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/exchanges/formatters.py +0 -0
  26. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/exchanges/models.py +0 -0
  27. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/exchanges/reader.py +0 -0
  28. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/exchanges/stats.py +0 -0
  29. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/frontend/README.md +0 -0
  30. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/frontend/app/api/connection-details/route.ts +0 -0
  31. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/frontend/app/favicon.ico +0 -0
  32. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/frontend/app/globals.css +0 -0
  33. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/frontend/app/layout.tsx +0 -0
  34. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/frontend/app/page.tsx +0 -0
  35. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/frontend/components/CloseIcon.tsx +0 -0
  36. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/frontend/components/NoAgentNotification.tsx +0 -0
  37. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/frontend/components/TranscriptionView.tsx +0 -0
  38. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/frontend/hooks/useCombinedTranscriptions.ts +0 -0
  39. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/frontend/hooks/useLocalMicTrack.ts +0 -0
  40. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/frontend/next-env.d.ts +0 -0
  41. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/frontend/next.config.mjs +0 -0
  42. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/frontend/package-lock.json +0 -0
  43. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/frontend/package.json +0 -0
  44. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/frontend/pnpm-lock.yaml +0 -0
  45. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/frontend/postcss.config.mjs +0 -0
  46. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/frontend/tailwind.config.ts +0 -0
  47. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/frontend/tsconfig.json +0 -0
  48. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/prompts/README.md +0 -0
  49. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/prompts/__init__.py +0 -0
  50. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/prompts/converse.py +0 -0
  51. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/prompts/release_notes.py +0 -0
  52. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/prompts/services.py +0 -0
  53. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/provider_discovery.py +0 -0
  54. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/providers.py +0 -0
  55. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/resources/__init__.py +0 -0
  56. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/resources/audio_files.py +0 -0
  57. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/resources/changelog.py +0 -0
  58. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/resources/configuration.py +0 -0
  59. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/resources/statistics.py +0 -0
  60. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/resources/version.py +0 -0
  61. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/resources/whisper_models.py +0 -0
  62. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/server.py +0 -0
  63. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/shared.py +0 -0
  64. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/simple_failover.py +0 -0
  65. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/statistics.py +0 -0
  66. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/streaming.py +0 -0
  67. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/templates/launchd/com.voicemode.frontend.plist +0 -0
  68. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/templates/launchd/com.voicemode.kokoro.plist +0 -0
  69. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/templates/launchd/com.voicemode.livekit.plist +0 -0
  70. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/templates/launchd/com.voicemode.whisper.plist +0 -0
  71. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/templates/launchd/start-kokoro-with-health-check.sh +0 -0
  72. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/templates/launchd/start-whisper-with-health-check.sh +0 -0
  73. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/templates/systemd/voicemode-frontend.service +0 -0
  74. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/templates/systemd/voicemode-kokoro.service +0 -0
  75. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/templates/systemd/voicemode-livekit.service +0 -0
  76. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/templates/systemd/voicemode-whisper.service +0 -0
  77. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/__init__.py +0 -0
  78. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/configuration_management.py +0 -0
  79. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/dependencies.py +0 -0
  80. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/devices.py +0 -0
  81. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/diagnostics.py +0 -0
  82. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/providers.py +0 -0
  83. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/services/kokoro/uninstall.py +0 -0
  84. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/services/list_versions.py +0 -0
  85. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/services/livekit/__init__.py +0 -0
  86. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/services/livekit/frontend.py +0 -0
  87. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/services/livekit/production_server.py +0 -0
  88. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/services/livekit/uninstall.py +0 -0
  89. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/services/version_info.py +0 -0
  90. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/services/whisper/__init__.py +0 -0
  91. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/services/whisper/install.py +0 -0
  92. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/services/whisper/list_models.py +0 -0
  93. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/services/whisper/model_active.py +0 -0
  94. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/services/whisper/model_benchmark.py +0 -0
  95. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/services/whisper/model_remove.py +0 -0
  96. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/services/whisper/models.py +0 -0
  97. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/services/whisper/uninstall.py +0 -0
  98. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/statistics.py +0 -0
  99. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/tools/voice_registry.py +0 -0
  100. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/utils/__init__.py +0 -0
  101. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/utils/audio_diagnostics.py +0 -0
  102. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/utils/event_logger.py +0 -0
  103. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/utils/ffmpeg_check.py +0 -0
  104. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/utils/format_migration.py +0 -0
  105. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/utils/gpu_detection.py +0 -0
  106. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/utils/migration_helpers.py +0 -0
  107. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/utils/services/common.py +0 -0
  108. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/utils/services/kokoro_helpers.py +0 -0
  109. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/utils/services/livekit_helpers.py +0 -0
  110. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/utils/services/whisper_helpers.py +0 -0
  111. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/utils/services/whisper_version.py +0 -0
  112. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/utils/version_helpers.py +0 -0
  113. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/version.py +0 -0
  114. {voice_mode-2.28.2 → voice_mode-2.29.0}/voice_mode/voice_preferences.py +0 -0
@@ -7,14 +7,60 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [2.29.0] - 2025-08-25
11
+
12
+ ### Added
13
+ - **CoreML acceleration support for Whisper on Apple Silicon**
14
+ - Added optional dependency group 'coreml' with PyTorch and CoreMLTools
15
+ - Enhanced whisper_model_install tool with install_torch and auto_confirm parameters
16
+ - Automatic detection of Apple Silicon Macs with CoreML acceleration offer
17
+ - User-friendly confirmation prompts for large (~2.5GB) PyTorch download
18
+ - Graceful fallback to Metal acceleration if CoreML requirements not met
19
+ - Clear instructions for enabling CoreML later if initially skipped
20
+
21
+ - **Beautiful installer experience**
22
+ - Added Voice Mode ASCII art in Claude Code orange color
23
+ - Enhanced preamble with clear value proposition and privacy messaging
24
+ - Early system detection with special recognition for Apple Silicon
25
+ - Professional presentation with centered text and visual hierarchy
26
+
27
+ ### Fixed
28
+ - **Improved converse tool documentation**
29
+ - Simplified listen_duration parameter documentation
30
+ - Removed confusing duration recommendations that led to unnecessary overrides
31
+ - Clarified that silence detection handles timing well with sensible defaults
32
+ - Reduces cognitive load and prevents token waste from explicit duration settings
33
+
34
+ ## [2.28.3] - 2025-08-24
35
+
36
+ ### Fixed
37
+ - **Parameter type handling for MCP tools**
38
+ - Fixed vad_aggressiveness parameter to accept string values from LLMs
39
+ - Fixed port parameters in kokoro_install and livekit_install
40
+ - Fixed lines parameter in service management tool
41
+ - All numeric parameters now properly convert strings to integers
42
+ - Addresses systemic issue where Claude Code MCP client passes strings
43
+
44
+ - **Installer script uvx command corrections**
45
+ - Fixed MCP configuration to use correct command `uvx voice-mode` (without --refresh)
46
+ - Installer now always refreshes to latest version at start
47
+ - Removed unnecessary --refresh flags from runtime commands
48
+ - Updated user-facing command examples to show correct usage
49
+
10
50
  ## [2.28.2] - 2025-08-24
11
51
 
52
+ ### Added
53
+ - **Configurable audio feedback pip delays**
54
+ - Added VOICEMODE_PIP_LEADING_SILENCE and VOICEMODE_PIP_TRAILING_SILENCE environment variables
55
+ - Allows customization of silence before and after audio feedback chimes
56
+ - Configurable via converse tool parameters pip_leading_silence and pip_trailing_silence
57
+ - Helps prevent audio cutoff on Bluetooth devices and other audio systems with delay
58
+
12
59
  ### Fixed
13
- - **Improved noise filtering in VAD (Voice Activity Detection)**
14
- - Fixed issue where nose blowing and similar non-speech sounds were detected as speech
15
- - Adjusted VAD aggressiveness handling to better filter continuous non-speech audio
16
- - Prevents false positives from breathing sounds, sniffles, and ambient noise
17
- - Improves overall speech detection accuracy
60
+ - **Audio feedback for Bluetooth devices**
61
+ - Added silence buffer before chimes to prevent Bluetooth audio cutoff
62
+ - Improved compatibility with devices that have audio activation delay
63
+ - Better audio feedback experience across different output devices
18
64
 
19
65
  ## [2.28.1] - 2025-08-24
20
66
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: voice-mode
3
- Version: 2.28.2
3
+ Version: 2.29.0
4
4
  Summary: VoiceMode - Voice interaction capabilities for AI assistants (formerly voice-mcp)
5
5
  Project-URL: Homepage, https://github.com/mbailey/voicemode
6
6
  Project-URL: Repository, https://github.com/mbailey/voicemode
@@ -39,6 +39,11 @@ Requires-Dist: simpleaudio
39
39
  Requires-Dist: sounddevice
40
40
  Requires-Dist: uv>=0.4.0
41
41
  Requires-Dist: webrtcvad>=2.0.10
42
+ Provides-Extra: coreml
43
+ Requires-Dist: ane-transformers; extra == 'coreml'
44
+ Requires-Dist: coremltools>=7.0; extra == 'coreml'
45
+ Requires-Dist: torch>=2.0.0; extra == 'coreml'
46
+ Requires-Dist: transformers; extra == 'coreml'
42
47
  Provides-Extra: dev
43
48
  Requires-Dist: build>=1.0.0; extra == 'dev'
44
49
  Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
@@ -50,6 +50,12 @@ dependencies = [
50
50
  ]
51
51
 
52
52
  [project.optional-dependencies]
53
+ coreml = [
54
+ "torch>=2.0.0",
55
+ "coremltools>=7.0",
56
+ "transformers",
57
+ "ane-transformers",
58
+ ]
53
59
  dev = [
54
60
  "build>=1.0.0",
55
61
  "twine>=4.0.0",
@@ -1,3 +1,3 @@
1
1
  # This file is automatically updated by 'make release'
2
2
  # Do not edit manually
3
- __version__ = "2.28.2"
3
+ __version__ = "2.29.0"
@@ -1323,7 +1323,7 @@ async def converse(
1323
1323
  audio_format: Optional[str] = None,
1324
1324
  disable_silence_detection: Union[bool, str] = False,
1325
1325
  speed: Optional[float] = None,
1326
- vad_aggressiveness: Optional[int] = None,
1326
+ vad_aggressiveness: Optional[Union[int, str]] = None,
1327
1327
  skip_tts: Optional[Union[bool, str]] = None,
1328
1328
  pip_leading_silence: Optional[float] = None,
1329
1329
  pip_trailing_silence: Optional[float] = None
@@ -1351,14 +1351,13 @@ async def converse(
1351
1351
  message: The message to speak
1352
1352
  wait_for_response: Whether to listen for a response after speaking (default: True)
1353
1353
  listen_duration: How long to listen for response in seconds (default: 120.0)
1354
- Recommended durations based on expected response:
1355
- - Simple yes/no questions: 10 seconds
1356
- - Normal conversational responses: 30 seconds
1357
- - Open-ended questions: 60 seconds
1358
- - Detailed explanations: 120 seconds (default)
1359
- - Stories or long explanations: 300 seconds
1360
- Always err on the side of longer duration - it's better to have
1361
- silence at the end than to cut off the user mid-sentence.
1354
+ The tool handles silence detection well and uses a sensible default.
1355
+ It's unusual to need to set the duration - only override if you have
1356
+ specific requirements such as:
1357
+ - Silence detection is disabled and you need a specific timeout
1358
+ - You know the response will be exceptionally long (>120s)
1359
+ - You're in a special mode that requires different timing
1360
+ In most cases, just let the default and silence detection handle it.
1362
1361
  min_listen_duration: Minimum time to record before silence detection can stop (default: 2.0)
1363
1362
  Useful for preventing premature cutoffs when users need thinking time.
1364
1363
  Examples:
@@ -1495,6 +1494,15 @@ async def converse(
1495
1494
  if skip_tts is not None and isinstance(skip_tts, str):
1496
1495
  skip_tts = skip_tts.lower() in ('true', '1', 'yes', 'on')
1497
1496
 
1497
+ # Convert vad_aggressiveness to integer if provided as string
1498
+ if vad_aggressiveness is not None and isinstance(vad_aggressiveness, str):
1499
+ try:
1500
+ vad_aggressiveness = int(vad_aggressiveness)
1501
+ # Validation will happen later in the function
1502
+ except ValueError:
1503
+ logger.warning(f"Invalid VAD aggressiveness value '{vad_aggressiveness}', using default")
1504
+ vad_aggressiveness = None
1505
+
1498
1506
  # Determine whether to skip TTS
1499
1507
  if skip_tts is not None:
1500
1508
  # Parameter explicitly set, use it
@@ -8,7 +8,7 @@ import platform
8
8
  import subprocess
9
9
  import time
10
10
  from pathlib import Path
11
- from typing import Literal, Optional, Dict, Any
11
+ from typing import Literal, Optional, Dict, Any, Union
12
12
 
13
13
  import psutil
14
14
 
@@ -980,7 +980,7 @@ async def view_logs(service_name: str, lines: Optional[int] = None) -> str:
980
980
  async def service(
981
981
  service_name: Literal["whisper", "kokoro", "livekit", "frontend"],
982
982
  action: Literal["status", "start", "stop", "restart", "enable", "disable", "logs", "update-service-files"] = "status",
983
- lines: Optional[int] = None
983
+ lines: Optional[Union[int, str]] = None
984
984
  ) -> str:
985
985
  """Unified service management tool for voice mode services.
986
986
 
@@ -1007,6 +1007,14 @@ async def service(
1007
1007
  service("kokoro", "start") # Start Kokoro service
1008
1008
  service("whisper", "logs", 100) # View last 100 lines of Whisper logs
1009
1009
  """
1010
+ # Convert lines to integer if provided as string
1011
+ if lines is not None and isinstance(lines, str):
1012
+ try:
1013
+ lines = int(lines)
1014
+ except ValueError:
1015
+ logger.warning(f"Invalid lines value '{lines}', using default 50")
1016
+ lines = 50
1017
+
1010
1018
  # Route to appropriate handler
1011
1019
  if action == "status":
1012
1020
  return await status_service(service_name)
@@ -26,7 +26,7 @@ logger = logging.getLogger("voice-mode")
26
26
  async def kokoro_install(
27
27
  install_dir: Optional[str] = None,
28
28
  models_dir: Optional[str] = None,
29
- port: int = 8880,
29
+ port: Union[int, str] = 8880,
30
30
  auto_start: Union[bool, str] = True,
31
31
  install_models: Union[bool, str] = True,
32
32
  force_reinstall: Union[bool, str] = False,
@@ -54,6 +54,14 @@ async def kokoro_install(
54
54
  Installation status with service configuration details
55
55
  """
56
56
  try:
57
+ # Convert port to integer if provided as string
58
+ if isinstance(port, str):
59
+ try:
60
+ port = int(port)
61
+ except ValueError:
62
+ logger.warning(f"Invalid port value '{port}', using default 8880")
63
+ port = 8880
64
+
57
65
  # Check for and migrate old installations
58
66
  migration_msg = auto_migrate_if_needed("kokoro")
59
67
 
@@ -124,7 +124,7 @@ room:
124
124
  @mcp.tool()
125
125
  async def livekit_install(
126
126
  install_dir: Optional[str] = None,
127
- port: int = 7880,
127
+ port: Union[int, str] = 7880,
128
128
  force_reinstall: Union[bool, str] = False,
129
129
  auto_enable: Optional[Union[bool, str]] = None,
130
130
  version: str = "latest"
@@ -159,6 +159,14 @@ async def livekit_install(
159
159
  # Check system
160
160
  system = platform.system()
161
161
 
162
+ # Convert port to integer if provided as string
163
+ if isinstance(port, str):
164
+ try:
165
+ port = int(port)
166
+ except ValueError:
167
+ logger.warning(f"Invalid port value '{port}', using default 7880")
168
+ port = 7880
169
+
162
170
  # Handle string boolean conversions
163
171
  if isinstance(force_reinstall, str):
164
172
  force_reinstall = force_reinstall.lower() in ("true", "1", "yes", "on")
@@ -1,10 +1,13 @@
1
1
  """Download Whisper models with Core ML support."""
2
2
 
3
3
  import os
4
+ import sys
4
5
  import json
5
6
  import logging
7
+ import platform
8
+ import subprocess
6
9
  from pathlib import Path
7
- from typing import Union, List
10
+ from typing import Union, List, Dict, Any
8
11
 
9
12
  from voice_mode.server import mcp
10
13
  from voice_mode.config import logger, MODELS_DIR
@@ -17,7 +20,9 @@ logger = logging.getLogger("voice-mode")
17
20
  async def whisper_model_install(
18
21
  model: Union[str, List[str]] = "large-v2",
19
22
  force_download: Union[bool, str] = False,
20
- skip_core_ml: Union[bool, str] = False
23
+ skip_core_ml: Union[bool, str] = False,
24
+ install_torch: Union[bool, str] = False,
25
+ auto_confirm: Union[bool, str] = False
21
26
  ) -> str:
22
27
  """Download Whisper model(s) with optional Core ML conversion.
23
28
 
@@ -31,6 +36,8 @@ async def whisper_model_install(
31
36
  - "all" to download all available models
32
37
  force_download: Re-download even if model exists (default: False)
33
38
  skip_core_ml: Skip Core ML conversion on Apple Silicon (default: False)
39
+ install_torch: Install PyTorch for CoreML (adds ~2.5GB) (default: False)
40
+ auto_confirm: Skip all confirmation prompts (default: False)
34
41
 
35
42
  Available models:
36
43
  - tiny, tiny.en
@@ -77,6 +84,20 @@ async def whisper_model_install(
77
84
  "error": "Whisper.cpp not installed. Please run whisper_install first."
78
85
  }, indent=2)
79
86
 
87
+ # Handle CoreML dependencies if needed
88
+ coreml_status = await _handle_coreml_dependencies(
89
+ install_torch=install_torch,
90
+ auto_confirm=auto_confirm,
91
+ skip_core_ml=skip_core_ml
92
+ )
93
+
94
+ if not coreml_status["continue"]:
95
+ return json.dumps(coreml_status, indent=2)
96
+
97
+ # If CoreML deps were installed, skip_core_ml may have been updated
98
+ if coreml_status.get("coreml_deps_failed"):
99
+ skip_core_ml = True
100
+
80
101
  # Parse model input
81
102
  available_models = get_available_models()
82
103
 
@@ -200,4 +221,79 @@ async def whisper_model_install(
200
221
  return json.dumps({
201
222
  "success": False,
202
223
  "error": str(e)
203
- }, indent=2)
224
+ }, indent=2)
225
+
226
+
227
+ async def _handle_coreml_dependencies(
228
+ install_torch: bool = False,
229
+ auto_confirm: bool = False,
230
+ skip_core_ml: bool = False
231
+ ) -> Dict[str, Any]:
232
+ """Handle CoreML dependency installation for Apple Silicon Macs.
233
+
234
+ Returns:
235
+ Dict with 'continue' key indicating whether to proceed with model download
236
+ """
237
+ # Check if we're on Apple Silicon Mac
238
+ if platform.system() != "Darwin" or platform.machine() != "arm64":
239
+ return {"continue": True}
240
+
241
+ # If skipping CoreML, no need to check dependencies
242
+ if skip_core_ml:
243
+ return {"continue": True}
244
+
245
+ # Check if torch is already installed
246
+ try:
247
+ import torch
248
+ logger.info("PyTorch already installed for CoreML support")
249
+ return {"continue": True}
250
+ except ImportError:
251
+ pass
252
+
253
+ # Check if user wants to install torch
254
+ if not install_torch and not auto_confirm:
255
+ return {
256
+ "continue": False,
257
+ "success": False,
258
+ "requires_confirmation": True,
259
+ "message": "CoreML requires PyTorch (~2.5GB). Rerun with install_torch=True to confirm.",
260
+ "recommendation": "Set install_torch=True for CoreML acceleration (2-3x faster)"
261
+ }
262
+
263
+ # Install CoreML dependencies
264
+ logger.info("Installing CoreML dependencies...")
265
+
266
+ try:
267
+ # Detect environment and install appropriately
268
+ packages = ["torch>=2.0.0", "coremltools>=7.0", "transformers", "ane-transformers"]
269
+
270
+ # Try UV first (most common)
271
+ if subprocess.run(["which", "uv"], capture_output=True).returncode == 0:
272
+ cmd = ["uv", "pip", "install"] + packages
273
+ logger.info("Installing via UV...")
274
+ else:
275
+ # Fallback to pip
276
+ cmd = [sys.executable, "-m", "pip", "install"] + packages
277
+ logger.info("Installing via pip...")
278
+
279
+ # Run installation
280
+ result = subprocess.run(cmd, capture_output=True, text=True)
281
+
282
+ if result.returncode == 0:
283
+ logger.info("CoreML dependencies installed successfully")
284
+ return {"continue": True, "coreml_deps_installed": True}
285
+ else:
286
+ logger.warning(f"Failed to install CoreML dependencies: {result.stderr}")
287
+ return {
288
+ "continue": True,
289
+ "coreml_deps_failed": True,
290
+ "warning": "CoreML dependencies installation failed. Models will use Metal acceleration."
291
+ }
292
+
293
+ except Exception as e:
294
+ logger.warning(f"Error installing CoreML dependencies: {e}")
295
+ return {
296
+ "continue": True,
297
+ "coreml_deps_failed": True,
298
+ "warning": f"CoreML setup error: {str(e)}. Models will use Metal acceleration."
299
+ }
File without changes
File without changes
File without changes