voice-mode 2.26.0__py3-none-any.whl → 2.27.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- voice_mode/__version__.py +1 -1
- voice_mode/cli.py +611 -3
- voice_mode/config.py +11 -3
- voice_mode/frontend/.next/BUILD_ID +1 -1
- voice_mode/frontend/.next/app-build-manifest.json +5 -5
- voice_mode/frontend/.next/app-path-routes-manifest.json +1 -1
- voice_mode/frontend/.next/build-manifest.json +3 -3
- voice_mode/frontend/.next/next-minimal-server.js.nft.json +1 -1
- voice_mode/frontend/.next/next-server.js.nft.json +1 -1
- voice_mode/frontend/.next/prerender-manifest.json +1 -1
- voice_mode/frontend/.next/required-server-files.json +1 -1
- voice_mode/frontend/.next/server/app/_not-found/page.js +1 -1
- voice_mode/frontend/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
- voice_mode/frontend/.next/server/app/_not-found.html +1 -1
- voice_mode/frontend/.next/server/app/_not-found.rsc +1 -1
- voice_mode/frontend/.next/server/app/api/connection-details/route.js +2 -2
- voice_mode/frontend/.next/server/app/favicon.ico/route.js +2 -2
- voice_mode/frontend/.next/server/app/index.html +1 -1
- voice_mode/frontend/.next/server/app/index.rsc +2 -2
- voice_mode/frontend/.next/server/app/page.js +2 -2
- voice_mode/frontend/.next/server/app/page_client-reference-manifest.js +1 -1
- voice_mode/frontend/.next/server/app-paths-manifest.json +1 -1
- voice_mode/frontend/.next/server/chunks/994.js +1 -1
- voice_mode/frontend/.next/server/middleware-build-manifest.js +1 -1
- voice_mode/frontend/.next/server/next-font-manifest.js +1 -1
- voice_mode/frontend/.next/server/next-font-manifest.json +1 -1
- voice_mode/frontend/.next/server/pages/404.html +1 -1
- voice_mode/frontend/.next/server/pages/500.html +1 -1
- voice_mode/frontend/.next/server/server-reference-manifest.json +1 -1
- voice_mode/frontend/.next/standalone/.next/BUILD_ID +1 -1
- voice_mode/frontend/.next/standalone/.next/app-build-manifest.json +5 -5
- voice_mode/frontend/.next/standalone/.next/app-path-routes-manifest.json +1 -1
- voice_mode/frontend/.next/standalone/.next/build-manifest.json +3 -3
- voice_mode/frontend/.next/standalone/.next/prerender-manifest.json +1 -1
- voice_mode/frontend/.next/standalone/.next/required-server-files.json +1 -1
- voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page.js +1 -1
- voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
- voice_mode/frontend/.next/standalone/.next/server/app/_not-found.html +1 -1
- voice_mode/frontend/.next/standalone/.next/server/app/_not-found.rsc +1 -1
- voice_mode/frontend/.next/standalone/.next/server/app/api/connection-details/route.js +2 -2
- voice_mode/frontend/.next/standalone/.next/server/app/favicon.ico/route.js +2 -2
- voice_mode/frontend/.next/standalone/.next/server/app/index.html +1 -1
- voice_mode/frontend/.next/standalone/.next/server/app/index.rsc +2 -2
- voice_mode/frontend/.next/standalone/.next/server/app/page.js +2 -2
- voice_mode/frontend/.next/standalone/.next/server/app/page_client-reference-manifest.js +1 -1
- voice_mode/frontend/.next/standalone/.next/server/app-paths-manifest.json +1 -1
- voice_mode/frontend/.next/standalone/.next/server/chunks/994.js +1 -1
- voice_mode/frontend/.next/standalone/.next/server/middleware-build-manifest.js +1 -1
- voice_mode/frontend/.next/standalone/.next/server/next-font-manifest.js +1 -1
- voice_mode/frontend/.next/standalone/.next/server/next-font-manifest.json +1 -1
- voice_mode/frontend/.next/standalone/.next/server/pages/404.html +1 -1
- voice_mode/frontend/.next/standalone/.next/server/pages/500.html +1 -1
- voice_mode/frontend/.next/standalone/.next/server/server-reference-manifest.json +1 -1
- voice_mode/frontend/.next/standalone/server.js +1 -1
- voice_mode/frontend/.next/static/chunks/app/{layout-0e969b20634a3137.js → layout-08be62ed6e344292.js} +1 -1
- voice_mode/frontend/.next/static/chunks/app/page-80fc72669f25298f.js +1 -0
- voice_mode/frontend/.next/static/chunks/{main-app-b9e128659aafd50e.js → main-app-413f77c1f2c53e3f.js} +1 -1
- voice_mode/frontend/.next/trace +43 -43
- voice_mode/frontend/.next/types/app/api/connection-details/route.ts +1 -1
- voice_mode/frontend/.next/types/app/layout.ts +1 -1
- voice_mode/frontend/.next/types/app/page.ts +1 -1
- voice_mode/frontend/package-lock.json +8 -8
- voice_mode/resources/configuration.py +8 -4
- voice_mode/resources/whisper_models.py +10 -13
- voice_mode/templates/systemd/voicemode-frontend.service +1 -1
- voice_mode/tools/configuration_management.py +7 -2
- voice_mode/tools/converse.py +31 -0
- voice_mode/tools/services/kokoro/install.py +3 -2
- voice_mode/tools/services/whisper/__init__.py +13 -0
- voice_mode/tools/services/whisper/install.py +3 -2
- voice_mode/tools/services/whisper/list_models.py +70 -0
- voice_mode/tools/services/whisper/list_models_tool.py +65 -0
- voice_mode/tools/services/whisper/models.py +274 -0
- {voice_mode-2.26.0.dist-info → voice_mode-2.27.0.dist-info}/METADATA +1 -1
- {voice_mode-2.26.0.dist-info → voice_mode-2.27.0.dist-info}/RECORD +79 -75
- voice_mode/frontend/.next/static/chunks/app/page-db597c111ebcc19f.js +0 -1
- /voice_mode/frontend/.next/static/{uvJyMdD1IAhgbf_LCTQE6 → wQ5pxzPmwjlzdUfJwSjMg}/_buildManifest.js +0 -0
- /voice_mode/frontend/.next/static/{uvJyMdD1IAhgbf_LCTQE6 → wQ5pxzPmwjlzdUfJwSjMg}/_ssgManifest.js +0 -0
- {voice_mode-2.26.0.dist-info → voice_mode-2.27.0.dist-info}/WHEEL +0 -0
- {voice_mode-2.26.0.dist-info → voice_mode-2.27.0.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
// File: /tmp/build-via-sdist-
|
1
|
+
// File: /tmp/build-via-sdist-qw720py5/voice_mode-2.27.0/voice_mode/frontend/app/api/connection-details/route.ts
|
2
2
|
import * as entry from '../../../../../app/api/connection-details/route.js'
|
3
3
|
import type { NextRequest } from 'next/server.js'
|
4
4
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
// File: /tmp/build-via-sdist-
|
1
|
+
// File: /tmp/build-via-sdist-qw720py5/voice_mode-2.27.0/voice_mode/frontend/app/layout.tsx
|
2
2
|
import * as entry from '../../../app/layout.js'
|
3
3
|
import type { ResolvingMetadata, ResolvingViewport } from 'next/dist/lib/metadata/types/metadata-interface.js'
|
4
4
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
// File: /tmp/build-via-sdist-
|
1
|
+
// File: /tmp/build-via-sdist-qw720py5/voice_mode-2.27.0/voice_mode/frontend/app/page.tsx
|
2
2
|
import * as entry from '../../../app/page.js'
|
3
3
|
import type { ResolvingMetadata, ResolvingViewport } from 'next/dist/lib/metadata/types/metadata-interface.js'
|
4
4
|
|
@@ -1360,9 +1360,9 @@
|
|
1360
1360
|
}
|
1361
1361
|
},
|
1362
1362
|
"node_modules/browserslist": {
|
1363
|
-
"version": "4.25.
|
1364
|
-
"resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.25.
|
1365
|
-
"integrity": "sha512-
|
1363
|
+
"version": "4.25.3",
|
1364
|
+
"resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.25.3.tgz",
|
1365
|
+
"integrity": "sha512-cDGv1kkDI4/0e5yON9yM5G/0A5u8sf5TnmdX5C9qHzI9PPu++sQ9zjm1k9NiOrf3riY4OkK0zSGqfvJyJsgCBQ==",
|
1366
1366
|
"dev": true,
|
1367
1367
|
"funding": [
|
1368
1368
|
{
|
@@ -1380,8 +1380,8 @@
|
|
1380
1380
|
],
|
1381
1381
|
"license": "MIT",
|
1382
1382
|
"dependencies": {
|
1383
|
-
"caniuse-lite": "^1.0.
|
1384
|
-
"electron-to-chromium": "^1.5.
|
1383
|
+
"caniuse-lite": "^1.0.30001735",
|
1384
|
+
"electron-to-chromium": "^1.5.204",
|
1385
1385
|
"node-releases": "^2.0.19",
|
1386
1386
|
"update-browserslist-db": "^1.1.3"
|
1387
1387
|
},
|
@@ -1774,9 +1774,9 @@
|
|
1774
1774
|
"license": "MIT"
|
1775
1775
|
},
|
1776
1776
|
"node_modules/electron-to-chromium": {
|
1777
|
-
"version": "1.5.
|
1778
|
-
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.
|
1779
|
-
"integrity": "sha512-
|
1777
|
+
"version": "1.5.207",
|
1778
|
+
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.207.tgz",
|
1779
|
+
"integrity": "sha512-mryFrrL/GXDTmAtIVMVf+eIXM09BBPlO5IQ7lUyKmK8d+A4VpRGG+M3ofoVef6qyF8s60rJei8ymlJxjUA8Faw==",
|
1780
1780
|
"dev": true,
|
1781
1781
|
"license": "ISC"
|
1782
1782
|
},
|
@@ -238,13 +238,17 @@ async def environment_variables() -> str:
|
|
238
238
|
Shows each configuration variable with:
|
239
239
|
- Name: The environment variable name
|
240
240
|
- Environment Value: Current value from environment
|
241
|
-
- Config File Value: Value from ~/.voicemode.env (if exists)
|
241
|
+
- Config File Value: Value from ~/.voicemode/voicemode.env (if exists)
|
242
242
|
- Description: What the variable controls
|
243
243
|
|
244
244
|
This helps identify configuration sources and troubleshoot settings.
|
245
245
|
"""
|
246
|
-
# Parse config file
|
247
|
-
user_config_path = Path.home() / ".voicemode.env"
|
246
|
+
# Parse config file - try new path first, fall back to old
|
247
|
+
user_config_path = Path.home() / ".voicemode" / "voicemode.env"
|
248
|
+
if not user_config_path.exists():
|
249
|
+
old_path = Path.home() / ".voicemode" / ".voicemode.env"
|
250
|
+
if old_path.exists():
|
251
|
+
user_config_path = old_path
|
248
252
|
file_config = parse_env_file(user_config_path)
|
249
253
|
|
250
254
|
# Define all configuration variables with descriptions
|
@@ -330,7 +334,7 @@ async def environment_template() -> str:
|
|
330
334
|
Environment variable template for voice mode configuration.
|
331
335
|
|
332
336
|
Provides a ready-to-use template of all available environment variables
|
333
|
-
with their current values. This can be saved to ~/.voicemode.env and
|
337
|
+
with their current values. This can be saved to ~/.voicemode/voicemode.env and
|
334
338
|
customized as needed.
|
335
339
|
|
336
340
|
Sensitive values like API keys are masked for security.
|
@@ -6,7 +6,7 @@ from pathlib import Path
|
|
6
6
|
from typing import Dict, Any, List
|
7
7
|
|
8
8
|
from ..server import mcp
|
9
|
-
from ..config import logger
|
9
|
+
from ..config import logger, WHISPER_MODEL_PATH, WHISPER_MODEL
|
10
10
|
|
11
11
|
|
12
12
|
@mcp.resource("whisper://models")
|
@@ -24,17 +24,14 @@ async def list_whisper_models() -> str:
|
|
24
24
|
and which one is currently being used by the whisper server.
|
25
25
|
"""
|
26
26
|
try:
|
27
|
-
# Get whisper models directory
|
28
|
-
|
29
|
-
Path.home() / ".voicemode/services/whisper/models",
|
30
|
-
Path.home() / ".voicemode/whisper.cpp/models" # legacy
|
31
|
-
]
|
27
|
+
# Get whisper models directory from config
|
28
|
+
models_dir = Path(WHISPER_MODEL_PATH)
|
32
29
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
30
|
+
# If config path doesn't exist, check service installation
|
31
|
+
if not models_dir.exists():
|
32
|
+
service_models = Path.home() / ".voicemode/services/whisper/models"
|
33
|
+
if service_models.exists():
|
34
|
+
models_dir = service_models
|
38
35
|
|
39
36
|
# List all model files
|
40
37
|
models: List[Dict[str, Any]] = []
|
@@ -55,8 +52,8 @@ async def list_whisper_models() -> str:
|
|
55
52
|
# Sort models by name
|
56
53
|
models.sort(key=lambda x: x["name"])
|
57
54
|
|
58
|
-
# Get current configuration
|
59
|
-
current_model =
|
55
|
+
# Get current configuration from config
|
56
|
+
current_model = WHISPER_MODEL
|
60
57
|
|
61
58
|
# Build response
|
62
59
|
data = {
|
@@ -7,7 +7,7 @@ Wants=network.target
|
|
7
7
|
[Service]
|
8
8
|
Type=simple
|
9
9
|
WorkingDirectory={FRONTEND_DIR}
|
10
|
-
EnvironmentFile=%h/.voicemode
|
10
|
+
EnvironmentFile=%h/.voicemode/voicemode.env
|
11
11
|
Environment=NODE_ENV=development
|
12
12
|
Environment=PORT={PORT}
|
13
13
|
Environment=HOST={HOST}
|
@@ -11,7 +11,9 @@ import logging
|
|
11
11
|
logger = logging.getLogger("voice-mode")
|
12
12
|
|
13
13
|
# Configuration file path (user-level only for security)
|
14
|
-
USER_CONFIG_PATH = Path.home() / ".voicemode" / "
|
14
|
+
USER_CONFIG_PATH = Path.home() / ".voicemode" / "voicemode.env"
|
15
|
+
# Legacy path for backwards compatibility
|
16
|
+
LEGACY_CONFIG_PATH = Path.home() / ".voicemode" / ".voicemode.env"
|
15
17
|
|
16
18
|
|
17
19
|
def parse_env_file(file_path: Path) -> Dict[str, str]:
|
@@ -117,8 +119,11 @@ async def update_config(key: str, value: str) -> str:
|
|
117
119
|
if not re.match(r'^[A-Z_]+$', key):
|
118
120
|
return f"❌ Invalid key format: {key}. Keys must be uppercase with underscores only."
|
119
121
|
|
120
|
-
# Use user config path
|
122
|
+
# Use user config path, check for legacy if new doesn't exist
|
121
123
|
config_path = USER_CONFIG_PATH
|
124
|
+
if not config_path.exists() and LEGACY_CONFIG_PATH.exists():
|
125
|
+
config_path = LEGACY_CONFIG_PATH
|
126
|
+
logger.warning(f"Using deprecated .voicemode.env - please rename to voicemode.env")
|
122
127
|
|
123
128
|
try:
|
124
129
|
# Read existing configuration
|
voice_mode/tools/converse.py
CHANGED
@@ -1412,6 +1412,37 @@ async def converse(
|
|
1412
1412
|
Remember: Lower values (0-1) = more permissive, may detect non-speech as speech
|
1413
1413
|
Higher values (2-3) = more strict, may miss soft speech or whispers
|
1414
1414
|
|
1415
|
+
Parallel Operations Pattern (RECOMMENDED):
|
1416
|
+
When performing actions that don't require user confirmation, use wait_for_response=False
|
1417
|
+
to speak while simultaneously executing other tools. This creates natural, flowing conversations.
|
1418
|
+
|
1419
|
+
Pattern: converse("Status update", wait_for_response=False) then immediately run other tools.
|
1420
|
+
The speech plays while your actions execute in parallel.
|
1421
|
+
|
1422
|
+
Examples:
|
1423
|
+
- Search narration: converse("Searching for that file", wait_for_response=False) + Grep(...)
|
1424
|
+
- Processing update: converse("Analyzing the screenshot", wait_for_response=False) + analyze_screenshot(...)
|
1425
|
+
- Creation status: converse("Creating that document now", wait_for_response=False) + Write(...)
|
1426
|
+
- Quick confirmation: converse("Done! The file is saved", wait_for_response=False)
|
1427
|
+
|
1428
|
+
Benefits:
|
1429
|
+
- No dead air during operations
|
1430
|
+
- User knows what's happening
|
1431
|
+
- More natural conversation flow
|
1432
|
+
- Better user experience
|
1433
|
+
|
1434
|
+
When to use parallel pattern:
|
1435
|
+
- File operations (reading, writing, searching)
|
1436
|
+
- Data processing (analysis, computation)
|
1437
|
+
- Status updates during long operations
|
1438
|
+
- Confirmations that don't need response
|
1439
|
+
|
1440
|
+
When NOT to use parallel pattern:
|
1441
|
+
- Questions requiring answers
|
1442
|
+
- Confirmations needing user approval
|
1443
|
+
- Error messages needing acknowledgment
|
1444
|
+
- End of conversation farewells (unless doing cleanup)
|
1445
|
+
|
1415
1446
|
Skip TTS Examples:
|
1416
1447
|
- Fast iteration mode: converse("Processing your request", skip_tts=True) # Text only, no voice
|
1417
1448
|
- Important announcement: converse("Warning: System will restart", skip_tts=False) # Always use voice
|
@@ -251,13 +251,14 @@ async def kokoro_install(
|
|
251
251
|
with open(plist_path, 'w') as f:
|
252
252
|
f.write(plist_content)
|
253
253
|
|
254
|
-
#
|
254
|
+
# Unload if already loaded (ignore errors)
|
255
255
|
try:
|
256
256
|
subprocess.run(["launchctl", "unload", plist_path], capture_output=True)
|
257
257
|
except:
|
258
258
|
pass # Ignore if not loaded
|
259
259
|
|
260
|
-
|
260
|
+
# Don't load here - let enable_service handle it with the -w flag
|
261
|
+
# This prevents the "already loaded" error when enable_service runs
|
261
262
|
result["launchagent"] = plist_path
|
262
263
|
result["message"] += f"\nLaunchAgent installed: {plist_name}"
|
263
264
|
|
@@ -0,0 +1,13 @@
|
|
1
|
+
"""Whisper service tools."""
|
2
|
+
|
3
|
+
from voice_mode.tools.services.whisper.install import whisper_install
|
4
|
+
from voice_mode.tools.services.whisper.uninstall import whisper_uninstall
|
5
|
+
from voice_mode.tools.services.whisper.download_model import download_model
|
6
|
+
from voice_mode.tools.services.whisper.list_models_tool import whisper_list_models
|
7
|
+
|
8
|
+
__all__ = [
|
9
|
+
'whisper_install',
|
10
|
+
'whisper_uninstall',
|
11
|
+
'download_model',
|
12
|
+
'whisper_list_models'
|
13
|
+
]
|
@@ -369,13 +369,14 @@ exec "$SERVER_BIN" \\
|
|
369
369
|
with open(plist_path, 'w') as f:
|
370
370
|
f.write(plist_content)
|
371
371
|
|
372
|
-
#
|
372
|
+
# Unload if already loaded (ignore errors)
|
373
373
|
try:
|
374
374
|
subprocess.run(["launchctl", "unload", plist_path], capture_output=True)
|
375
375
|
except:
|
376
376
|
pass # Ignore if not loaded
|
377
377
|
|
378
|
-
|
378
|
+
# Don't load here - let enable_service handle it with the -w flag
|
379
|
+
# This prevents the "already loaded" error when enable_service runs
|
379
380
|
|
380
381
|
# Handle auto_enable
|
381
382
|
enable_message = ""
|
@@ -0,0 +1,70 @@
|
|
1
|
+
"""MCP tool for listing Whisper models and their status."""
|
2
|
+
|
3
|
+
from typing import Dict, Any
|
4
|
+
from voice_mode.tools.services.whisper.models import (
|
5
|
+
WHISPER_MODELS,
|
6
|
+
get_model_directory,
|
7
|
+
get_current_model,
|
8
|
+
is_model_installed,
|
9
|
+
get_installed_models,
|
10
|
+
format_size,
|
11
|
+
has_coreml_model,
|
12
|
+
is_apple_silicon
|
13
|
+
)
|
14
|
+
|
15
|
+
|
16
|
+
async def list_whisper_models() -> Dict[str, Any]:
|
17
|
+
"""List available Whisper models and their installation status.
|
18
|
+
|
19
|
+
Returns:
|
20
|
+
Dictionary containing model information and status
|
21
|
+
"""
|
22
|
+
try:
|
23
|
+
model_dir = get_model_directory()
|
24
|
+
current_model = get_current_model()
|
25
|
+
installed_models = get_installed_models()
|
26
|
+
|
27
|
+
# Build models list with status
|
28
|
+
models = []
|
29
|
+
show_coreml = is_apple_silicon() # Only show Core ML on Apple Silicon
|
30
|
+
|
31
|
+
for model_name, info in WHISPER_MODELS.items():
|
32
|
+
model_status = {
|
33
|
+
"name": model_name,
|
34
|
+
"size_mb": info["size_mb"],
|
35
|
+
"size": format_size(info["size_mb"]),
|
36
|
+
"languages": info["languages"],
|
37
|
+
"description": info["description"],
|
38
|
+
"installed": is_model_installed(model_name),
|
39
|
+
"current": model_name == current_model,
|
40
|
+
"has_coreml": has_coreml_model(model_name) if show_coreml else False
|
41
|
+
}
|
42
|
+
models.append(model_status)
|
43
|
+
|
44
|
+
# Calculate totals
|
45
|
+
total_installed_size = sum(
|
46
|
+
WHISPER_MODELS[m]["size_mb"] for m in installed_models
|
47
|
+
)
|
48
|
+
total_available_size = sum(
|
49
|
+
m["size_mb"] for m in WHISPER_MODELS.values()
|
50
|
+
)
|
51
|
+
|
52
|
+
return {
|
53
|
+
"success": True,
|
54
|
+
"models": models,
|
55
|
+
"current_model": current_model,
|
56
|
+
"model_directory": str(model_dir),
|
57
|
+
"installed_count": len(installed_models),
|
58
|
+
"total_count": len(WHISPER_MODELS),
|
59
|
+
"installed_size_mb": total_installed_size,
|
60
|
+
"installed_size": format_size(total_installed_size),
|
61
|
+
"available_size_mb": total_available_size,
|
62
|
+
"available_size": format_size(total_available_size)
|
63
|
+
}
|
64
|
+
|
65
|
+
except Exception as e:
|
66
|
+
return {
|
67
|
+
"success": False,
|
68
|
+
"error": str(e),
|
69
|
+
"models": []
|
70
|
+
}
|
@@ -0,0 +1,65 @@
|
|
1
|
+
"""MCP tool for listing Whisper models."""
|
2
|
+
|
3
|
+
from voice_mode.server import mcp
|
4
|
+
from voice_mode.tools.services.whisper.list_models import list_whisper_models
|
5
|
+
|
6
|
+
|
7
|
+
@mcp.tool()
|
8
|
+
async def whisper_list_models() -> str:
|
9
|
+
"""List available Whisper models and their installation status.
|
10
|
+
|
11
|
+
Shows all available Whisper models with:
|
12
|
+
- Installation status (installed/not installed)
|
13
|
+
- Model sizes
|
14
|
+
- Language support
|
15
|
+
- Currently selected model
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
Formatted string showing model status and information
|
19
|
+
"""
|
20
|
+
result = await list_whisper_models()
|
21
|
+
|
22
|
+
if not result["success"]:
|
23
|
+
return f"Error listing models: {result.get('error', 'Unknown error')}"
|
24
|
+
|
25
|
+
# Format output
|
26
|
+
output = ["Whisper Models:", ""]
|
27
|
+
|
28
|
+
# Check if we should show Core ML column
|
29
|
+
show_coreml = any(model.get("has_coreml", False) for model in result["models"])
|
30
|
+
|
31
|
+
for model in result["models"]:
|
32
|
+
# Format status indicators
|
33
|
+
current = "→ " if model["current"] else " "
|
34
|
+
installed = "[✓ Installed]" if model["installed"] else "[ Download ]"
|
35
|
+
|
36
|
+
# Add Core ML indicator if on macOS
|
37
|
+
coreml = ""
|
38
|
+
if show_coreml:
|
39
|
+
coreml = "[ML]" if model.get("has_coreml", False) else " "
|
40
|
+
|
41
|
+
# Format model line
|
42
|
+
line = f"{current}{model['name']:15} {installed:14} {coreml} {model['size']:>8} {model['languages']:20}"
|
43
|
+
if model["current"]:
|
44
|
+
line += " (Currently selected)"
|
45
|
+
|
46
|
+
output.append(line)
|
47
|
+
|
48
|
+
# Add footer
|
49
|
+
footer = [
|
50
|
+
"",
|
51
|
+
f"Models directory: {result['model_directory']}",
|
52
|
+
f"Total size: {result['installed_size']} installed / {result['available_size']} available",
|
53
|
+
"",
|
54
|
+
f"Installed models: {result['installed_count']}/{result['total_count']}",
|
55
|
+
f"Current model: {result['current_model']}"
|
56
|
+
]
|
57
|
+
|
58
|
+
# Add Core ML note if on macOS
|
59
|
+
if show_coreml:
|
60
|
+
footer.append("")
|
61
|
+
footer.append("[ML] = Core ML model available for faster inference on Apple Silicon")
|
62
|
+
|
63
|
+
output.extend(footer)
|
64
|
+
|
65
|
+
return "\n".join(output)
|
@@ -0,0 +1,274 @@
|
|
1
|
+
"""Whisper model registry and utilities."""
|
2
|
+
|
3
|
+
import os
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import Dict, List, Optional, TypedDict
|
6
|
+
from voice_mode.config import WHISPER_MODEL_PATH, WHISPER_MODEL
|
7
|
+
|
8
|
+
|
9
|
+
class ModelInfo(TypedDict):
|
10
|
+
"""Information about a Whisper model."""
|
11
|
+
size_mb: int # Download size in MB
|
12
|
+
languages: str # Language support description
|
13
|
+
description: str # Brief description
|
14
|
+
filename: str # Expected filename when downloaded
|
15
|
+
|
16
|
+
|
17
|
+
# Registry of all available Whisper models
|
18
|
+
WHISPER_MODELS: Dict[str, ModelInfo] = {
|
19
|
+
"tiny": {
|
20
|
+
"size_mb": 39,
|
21
|
+
"languages": "Multilingual",
|
22
|
+
"description": "Fastest, least accurate",
|
23
|
+
"filename": "ggml-tiny.bin"
|
24
|
+
},
|
25
|
+
"tiny.en": {
|
26
|
+
"size_mb": 39,
|
27
|
+
"languages": "English only",
|
28
|
+
"description": "Fastest English model",
|
29
|
+
"filename": "ggml-tiny.en.bin"
|
30
|
+
},
|
31
|
+
"base": {
|
32
|
+
"size_mb": 142,
|
33
|
+
"languages": "Multilingual",
|
34
|
+
"description": "Good balance of speed and accuracy",
|
35
|
+
"filename": "ggml-base.bin"
|
36
|
+
},
|
37
|
+
"base.en": {
|
38
|
+
"size_mb": 142,
|
39
|
+
"languages": "English only",
|
40
|
+
"description": "Good English model",
|
41
|
+
"filename": "ggml-base.en.bin"
|
42
|
+
},
|
43
|
+
"small": {
|
44
|
+
"size_mb": 466,
|
45
|
+
"languages": "Multilingual",
|
46
|
+
"description": "Better accuracy, slower",
|
47
|
+
"filename": "ggml-small.bin"
|
48
|
+
},
|
49
|
+
"small.en": {
|
50
|
+
"size_mb": 466,
|
51
|
+
"languages": "English only",
|
52
|
+
"description": "Better English accuracy",
|
53
|
+
"filename": "ggml-small.en.bin"
|
54
|
+
},
|
55
|
+
"medium": {
|
56
|
+
"size_mb": 1500,
|
57
|
+
"languages": "Multilingual",
|
58
|
+
"description": "High accuracy, slow",
|
59
|
+
"filename": "ggml-medium.bin"
|
60
|
+
},
|
61
|
+
"medium.en": {
|
62
|
+
"size_mb": 1500,
|
63
|
+
"languages": "English only",
|
64
|
+
"description": "High English accuracy",
|
65
|
+
"filename": "ggml-medium.en.bin"
|
66
|
+
},
|
67
|
+
"large-v1": {
|
68
|
+
"size_mb": 2900,
|
69
|
+
"languages": "Multilingual",
|
70
|
+
"description": "Original large model",
|
71
|
+
"filename": "ggml-large-v1.bin"
|
72
|
+
},
|
73
|
+
"large-v2": {
|
74
|
+
"size_mb": 2900,
|
75
|
+
"languages": "Multilingual",
|
76
|
+
"description": "Improved large model (recommended)",
|
77
|
+
"filename": "ggml-large-v2.bin"
|
78
|
+
},
|
79
|
+
"large-v3": {
|
80
|
+
"size_mb": 3100,
|
81
|
+
"languages": "Multilingual",
|
82
|
+
"description": "Latest large model",
|
83
|
+
"filename": "ggml-large-v3.bin"
|
84
|
+
},
|
85
|
+
"large-v3-turbo": {
|
86
|
+
"size_mb": 1600,
|
87
|
+
"languages": "Multilingual",
|
88
|
+
"description": "Faster large model with good accuracy",
|
89
|
+
"filename": "ggml-large-v3-turbo.bin"
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
|
94
|
+
def get_model_directory() -> Path:
|
95
|
+
"""Get the directory where Whisper models are stored."""
|
96
|
+
# Use the configured path from config.py
|
97
|
+
model_dir = Path(WHISPER_MODEL_PATH)
|
98
|
+
|
99
|
+
# If config path doesn't exist, check service installation
|
100
|
+
if not model_dir.exists():
|
101
|
+
service_models = Path.home() / ".voicemode" / "services" / "whisper" / "models"
|
102
|
+
if service_models.exists():
|
103
|
+
return service_models
|
104
|
+
|
105
|
+
return model_dir
|
106
|
+
|
107
|
+
|
108
|
+
def get_current_model() -> str:
|
109
|
+
"""Get the currently selected Whisper model."""
|
110
|
+
# Use the configured model from config.py
|
111
|
+
model = WHISPER_MODEL
|
112
|
+
|
113
|
+
# Validate it's a known model
|
114
|
+
if model not in WHISPER_MODELS:
|
115
|
+
return "large-v2" # Default fallback
|
116
|
+
|
117
|
+
return model
|
118
|
+
|
119
|
+
|
120
|
+
def is_model_installed(model_name: str) -> bool:
|
121
|
+
"""Check if a model is installed."""
|
122
|
+
if model_name not in WHISPER_MODELS:
|
123
|
+
return False
|
124
|
+
|
125
|
+
model_dir = get_model_directory()
|
126
|
+
model_info = WHISPER_MODELS[model_name]
|
127
|
+
model_path = model_dir / model_info["filename"]
|
128
|
+
|
129
|
+
return model_path.exists()
|
130
|
+
|
131
|
+
|
132
|
+
def has_coreml_model(model_name: str) -> bool:
|
133
|
+
"""Check if a Core ML model is available for the given model.
|
134
|
+
|
135
|
+
Core ML models are only used on macOS with Apple Silicon.
|
136
|
+
They have the extension .mlmodelc and provide faster inference.
|
137
|
+
"""
|
138
|
+
import platform
|
139
|
+
|
140
|
+
# Core ML is only relevant on macOS
|
141
|
+
if platform.system() != "Darwin":
|
142
|
+
return False
|
143
|
+
|
144
|
+
if model_name not in WHISPER_MODELS:
|
145
|
+
return False
|
146
|
+
|
147
|
+
model_dir = get_model_directory()
|
148
|
+
model_info = WHISPER_MODELS[model_name]
|
149
|
+
|
150
|
+
# Core ML model would be named like ggml-large-v2-encoder.mlmodelc
|
151
|
+
coreml_path = model_dir / f"ggml-{model_name}-encoder.mlmodelc"
|
152
|
+
|
153
|
+
return coreml_path.exists()
|
154
|
+
|
155
|
+
|
156
|
+
def get_installed_models() -> List[str]:
|
157
|
+
"""Get list of installed models."""
|
158
|
+
installed = []
|
159
|
+
for model_name in WHISPER_MODELS:
|
160
|
+
if is_model_installed(model_name):
|
161
|
+
installed.append(model_name)
|
162
|
+
return installed
|
163
|
+
|
164
|
+
|
165
|
+
def get_total_size(models: Optional[List[str]] = None) -> int:
|
166
|
+
"""Get total size of models in MB.
|
167
|
+
|
168
|
+
Args:
|
169
|
+
models: List of model names. If None, uses all models.
|
170
|
+
|
171
|
+
Returns:
|
172
|
+
Total size in MB
|
173
|
+
"""
|
174
|
+
if models is None:
|
175
|
+
models = list(WHISPER_MODELS.keys())
|
176
|
+
|
177
|
+
total = 0
|
178
|
+
for model in models:
|
179
|
+
if model in WHISPER_MODELS:
|
180
|
+
total += WHISPER_MODELS[model]["size_mb"]
|
181
|
+
|
182
|
+
return total
|
183
|
+
|
184
|
+
|
185
|
+
def format_size(size_mb: int) -> str:
|
186
|
+
"""Format size in MB to human-readable string."""
|
187
|
+
if size_mb < 1000:
|
188
|
+
return f"{size_mb} MB"
|
189
|
+
else:
|
190
|
+
size_gb = size_mb / 1000
|
191
|
+
return f"{size_gb:.1f} GB"
|
192
|
+
|
193
|
+
|
194
|
+
def is_macos() -> bool:
|
195
|
+
"""Check if running on macOS."""
|
196
|
+
import platform
|
197
|
+
return platform.system() == "Darwin"
|
198
|
+
|
199
|
+
|
200
|
+
def is_apple_silicon() -> bool:
|
201
|
+
"""Check if running on Apple Silicon (M1/M2/M3/M4)."""
|
202
|
+
import platform
|
203
|
+
return platform.system() == "Darwin" and platform.machine() == "arm64"
|
204
|
+
|
205
|
+
|
206
|
+
def set_current_model(model_name: str) -> None:
|
207
|
+
"""Set the current active Whisper model.
|
208
|
+
|
209
|
+
Args:
|
210
|
+
model_name: Name of the model to set as active
|
211
|
+
|
212
|
+
Updates the voicemode.env configuration file for persistence.
|
213
|
+
"""
|
214
|
+
from pathlib import Path
|
215
|
+
import re
|
216
|
+
|
217
|
+
# Configuration file path
|
218
|
+
config_path = Path.home() / ".voicemode" / ".voicemode.env"
|
219
|
+
|
220
|
+
# Ensure directory exists
|
221
|
+
config_path.parent.mkdir(parents=True, exist_ok=True)
|
222
|
+
|
223
|
+
# Read existing configuration
|
224
|
+
config = {}
|
225
|
+
if config_path.exists():
|
226
|
+
with open(config_path, 'r') as f:
|
227
|
+
for line in f:
|
228
|
+
line = line.strip()
|
229
|
+
if not line or line.startswith('#'):
|
230
|
+
continue
|
231
|
+
match = re.match(r'^([A-Z_]+)=(.*)$', line)
|
232
|
+
if match:
|
233
|
+
key, value = match.groups()
|
234
|
+
value = value.strip('"').strip("'")
|
235
|
+
config[key] = value
|
236
|
+
|
237
|
+
# Update the model
|
238
|
+
config['VOICEMODE_WHISPER_MODEL'] = model_name
|
239
|
+
|
240
|
+
# Write back to file, preserving structure
|
241
|
+
lines = []
|
242
|
+
updated_keys = set()
|
243
|
+
|
244
|
+
if config_path.exists():
|
245
|
+
with open(config_path, 'r') as f:
|
246
|
+
for line in f:
|
247
|
+
stripped = line.strip()
|
248
|
+
if stripped and not stripped.startswith('#'):
|
249
|
+
match = re.match(r'^([A-Z_]+)=', stripped)
|
250
|
+
if match:
|
251
|
+
key = match.group(1)
|
252
|
+
if key == 'VOICEMODE_WHISPER_MODEL':
|
253
|
+
lines.append(f"VOICEMODE_WHISPER_MODEL={model_name}\n")
|
254
|
+
updated_keys.add(key)
|
255
|
+
elif key in config:
|
256
|
+
lines.append(f"{key}={config[key]}\n")
|
257
|
+
updated_keys.add(key)
|
258
|
+
else:
|
259
|
+
lines.append(line)
|
260
|
+
else:
|
261
|
+
lines.append(line)
|
262
|
+
else:
|
263
|
+
lines.append(line)
|
264
|
+
|
265
|
+
# Add VOICEMODE_WHISPER_MODEL if it wasn't in the file
|
266
|
+
if 'VOICEMODE_WHISPER_MODEL' not in updated_keys:
|
267
|
+
if lines and not lines[-1].strip() == '':
|
268
|
+
lines.append('\n')
|
269
|
+
lines.append("# Whisper Configuration\n")
|
270
|
+
lines.append(f"VOICEMODE_WHISPER_MODEL={model_name}\n")
|
271
|
+
|
272
|
+
# Write the updated configuration
|
273
|
+
with open(config_path, 'w') as f:
|
274
|
+
f.writelines(lines)
|