voice-mode 2.27.0__py3-none-any.whl → 2.28.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- voice_mode/__version__.py +1 -1
- voice_mode/cli.py +152 -37
- voice_mode/cli_commands/exchanges.py +6 -0
- voice_mode/frontend/.next/BUILD_ID +1 -1
- voice_mode/frontend/.next/app-build-manifest.json +5 -5
- voice_mode/frontend/.next/build-manifest.json +3 -3
- voice_mode/frontend/.next/next-minimal-server.js.nft.json +1 -1
- voice_mode/frontend/.next/next-server.js.nft.json +1 -1
- voice_mode/frontend/.next/prerender-manifest.json +1 -1
- voice_mode/frontend/.next/required-server-files.json +1 -1
- voice_mode/frontend/.next/server/app/_not-found/page.js +1 -1
- voice_mode/frontend/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
- voice_mode/frontend/.next/server/app/_not-found.html +1 -1
- voice_mode/frontend/.next/server/app/_not-found.rsc +1 -1
- voice_mode/frontend/.next/server/app/api/connection-details/route.js +2 -2
- voice_mode/frontend/.next/server/app/favicon.ico/route.js +2 -2
- voice_mode/frontend/.next/server/app/index.html +1 -1
- voice_mode/frontend/.next/server/app/index.rsc +2 -2
- voice_mode/frontend/.next/server/app/page.js +2 -2
- voice_mode/frontend/.next/server/app/page_client-reference-manifest.js +1 -1
- voice_mode/frontend/.next/server/chunks/994.js +1 -1
- voice_mode/frontend/.next/server/middleware-build-manifest.js +1 -1
- voice_mode/frontend/.next/server/next-font-manifest.js +1 -1
- voice_mode/frontend/.next/server/next-font-manifest.json +1 -1
- voice_mode/frontend/.next/server/pages/404.html +1 -1
- voice_mode/frontend/.next/server/pages/500.html +1 -1
- voice_mode/frontend/.next/server/server-reference-manifest.json +1 -1
- voice_mode/frontend/.next/standalone/.next/BUILD_ID +1 -1
- voice_mode/frontend/.next/standalone/.next/app-build-manifest.json +5 -5
- voice_mode/frontend/.next/standalone/.next/build-manifest.json +3 -3
- voice_mode/frontend/.next/standalone/.next/prerender-manifest.json +1 -1
- voice_mode/frontend/.next/standalone/.next/required-server-files.json +1 -1
- voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page.js +1 -1
- voice_mode/frontend/.next/standalone/.next/server/app/_not-found/page_client-reference-manifest.js +1 -1
- voice_mode/frontend/.next/standalone/.next/server/app/_not-found.html +1 -1
- voice_mode/frontend/.next/standalone/.next/server/app/_not-found.rsc +1 -1
- voice_mode/frontend/.next/standalone/.next/server/app/api/connection-details/route.js +2 -2
- voice_mode/frontend/.next/standalone/.next/server/app/favicon.ico/route.js +2 -2
- voice_mode/frontend/.next/standalone/.next/server/app/index.html +1 -1
- voice_mode/frontend/.next/standalone/.next/server/app/index.rsc +2 -2
- voice_mode/frontend/.next/standalone/.next/server/app/page.js +2 -2
- voice_mode/frontend/.next/standalone/.next/server/app/page_client-reference-manifest.js +1 -1
- voice_mode/frontend/.next/standalone/.next/server/chunks/994.js +1 -1
- voice_mode/frontend/.next/standalone/.next/server/middleware-build-manifest.js +1 -1
- voice_mode/frontend/.next/standalone/.next/server/next-font-manifest.js +1 -1
- voice_mode/frontend/.next/standalone/.next/server/next-font-manifest.json +1 -1
- voice_mode/frontend/.next/standalone/.next/server/pages/404.html +1 -1
- voice_mode/frontend/.next/standalone/.next/server/pages/500.html +1 -1
- voice_mode/frontend/.next/standalone/.next/server/server-reference-manifest.json +1 -1
- voice_mode/frontend/.next/standalone/server.js +1 -1
- voice_mode/frontend/.next/static/chunks/app/{layout-08be62ed6e344292.js → layout-2a1721553cbe58e4.js} +1 -1
- voice_mode/frontend/.next/static/chunks/app/page-fe35d9da20297c85.js +1 -0
- voice_mode/frontend/.next/static/chunks/{main-app-413f77c1f2c53e3f.js → main-app-c17195caa4e269d6.js} +1 -1
- voice_mode/frontend/.next/trace +43 -43
- voice_mode/frontend/.next/types/app/api/connection-details/route.ts +1 -1
- voice_mode/frontend/.next/types/app/layout.ts +1 -1
- voice_mode/frontend/.next/types/app/page.ts +1 -1
- voice_mode/frontend/package-lock.json +6 -6
- voice_mode/tools/converse.py +44 -24
- voice_mode/tools/service.py +30 -3
- voice_mode/tools/services/kokoro/install.py +1 -1
- voice_mode/tools/services/whisper/__init__.py +15 -5
- voice_mode/tools/services/whisper/install.py +41 -9
- voice_mode/tools/services/whisper/list_models.py +14 -14
- voice_mode/tools/services/whisper/model_active.py +54 -0
- voice_mode/tools/services/whisper/model_benchmark.py +159 -0
- voice_mode/tools/services/whisper/{download_model.py → model_install.py} +72 -11
- voice_mode/tools/services/whisper/model_remove.py +36 -0
- voice_mode/tools/services/whisper/models.py +225 -26
- voice_mode/utils/services/whisper_helpers.py +206 -19
- voice_mode/utils/services/whisper_version.py +138 -0
- {voice_mode-2.27.0.dist-info → voice_mode-2.28.1.dist-info}/METADATA +5 -1
- {voice_mode-2.27.0.dist-info → voice_mode-2.28.1.dist-info}/RECORD +77 -74
- voice_mode/frontend/.next/static/chunks/app/page-80fc72669f25298f.js +0 -1
- voice_mode/tools/services/whisper/list_models_tool.py +0 -65
- /voice_mode/frontend/.next/static/{wQ5pxzPmwjlzdUfJwSjMg → LhJalgfazyY_l3L_v0_Kw}/_buildManifest.js +0 -0
- /voice_mode/frontend/.next/static/{wQ5pxzPmwjlzdUfJwSjMg → LhJalgfazyY_l3L_v0_Kw}/_ssgManifest.js +0 -0
- {voice_mode-2.27.0.dist-info → voice_mode-2.28.1.dist-info}/WHEEL +0 -0
- {voice_mode-2.27.0.dist-info → voice_mode-2.28.1.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
// File: /tmp/build-via-sdist-
|
1
|
+
// File: /tmp/build-via-sdist-disrgmhp/voice_mode-2.28.1/voice_mode/frontend/app/api/connection-details/route.ts
|
2
2
|
import * as entry from '../../../../../app/api/connection-details/route.js'
|
3
3
|
import type { NextRequest } from 'next/server.js'
|
4
4
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
// File: /tmp/build-via-sdist-
|
1
|
+
// File: /tmp/build-via-sdist-disrgmhp/voice_mode-2.28.1/voice_mode/frontend/app/layout.tsx
|
2
2
|
import * as entry from '../../../app/layout.js'
|
3
3
|
import type { ResolvingMetadata, ResolvingViewport } from 'next/dist/lib/metadata/types/metadata-interface.js'
|
4
4
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
// File: /tmp/build-via-sdist-
|
1
|
+
// File: /tmp/build-via-sdist-disrgmhp/voice_mode-2.28.1/voice_mode/frontend/app/page.tsx
|
2
2
|
import * as entry from '../../../app/page.js'
|
3
3
|
import type { ResolvingMetadata, ResolvingViewport } from 'next/dist/lib/metadata/types/metadata-interface.js'
|
4
4
|
|
@@ -1489,9 +1489,9 @@
|
|
1489
1489
|
}
|
1490
1490
|
},
|
1491
1491
|
"node_modules/caniuse-lite": {
|
1492
|
-
"version": "1.0.
|
1493
|
-
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.
|
1494
|
-
"integrity": "sha512-
|
1492
|
+
"version": "1.0.30001737",
|
1493
|
+
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001737.tgz",
|
1494
|
+
"integrity": "sha512-BiloLiXtQNrY5UyF0+1nSJLXUENuhka2pzy2Fx5pGxqavdrxSCW4U6Pn/PoG3Efspi2frRbHpBV2XsrPE6EDlw==",
|
1495
1495
|
"dev": true,
|
1496
1496
|
"funding": [
|
1497
1497
|
{
|
@@ -1774,9 +1774,9 @@
|
|
1774
1774
|
"license": "MIT"
|
1775
1775
|
},
|
1776
1776
|
"node_modules/electron-to-chromium": {
|
1777
|
-
"version": "1.5.
|
1778
|
-
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.
|
1779
|
-
"integrity": "sha512-
|
1777
|
+
"version": "1.5.208",
|
1778
|
+
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.208.tgz",
|
1779
|
+
"integrity": "sha512-ozZyibehoe7tOhNaf16lKmljVf+3npZcJIEbJRVftVsmAg5TeA1mGS9dVCZzOwr2xT7xK15V0p7+GZqSPgkuPg==",
|
1780
1780
|
"dev": true,
|
1781
1781
|
"license": "ISC"
|
1782
1782
|
},
|
voice_mode/tools/converse.py
CHANGED
@@ -613,31 +613,51 @@ async def _speech_to_text_internal(
|
|
613
613
|
if stt_config.get('base_url') and ("127.0.0.1" in stt_config['base_url'] or "localhost" in stt_config['base_url']):
|
614
614
|
provider = "whisper-local"
|
615
615
|
|
616
|
-
#
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
616
|
+
# Check if we can skip conversion for local whisper
|
617
|
+
skip_conversion = False
|
618
|
+
if provider == "whisper-local":
|
619
|
+
# Check if whisper is truly local (not SSH-forwarded)
|
620
|
+
from voice_mode.utils.services.common import check_service_status
|
621
|
+
from voice_mode.config import WHISPER_PORT
|
622
|
+
status, _ = check_service_status(WHISPER_PORT)
|
623
|
+
if status == "local":
|
624
|
+
skip_conversion = True
|
625
|
+
logger.info("Detected truly local whisper - skipping audio conversion, using WAV directly")
|
626
|
+
|
627
|
+
if skip_conversion:
|
628
|
+
# Use WAV directly for local whisper
|
629
|
+
upload_file = wav_file
|
630
|
+
export_format = "wav"
|
631
|
+
logger.debug("Using WAV file directly for local whisper upload")
|
632
|
+
else:
|
633
|
+
# Validate format for provider
|
634
|
+
export_format = validate_audio_format(STT_AUDIO_FORMAT, provider, "stt")
|
627
635
|
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
636
|
+
# Convert WAV to target format for upload
|
637
|
+
logger.debug(f"Converting WAV to {export_format.upper()} for upload...")
|
638
|
+
conversion_start = time.perf_counter()
|
639
|
+
try:
|
640
|
+
audio = AudioSegment.from_wav(wav_file)
|
641
|
+
logger.debug(f"Audio loaded - Duration: {len(audio)}ms, Channels: {audio.channels}, Frame rate: {audio.frame_rate}")
|
642
|
+
|
643
|
+
# Get export parameters for the format
|
644
|
+
export_params = get_format_export_params(export_format)
|
645
|
+
|
646
|
+
with tempfile.NamedTemporaryFile(suffix=f'.{export_format}', delete=False) as export_file_obj:
|
647
|
+
export_file = export_file_obj.name
|
648
|
+
audio.export(export_file, **export_params)
|
649
|
+
upload_file = export_file
|
650
|
+
conversion_time = time.perf_counter() - conversion_start
|
651
|
+
logger.info(f"Audio conversion: WAV → {export_format.upper()} took {conversion_time:.3f}s")
|
652
|
+
logger.debug(f"{export_format.upper()} created for STT upload: {upload_file}")
|
653
|
+
except Exception as e:
|
654
|
+
if "ffmpeg" in str(e).lower() or "avconv" in str(e).lower():
|
655
|
+
logger.error(f"Audio conversion failed - FFmpeg may not be installed: {e}")
|
656
|
+
from voice_mode.utils.ffmpeg_check import get_install_instructions
|
657
|
+
logger.error(f"\n{get_install_instructions()}")
|
658
|
+
raise RuntimeError("FFmpeg is required but not found. Please install FFmpeg and try again.") from e
|
659
|
+
else:
|
660
|
+
raise
|
641
661
|
|
642
662
|
# Save debug file for upload version
|
643
663
|
if DEBUG:
|
voice_mode/tools/service.py
CHANGED
@@ -233,18 +233,45 @@ async def status_service(service_name: str) -> str:
|
|
233
233
|
if service_name == "whisper":
|
234
234
|
# Get model info
|
235
235
|
model = "unknown"
|
236
|
+
model_name = None
|
236
237
|
for i, arg in enumerate(cmdline):
|
237
238
|
if arg == "--model" and i + 1 < len(cmdline):
|
238
239
|
model = Path(cmdline[i + 1]).name
|
240
|
+
# Extract model name from filename (e.g., ggml-large-v3-turbo.bin -> large-v3-turbo)
|
241
|
+
if model.startswith("ggml-") and model.endswith(".bin"):
|
242
|
+
model_name = model[5:-4]
|
239
243
|
break
|
240
244
|
extra_info_parts.append(f"Model: {model}")
|
241
245
|
|
242
|
-
#
|
246
|
+
# Get version and capability info
|
243
247
|
try:
|
244
|
-
from voice_mode.
|
245
|
-
version_info =
|
248
|
+
from voice_mode.utils.services.whisper_version import get_whisper_version_info, check_coreml_model_exists
|
249
|
+
version_info = get_whisper_version_info()
|
250
|
+
|
246
251
|
if version_info.get("version"):
|
247
252
|
extra_info_parts.append(f"Version: {version_info['version']}")
|
253
|
+
elif version_info.get("commit"):
|
254
|
+
extra_info_parts.append(f"Commit: {version_info['commit']}")
|
255
|
+
|
256
|
+
# Show Core ML status on Apple Silicon
|
257
|
+
if platform.machine() == "arm64" and platform.system() == "Darwin":
|
258
|
+
if version_info.get("coreml_supported"):
|
259
|
+
# Check if the current model has Core ML
|
260
|
+
if model_name and check_coreml_model_exists(model_name):
|
261
|
+
extra_info_parts.append("Core ML: ✓ Enabled & Active")
|
262
|
+
else:
|
263
|
+
extra_info_parts.append("Core ML: ✓ Supported (model not converted)")
|
264
|
+
else:
|
265
|
+
extra_info_parts.append("Core ML: ✗ Not compiled in")
|
266
|
+
|
267
|
+
# Show GPU support
|
268
|
+
gpu_support = []
|
269
|
+
if version_info.get("metal_supported"):
|
270
|
+
gpu_support.append("Metal")
|
271
|
+
if version_info.get("cuda_supported"):
|
272
|
+
gpu_support.append("CUDA")
|
273
|
+
if gpu_support:
|
274
|
+
extra_info_parts.append(f"GPU: {', '.join(gpu_support)}")
|
248
275
|
except:
|
249
276
|
pass
|
250
277
|
|
@@ -243,7 +243,7 @@ async def kokoro_install(
|
|
243
243
|
<key>EnvironmentVariables</key>
|
244
244
|
<dict>
|
245
245
|
<key>PATH</key>
|
246
|
-
<string
|
246
|
+
<string>{os.path.expanduser("~/.local/bin")}:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/opt/homebrew/bin</string>
|
247
247
|
</dict>
|
248
248
|
</dict>
|
249
249
|
</plist>"""
|
@@ -2,12 +2,22 @@
|
|
2
2
|
|
3
3
|
from voice_mode.tools.services.whisper.install import whisper_install
|
4
4
|
from voice_mode.tools.services.whisper.uninstall import whisper_uninstall
|
5
|
-
from voice_mode.tools.services.whisper.
|
6
|
-
from voice_mode.tools.services.whisper.
|
5
|
+
from voice_mode.tools.services.whisper.model_install import whisper_model_install
|
6
|
+
from voice_mode.tools.services.whisper.list_models import whisper_models
|
7
|
+
from voice_mode.tools.services.whisper.model_active import whisper_model_active
|
8
|
+
from voice_mode.tools.services.whisper.model_remove import whisper_model_remove
|
9
|
+
from voice_mode.tools.services.whisper.model_benchmark import whisper_model_benchmark
|
7
10
|
|
8
11
|
__all__ = [
|
9
12
|
'whisper_install',
|
10
13
|
'whisper_uninstall',
|
11
|
-
'
|
12
|
-
'
|
13
|
-
|
14
|
+
'whisper_model_install',
|
15
|
+
'whisper_models',
|
16
|
+
'whisper_model_active',
|
17
|
+
'whisper_model_remove',
|
18
|
+
'whisper_model_benchmark'
|
19
|
+
]
|
20
|
+
|
21
|
+
# Backwards compatibility aliases
|
22
|
+
download_model = whisper_model_install # Deprecated alias
|
23
|
+
whisper_list_models = whisper_models # Deprecated alias
|
@@ -206,13 +206,21 @@ async def whisper_install(
|
|
206
206
|
except subprocess.CalledProcessError:
|
207
207
|
logger.warning("Make clean failed, continuing anyway...")
|
208
208
|
|
209
|
-
# Build with
|
209
|
+
# Build with CMake for better control and Core ML support
|
210
210
|
build_env = os.environ.copy()
|
211
|
+
cmake_flags = []
|
211
212
|
|
212
|
-
|
213
|
-
|
213
|
+
# Enable GPU support based on platform
|
214
|
+
if is_macos:
|
215
|
+
# On macOS, always enable Metal
|
216
|
+
cmake_flags.append("-DGGML_METAL=ON")
|
217
|
+
# On Apple Silicon, also enable Core ML for better performance
|
218
|
+
if platform.machine() == "arm64":
|
219
|
+
cmake_flags.append("-DWHISPER_COREML=ON")
|
220
|
+
cmake_flags.append("-DWHISPER_COREML_ALLOW_FALLBACK=ON")
|
221
|
+
logger.info("Enabling Core ML support with fallback for Apple Silicon")
|
214
222
|
elif is_linux and use_gpu:
|
215
|
-
|
223
|
+
cmake_flags.append("-DGGML_CUDA=ON")
|
216
224
|
|
217
225
|
# Get number of CPU cores for parallel build
|
218
226
|
cpu_count = os.cpu_count() or 4
|
@@ -220,13 +228,31 @@ async def whisper_install(
|
|
220
228
|
# Determine if we should show build output
|
221
229
|
debug_mode = os.environ.get("VOICEMODE_DEBUG", "").lower() in ("true", "1", "yes")
|
222
230
|
|
231
|
+
# Configure with CMake
|
232
|
+
logger.info("Configuring whisper.cpp build...")
|
233
|
+
cmake_cmd = ["cmake", "-B", "build"] + cmake_flags
|
234
|
+
|
223
235
|
if debug_mode:
|
224
|
-
subprocess.run(
|
236
|
+
subprocess.run(cmake_cmd, env=build_env, check=True)
|
225
237
|
else:
|
226
|
-
# Suppress output unless there's an error
|
227
|
-
logger.info("Building whisper.cpp (this may take a few minutes)...")
|
228
238
|
try:
|
229
|
-
result = subprocess.run(
|
239
|
+
result = subprocess.run(cmake_cmd, env=build_env,
|
240
|
+
capture_output=True, text=True, check=True)
|
241
|
+
except subprocess.CalledProcessError as e:
|
242
|
+
logger.error(f"Configuration failed: {e}")
|
243
|
+
if e.stderr:
|
244
|
+
logger.error(f"Configuration errors:\n{e.stderr}")
|
245
|
+
raise
|
246
|
+
|
247
|
+
# Build with CMake
|
248
|
+
logger.info("Building whisper.cpp (this may take a few minutes)...")
|
249
|
+
build_cmd = ["cmake", "--build", "build", "-j", str(cpu_count), "--config", "Release"]
|
250
|
+
|
251
|
+
if debug_mode:
|
252
|
+
subprocess.run(build_cmd, env=build_env, check=True)
|
253
|
+
else:
|
254
|
+
try:
|
255
|
+
result = subprocess.run(build_cmd, env=build_env,
|
230
256
|
capture_output=True, text=True, check=True)
|
231
257
|
logger.info("Build completed successfully")
|
232
258
|
except subprocess.CalledProcessError as e:
|
@@ -258,7 +284,8 @@ async def whisper_install(
|
|
258
284
|
model_path = download_result["path"]
|
259
285
|
|
260
286
|
# Test whisper with sample if available
|
261
|
-
|
287
|
+
# With CMake build, binaries are in build/bin/
|
288
|
+
main_path = os.path.join(install_dir, "build", "bin", "whisper-cli")
|
262
289
|
sample_path = os.path.join(install_dir, "samples", "jfk.wav")
|
263
290
|
if os.path.exists(sample_path) and os.path.exists(main_path):
|
264
291
|
try:
|
@@ -283,6 +310,11 @@ async def whisper_install(
|
|
283
310
|
WHISPER_DIR="{install_dir}"
|
284
311
|
LOG_FILE="{os.path.join(voicemode_dir, 'whisper-server.log')}"
|
285
312
|
|
313
|
+
# Source voicemode configuration if it exists
|
314
|
+
if [ -f "{voicemode_dir}/voicemode.env" ]; then
|
315
|
+
source "{voicemode_dir}/voicemode.env"
|
316
|
+
fi
|
317
|
+
|
286
318
|
# Model selection with environment variable support
|
287
319
|
MODEL_NAME="${{VOICEMODE_WHISPER_MODEL:-{model}}}"
|
288
320
|
MODEL_PATH="$WHISPER_DIR/models/ggml-$MODEL_NAME.bin"
|
@@ -2,18 +2,18 @@
|
|
2
2
|
|
3
3
|
from typing import Dict, Any
|
4
4
|
from voice_mode.tools.services.whisper.models import (
|
5
|
-
|
5
|
+
WHISPER_MODEL_REGISTRY,
|
6
6
|
get_model_directory,
|
7
|
-
|
8
|
-
|
9
|
-
|
7
|
+
get_active_model,
|
8
|
+
is_whisper_model_installed,
|
9
|
+
get_installed_whisper_models,
|
10
10
|
format_size,
|
11
|
-
|
11
|
+
has_whisper_coreml_model,
|
12
12
|
is_apple_silicon
|
13
13
|
)
|
14
14
|
|
15
15
|
|
16
|
-
async def
|
16
|
+
async def whisper_models() -> Dict[str, Any]:
|
17
17
|
"""List available Whisper models and their installation status.
|
18
18
|
|
19
19
|
Returns:
|
@@ -21,32 +21,32 @@ async def list_whisper_models() -> Dict[str, Any]:
|
|
21
21
|
"""
|
22
22
|
try:
|
23
23
|
model_dir = get_model_directory()
|
24
|
-
current_model =
|
25
|
-
installed_models =
|
24
|
+
current_model = get_active_model()
|
25
|
+
installed_models = get_installed_whisper_models()
|
26
26
|
|
27
27
|
# Build models list with status
|
28
28
|
models = []
|
29
29
|
show_coreml = is_apple_silicon() # Only show Core ML on Apple Silicon
|
30
30
|
|
31
|
-
for model_name, info in
|
31
|
+
for model_name, info in WHISPER_MODEL_REGISTRY.items():
|
32
32
|
model_status = {
|
33
33
|
"name": model_name,
|
34
34
|
"size_mb": info["size_mb"],
|
35
35
|
"size": format_size(info["size_mb"]),
|
36
36
|
"languages": info["languages"],
|
37
37
|
"description": info["description"],
|
38
|
-
"installed":
|
38
|
+
"installed": is_whisper_model_installed(model_name),
|
39
39
|
"current": model_name == current_model,
|
40
|
-
"has_coreml":
|
40
|
+
"has_coreml": has_whisper_coreml_model(model_name) if show_coreml else False
|
41
41
|
}
|
42
42
|
models.append(model_status)
|
43
43
|
|
44
44
|
# Calculate totals
|
45
45
|
total_installed_size = sum(
|
46
|
-
|
46
|
+
WHISPER_MODEL_REGISTRY[m]["size_mb"] for m in installed_models
|
47
47
|
)
|
48
48
|
total_available_size = sum(
|
49
|
-
m["size_mb"] for m in
|
49
|
+
m["size_mb"] for m in WHISPER_MODEL_REGISTRY.values()
|
50
50
|
)
|
51
51
|
|
52
52
|
return {
|
@@ -55,7 +55,7 @@ async def list_whisper_models() -> Dict[str, Any]:
|
|
55
55
|
"current_model": current_model,
|
56
56
|
"model_directory": str(model_dir),
|
57
57
|
"installed_count": len(installed_models),
|
58
|
-
"total_count": len(
|
58
|
+
"total_count": len(WHISPER_MODEL_REGISTRY),
|
59
59
|
"installed_size_mb": total_installed_size,
|
60
60
|
"installed_size": format_size(total_installed_size),
|
61
61
|
"available_size_mb": total_available_size,
|
@@ -0,0 +1,54 @@
|
|
1
|
+
"""MCP tool for showing/setting active Whisper model."""
|
2
|
+
|
3
|
+
from typing import Optional, Dict, Any
|
4
|
+
from voice_mode.tools.services.whisper.models import (
|
5
|
+
get_active_model,
|
6
|
+
set_active_model,
|
7
|
+
is_whisper_model_installed,
|
8
|
+
WHISPER_MODEL_REGISTRY
|
9
|
+
)
|
10
|
+
|
11
|
+
|
12
|
+
async def whisper_model_active(model_name: Optional[str] = None) -> Dict[str, Any]:
|
13
|
+
"""Show or set the active Whisper model.
|
14
|
+
|
15
|
+
Args:
|
16
|
+
model_name: Model to set as active (None to just show current)
|
17
|
+
|
18
|
+
Returns:
|
19
|
+
Dict with current/new active model info
|
20
|
+
"""
|
21
|
+
if model_name is None:
|
22
|
+
# Just show current
|
23
|
+
current = get_active_model()
|
24
|
+
return {
|
25
|
+
"success": True,
|
26
|
+
"active_model": current,
|
27
|
+
"installed": is_whisper_model_installed(current),
|
28
|
+
"message": f"Current active model: {current}"
|
29
|
+
}
|
30
|
+
|
31
|
+
# Validate model exists in registry
|
32
|
+
if model_name not in WHISPER_MODEL_REGISTRY:
|
33
|
+
return {
|
34
|
+
"success": False,
|
35
|
+
"error": f"Model {model_name} is not a valid Whisper model",
|
36
|
+
"available_models": list(WHISPER_MODEL_REGISTRY.keys())
|
37
|
+
}
|
38
|
+
|
39
|
+
# Check if model is installed
|
40
|
+
if not is_whisper_model_installed(model_name):
|
41
|
+
return {
|
42
|
+
"success": False,
|
43
|
+
"error": f"Model {model_name} is not installed. Install it first with whisper_model_install()",
|
44
|
+
"model": model_name
|
45
|
+
}
|
46
|
+
|
47
|
+
# Set new active model
|
48
|
+
set_active_model(model_name)
|
49
|
+
|
50
|
+
return {
|
51
|
+
"success": True,
|
52
|
+
"active_model": model_name,
|
53
|
+
"message": f"Active model set to {model_name}. Restart whisper service for changes to take effect."
|
54
|
+
}
|
@@ -0,0 +1,159 @@
|
|
1
|
+
"""MCP tool for benchmarking Whisper models."""
|
2
|
+
|
3
|
+
from typing import Union, List, Dict, Any, Optional
|
4
|
+
from voice_mode.tools.services.whisper.models import (
|
5
|
+
get_installed_whisper_models,
|
6
|
+
benchmark_whisper_model,
|
7
|
+
is_whisper_model_installed,
|
8
|
+
WHISPER_MODEL_REGISTRY
|
9
|
+
)
|
10
|
+
|
11
|
+
|
12
|
+
async def whisper_model_benchmark(
|
13
|
+
models: Union[str, List[str]] = "installed",
|
14
|
+
sample_file: Optional[str] = None,
|
15
|
+
runs: int = 1
|
16
|
+
) -> Dict[str, Any]:
|
17
|
+
"""Benchmark Whisper model performance.
|
18
|
+
|
19
|
+
Args:
|
20
|
+
models: 'installed' (default), 'all', specific model name, or list of models
|
21
|
+
sample_file: Optional audio file for testing (uses default JFK sample if None)
|
22
|
+
runs: Number of benchmark runs per model (default: 1)
|
23
|
+
|
24
|
+
Returns:
|
25
|
+
Dict with benchmark results and recommendations
|
26
|
+
"""
|
27
|
+
# Determine which models to benchmark
|
28
|
+
if models == "installed":
|
29
|
+
model_list = get_installed_whisper_models()
|
30
|
+
if not model_list:
|
31
|
+
return {
|
32
|
+
"success": False,
|
33
|
+
"error": "No Whisper models are installed. Install models first with whisper_model_install()"
|
34
|
+
}
|
35
|
+
elif models == "all":
|
36
|
+
# Only benchmark installed models from the full list
|
37
|
+
all_models = list(WHISPER_MODEL_REGISTRY.keys())
|
38
|
+
model_list = [m for m in all_models if is_whisper_model_installed(m)]
|
39
|
+
if not model_list:
|
40
|
+
return {
|
41
|
+
"success": False,
|
42
|
+
"error": "No Whisper models are installed"
|
43
|
+
}
|
44
|
+
elif isinstance(models, str):
|
45
|
+
# Single model specified
|
46
|
+
if not is_whisper_model_installed(models):
|
47
|
+
return {
|
48
|
+
"success": False,
|
49
|
+
"error": f"Model {models} is not installed"
|
50
|
+
}
|
51
|
+
model_list = [models]
|
52
|
+
elif isinstance(models, list):
|
53
|
+
# List of models specified
|
54
|
+
model_list = []
|
55
|
+
for model in models:
|
56
|
+
if is_whisper_model_installed(model):
|
57
|
+
model_list.append(model)
|
58
|
+
else:
|
59
|
+
# Model not installed, skip silently or could use logger.warning
|
60
|
+
pass
|
61
|
+
if not model_list:
|
62
|
+
return {
|
63
|
+
"success": False,
|
64
|
+
"error": "None of the specified models are installed"
|
65
|
+
}
|
66
|
+
else:
|
67
|
+
return {
|
68
|
+
"success": False,
|
69
|
+
"error": f"Invalid models parameter: {models}"
|
70
|
+
}
|
71
|
+
|
72
|
+
# Run benchmarks
|
73
|
+
results = []
|
74
|
+
failed = []
|
75
|
+
|
76
|
+
for model in model_list:
|
77
|
+
best_result = None
|
78
|
+
|
79
|
+
for run_num in range(runs):
|
80
|
+
result = benchmark_whisper_model(model, sample_file)
|
81
|
+
|
82
|
+
if result.get("success"):
|
83
|
+
# Keep the best (fastest) result from multiple runs
|
84
|
+
if best_result is None or result["total_time_ms"] < best_result["total_time_ms"]:
|
85
|
+
best_result = result
|
86
|
+
else:
|
87
|
+
# If any run fails, record the failure
|
88
|
+
if model not in failed:
|
89
|
+
failed.append(model)
|
90
|
+
results.append({
|
91
|
+
"model": model,
|
92
|
+
"success": False,
|
93
|
+
"error": result.get("error", "Benchmark failed")
|
94
|
+
})
|
95
|
+
break
|
96
|
+
|
97
|
+
if best_result:
|
98
|
+
results.append(best_result)
|
99
|
+
|
100
|
+
if not results:
|
101
|
+
return {
|
102
|
+
"success": False,
|
103
|
+
"error": "No benchmarks completed successfully"
|
104
|
+
}
|
105
|
+
|
106
|
+
# Find successful results for analysis
|
107
|
+
successful_results = [r for r in results if r.get("success")]
|
108
|
+
|
109
|
+
if successful_results:
|
110
|
+
# Find fastest model
|
111
|
+
fastest = min(successful_results, key=lambda x: x["total_time_ms"])
|
112
|
+
|
113
|
+
# Generate recommendations based on results
|
114
|
+
recommendations = []
|
115
|
+
|
116
|
+
# Categorize by speed
|
117
|
+
for result in successful_results:
|
118
|
+
rtf = result.get("real_time_factor", 0)
|
119
|
+
if rtf > 20:
|
120
|
+
category = "Ultra-fast (good for real-time)"
|
121
|
+
elif rtf > 5:
|
122
|
+
category = "Fast (good for interactive use)"
|
123
|
+
elif rtf > 1:
|
124
|
+
category = "Moderate (good balance)"
|
125
|
+
else:
|
126
|
+
category = "Slow (best accuracy)"
|
127
|
+
|
128
|
+
result["category"] = category
|
129
|
+
|
130
|
+
# Generate specific recommendations
|
131
|
+
if fastest["real_time_factor"] > 10:
|
132
|
+
recommendations.append(f"Use {fastest['model']} for real-time applications")
|
133
|
+
|
134
|
+
# Find best balance (medium or base if available)
|
135
|
+
balance_models = [r for r in successful_results if r["model"] in ["base", "medium"]]
|
136
|
+
if balance_models:
|
137
|
+
best_balance = min(balance_models, key=lambda x: x["total_time_ms"])
|
138
|
+
recommendations.append(f"Use {best_balance['model']} for balanced speed/accuracy")
|
139
|
+
|
140
|
+
# Recommend large models for accuracy
|
141
|
+
large_models = [r for r in successful_results if "large" in r["model"]]
|
142
|
+
if large_models:
|
143
|
+
best_large = min(large_models, key=lambda x: x["total_time_ms"])
|
144
|
+
recommendations.append(f"Use {best_large['model']} for best accuracy")
|
145
|
+
else:
|
146
|
+
fastest = None
|
147
|
+
recommendations = ["Unable to generate recommendations - no successful benchmarks"]
|
148
|
+
|
149
|
+
return {
|
150
|
+
"success": True,
|
151
|
+
"benchmarks": results,
|
152
|
+
"models_tested": len(model_list),
|
153
|
+
"models_failed": len(failed),
|
154
|
+
"fastest_model": fastest["model"] if fastest else None,
|
155
|
+
"fastest_time_ms": fastest["total_time_ms"] if fastest else None,
|
156
|
+
"recommendations": recommendations,
|
157
|
+
"sample_file": sample_file or "default JFK sample",
|
158
|
+
"runs_per_model": runs
|
159
|
+
}
|