whisper-key-local 0.4.0__tar.gz → 0.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/PKG-INFO +5 -2
  2. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/README.md +2 -0
  3. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/pyproject.toml +3 -2
  4. whisper_key_local-0.5.1/src/whisper_key/assets/version.txt +1 -0
  5. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key/clipboard_manager.py +10 -6
  6. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key/config.defaults.yaml +83 -52
  7. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key/config_manager.py +24 -16
  8. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key/main.py +13 -8
  9. whisper_key_local-0.5.1/src/whisper_key/model_registry.py +77 -0
  10. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key/state_manager.py +12 -12
  11. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key/system_tray.py +66 -26
  12. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key/utils.py +6 -1
  13. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key/whisper_engine.py +49 -51
  14. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key_local.egg-info/PKG-INFO +5 -2
  15. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key_local.egg-info/SOURCES.txt +1 -0
  16. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key_local.egg-info/requires.txt +2 -1
  17. whisper_key_local-0.4.0/src/whisper_key/assets/version.txt +0 -1
  18. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/setup.cfg +0 -0
  19. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key/__init__.py +0 -0
  20. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key/assets/portaudio.dll +0 -0
  21. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key/assets/sounds/record_cancel.wav +0 -0
  22. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key/assets/sounds/record_start.wav +0 -0
  23. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key/assets/sounds/record_stop.wav +0 -0
  24. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key/assets/tray_idle.png +0 -0
  25. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key/assets/tray_processing.png +0 -0
  26. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key/assets/tray_recording.png +0 -0
  27. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key/audio_feedback.py +0 -0
  28. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key/audio_recorder.py +0 -0
  29. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key/console_manager.py +0 -0
  30. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key/hotkey_listener.py +0 -0
  31. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key/instance_manager.py +0 -0
  32. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key/voice_activity_detection.py +0 -0
  33. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key_local.egg-info/dependency_links.txt +0 -0
  34. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key_local.egg-info/entry_points.txt +0 -0
  35. {whisper_key_local-0.4.0 → whisper_key_local-0.5.1}/src/whisper_key_local.egg-info/top_level.txt +0 -0
@@ -1,11 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: whisper-key-local
3
- Version: 0.4.0
3
+ Version: 0.5.1
4
4
  Summary: Local faster-whisper speech-to-text app with global hotkeys for Windows
5
5
  Author-email: Pin Wang <pinwang@gmail.com>
6
6
  Requires-Python: >=3.11
7
7
  Description-Content-Type: text/markdown
8
- Requires-Dist: faster-whisper>=1.1.1
8
+ Requires-Dist: faster-whisper>=1.2.1
9
+ Requires-Dist: ctranslate2>=4.6.3
9
10
  Requires-Dist: numpy>=1.24.0
10
11
  Requires-Dist: scipy>=1.11.0
11
12
  Requires-Dist: sounddevice>=0.4.6
@@ -22,6 +23,8 @@ Requires-Dist: hf-xet>=1.1.5
22
23
 
23
24
  Global hotkey to start/stop recording and auto-paste transcription wherever your cursor is.
24
25
 
26
+ Questions or ideas? [Discord Server](https://discord.gg/uZnXV8snhz)
27
+
25
28
  ## 🎯 Features
26
29
 
27
30
  - **Global Hotkey**: Press `Ctrl+Win` to start recording, `Ctrl` to stop
@@ -2,6 +2,8 @@
2
2
 
3
3
  Global hotkey to start/stop recording and auto-paste transcription wherever your cursor is.
4
4
 
5
+ Questions or ideas? [Discord Server](https://discord.gg/uZnXV8snhz)
6
+
5
7
  ## 🎯 Features
6
8
 
7
9
  - **Global Hotkey**: Press `Ctrl+Win` to start recording, `Ctrl` to stop
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "whisper-key-local"
7
- version = "0.4.0"
7
+ version = "0.5.1"
8
8
  description = "Local faster-whisper speech-to-text app with global hotkeys for Windows"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -12,7 +12,8 @@ authors = [
12
12
  ]
13
13
  requires-python = ">=3.11"
14
14
  dependencies = [
15
- "faster-whisper>=1.1.1",
15
+ "faster-whisper>=1.2.1",
16
+ "ctranslate2>=4.6.3",
16
17
  "numpy>=1.24.0",
17
18
  "scipy>=1.11.0",
18
19
  "sounddevice>=0.4.6",
@@ -6,14 +6,18 @@ import pyperclip
6
6
  import win32gui
7
7
  import pyautogui
8
8
 
9
+ from .utils import parse_hotkey
10
+
9
11
  pyautogui.FAILSAFE = True # Enable "move mouse to corner to abort automation"
10
12
 
11
- class ClipboardManager:
12
- def __init__(self, key_simulation_delay, auto_paste, preserve_clipboard):
13
+ class ClipboardManager:
14
+ def __init__(self, key_simulation_delay, auto_paste, preserve_clipboard, paste_hotkey):
13
15
  self.logger = logging.getLogger(__name__)
14
16
  self.key_simulation_delay = key_simulation_delay
15
17
  self.auto_paste = auto_paste
16
18
  self.preserve_clipboard = preserve_clipboard
19
+ self.paste_hotkey = paste_hotkey
20
+ self.paste_keys = parse_hotkey(paste_hotkey)
17
21
  self._configure_pyautogui_timing()
18
22
  self._test_clipboard_access()
19
23
  self._print_status()
@@ -31,11 +35,11 @@ class ClipboardManager:
31
35
  raise
32
36
 
33
37
  def _print_status(self):
38
+ hotkey_display = self.paste_hotkey.upper()
34
39
  if self.auto_paste:
35
- method_name = "key simulation (CTRL+V)"
36
- print(f" ✓ Auto-paste is ENABLED using {method_name}")
40
+ print(f" ✓ Auto-paste is ENABLED using key simulation ({hotkey_display})")
37
41
  else:
38
- print(" ✗ Auto-paste is DISABLED - paste manually with Ctrl+V")
42
+ print(f" ✗ Auto-paste is DISABLED - paste manually with {hotkey_display}")
39
43
 
40
44
  def copy_text(self, text: str) -> bool:
41
45
  if not text:
@@ -106,7 +110,7 @@ class ClipboardManager:
106
110
  if not self.copy_text(text):
107
111
  return False
108
112
 
109
- pyautogui.hotkey('ctrl', 'v')
113
+ pyautogui.hotkey(*self.paste_keys)
110
114
 
111
115
  print(f" ✓ Auto-pasted via key simulation")
112
116
 
@@ -7,24 +7,11 @@
7
7
  # Personal settings are stored at:
8
8
  # %APPDATA%\Roaming\whisperkey\user_settings.yaml
9
9
 
10
- # =============================================================================
11
- # WHISPER AI MODEL SETTINGS
12
- # =============================================================================
13
- whisper:
14
- # Model size - affects accuracy vs speed tradeoff
15
- # Options: "tiny", "base", "small", "medium", "large", "tiny.en", "base.en", "small.en", "medium.en"
16
- # Multilingual models:
17
- # - tiny: ~39MB, fastest, good for most use cases
18
- # - base: ~74MB, better accuracy, still fast
19
- # - small: ~244MB, high accuracy, slower
20
- # - medium: ~769MB, very high accuracy, much slower
21
- # - large: ~1550MB, best accuracy, very slow (not recommended for real-time)
22
- # English-only models (more accurate, slightly faster):
23
- # - tiny.en: 39MB,
24
- # - base.en: 74MB,
25
- # - small.en: 244MB,
26
- # - medium.en: 769MB
27
- model_size: base
10
+ whisper: # Whisper AI Model Settings
11
+
12
+ # Model selection
13
+ # See "models" below for more information
14
+ model: tiny
28
15
 
29
16
  # Processing device - where the AI runs
30
17
  # Options: "cpu", "cuda" (for NVIDIA GPUs)
@@ -46,10 +33,60 @@ whisper:
46
33
  # Transcription quality settings
47
34
  beam_size: 5 # Higher = more accurate but slower (1-10)
48
35
 
49
- # =============================================================================
50
- # HOTKEY CONFIGURATION
51
- # =============================================================================
52
- hotkey:
36
+ # Available models (set enabled: false to hide from menu)
37
+ # To add custom models (CTranslate2 format), add entry with source (HuggingFace or local path)
38
+ # Example: my-model: {source: "username/repo-ct2", label: "My Model", group: custom}
39
+ models:
40
+ # Official whisper models
41
+ tiny:
42
+ label: "Tiny (76MB, fastest)"
43
+ group: official
44
+ enabled: true
45
+ base:
46
+ label: "Base (145MB, balanced)"
47
+ group: official
48
+ enabled: true
49
+ small:
50
+ label: "Small (484MB, accurate)"
51
+ group: official
52
+ enabled: true
53
+ medium:
54
+ label: "Medium (1.5GB, very accurate)"
55
+ group: official
56
+ enabled: true
57
+ large: # Uses the large-v3 version
58
+ label: "Large (3.1GB, best accuracy)"
59
+ group: official
60
+ enabled: true
61
+ large-v3-turbo: # 8x faster and only slightly less accurate (transcription optimized)
62
+ label: "Large-V3-Turbo (1.6GB, newest)"
63
+ group: official
64
+ enabled: true
65
+
66
+ # Models with native faster-whisper support
67
+ tiny.en:
68
+ label: "Tiny.En (English)"
69
+ group: custom
70
+ enabled: true
71
+ base.en:
72
+ label: "Base.En (English)"
73
+ group: custom
74
+ enabled: true
75
+ small.en:
76
+ label: "Small.En (English)"
77
+ group: custom
78
+ enabled: true
79
+ medium.en:
80
+ label: "Medium.En (English)"
81
+ group: custom
82
+ enabled: true
83
+ distil-large-v3.5: # About 1.5x faster than large-v3-turbo
84
+ source: distil-whisper/distil-large-v3.5-ct2
85
+ label: "Distil-Large-V3.5 (English)"
86
+ group: custom
87
+
88
+ hotkey: # Hotkey Configuration
89
+
53
90
  # Key combination to start/stop recording
54
91
  # Format: modifier+modifier+key (use lowercase)
55
92
  # Common modifiers: ctrl, shift, alt, win
@@ -79,10 +116,8 @@ hotkey:
79
116
  # Examples: "esc", "ctrl+c", "shift+esc"
80
117
  cancel_combination: esc
81
118
 
82
- # =============================================================================
83
- # VOICE ACTIVITY DETECTION (VAD) SETTINGS
84
- # =============================================================================
85
- vad:
119
+ vad: # Voice Activity Detection (VAD)
120
+
86
121
  # Voice Activity Detection (VAD) Pre-check
87
122
  # Uses TEN VAD to detect speech before transcription (prevents hallucinations on silence)
88
123
  # NOTE: This is separate from faster-whisper's built-in VAD
@@ -99,10 +134,8 @@ vad:
99
134
  vad_realtime_enabled: true # Enables continuous silence detection and automatic recording stop
100
135
  vad_silence_timeout_seconds: 30.0 # Auto-stop recording after this many seconds of silence
101
136
 
102
- # =============================================================================
103
- # AUDIO RECORDING SETTINGS
104
- # =============================================================================
105
- audio:
137
+ audio: # Audio Recording Settings
138
+
106
139
  # Sample rate is fixed at 16000 Hz for optimal Whisper and TEN VAD performance
107
140
 
108
141
  # Audio host API selection
@@ -132,28 +165,32 @@ audio:
132
165
  # are plugged/unplugged. Use system tray to select device - it will save the ID here.
133
166
  input_device: "default"
134
167
 
135
- # =============================================================================
136
- # CLIPBOARD BEHAVIOR
137
- # =============================================================================
138
- clipboard:
168
+ clipboard: # Clipboard Behavior
169
+
139
170
  # Automatically paste after transcription
140
171
  # true = paste immediately to active window
141
172
  # false = only copy to clipboard (paste manually with Ctrl+V)
142
173
  auto_paste: true
143
-
174
+
175
+ # Key combination to simulate paste
176
+ # Format: modifier+key (use lowercase)
177
+ # Examples:
178
+ # - "ctrl+v" (standard paste)
179
+ # - "ctrl+shift+v" (plain text paste in some apps)
180
+ # - "shift+insert" (terminal paste)
181
+ paste_hotkey: ctrl+v
182
+
144
183
  # Preserve existing clipboard content when pasting
145
184
  # true = save current clipboard, paste transcription, then restore original clipboard
146
185
  # false = replace clipboard with transcription (default behavior)
147
186
  preserve_clipboard: true
148
-
187
+
149
188
  # Delay for key simulation operations (seconds)
150
189
  # Small delay ensures operations complete reliably
151
190
  key_simulation_delay: 0.05
152
191
 
153
- # =============================================================================
154
- # LOGGING CONFIGURATION
155
- # =============================================================================
156
- logging:
192
+ logging: # Logging Configuration
193
+
157
194
  # Log level - how much detail to log
158
195
  # Options: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
159
196
  # - DEBUG: Very detailed (for troubleshooting)
@@ -173,10 +210,8 @@ logging:
173
210
  level: WARNING # Console log level (DEBUG, INFO, WARNING, ERROR, CRITICAL) - WARNING shows important messages only
174
211
 
175
212
 
176
- # =============================================================================
177
- # AUDIO FEEDBACK SETTINGS
178
- # =============================================================================
179
- audio_feedback:
213
+ audio_feedback: # Audio Feedback Settings
214
+
180
215
  # Enable/disable audio feedback sounds
181
216
  # true = play sounds when recording starts/stops
182
217
  # false = silent operation
@@ -188,10 +223,8 @@ audio_feedback:
188
223
  stop_sound: assets/sounds/record_stop.wav
189
224
  cancel_sound: assets/sounds/record_cancel.wav
190
225
 
191
- # =============================================================================
192
- # SYSTEM TRAY SETTINGS
193
- # =============================================================================
194
- system_tray:
226
+ system_tray: # System Tray Settings
227
+
195
228
  # Enable/disable system tray icon
196
229
  # true = show icon in system tray with status and menu
197
230
  # false = run without system tray (console mode only)
@@ -200,10 +233,8 @@ system_tray:
200
233
  # Tooltip text when hovering over tray icon
201
234
  tooltip: Whisper Key
202
235
 
203
- # =============================================================================
204
- # CONSOLE VISIBILITY SETTINGS
205
- # =============================================================================
206
- console:
236
+ console: # Console Visibility Settings
237
+
207
238
  # Start with console hidden to system tray
208
239
  # Note: This setting is ignored in CLI mode
209
240
  start_hidden: false
@@ -210,6 +210,10 @@ class ConfigManager:
210
210
  def get_console_config(self) -> Dict[str, Any]:
211
211
  return self.config.get('console', {}).copy()
212
212
 
213
+ def get_log_file_path(self) -> str:
214
+ log_filename = self.config['logging']['file']['filename']
215
+ return os.path.join(get_user_app_data_path(), log_filename)
216
+
213
217
  def get_setting(self, section: str, key: str) -> Any:
214
218
  return self.config[section][key]
215
219
 
@@ -217,25 +221,30 @@ class ConfigManager:
217
221
  yaml = YAML()
218
222
  yaml.preserve_quotes = True
219
223
  yaml.indent(mapping=2, sequence=4, offset=2)
220
-
224
+
221
225
  temp_output = StringIO()
222
226
  yaml.dump(config_data, temp_output)
223
227
  lines = temp_output.getvalue().split('\n')
224
-
225
- # Find end of header - first blank line is the cutoff
226
- data_start = 0
228
+
229
+ content_start = 0
227
230
  for i, line in enumerate(lines):
228
- if not line.strip(): # Empty line found
229
- data_start = i
231
+ stripped = line.strip()
232
+ if stripped and not stripped.startswith('#'):
233
+ content_start = i
230
234
  break
231
-
232
- user_config = []
233
- user_config.append("# =============================================================================")
234
- user_config.append("# WHISPER KEY - PERSONAL CONFIGURATION")
235
- user_config.append("# =============================================================================")
236
- user_config.extend(lines[data_start:])
237
-
238
- return '\n'.join(user_config)
235
+
236
+ content_lines = lines[content_start:]
237
+
238
+ header = [
239
+ "# =============================================================================",
240
+ "# WHISPER KEY - PERSONAL CONFIGURATION",
241
+ "# =============================================================================",
242
+ "# Edit this file to customize your settings",
243
+ "# Save and restart Whisper Key for changes to take effect",
244
+ ""
245
+ ]
246
+
247
+ return '\n'.join(header + content_lines)
239
248
 
240
249
  def save_config_to_user_settings_file(self):
241
250
  try:
@@ -339,8 +348,6 @@ class ConfigValidator:
339
348
  self.config = config
340
349
  self.default_config = default_config
341
350
 
342
- self._validate_enum('whisper.model_size',
343
- ['tiny', 'base', 'small', 'medium', 'large', 'tiny.en', 'base.en', 'small.en', 'medium.en'])
344
351
  self._validate_enum('whisper.device', ['cpu', 'cuda'])
345
352
  self._validate_enum('whisper.compute_type', ['int8', 'float16', 'float32'])
346
353
 
@@ -354,6 +361,7 @@ class ConfigValidator:
354
361
  self._validate_boolean('clipboard.auto_paste')
355
362
  self._validate_boolean('clipboard.preserve_clipboard')
356
363
  self._validate_numeric_range('clipboard.key_simulation_delay', min_val=0, description='key simulation delay')
364
+ self._validate_hotkey_string('clipboard.paste_hotkey')
357
365
 
358
366
  self._validate_boolean('hotkey.stop_with_modifier_enabled')
359
367
  self._validate_boolean('hotkey.auto_enter_enabled')
@@ -20,6 +20,7 @@ from .system_tray import SystemTray
20
20
  from .audio_feedback import AudioFeedback
21
21
  from .console_manager import ConsoleManager
22
22
  from .instance_manager import guard_against_multiple_instances
23
+ from .model_registry import ModelRegistry
23
24
  from .utils import beautify_hotkey, get_user_app_data_path, get_version
24
25
 
25
26
  def is_built_executable():
@@ -82,21 +83,23 @@ def setup_vad(vad_config):
82
83
  vad_silence_timeout_seconds=vad_config['vad_silence_timeout_seconds']
83
84
  )
84
85
 
85
- def setup_whisper_engine(whisper_config, vad_manager):
86
+ def setup_whisper_engine(whisper_config, vad_manager, model_registry):
86
87
  return WhisperEngine(
87
- model_size=whisper_config['model_size'],
88
+ model_key=whisper_config['model'],
88
89
  device=whisper_config['device'],
89
90
  compute_type=whisper_config['compute_type'],
90
91
  language=whisper_config['language'],
91
92
  beam_size=whisper_config['beam_size'],
92
- vad_manager=vad_manager
93
+ vad_manager=vad_manager,
94
+ model_registry=model_registry
93
95
  )
94
96
 
95
97
  def setup_clipboard_manager(clipboard_config):
96
98
  return ClipboardManager(
97
99
  key_simulation_delay=clipboard_config['key_simulation_delay'],
98
100
  auto_paste=clipboard_config['auto_paste'],
99
- preserve_clipboard=clipboard_config['preserve_clipboard']
101
+ preserve_clipboard=clipboard_config['preserve_clipboard'],
102
+ paste_hotkey=clipboard_config['paste_hotkey']
100
103
  )
101
104
 
102
105
  def setup_audio_feedback(audio_feedback_config):
@@ -113,11 +116,12 @@ def setup_console_manager(console_config, is_executable_mode):
113
116
  is_executable_mode=is_executable_mode
114
117
  )
115
118
 
116
- def setup_system_tray(tray_config, config_manager, state_manager=None):
119
+ def setup_system_tray(tray_config, config_manager, state_manager, model_registry):
117
120
  return SystemTray(
118
121
  state_manager=state_manager,
119
122
  tray_config=tray_config,
120
- config_manager=config_manager
123
+ config_manager=config_manager,
124
+ model_registry=model_registry
121
125
  )
122
126
 
123
127
  def setup_signal_handlers(shutdown_event):
@@ -179,8 +183,9 @@ def main():
179
183
  is_executable = is_built_executable()
180
184
  console_manager = setup_console_manager(console_config, is_executable)
181
185
 
186
+ model_registry = ModelRegistry(whisper_config.get('models', {}))
182
187
  vad_manager = setup_vad(vad_config)
183
- whisper_engine = setup_whisper_engine(whisper_config, vad_manager)
188
+ whisper_engine = setup_whisper_engine(whisper_config, vad_manager, model_registry)
184
189
  clipboard_manager = setup_clipboard_manager(clipboard_config)
185
190
  audio_feedback = setup_audio_feedback(audio_feedback_config)
186
191
 
@@ -195,7 +200,7 @@ def main():
195
200
  vad_manager=vad_manager
196
201
  )
197
202
  audio_recorder = setup_audio_recorder(audio_config, state_manager, vad_manager)
198
- system_tray = setup_system_tray(tray_config, config_manager, state_manager)
203
+ system_tray = setup_system_tray(tray_config, config_manager, state_manager, model_registry)
199
204
  state_manager.attach_components(audio_recorder, system_tray)
200
205
 
201
206
  hotkey_listener = setup_hotkey_listener(hotkey_config, state_manager)
@@ -0,0 +1,77 @@
1
+ import os
2
+ from faster_whisper.utils import _MODELS
3
+
4
+
5
+ class ModelRegistry:
6
+ DEFAULT_CACHE_PREFIX = "models--Systran--faster-whisper-"
7
+
8
+ def __init__(self, models_config: dict):
9
+ self.models = {}
10
+ for key, config in models_config.items():
11
+ if isinstance(config, dict):
12
+ self.models[key] = ModelDefinition(key, config)
13
+
14
+ def get_model(self, key: str):
15
+ return self.models.get(key)
16
+
17
+ def get_source(self, key: str) -> str:
18
+ model = self.get_model(key)
19
+ return model.source if model else key
20
+
21
+ def get_cache_folder(self, key: str) -> str:
22
+ model = self.get_model(key)
23
+ if not model:
24
+ return f"{self.DEFAULT_CACHE_PREFIX}{key}"
25
+ return model.cache_folder
26
+
27
+ def get_models_by_group(self, group: str) -> list:
28
+ return [m for m in self.models.values() if m.group == group and m.enabled]
29
+
30
+ def get_groups_ordered(self) -> list:
31
+ return ["official", "custom"]
32
+
33
+ def get_hf_cache_path(self) -> str:
34
+ userprofile = os.environ.get('USERPROFILE')
35
+ if userprofile:
36
+ return os.path.join(userprofile, '.cache', 'huggingface', 'hub')
37
+ return os.path.join(os.path.expanduser('~'), '.cache', 'huggingface', 'hub')
38
+
39
+ def is_model_cached(self, key: str) -> bool:
40
+ model = self.get_model(key)
41
+ if model and model.is_local_path:
42
+ return os.path.exists(os.path.join(model.source, 'model.bin'))
43
+ cache_folder = self.get_cache_folder(key)
44
+ if not cache_folder:
45
+ return False
46
+ return os.path.exists(os.path.join(self.get_hf_cache_path(), cache_folder))
47
+
48
+
49
+ class ModelDefinition:
50
+ def __init__(self, key: str, config: dict):
51
+ self.key = key
52
+ self.source = config.get("source", key)
53
+ self.label = config.get("label", key.title())
54
+ self.group = config.get("group", "custom")
55
+ self.enabled = config.get("enabled", True)
56
+ self.is_local_path = self._check_is_local_path()
57
+ self.cache_folder = self._derive_cache_folder()
58
+
59
+ def _check_is_local_path(self) -> bool:
60
+ if self.source.startswith("\\\\") or (len(self.source) > 2 and self.source[1] == ":"):
61
+ return True
62
+ if "/" in self.source:
63
+ return os.path.exists(self.source)
64
+ return False
65
+
66
+ def _derive_cache_folder(self) -> str:
67
+ if self.is_local_path:
68
+ return None
69
+
70
+ if "/" in self.source:
71
+ return "models--" + self.source.replace("/", "--")
72
+
73
+ if self.source in _MODELS:
74
+ repo = _MODELS[self.source]
75
+ return "models--" + repo.replace("/", "--")
76
+
77
+ return f"{ModelRegistry.DEFAULT_CACHE_PREFIX}{self.source}"
@@ -160,7 +160,7 @@ class StateManager:
160
160
 
161
161
  if pending_model:
162
162
  self.logger.info(f"Executing pending model change to: {pending_model}")
163
- print(f"🔄 Processing complete, now switching to {pending_model} model...")
163
+ print(f"🔄 Processing complete, now switching to [{pending_model}] model...")
164
164
  self._execute_model_change(pending_model)
165
165
  self._pending_model_change = None
166
166
 
@@ -226,10 +226,10 @@ class StateManager:
226
226
  else:
227
227
  return "idle"
228
228
 
229
- def request_model_change(self, new_model_size: str) -> bool:
229
+ def request_model_change(self, new_model_key: str) -> bool:
230
230
  current_state = self.get_current_state()
231
231
 
232
- if new_model_size == self.whisper_engine.model_size:
232
+ if new_model_key == self.whisper_engine.model_key:
233
233
  return True
234
234
 
235
235
  if current_state == "model_loading":
@@ -237,18 +237,18 @@ class StateManager:
237
237
  return False
238
238
 
239
239
  if current_state == "recording":
240
- print(f"🎤 Cancelling recording to switch to {new_model_size} model...")
240
+ print(f"🎤 Cancelling recording to switch to [{new_model_key}] model...")
241
241
  self.cancel_active_recording()
242
- self._execute_model_change(new_model_size)
242
+ self._execute_model_change(new_model_key)
243
243
  return True
244
244
 
245
245
  if current_state == "processing":
246
- print(f"⏳ Queueing model change to {new_model_size} until transcription completes...")
247
- self._pending_model_change = new_model_size
246
+ print(f"⏳ Queueing model change to [{new_model_key}] until transcription completes...")
247
+ self._pending_model_change = new_model_key
248
248
  return True
249
249
 
250
250
  if current_state == "idle":
251
- self._execute_model_change(new_model_size)
251
+ self._execute_model_change(new_model_key)
252
252
  return True
253
253
 
254
254
  self.logger.warning(f"Unexpected state for model change: {current_state}")
@@ -261,10 +261,10 @@ class StateManager:
261
261
  def show_console(self):
262
262
  self.console_manager.show_console()
263
263
 
264
- def _execute_model_change(self, new_model_size: str):
264
+ def _execute_model_change(self, new_model_key: str):
265
265
  def progress_callback(message: str):
266
266
  if "ready" in message.lower() or "already loaded" in message.lower():
267
- print(f"✅ Successfully switched to {new_model_size} model")
267
+ print(f"✅ Successfully switched to [{new_model_key}] model")
268
268
  self.set_model_loading(False)
269
269
  elif "failed" in message.lower():
270
270
  print(f"❌ Failed to change model: {message}")
@@ -275,9 +275,9 @@ class StateManager:
275
275
 
276
276
  try:
277
277
  self.set_model_loading(True)
278
- print(f"🔄 Switching to {new_model_size} model...")
278
+ print(f"🔄 Switching to [{new_model_key}] model...")
279
279
 
280
- self.whisper_engine.change_model(new_model_size, progress_callback)
280
+ self.whisper_engine.change_model(new_model_key, progress_callback)
281
281
 
282
282
  except Exception as e:
283
283
  self.logger.error(f"Failed to initiate model change: {e}")
@@ -20,15 +20,17 @@ if TYPE_CHECKING:
20
20
  from .state_manager import StateManager
21
21
  from .config_manager import ConfigManager
22
22
 
23
- class SystemTray:
23
+ class SystemTray:
24
24
  def __init__(self,
25
25
  state_manager: 'StateManager',
26
26
  tray_config: dict = None,
27
- config_manager: Optional['ConfigManager'] = None):
27
+ config_manager: Optional['ConfigManager'] = None,
28
+ model_registry = None):
28
29
 
29
30
  self.state_manager = state_manager
30
31
  self.tray_config = tray_config or {}
31
32
  self.config_manager = config_manager
33
+ self.model_registry = model_registry
32
34
  self.logger = logging.getLogger(__name__)
33
35
 
34
36
  self.icon = None # pystray object, holds menu, state, etc.
@@ -91,13 +93,49 @@ class SystemTray:
91
93
 
92
94
  return icon
93
95
 
96
+ def _build_model_menu_items(self, current_model: str, is_model_loading: bool) -> list:
97
+ items = []
98
+
99
+ if not self.model_registry:
100
+ return items
101
+
102
+ def make_model_selector(model_key):
103
+ return lambda icon, item: self._select_model(model_key)
104
+
105
+ def make_is_current(model_key):
106
+ return lambda item: model_key == current_model
107
+
108
+ def model_selection_enabled(item):
109
+ return not is_model_loading
110
+
111
+ first_group = True
112
+ for group in self.model_registry.get_groups_ordered():
113
+ models = self.model_registry.get_models_by_group(group)
114
+ if not models:
115
+ continue
116
+
117
+ if not first_group:
118
+ items.append(pystray.Menu.SEPARATOR)
119
+ first_group = False
120
+
121
+ for model in models:
122
+ items.append(pystray.MenuItem(
123
+ model.label,
124
+ make_model_selector(model.key),
125
+ radio=True,
126
+ checked=make_is_current(model.key),
127
+ enabled=model_selection_enabled
128
+ ))
129
+
130
+ return items
131
+
94
132
  def _create_menu(self):
95
133
  try:
96
134
  app_state = self.state_manager.get_application_state()
97
135
  is_model_loading = app_state.get('model_loading', False)
98
136
 
99
137
  auto_paste_enabled = self.config_manager.get_setting('clipboard', 'auto_paste')
100
- current_model = self.config_manager.get_setting('whisper', 'model_size')
138
+ current_model = self.config_manager.get_setting('whisper', 'model')
101
139
 
102
140
  available_hosts = self.state_manager.get_available_audio_hosts()
103
141
  current_host = self.state_manager.get_current_audio_host()
@@ -146,26 +184,12 @@ class SystemTray:
146
184
  )
147
185
  )
148
186
 
149
- def is_current_model(model_name):
150
- return model_name == current_model
151
-
152
- def model_selection_enabled():
153
- return not is_model_loading
154
-
155
- model_sub_menu_items = [
156
- pystray.MenuItem("Tiny (75MB, fastest)", lambda icon, item: self._select_model("tiny"), radio=True, checked=lambda item: is_current_model("tiny"), enabled=model_selection_enabled()),
157
- pystray.MenuItem("Base (142MB, balanced)", lambda icon, item: self._select_model("base"), radio=True, checked=lambda item: is_current_model("base"), enabled=model_selection_enabled()),
158
- pystray.MenuItem("Small (466MB, accurate)", lambda icon, item: self._select_model("small"), radio=True, checked=lambda item: is_current_model("small"), enabled=model_selection_enabled()),
159
- pystray.MenuItem("Medium (1.5GB, very accurate)", lambda icon, item: self._select_model("medium"), radio=True, checked=lambda item: is_current_model("medium"), enabled=model_selection_enabled()),
160
- pystray.MenuItem("Large (2.9GB, best accuracy)", lambda icon, item: self._select_model("large"), radio=True, checked=lambda item: is_current_model("large"), enabled=model_selection_enabled()),
161
- pystray.Menu.SEPARATOR,
162
- pystray.MenuItem("Tiny.En (English only)", lambda icon, item: self._select_model("tiny.en"), radio=True, checked=lambda item: is_current_model("tiny.en"), enabled=model_selection_enabled()),
163
- pystray.MenuItem("Base.En (English only)", lambda icon, item: self._select_model("base.en"), radio=True, checked=lambda item: is_current_model("base.en"), enabled=model_selection_enabled()),
164
- pystray.MenuItem("Small.En (English only)", lambda icon, item: self._select_model("small.en"), radio=True, checked=lambda item: is_current_model("small.en"), enabled=model_selection_enabled()),
165
- pystray.MenuItem("Medium.En (English only)", lambda icon, item: self._select_model("medium.en"), radio=True, checked=lambda item: is_current_model("medium.en"), enabled=model_selection_enabled())
166
- ]
187
+ model_sub_menu_items = self._build_model_menu_items(current_model, is_model_loading)
167
188
 
168
189
  menu_items = [
190
+ pystray.MenuItem("View Log", self._view_log_file),
191
+ pystray.MenuItem("Advanced Settings", self._open_config_file),
192
+ pystray.Menu.SEPARATOR,
169
193
  pystray.MenuItem(
170
194
  "Audio Host",
171
195
  pystray.Menu(*audio_host_items)
@@ -207,22 +231,38 @@ class SystemTray:
207
231
  def _show_console(self, icon=None, item=None):
208
232
  self.state_manager.show_console()
209
233
 
234
+ def _view_log_file(self, icon=None, item=None):
235
+ try:
236
+ print("⚙️ Opening log file...")
237
+ log_path = self.config_manager.get_log_file_path()
238
+ os.startfile(log_path)
239
+ except Exception as e:
240
+ self.logger.error(f"Failed to open log file: {e}")
241
+
242
+ def _open_config_file(self, icon=None, item=None):
243
+ try:
244
+ print("⚙️ Opening settings...")
245
+ config_path = self.config_manager.user_settings_path
246
+ os.startfile(config_path)
247
+ except Exception as e:
248
+ self.logger.error(f"Failed to open config file: {e}")
249
+
210
250
  def _set_transcription_mode(self, auto_paste: bool):
211
251
  self.state_manager.update_transcription_mode(auto_paste)
212
252
  self.icon.menu = self._create_menu()
213
253
 
214
- def _select_model(self, model_size: str):
254
+ def _select_model(self, model_key: str):
215
255
  try:
216
- success = self.state_manager.request_model_change(model_size)
256
+ success = self.state_manager.request_model_change(model_key)
217
257
 
218
258
  if success:
219
- self.config_manager.update_user_setting('whisper', 'model_size', model_size)
259
+ self.config_manager.update_user_setting('whisper', 'model', model_key)
220
260
  self.icon.menu = self._create_menu()
221
261
  else:
222
- self.logger.warning(f"Request to change model to {model_size} was not accepted")
262
+ self.logger.warning(f"Request to change model to {model_key} was not accepted")
223
263
 
224
264
  except Exception as e:
225
- self.logger.error(f"Error selecting model {model_size}: {e}")
265
+ self.logger.error(f"Error selecting model {model_key}: {e}")
226
266
 
227
267
  def _select_audio_host(self, host_name: str):
228
268
  try:
@@ -20,9 +20,14 @@ class OptionalComponent:
20
20
  def beautify_hotkey(hotkey_string: str) -> str:
21
21
  if not hotkey_string:
22
22
  return ""
23
-
23
+
24
24
  return hotkey_string.replace('+', '+').upper()
25
25
 
26
+ def parse_hotkey(hotkey_string: str) -> list:
27
+ if not hotkey_string:
28
+ return []
29
+ return hotkey_string.lower().split('+')
30
+
26
31
  def is_installed_package():
27
32
  # Check if running from an installed package
28
33
  return 'site-packages' in __file__
@@ -1,111 +1,109 @@
1
1
  import logging
2
- import os
3
2
  import time
4
3
  import threading
5
4
  from typing import Optional, Callable
6
5
 
7
6
  import numpy as np
8
7
  from faster_whisper import WhisperModel
9
- from .utils import OptionalComponent
10
8
 
11
- class WhisperEngine:
12
- MODEL_CACHE_PREFIX = "models--Systran--faster-whisper-" # file prefix for hugging-face model
13
9
 
10
+ class WhisperEngine:
14
11
  def __init__(self,
15
- model_size: str = "tiny",
12
+ model_key: str = "tiny",
16
13
  device: str = "cpu",
17
14
  compute_type: str = "int8",
18
15
  language: str = None,
19
16
  beam_size: int = 5,
20
- vad_manager = None):
21
-
22
- self.model_size = model_size
17
+ vad_manager = None,
18
+ model_registry = None):
19
+
20
+ self.model_key = model_key
23
21
  self.device = device
24
22
  self.compute_type = compute_type
25
23
  self.language = None if language == 'auto' else language
26
24
  self.beam_size = beam_size
27
25
  self.model = None
28
26
  self.logger = logging.getLogger(__name__)
27
+ self.registry = model_registry
29
28
 
30
29
  self._loading_thread = None
31
30
  self._progress_callback = None
32
31
 
33
32
  self.vad_manager = vad_manager
34
-
33
+
35
34
  self._load_model()
36
35
 
37
- def _get_cache_directory(self):
38
- userprofile = os.getenv('USERPROFILE')
39
- if not userprofile:
40
- home = os.path.expanduser('~')
41
- userprofile = home
42
-
43
- cache_dir = os.path.join(userprofile, '.cache', 'huggingface', 'hub')
44
- return cache_dir
45
-
46
- def _is_model_cached(self, model_size=None):
47
- if model_size is None:
48
- model_size = self.model_size
49
- cache_dir = self._get_cache_directory()
50
- model_folder = f"{self.MODEL_CACHE_PREFIX}{model_size}"
51
- return os.path.exists(os.path.join(cache_dir, model_folder))
52
-
36
+ def _get_model_source(self, model_key: str) -> str:
37
+ if self.registry:
38
+ return self.registry.get_source(model_key)
39
+ return model_key
40
+
41
+ def _is_model_cached(self, model_key: str = None) -> bool:
42
+ if model_key is None:
43
+ model_key = self.model_key
44
+ if self.registry:
45
+ return self.registry.is_model_cached(model_key)
46
+ return False
47
+
53
48
  def _load_model(self):
54
49
  try:
55
- print(f"🧠 Loading Whisper AI model [{self.model_size}]...")
56
-
50
+ print(f"🧠 Loading Whisper AI model [{self.model_key}]...")
51
+
57
52
  was_cached = self._is_model_cached()
58
53
  if not was_cached:
59
54
  print("Downloading model, this may take a few minutes....")
60
-
55
+
56
+ model_source = self._get_model_source(self.model_key)
61
57
  self.model = WhisperModel(
62
- self.model_size,
58
+ model_source,
63
59
  device=self.device,
64
60
  compute_type=self.compute_type
65
61
  )
66
-
62
+
67
63
  if not was_cached:
68
- print("\n") # Workaround for download status bar misplacement
64
+ print("\n") # Workaround for download status bar misplacement
65
+
66
+ print(f" ✓ Whisper model [{self.model_key}] ready!")
69
67
 
70
- print(f" ✓ Whisper model [{self.model_size}] ready!")
71
-
72
68
  except Exception as e:
73
69
  self.logger.error(f"Failed to load Whisper model: {e}")
74
70
  raise
75
71
 
76
72
  def _load_model_async(self,
77
- new_model_size: str,
73
+ new_model_key: str,
78
74
  progress_callback: Optional[Callable[[str], None]] = None):
79
75
  def _background_loader():
80
- try:
76
+ try:
81
77
  if progress_callback:
82
78
  progress_callback("Checking model cache...")
83
-
84
- old_model_size = self.model_size
85
- was_cached = self._is_model_cached(new_model_size)
86
-
79
+
80
+ old_model_key = self.model_key
81
+ was_cached = self._is_model_cached(new_model_key)
82
+
87
83
  if progress_callback:
88
84
  if was_cached:
89
85
  progress_callback("Loading cached model...")
90
86
  else:
91
87
  progress_callback("Downloading model...")
92
-
93
- self.logger.info(f"Loading Whisper model: {new_model_size} (async)")
94
88
 
89
+ self.logger.info(f"Loading Whisper model: {new_model_key} (async)")
90
+
91
+ model_source = self._get_model_source(new_model_key)
95
92
  new_model = WhisperModel(
96
- new_model_size,
93
+ model_source,
97
94
  device=self.device,
98
95
  compute_type=self.compute_type
99
96
  )
100
-
97
+
101
98
  self.model = new_model
102
- self.logger.info(f"Whisper model [{new_model_size}] loaded successfully (async)")
103
-
99
+ self.model_key = new_model_key
100
+ self.logger.info(f"Whisper model [{new_model_key}] loaded successfully (async)")
101
+
104
102
  if progress_callback:
105
103
  progress_callback("Model ready!")
106
-
104
+
107
105
  except Exception as e:
108
- self.model_size = old_model_size
106
+ self.model_key = old_model_key
109
107
  self.logger.error(f"Failed to load Whisper model async: {e}")
110
108
  if progress_callback:
111
109
  progress_callback(f"Failed to load model: {e}")
@@ -113,7 +111,7 @@ class WhisperEngine:
113
111
  finally:
114
112
  self._loading_thread = None
115
113
  self._progress_callback = None
116
-
114
+
117
115
  if self._loading_thread and self._loading_thread.is_alive():
118
116
  self.logger.warning("Model loading already in progress, ignoring new request")
119
117
  return
@@ -188,13 +186,13 @@ class WhisperEngine:
188
186
 
189
187
 
190
188
  def change_model(self,
191
- new_model_size: str,
189
+ new_model_key: str,
192
190
  progress_callback: Optional[Callable[[str], None]] = None):
193
191
 
194
- if new_model_size == self.model_size:
192
+ if new_model_key == self.model_key:
195
193
  if progress_callback:
196
194
  progress_callback("Model already loaded")
197
195
  return
198
196
 
199
- self._load_model_async(new_model_size, progress_callback)
197
+ self._load_model_async(new_model_key, progress_callback)
200
198
 
@@ -1,11 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: whisper-key-local
3
- Version: 0.4.0
3
+ Version: 0.5.1
4
4
  Summary: Local faster-whisper speech-to-text app with global hotkeys for Windows
5
5
  Author-email: Pin Wang <pinwang@gmail.com>
6
6
  Requires-Python: >=3.11
7
7
  Description-Content-Type: text/markdown
8
- Requires-Dist: faster-whisper>=1.1.1
8
+ Requires-Dist: faster-whisper>=1.2.1
9
+ Requires-Dist: ctranslate2>=4.6.3
9
10
  Requires-Dist: numpy>=1.24.0
10
11
  Requires-Dist: scipy>=1.11.0
11
12
  Requires-Dist: sounddevice>=0.4.6
@@ -22,6 +23,8 @@ Requires-Dist: hf-xet>=1.1.5
22
23
 
23
24
  Global hotkey to start/stop recording and auto-paste transcription wherever your cursor is.
24
25
 
26
+ Questions or ideas? [Discord Server](https://discord.gg/uZnXV8snhz)
27
+
25
28
  ## 🎯 Features
26
29
 
27
30
  - **Global Hotkey**: Press `Ctrl+Win` to start recording, `Ctrl` to stop
@@ -10,6 +10,7 @@ src/whisper_key/console_manager.py
10
10
  src/whisper_key/hotkey_listener.py
11
11
  src/whisper_key/instance_manager.py
12
12
  src/whisper_key/main.py
13
+ src/whisper_key/model_registry.py
13
14
  src/whisper_key/state_manager.py
14
15
  src/whisper_key/system_tray.py
15
16
  src/whisper_key/utils.py
@@ -1,4 +1,5 @@
1
- faster-whisper>=1.1.1
1
+ faster-whisper>=1.2.1
2
+ ctranslate2>=4.6.3
2
3
  numpy>=1.24.0
3
4
  scipy>=1.11.0
4
5
  sounddevice>=0.4.6