whisper-key-local 0.4.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/PKG-INFO +5 -2
  2. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/README.md +2 -0
  3. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/pyproject.toml +3 -2
  4. whisper_key_local-0.5.0/src/whisper_key/assets/version.txt +1 -0
  5. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key/config.defaults.yaml +73 -50
  6. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key/config_manager.py +20 -18
  7. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key/main.py +11 -7
  8. whisper_key_local-0.5.0/src/whisper_key/model_registry.py +77 -0
  9. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key/state_manager.py +12 -12
  10. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key/system_tray.py +64 -26
  11. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key/whisper_engine.py +49 -51
  12. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key_local.egg-info/PKG-INFO +5 -2
  13. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key_local.egg-info/SOURCES.txt +1 -0
  14. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key_local.egg-info/requires.txt +2 -1
  15. whisper_key_local-0.4.0/src/whisper_key/assets/version.txt +0 -1
  16. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/setup.cfg +0 -0
  17. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key/__init__.py +0 -0
  18. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key/assets/portaudio.dll +0 -0
  19. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key/assets/sounds/record_cancel.wav +0 -0
  20. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key/assets/sounds/record_start.wav +0 -0
  21. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key/assets/sounds/record_stop.wav +0 -0
  22. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key/assets/tray_idle.png +0 -0
  23. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key/assets/tray_processing.png +0 -0
  24. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key/assets/tray_recording.png +0 -0
  25. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key/audio_feedback.py +0 -0
  26. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key/audio_recorder.py +0 -0
  27. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key/clipboard_manager.py +0 -0
  28. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key/console_manager.py +0 -0
  29. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key/hotkey_listener.py +0 -0
  30. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key/instance_manager.py +0 -0
  31. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key/utils.py +0 -0
  32. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key/voice_activity_detection.py +0 -0
  33. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key_local.egg-info/dependency_links.txt +0 -0
  34. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key_local.egg-info/entry_points.txt +0 -0
  35. {whisper_key_local-0.4.0 → whisper_key_local-0.5.0}/src/whisper_key_local.egg-info/top_level.txt +0 -0
@@ -1,11 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: whisper-key-local
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Local faster-whisper speech-to-text app with global hotkeys for Windows
5
5
  Author-email: Pin Wang <pinwang@gmail.com>
6
6
  Requires-Python: >=3.11
7
7
  Description-Content-Type: text/markdown
8
- Requires-Dist: faster-whisper>=1.1.1
8
+ Requires-Dist: faster-whisper>=1.2.1
9
+ Requires-Dist: ctranslate2>=4.6.3
9
10
  Requires-Dist: numpy>=1.24.0
10
11
  Requires-Dist: scipy>=1.11.0
11
12
  Requires-Dist: sounddevice>=0.4.6
@@ -22,6 +23,8 @@ Requires-Dist: hf-xet>=1.1.5
22
23
 
23
24
  Global hotkey to start/stop recording and auto-paste transcription wherever your cursor is.
24
25
 
26
+ Questions or ideas? [Discord Server](https://discord.gg/uZnXV8snhz)
27
+
25
28
  ## 🎯 Features
26
29
 
27
30
  - **Global Hotkey**: Press `Ctrl+Win` to start recording, `Ctrl` to stop
@@ -2,6 +2,8 @@
2
2
 
3
3
  Global hotkey to start/stop recording and auto-paste transcription wherever your cursor is.
4
4
 
5
+ Questions or ideas? [Discord Server](https://discord.gg/uZnXV8snhz)
6
+
5
7
  ## 🎯 Features
6
8
 
7
9
  - **Global Hotkey**: Press `Ctrl+Win` to start recording, `Ctrl` to stop
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "whisper-key-local"
7
- version = "0.4.0"
7
+ version = "0.5.0"
8
8
  description = "Local faster-whisper speech-to-text app with global hotkeys for Windows"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -12,7 +12,8 @@ authors = [
12
12
  ]
13
13
  requires-python = ">=3.11"
14
14
  dependencies = [
15
- "faster-whisper>=1.1.1",
15
+ "faster-whisper>=1.2.1",
16
+ "ctranslate2>=4.6.3",
16
17
  "numpy>=1.24.0",
17
18
  "scipy>=1.11.0",
18
19
  "sounddevice>=0.4.6",
@@ -7,24 +7,11 @@
7
7
  # Personal settings are stored at:
8
8
  # %APPDATA%\Roaming\whisperkey\user_settings.yaml
9
9
 
10
- # =============================================================================
11
- # WHISPER AI MODEL SETTINGS
12
- # =============================================================================
13
- whisper:
14
- # Model size - affects accuracy vs speed tradeoff
15
- # Options: "tiny", "base", "small", "medium", "large", "tiny.en", "base.en", "small.en", "medium.en"
16
- # Multilingual models:
17
- # - tiny: ~39MB, fastest, good for most use cases
18
- # - base: ~74MB, better accuracy, still fast
19
- # - small: ~244MB, high accuracy, slower
20
- # - medium: ~769MB, very high accuracy, much slower
21
- # - large: ~1550MB, best accuracy, very slow (not recommended for real-time)
22
- # English-only models (more accurate, slightly faster):
23
- # - tiny.en: 39MB,
24
- # - base.en: 74MB,
25
- # - small.en: 244MB,
26
- # - medium.en: 769MB
27
- model_size: base
10
+ whisper: # Whisper AI Model Settings
11
+
12
+ # Model selection
13
+ # See "models" below for more information
14
+ model: tiny
28
15
 
29
16
  # Processing device - where the AI runs
30
17
  # Options: "cpu", "cuda" (for NVIDIA GPUs)
@@ -46,10 +33,60 @@ whisper:
46
33
  # Transcription quality settings
47
34
  beam_size: 5 # Higher = more accurate but slower (1-10)
48
35
 
49
- # =============================================================================
50
- # HOTKEY CONFIGURATION
51
- # =============================================================================
52
- hotkey:
36
+ # Available models (set enabled: false to hide from menu)
37
+ # To add custom models (CTranslate2 format), add entry with source (HuggingFace or local path)
38
+ # Example: my-model: {source: "username/repo-ct2", label: "My Model", group: custom}
39
+ models:
40
+ # Official whisper models
41
+ tiny:
42
+ label: "Tiny (76MB, fastest)"
43
+ group: official
44
+ enabled: true
45
+ base:
46
+ label: "Base (145MB, balanced)"
47
+ group: official
48
+ enabled: true
49
+ small:
50
+ label: "Small (484MB, accurate)"
51
+ group: official
52
+ enabled: true
53
+ medium:
54
+ label: "Medium (1.5GB, very accurate)"
55
+ group: official
56
+ enabled: true
57
+ large: # Uses the large-v3 version
58
+ label: "Large (3.1GB, best accuracy)"
59
+ group: official
60
+ enabled: true
61
+ large-v3-turbo: # 8x faster and only slightly less accurate (transcription optimized)
62
+ label: "Large-V3-Turbo (1.6GB, newest)"
63
+ group: official
64
+ enabled: true
65
+
66
+ # Models with native faster-whisper support
67
+ tiny.en:
68
+ label: "Tiny.En (English)"
69
+ group: custom
70
+ enabled: true
71
+ base.en:
72
+ label: "Base.En (English)"
73
+ group: custom
74
+ enabled: true
75
+ small.en:
76
+ label: "Small.En (English)"
77
+ group: custom
78
+ enabled: true
79
+ medium.en:
80
+ label: "Medium.En (English)"
81
+ group: custom
82
+ enabled: true
83
+ distil-large-v3.5: # About 1.5x faster than large-v3-turbo
84
+ source: distil-whisper/distil-large-v3.5-ct2
85
+ label: "Distil-Large-V3.5 (English)"
86
+ group: custom
87
+
88
+ hotkey: # Hotkey Configuration
89
+
53
90
  # Key combination to start/stop recording
54
91
  # Format: modifier+modifier+key (use lowercase)
55
92
  # Common modifiers: ctrl, shift, alt, win
@@ -79,10 +116,8 @@ hotkey:
79
116
  # Examples: "esc", "ctrl+c", "shift+esc"
80
117
  cancel_combination: esc
81
118
 
82
- # =============================================================================
83
- # VOICE ACTIVITY DETECTION (VAD) SETTINGS
84
- # =============================================================================
85
- vad:
119
+ vad: # Voice Activity Detection (VAD)
120
+
86
121
  # Voice Activity Detection (VAD) Pre-check
87
122
  # Uses TEN VAD to detect speech before transcription (prevents hallucinations on silence)
88
123
  # NOTE: This is separate from faster-whisper's built-in VAD
@@ -99,10 +134,8 @@ vad:
99
134
  vad_realtime_enabled: true # Enables continuous silence detection and automatic recording stop
100
135
  vad_silence_timeout_seconds: 30.0 # Auto-stop recording after this many seconds of silence
101
136
 
102
- # =============================================================================
103
- # AUDIO RECORDING SETTINGS
104
- # =============================================================================
105
- audio:
137
+ audio: # Audio Recording Settings
138
+
106
139
  # Sample rate is fixed at 16000 Hz for optimal Whisper and TEN VAD performance
107
140
 
108
141
  # Audio host API selection
@@ -132,10 +165,8 @@ audio:
132
165
  # are plugged/unplugged. Use system tray to select device - it will save the ID here.
133
166
  input_device: "default"
134
167
 
135
- # =============================================================================
136
- # CLIPBOARD BEHAVIOR
137
- # =============================================================================
138
- clipboard:
168
+ clipboard: # Clipboard Behavior
169
+
139
170
  # Automatically paste after transcription
140
171
  # true = paste immediately to active window
141
172
  # false = only copy to clipboard (paste manually with Ctrl+V)
@@ -150,10 +181,8 @@ clipboard:
150
181
  # Small delay ensures operations complete reliably
151
182
  key_simulation_delay: 0.05
152
183
 
153
- # =============================================================================
154
- # LOGGING CONFIGURATION
155
- # =============================================================================
156
- logging:
184
+ logging: # Logging Configuration
185
+
157
186
  # Log level - how much detail to log
158
187
  # Options: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
159
188
  # - DEBUG: Very detailed (for troubleshooting)
@@ -173,10 +202,8 @@ logging:
173
202
  level: WARNING # Console log level (DEBUG, INFO, WARNING, ERROR, CRITICAL) - WARNING shows important messages only
174
203
 
175
204
 
176
- # =============================================================================
177
- # AUDIO FEEDBACK SETTINGS
178
- # =============================================================================
179
- audio_feedback:
205
+ audio_feedback: # Audio Feedback Settings
206
+
180
207
  # Enable/disable audio feedback sounds
181
208
  # true = play sounds when recording starts/stops
182
209
  # false = silent operation
@@ -188,10 +215,8 @@ audio_feedback:
188
215
  stop_sound: assets/sounds/record_stop.wav
189
216
  cancel_sound: assets/sounds/record_cancel.wav
190
217
 
191
- # =============================================================================
192
- # SYSTEM TRAY SETTINGS
193
- # =============================================================================
194
- system_tray:
218
+ system_tray: # System Tray Settings
219
+
195
220
  # Enable/disable system tray icon
196
221
  # true = show icon in system tray with status and menu
197
222
  # false = run without system tray (console mode only)
@@ -200,10 +225,8 @@ system_tray:
200
225
  # Tooltip text when hovering over tray icon
201
226
  tooltip: Whisper Key
202
227
 
203
- # =============================================================================
204
- # CONSOLE VISIBILITY SETTINGS
205
- # =============================================================================
206
- console:
228
+ console: # Console Visibility Settings
229
+
207
230
  # Start with console hidden to system tray
208
231
  # Note: This setting is ignored in CLI mode
209
232
  start_hidden: false
@@ -210,6 +210,10 @@ class ConfigManager:
210
210
  def get_console_config(self) -> Dict[str, Any]:
211
211
  return self.config.get('console', {}).copy()
212
212
 
213
+ def get_log_file_path(self) -> str:
214
+ log_filename = self.config['logging']['file']['filename']
215
+ return os.path.join(get_user_app_data_path(), log_filename)
216
+
213
217
  def get_setting(self, section: str, key: str) -> Any:
214
218
  return self.config[section][key]
215
219
 
@@ -217,25 +221,25 @@ class ConfigManager:
217
221
  yaml = YAML()
218
222
  yaml.preserve_quotes = True
219
223
  yaml.indent(mapping=2, sequence=4, offset=2)
220
-
224
+
221
225
  temp_output = StringIO()
222
226
  yaml.dump(config_data, temp_output)
223
227
  lines = temp_output.getvalue().split('\n')
224
-
225
- # Find end of header - first blank line is the cutoff
226
- data_start = 0
227
- for i, line in enumerate(lines):
228
- if not line.strip(): # Empty line found
229
- data_start = i
230
- break
231
-
232
- user_config = []
233
- user_config.append("# =============================================================================")
234
- user_config.append("# WHISPER KEY - PERSONAL CONFIGURATION")
235
- user_config.append("# =============================================================================")
236
- user_config.extend(lines[data_start:])
237
-
238
- return '\n'.join(user_config)
228
+
229
+ filtered_lines = []
230
+ for line in lines:
231
+ if '# ====' in line:
232
+ continue
233
+ if line.strip().startswith('# ') and line.strip()[2:].replace(' ', '').isupper():
234
+ continue
235
+ filtered_lines.append(line)
236
+
237
+ header = ["# ============================================================================="]
238
+ header.append("# WHISPER KEY - PERSONAL CONFIGURATION")
239
+ header.append("# =============================================================================")
240
+ header.append("")
241
+
242
+ return '\n'.join(header + filtered_lines)
239
243
 
240
244
  def save_config_to_user_settings_file(self):
241
245
  try:
@@ -339,8 +343,6 @@ class ConfigValidator:
339
343
  self.config = config
340
344
  self.default_config = default_config
341
345
 
342
- self._validate_enum('whisper.model_size',
343
- ['tiny', 'base', 'small', 'medium', 'large', 'tiny.en', 'base.en', 'small.en', 'medium.en'])
344
346
  self._validate_enum('whisper.device', ['cpu', 'cuda'])
345
347
  self._validate_enum('whisper.compute_type', ['int8', 'float16', 'float32'])
346
348
 
@@ -20,6 +20,7 @@ from .system_tray import SystemTray
20
20
  from .audio_feedback import AudioFeedback
21
21
  from .console_manager import ConsoleManager
22
22
  from .instance_manager import guard_against_multiple_instances
23
+ from .model_registry import ModelRegistry
23
24
  from .utils import beautify_hotkey, get_user_app_data_path, get_version
24
25
 
25
26
  def is_built_executable():
@@ -82,14 +83,15 @@ def setup_vad(vad_config):
82
83
  vad_silence_timeout_seconds=vad_config['vad_silence_timeout_seconds']
83
84
  )
84
85
 
85
- def setup_whisper_engine(whisper_config, vad_manager):
86
+ def setup_whisper_engine(whisper_config, vad_manager, model_registry):
86
87
  return WhisperEngine(
87
- model_size=whisper_config['model_size'],
88
+ model_key=whisper_config['model'],
88
89
  device=whisper_config['device'],
89
90
  compute_type=whisper_config['compute_type'],
90
91
  language=whisper_config['language'],
91
92
  beam_size=whisper_config['beam_size'],
92
- vad_manager=vad_manager
93
+ vad_manager=vad_manager,
94
+ model_registry=model_registry
93
95
  )
94
96
 
95
97
  def setup_clipboard_manager(clipboard_config):
@@ -113,11 +115,12 @@ def setup_console_manager(console_config, is_executable_mode):
113
115
  is_executable_mode=is_executable_mode
114
116
  )
115
117
 
116
- def setup_system_tray(tray_config, config_manager, state_manager=None):
118
+ def setup_system_tray(tray_config, config_manager, state_manager, model_registry):
117
119
  return SystemTray(
118
120
  state_manager=state_manager,
119
121
  tray_config=tray_config,
120
- config_manager=config_manager
122
+ config_manager=config_manager,
123
+ model_registry=model_registry
121
124
  )
122
125
 
123
126
  def setup_signal_handlers(shutdown_event):
@@ -179,8 +182,9 @@ def main():
179
182
  is_executable = is_built_executable()
180
183
  console_manager = setup_console_manager(console_config, is_executable)
181
184
 
185
+ model_registry = ModelRegistry(whisper_config.get('models', {}))
182
186
  vad_manager = setup_vad(vad_config)
183
- whisper_engine = setup_whisper_engine(whisper_config, vad_manager)
187
+ whisper_engine = setup_whisper_engine(whisper_config, vad_manager, model_registry)
184
188
  clipboard_manager = setup_clipboard_manager(clipboard_config)
185
189
  audio_feedback = setup_audio_feedback(audio_feedback_config)
186
190
 
@@ -195,7 +199,7 @@ def main():
195
199
  vad_manager=vad_manager
196
200
  )
197
201
  audio_recorder = setup_audio_recorder(audio_config, state_manager, vad_manager)
198
- system_tray = setup_system_tray(tray_config, config_manager, state_manager)
202
+ system_tray = setup_system_tray(tray_config, config_manager, state_manager, model_registry)
199
203
  state_manager.attach_components(audio_recorder, system_tray)
200
204
 
201
205
  hotkey_listener = setup_hotkey_listener(hotkey_config, state_manager)
@@ -0,0 +1,77 @@
1
+ import os
2
+ from faster_whisper.utils import _MODELS
3
+
4
+
5
+ class ModelRegistry:
6
+ DEFAULT_CACHE_PREFIX = "models--Systran--faster-whisper-"
7
+
8
+ def __init__(self, models_config: dict):
9
+ self.models = {}
10
+ for key, config in models_config.items():
11
+ if isinstance(config, dict):
12
+ self.models[key] = ModelDefinition(key, config)
13
+
14
+ def get_model(self, key: str):
15
+ return self.models.get(key)
16
+
17
+ def get_source(self, key: str) -> str:
18
+ model = self.get_model(key)
19
+ return model.source if model else key
20
+
21
+ def get_cache_folder(self, key: str) -> str:
22
+ model = self.get_model(key)
23
+ if not model:
24
+ return f"{self.DEFAULT_CACHE_PREFIX}{key}"
25
+ return model.cache_folder
26
+
27
+ def get_models_by_group(self, group: str) -> list:
28
+ return [m for m in self.models.values() if m.group == group and m.enabled]
29
+
30
+ def get_groups_ordered(self) -> list:
31
+ return ["official", "custom"]
32
+
33
+ def get_hf_cache_path(self) -> str:
34
+ userprofile = os.environ.get('USERPROFILE')
35
+ if userprofile:
36
+ return os.path.join(userprofile, '.cache', 'huggingface', 'hub')
37
+ return os.path.join(os.path.expanduser('~'), '.cache', 'huggingface', 'hub')
38
+
39
+ def is_model_cached(self, key: str) -> bool:
40
+ model = self.get_model(key)
41
+ if model and model.is_local_path:
42
+ return os.path.exists(os.path.join(model.source, 'model.bin'))
43
+ cache_folder = self.get_cache_folder(key)
44
+ if not cache_folder:
45
+ return False
46
+ return os.path.exists(os.path.join(self.get_hf_cache_path(), cache_folder))
47
+
48
+
49
+ class ModelDefinition:
50
+ def __init__(self, key: str, config: dict):
51
+ self.key = key
52
+ self.source = config.get("source", key)
53
+ self.label = config.get("label", key.title())
54
+ self.group = config.get("group", "custom")
55
+ self.enabled = config.get("enabled", True)
56
+ self.is_local_path = self._check_is_local_path()
57
+ self.cache_folder = self._derive_cache_folder()
58
+
59
+ def _check_is_local_path(self) -> bool:
60
+ if self.source.startswith("\\\\") or (len(self.source) > 2 and self.source[1] == ":"):
61
+ return True
62
+ if "/" in self.source:
63
+ return os.path.exists(self.source)
64
+ return False
65
+
66
+ def _derive_cache_folder(self) -> str:
67
+ if self.is_local_path:
68
+ return None
69
+
70
+ if "/" in self.source:
71
+ return "models--" + self.source.replace("/", "--")
72
+
73
+ if self.source in _MODELS:
74
+ repo = _MODELS[self.source]
75
+ return "models--" + repo.replace("/", "--")
76
+
77
+ return f"{ModelRegistry.DEFAULT_CACHE_PREFIX}{self.source}"
@@ -160,7 +160,7 @@ class StateManager:
160
160
 
161
161
  if pending_model:
162
162
  self.logger.info(f"Executing pending model change to: {pending_model}")
163
- print(f"🔄 Processing complete, now switching to {pending_model} model...")
163
+ print(f"🔄 Processing complete, now switching to [{pending_model}] model...")
164
164
  self._execute_model_change(pending_model)
165
165
  self._pending_model_change = None
166
166
 
@@ -226,10 +226,10 @@ class StateManager:
226
226
  else:
227
227
  return "idle"
228
228
 
229
- def request_model_change(self, new_model_size: str) -> bool:
229
+ def request_model_change(self, new_model_key: str) -> bool:
230
230
  current_state = self.get_current_state()
231
231
 
232
- if new_model_size == self.whisper_engine.model_size:
232
+ if new_model_key == self.whisper_engine.model_key:
233
233
  return True
234
234
 
235
235
  if current_state == "model_loading":
@@ -237,18 +237,18 @@ class StateManager:
237
237
  return False
238
238
 
239
239
  if current_state == "recording":
240
- print(f"🎤 Cancelling recording to switch to {new_model_size} model...")
240
+ print(f"🎤 Cancelling recording to switch to [{new_model_key}] model...")
241
241
  self.cancel_active_recording()
242
- self._execute_model_change(new_model_size)
242
+ self._execute_model_change(new_model_key)
243
243
  return True
244
244
 
245
245
  if current_state == "processing":
246
- print(f"⏳ Queueing model change to {new_model_size} until transcription completes...")
247
- self._pending_model_change = new_model_size
246
+ print(f"⏳ Queueing model change to [{new_model_key}] until transcription completes...")
247
+ self._pending_model_change = new_model_key
248
248
  return True
249
249
 
250
250
  if current_state == "idle":
251
- self._execute_model_change(new_model_size)
251
+ self._execute_model_change(new_model_key)
252
252
  return True
253
253
 
254
254
  self.logger.warning(f"Unexpected state for model change: {current_state}")
@@ -261,10 +261,10 @@ class StateManager:
261
261
  def show_console(self):
262
262
  self.console_manager.show_console()
263
263
 
264
- def _execute_model_change(self, new_model_size: str):
264
+ def _execute_model_change(self, new_model_key: str):
265
265
  def progress_callback(message: str):
266
266
  if "ready" in message.lower() or "already loaded" in message.lower():
267
- print(f"✅ Successfully switched to {new_model_size} model")
267
+ print(f"✅ Successfully switched to [{new_model_key}] model")
268
268
  self.set_model_loading(False)
269
269
  elif "failed" in message.lower():
270
270
  print(f"❌ Failed to change model: {message}")
@@ -275,9 +275,9 @@ class StateManager:
275
275
 
276
276
  try:
277
277
  self.set_model_loading(True)
278
- print(f"🔄 Switching to {new_model_size} model...")
278
+ print(f"🔄 Switching to [{new_model_key}] model...")
279
279
 
280
- self.whisper_engine.change_model(new_model_size, progress_callback)
280
+ self.whisper_engine.change_model(new_model_key, progress_callback)
281
281
 
282
282
  except Exception as e:
283
283
  self.logger.error(f"Failed to initiate model change: {e}")
@@ -20,15 +20,17 @@ if TYPE_CHECKING:
20
20
  from .state_manager import StateManager
21
21
  from .config_manager import ConfigManager
22
22
 
23
- class SystemTray:
23
+ class SystemTray:
24
24
  def __init__(self,
25
25
  state_manager: 'StateManager',
26
26
  tray_config: dict = None,
27
- config_manager: Optional['ConfigManager'] = None):
27
+ config_manager: Optional['ConfigManager'] = None,
28
+ model_registry = None):
28
29
 
29
30
  self.state_manager = state_manager
30
31
  self.tray_config = tray_config or {}
31
32
  self.config_manager = config_manager
33
+ self.model_registry = model_registry
32
34
  self.logger = logging.getLogger(__name__)
33
35
 
34
36
  self.icon = None # pystray object, holds menu, state, etc.
@@ -91,13 +93,49 @@ class SystemTray:
91
93
 
92
94
  return icon
93
95
 
96
+ def _build_model_menu_items(self, current_model: str, is_model_loading: bool) -> list:
97
+ items = []
98
+
99
+ if not self.model_registry:
100
+ return items
101
+
102
+ def make_model_selector(model_key):
103
+ return lambda icon, item: self._select_model(model_key)
104
+
105
+ def make_is_current(model_key):
106
+ return lambda item: model_key == current_model
107
+
108
+ def model_selection_enabled(item):
109
+ return not is_model_loading
110
+
111
+ first_group = True
112
+ for group in self.model_registry.get_groups_ordered():
113
+ models = self.model_registry.get_models_by_group(group)
114
+ if not models:
115
+ continue
116
+
117
+ if not first_group:
118
+ items.append(pystray.Menu.SEPARATOR)
119
+ first_group = False
120
+
121
+ for model in models:
122
+ items.append(pystray.MenuItem(
123
+ model.label,
124
+ make_model_selector(model.key),
125
+ radio=True,
126
+ checked=make_is_current(model.key),
127
+ enabled=model_selection_enabled
128
+ ))
129
+
130
+ return items
131
+
94
132
  def _create_menu(self):
95
133
  try:
96
134
  app_state = self.state_manager.get_application_state()
97
135
  is_model_loading = app_state.get('model_loading', False)
98
136
 
99
137
  auto_paste_enabled = self.config_manager.get_setting('clipboard', 'auto_paste')
100
- current_model = self.config_manager.get_setting('whisper', 'model_size')
138
+ current_model = self.config_manager.get_setting('whisper', 'model')
101
139
 
102
140
  available_hosts = self.state_manager.get_available_audio_hosts()
103
141
  current_host = self.state_manager.get_current_audio_host()
@@ -146,26 +184,12 @@ class SystemTray:
146
184
  )
147
185
  )
148
186
 
149
- def is_current_model(model_name):
150
- return model_name == current_model
151
-
152
- def model_selection_enabled():
153
- return not is_model_loading
154
-
155
- model_sub_menu_items = [
156
- pystray.MenuItem("Tiny (75MB, fastest)", lambda icon, item: self._select_model("tiny"), radio=True, checked=lambda item: is_current_model("tiny"), enabled=model_selection_enabled()),
157
- pystray.MenuItem("Base (142MB, balanced)", lambda icon, item: self._select_model("base"), radio=True, checked=lambda item: is_current_model("base"), enabled=model_selection_enabled()),
158
- pystray.MenuItem("Small (466MB, accurate)", lambda icon, item: self._select_model("small"), radio=True, checked=lambda item: is_current_model("small"), enabled=model_selection_enabled()),
159
- pystray.MenuItem("Medium (1.5GB, very accurate)", lambda icon, item: self._select_model("medium"), radio=True, checked=lambda item: is_current_model("medium"), enabled=model_selection_enabled()),
160
- pystray.MenuItem("Large (2.9GB, best accuracy)", lambda icon, item: self._select_model("large"), radio=True, checked=lambda item: is_current_model("large"), enabled=model_selection_enabled()),
161
- pystray.Menu.SEPARATOR,
162
- pystray.MenuItem("Tiny.En (English only)", lambda icon, item: self._select_model("tiny.en"), radio=True, checked=lambda item: is_current_model("tiny.en"), enabled=model_selection_enabled()),
163
- pystray.MenuItem("Base.En (English only)", lambda icon, item: self._select_model("base.en"), radio=True, checked=lambda item: is_current_model("base.en"), enabled=model_selection_enabled()),
164
- pystray.MenuItem("Small.En (English only)", lambda icon, item: self._select_model("small.en"), radio=True, checked=lambda item: is_current_model("small.en"), enabled=model_selection_enabled()),
165
- pystray.MenuItem("Medium.En (English only)", lambda icon, item: self._select_model("medium.en"), radio=True, checked=lambda item: is_current_model("medium.en"), enabled=model_selection_enabled())
166
- ]
187
+ model_sub_menu_items = self._build_model_menu_items(current_model, is_model_loading)
167
188
 
168
189
  menu_items = [
190
+ pystray.MenuItem("View Log", self._view_log_file),
191
+ pystray.MenuItem("Advanced Settings", self._open_config_file),
192
+ pystray.Menu.SEPARATOR,
169
193
  pystray.MenuItem(
170
194
  "Audio Host",
171
195
  pystray.Menu(*audio_host_items)
@@ -207,22 +231,36 @@ class SystemTray:
207
231
  def _show_console(self, icon=None, item=None):
208
232
  self.state_manager.show_console()
209
233
 
234
+ def _view_log_file(self, icon=None, item=None):
235
+ try:
236
+ log_path = self.config_manager.get_log_file_path()
237
+ os.startfile(log_path)
238
+ except Exception as e:
239
+ self.logger.error(f"Failed to open log file: {e}")
240
+
241
+ def _open_config_file(self, icon=None, item=None):
242
+ try:
243
+ config_path = self.config_manager.user_settings_path
244
+ os.startfile(config_path)
245
+ except Exception as e:
246
+ self.logger.error(f"Failed to open config file: {e}")
247
+
210
248
  def _set_transcription_mode(self, auto_paste: bool):
211
249
  self.state_manager.update_transcription_mode(auto_paste)
212
250
  self.icon.menu = self._create_menu()
213
251
 
214
- def _select_model(self, model_size: str):
252
+ def _select_model(self, model_key: str):
215
253
  try:
216
- success = self.state_manager.request_model_change(model_size)
254
+ success = self.state_manager.request_model_change(model_key)
217
255
 
218
256
  if success:
219
- self.config_manager.update_user_setting('whisper', 'model_size', model_size)
257
+ self.config_manager.update_user_setting('whisper', 'model', model_key)
220
258
  self.icon.menu = self._create_menu()
221
259
  else:
222
- self.logger.warning(f"Request to change model to {model_size} was not accepted")
260
+ self.logger.warning(f"Request to change model to {model_key} was not accepted")
223
261
 
224
262
  except Exception as e:
225
- self.logger.error(f"Error selecting model {model_size}: {e}")
263
+ self.logger.error(f"Error selecting model {model_key}: {e}")
226
264
 
227
265
  def _select_audio_host(self, host_name: str):
228
266
  try:
@@ -1,111 +1,109 @@
1
1
  import logging
2
- import os
3
2
  import time
4
3
  import threading
5
4
  from typing import Optional, Callable
6
5
 
7
6
  import numpy as np
8
7
  from faster_whisper import WhisperModel
9
- from .utils import OptionalComponent
10
8
 
11
- class WhisperEngine:
12
- MODEL_CACHE_PREFIX = "models--Systran--faster-whisper-" # file prefix for hugging-face model
13
9
 
10
+ class WhisperEngine:
14
11
  def __init__(self,
15
- model_size: str = "tiny",
12
+ model_key: str = "tiny",
16
13
  device: str = "cpu",
17
14
  compute_type: str = "int8",
18
15
  language: str = None,
19
16
  beam_size: int = 5,
20
- vad_manager = None):
21
-
22
- self.model_size = model_size
17
+ vad_manager = None,
18
+ model_registry = None):
19
+
20
+ self.model_key = model_key
23
21
  self.device = device
24
22
  self.compute_type = compute_type
25
23
  self.language = None if language == 'auto' else language
26
24
  self.beam_size = beam_size
27
25
  self.model = None
28
26
  self.logger = logging.getLogger(__name__)
27
+ self.registry = model_registry
29
28
 
30
29
  self._loading_thread = None
31
30
  self._progress_callback = None
32
31
 
33
32
  self.vad_manager = vad_manager
34
-
33
+
35
34
  self._load_model()
36
35
 
37
- def _get_cache_directory(self):
38
- userprofile = os.getenv('USERPROFILE')
39
- if not userprofile:
40
- home = os.path.expanduser('~')
41
- userprofile = home
42
-
43
- cache_dir = os.path.join(userprofile, '.cache', 'huggingface', 'hub')
44
- return cache_dir
45
-
46
- def _is_model_cached(self, model_size=None):
47
- if model_size is None:
48
- model_size = self.model_size
49
- cache_dir = self._get_cache_directory()
50
- model_folder = f"{self.MODEL_CACHE_PREFIX}{model_size}"
51
- return os.path.exists(os.path.join(cache_dir, model_folder))
52
-
36
+ def _get_model_source(self, model_key: str) -> str:
37
+ if self.registry:
38
+ return self.registry.get_source(model_key)
39
+ return model_key
40
+
41
+ def _is_model_cached(self, model_key: str = None) -> bool:
42
+ if model_key is None:
43
+ model_key = self.model_key
44
+ if self.registry:
45
+ return self.registry.is_model_cached(model_key)
46
+ return False
47
+
53
48
  def _load_model(self):
54
49
  try:
55
- print(f"🧠 Loading Whisper AI model [{self.model_size}]...")
56
-
50
+ print(f"🧠 Loading Whisper AI model [{self.model_key}]...")
51
+
57
52
  was_cached = self._is_model_cached()
58
53
  if not was_cached:
59
54
  print("Downloading model, this may take a few minutes....")
60
-
55
+
56
+ model_source = self._get_model_source(self.model_key)
61
57
  self.model = WhisperModel(
62
- self.model_size,
58
+ model_source,
63
59
  device=self.device,
64
60
  compute_type=self.compute_type
65
61
  )
66
-
62
+
67
63
  if not was_cached:
68
- print("\n") # Workaround for download status bar misplacement
64
+ print("\n") # Workaround for download status bar misplacement
65
+
66
+ print(f" ✓ Whisper model [{self.model_key}] ready!")
69
67
 
70
- print(f" ✓ Whisper model [{self.model_size}] ready!")
71
-
72
68
  except Exception as e:
73
69
  self.logger.error(f"Failed to load Whisper model: {e}")
74
70
  raise
75
71
 
76
72
  def _load_model_async(self,
77
- new_model_size: str,
73
+ new_model_key: str,
78
74
  progress_callback: Optional[Callable[[str], None]] = None):
79
75
  def _background_loader():
80
- try:
76
+ try:
81
77
  if progress_callback:
82
78
  progress_callback("Checking model cache...")
83
-
84
- old_model_size = self.model_size
85
- was_cached = self._is_model_cached(new_model_size)
86
-
79
+
80
+ old_model_key = self.model_key
81
+ was_cached = self._is_model_cached(new_model_key)
82
+
87
83
  if progress_callback:
88
84
  if was_cached:
89
85
  progress_callback("Loading cached model...")
90
86
  else:
91
87
  progress_callback("Downloading model...")
92
-
93
- self.logger.info(f"Loading Whisper model: {new_model_size} (async)")
94
88
 
89
+ self.logger.info(f"Loading Whisper model: {new_model_key} (async)")
90
+
91
+ model_source = self._get_model_source(new_model_key)
95
92
  new_model = WhisperModel(
96
- new_model_size,
93
+ model_source,
97
94
  device=self.device,
98
95
  compute_type=self.compute_type
99
96
  )
100
-
97
+
101
98
  self.model = new_model
102
- self.logger.info(f"Whisper model [{new_model_size}] loaded successfully (async)")
103
-
99
+ self.model_key = new_model_key
100
+ self.logger.info(f"Whisper model [{new_model_key}] loaded successfully (async)")
101
+
104
102
  if progress_callback:
105
103
  progress_callback("Model ready!")
106
-
104
+
107
105
  except Exception as e:
108
- self.model_size = old_model_size
106
+ self.model_key = old_model_key
109
107
  self.logger.error(f"Failed to load Whisper model async: {e}")
110
108
  if progress_callback:
111
109
  progress_callback(f"Failed to load model: {e}")
@@ -113,7 +111,7 @@ class WhisperEngine:
113
111
  finally:
114
112
  self._loading_thread = None
115
113
  self._progress_callback = None
116
-
114
+
117
115
  if self._loading_thread and self._loading_thread.is_alive():
118
116
  self.logger.warning("Model loading already in progress, ignoring new request")
119
117
  return
@@ -188,13 +186,13 @@ class WhisperEngine:
188
186
 
189
187
 
190
188
  def change_model(self,
191
- new_model_size: str,
189
+ new_model_key: str,
192
190
  progress_callback: Optional[Callable[[str], None]] = None):
193
191
 
194
- if new_model_size == self.model_size:
192
+ if new_model_key == self.model_key:
195
193
  if progress_callback:
196
194
  progress_callback("Model already loaded")
197
195
  return
198
196
 
199
- self._load_model_async(new_model_size, progress_callback)
197
+ self._load_model_async(new_model_key, progress_callback)
200
198
 
@@ -1,11 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: whisper-key-local
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Local faster-whisper speech-to-text app with global hotkeys for Windows
5
5
  Author-email: Pin Wang <pinwang@gmail.com>
6
6
  Requires-Python: >=3.11
7
7
  Description-Content-Type: text/markdown
8
- Requires-Dist: faster-whisper>=1.1.1
8
+ Requires-Dist: faster-whisper>=1.2.1
9
+ Requires-Dist: ctranslate2>=4.6.3
9
10
  Requires-Dist: numpy>=1.24.0
10
11
  Requires-Dist: scipy>=1.11.0
11
12
  Requires-Dist: sounddevice>=0.4.6
@@ -22,6 +23,8 @@ Requires-Dist: hf-xet>=1.1.5
22
23
 
23
24
  Global hotkey to start/stop recording and auto-paste transcription wherever your cursor is.
24
25
 
26
+ Questions or ideas? [Discord Server](https://discord.gg/uZnXV8snhz)
27
+
25
28
  ## 🎯 Features
26
29
 
27
30
  - **Global Hotkey**: Press `Ctrl+Win` to start recording, `Ctrl` to stop
@@ -10,6 +10,7 @@ src/whisper_key/console_manager.py
10
10
  src/whisper_key/hotkey_listener.py
11
11
  src/whisper_key/instance_manager.py
12
12
  src/whisper_key/main.py
13
+ src/whisper_key/model_registry.py
13
14
  src/whisper_key/state_manager.py
14
15
  src/whisper_key/system_tray.py
15
16
  src/whisper_key/utils.py
@@ -1,4 +1,5 @@
1
- faster-whisper>=1.1.1
1
+ faster-whisper>=1.2.1
2
+ ctranslate2>=4.6.3
2
3
  numpy>=1.24.0
3
4
  scipy>=1.11.0
4
5
  sounddevice>=0.4.6