supervertaler 1.9.163__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. Supervertaler.py +48473 -0
  2. modules/__init__.py +10 -0
  3. modules/ai_actions.py +964 -0
  4. modules/ai_attachment_manager.py +343 -0
  5. modules/ai_file_viewer_dialog.py +210 -0
  6. modules/autofingers_engine.py +466 -0
  7. modules/cafetran_docx_handler.py +379 -0
  8. modules/config_manager.py +469 -0
  9. modules/database_manager.py +1911 -0
  10. modules/database_migrations.py +417 -0
  11. modules/dejavurtf_handler.py +779 -0
  12. modules/document_analyzer.py +427 -0
  13. modules/docx_handler.py +689 -0
  14. modules/encoding_repair.py +319 -0
  15. modules/encoding_repair_Qt.py +393 -0
  16. modules/encoding_repair_ui.py +481 -0
  17. modules/feature_manager.py +350 -0
  18. modules/figure_context_manager.py +340 -0
  19. modules/file_dialog_helper.py +148 -0
  20. modules/find_replace.py +164 -0
  21. modules/find_replace_qt.py +457 -0
  22. modules/glossary_manager.py +433 -0
  23. modules/image_extractor.py +188 -0
  24. modules/keyboard_shortcuts_widget.py +571 -0
  25. modules/llm_clients.py +1211 -0
  26. modules/llm_leaderboard.py +737 -0
  27. modules/llm_superbench_ui.py +1401 -0
  28. modules/local_llm_setup.py +1104 -0
  29. modules/model_update_dialog.py +381 -0
  30. modules/model_version_checker.py +373 -0
  31. modules/mqxliff_handler.py +638 -0
  32. modules/non_translatables_manager.py +743 -0
  33. modules/pdf_rescue_Qt.py +1822 -0
  34. modules/pdf_rescue_tkinter.py +909 -0
  35. modules/phrase_docx_handler.py +516 -0
  36. modules/project_home_panel.py +209 -0
  37. modules/prompt_assistant.py +357 -0
  38. modules/prompt_library.py +689 -0
  39. modules/prompt_library_migration.py +447 -0
  40. modules/quick_access_sidebar.py +282 -0
  41. modules/ribbon_widget.py +597 -0
  42. modules/sdlppx_handler.py +874 -0
  43. modules/setup_wizard.py +353 -0
  44. modules/shortcut_manager.py +932 -0
  45. modules/simple_segmenter.py +128 -0
  46. modules/spellcheck_manager.py +727 -0
  47. modules/statuses.py +207 -0
  48. modules/style_guide_manager.py +315 -0
  49. modules/superbench_ui.py +1319 -0
  50. modules/superbrowser.py +329 -0
  51. modules/supercleaner.py +600 -0
  52. modules/supercleaner_ui.py +444 -0
  53. modules/superdocs.py +19 -0
  54. modules/superdocs_viewer_qt.py +382 -0
  55. modules/superlookup.py +252 -0
  56. modules/tag_cleaner.py +260 -0
  57. modules/tag_manager.py +351 -0
  58. modules/term_extractor.py +270 -0
  59. modules/termbase_entry_editor.py +842 -0
  60. modules/termbase_import_export.py +488 -0
  61. modules/termbase_manager.py +1060 -0
  62. modules/termview_widget.py +1176 -0
  63. modules/theme_manager.py +499 -0
  64. modules/tm_editor_dialog.py +99 -0
  65. modules/tm_manager_qt.py +1280 -0
  66. modules/tm_metadata_manager.py +545 -0
  67. modules/tmx_editor.py +1461 -0
  68. modules/tmx_editor_qt.py +2784 -0
  69. modules/tmx_generator.py +284 -0
  70. modules/tracked_changes.py +900 -0
  71. modules/trados_docx_handler.py +430 -0
  72. modules/translation_memory.py +715 -0
  73. modules/translation_results_panel.py +2134 -0
  74. modules/translation_services.py +282 -0
  75. modules/unified_prompt_library.py +659 -0
  76. modules/unified_prompt_manager_qt.py +3951 -0
  77. modules/voice_commands.py +920 -0
  78. modules/voice_dictation.py +477 -0
  79. modules/voice_dictation_lite.py +249 -0
  80. supervertaler-1.9.163.dist-info/METADATA +906 -0
  81. supervertaler-1.9.163.dist-info/RECORD +85 -0
  82. supervertaler-1.9.163.dist-info/WHEEL +5 -0
  83. supervertaler-1.9.163.dist-info/entry_points.txt +2 -0
  84. supervertaler-1.9.163.dist-info/licenses/LICENSE +21 -0
  85. supervertaler-1.9.163.dist-info/top_level.txt +2 -0
@@ -0,0 +1,920 @@
1
+ """
2
+ Voice Commands Module for Supervertaler
3
+ Talon-style voice command system with 3 tiers:
4
+ - Tier 1: In-app commands (Python/PyQt6)
5
+ - Tier 2: System commands (AutoHotkey scripts)
6
+ - Tier 3: Dictation fallback (insert as text)
7
+ """
8
+
9
+ import json
10
+ import os
11
+ import re
12
+ import subprocess
13
+ from pathlib import Path
14
+ from typing import Optional, Dict, List, Callable, Tuple
15
+ from dataclasses import dataclass, field
16
+ from difflib import SequenceMatcher
17
+ from PyQt6.QtCore import QObject, pyqtSignal
18
+
19
+
20
+ @dataclass
21
+ class VoiceCommand:
22
+ """Represents a single voice command"""
23
+ phrase: str # The spoken phrase (e.g., "confirm segment")
24
+ aliases: List[str] = field(default_factory=list) # Alternative phrases
25
+ action_type: str = "internal" # "internal", "keystroke", "ahk_script", "ahk_inline"
26
+ action: str = "" # Action to execute
27
+ description: str = "" # Human-readable description
28
+ category: str = "general" # Category for organization
29
+ enabled: bool = True
30
+
31
+ def matches(self, spoken_text: str, threshold: float = 0.85) -> Tuple[bool, float]:
32
+ """
33
+ Check if spoken text matches this command.
34
+ Returns (is_match, confidence_score)
35
+ """
36
+ spoken_lower = spoken_text.lower().strip()
37
+
38
+ # Check exact matches first
39
+ all_phrases = [self.phrase.lower()] + [a.lower() for a in self.aliases]
40
+ for phrase in all_phrases:
41
+ if spoken_lower == phrase:
42
+ return (True, 1.0)
43
+
44
+ # Check fuzzy matches
45
+ best_score = 0.0
46
+ for phrase in all_phrases:
47
+ # Use SequenceMatcher for fuzzy matching
48
+ score = SequenceMatcher(None, spoken_lower, phrase).ratio()
49
+ best_score = max(best_score, score)
50
+
51
+ # Also check if spoken text contains the phrase
52
+ if phrase in spoken_lower or spoken_lower in phrase:
53
+ # Boost score for partial matches
54
+ length_ratio = min(len(phrase), len(spoken_lower)) / max(len(phrase), len(spoken_lower))
55
+ best_score = max(best_score, 0.9 * length_ratio)
56
+
57
+ return (best_score >= threshold, best_score)
58
+
59
+
60
+ class VoiceCommandManager(QObject):
61
+ """
62
+ Manages voice commands - matching spoken text to actions and executing them.
63
+ """
64
+
65
+ # Signals
66
+ command_executed = pyqtSignal(str, str) # (command_phrase, result_message)
67
+ command_not_found = pyqtSignal(str) # spoken_text that didn't match
68
+ error_occurred = pyqtSignal(str) # error message
69
+
70
+ # Default commands
71
+ DEFAULT_COMMANDS = [
72
+ # Navigation
73
+ VoiceCommand("next segment", ["next", "down"], "internal", "navigate_next",
74
+ "Move to next segment", "navigation"),
75
+ VoiceCommand("previous segment", ["previous", "back", "up"], "internal", "navigate_previous",
76
+ "Move to previous segment", "navigation"),
77
+ VoiceCommand("first segment", ["go to start", "beginning"], "internal", "navigate_first",
78
+ "Jump to first segment", "navigation"),
79
+ VoiceCommand("last segment", ["go to end", "end"], "internal", "navigate_last",
80
+ "Jump to last segment", "navigation"),
81
+
82
+ # Segment actions
83
+ VoiceCommand("confirm", ["confirm segment", "done", "okay"], "internal", "confirm_segment",
84
+ "Confirm current segment", "editing"),
85
+ VoiceCommand("copy source", ["copy from source", "source to target"], "internal", "copy_source_to_target",
86
+ "Copy source text to target", "editing"),
87
+ VoiceCommand("clear target", ["clear", "delete target"], "internal", "clear_target",
88
+ "Clear target text", "editing"),
89
+ VoiceCommand("undo", [], "keystroke", "ctrl+z",
90
+ "Undo last action", "editing"),
91
+ VoiceCommand("redo", [], "keystroke", "ctrl+y",
92
+ "Redo last action", "editing"),
93
+
94
+ # Translation
95
+ VoiceCommand("translate", ["translate segment", "translate this"], "internal", "translate_segment",
96
+ "AI translate current segment", "translation"),
97
+ VoiceCommand("translate all", ["batch translate"], "internal", "batch_translate",
98
+ "Translate all segments", "translation"),
99
+
100
+ # Lookup & Search
101
+ VoiceCommand("lookup", ["super lookup", "search"], "internal", "open_superlookup",
102
+ "Open Superlookup (Ctrl+K)", "lookup"),
103
+ VoiceCommand("concordance", ["search memory", "search TM"], "internal", "concordance_search",
104
+ "Open concordance search", "lookup"),
105
+
106
+ # File operations
107
+ VoiceCommand("save project", ["save"], "keystroke", "ctrl+s",
108
+ "Save current project", "file"),
109
+ VoiceCommand("open project", ["open"], "keystroke", "ctrl+o",
110
+ "Open project", "file"),
111
+
112
+ # View
113
+ VoiceCommand("show log", ["open log", "log tab"], "internal", "show_log",
114
+ "Show log panel", "view"),
115
+ VoiceCommand("show editor", ["editor tab", "go to editor"], "internal", "show_editor",
116
+ "Show editor panel", "view"),
117
+
118
+ # Dictation control
119
+ VoiceCommand("start dictation", ["dictate", "voice input"], "internal", "start_dictation",
120
+ "Start voice dictation mode", "dictation"),
121
+ VoiceCommand("stop listening", ["stop", "pause"], "internal", "stop_listening",
122
+ "Stop voice recognition", "dictation"),
123
+
124
+ # memoQ-specific (AHK)
125
+ VoiceCommand("glossary", ["add term", "add to glossary"], "ahk_inline",
126
+ "Send, !{Down}", # Alt+Down
127
+ "Add term pair to memoQ termbase", "memoq"),
128
+ VoiceCommand("tag next", ["next tag", "insert tag"], "ahk_inline",
129
+ "Send, ^{PgDn}\nSleep, 100\nSend, {F9}\nSleep, 100\nSend, ^{Enter}",
130
+ "Go to end, insert next tag, confirm", "memoq"),
131
+ VoiceCommand("confirm memoQ", ["confirm memo"], "ahk_inline",
132
+ "Send, ^{Enter}",
133
+ "Confirm segment in memoQ", "memoq"),
134
+
135
+ # Trados-specific (AHK)
136
+ VoiceCommand("confirm trados", ["confirm studio"], "ahk_inline",
137
+ "Send, ^{Enter}",
138
+ "Confirm segment in Trados Studio", "trados"),
139
+ ]
140
+
141
+ def __init__(self, user_data_path: Path, main_window=None):
142
+ super().__init__()
143
+ self.user_data_path = user_data_path
144
+ self.main_window = main_window
145
+ self.commands: List[VoiceCommand] = []
146
+ self.commands_file = user_data_path / "voice_commands.json"
147
+ self.ahk_script_dir = user_data_path / "voice_scripts"
148
+ self.match_threshold = 0.85 # Minimum similarity for fuzzy matching
149
+
150
+ # Internal action handlers (mapped to main_window methods)
151
+ self.internal_handlers: Dict[str, Callable] = {}
152
+
153
+ # Ensure directories exist
154
+ self.ahk_script_dir.mkdir(parents=True, exist_ok=True)
155
+
156
+ # Load commands
157
+ self.load_commands()
158
+
159
+ # Register internal handlers if main_window provided
160
+ if main_window:
161
+ self.register_main_window_handlers(main_window)
162
+
163
+ def register_main_window_handlers(self, main_window):
164
+ """Register handlers that call main window methods"""
165
+ self.main_window = main_window
166
+
167
+ self.internal_handlers = {
168
+ # Navigation - using correct method names from Supervertaler.py
169
+ "navigate_next": lambda: main_window.go_to_next_segment() if hasattr(main_window, 'go_to_next_segment') else self._log_missing('go_to_next_segment'),
170
+ "navigate_previous": lambda: main_window.go_to_previous_segment() if hasattr(main_window, 'go_to_previous_segment') else self._log_missing('go_to_previous_segment'),
171
+ "navigate_first": lambda: main_window.go_to_first_segment() if hasattr(main_window, 'go_to_first_segment') else self._log_missing('go_to_first_segment'),
172
+ "navigate_last": lambda: main_window.go_to_last_segment() if hasattr(main_window, 'go_to_last_segment') else self._log_missing('go_to_last_segment'),
173
+
174
+ # Editing - confirm_and_next_unconfirmed is the Enter key behavior
175
+ "confirm_segment": lambda: main_window.confirm_and_next_unconfirmed() if hasattr(main_window, 'confirm_and_next_unconfirmed') else self._log_missing('confirm_and_next_unconfirmed'),
176
+ "copy_source_to_target": lambda: main_window.copy_source_to_grid_target() if hasattr(main_window, 'copy_source_to_grid_target') else self._log_missing('copy_source_to_grid_target'),
177
+ "clear_target": lambda: main_window.clear_grid_target() if hasattr(main_window, 'clear_grid_target') else self._log_missing('clear_grid_target'),
178
+
179
+ # Translation
180
+ "translate_segment": lambda: main_window.translate_current_segment() if hasattr(main_window, 'translate_current_segment') else self._log_missing('translate_current_segment'),
181
+ "batch_translate": lambda: main_window.translate_batch() if hasattr(main_window, 'translate_batch') else self._log_missing('translate_batch'),
182
+
183
+ # Lookup
184
+ "open_superlookup": lambda: main_window._go_to_superlookup() if hasattr(main_window, '_go_to_superlookup') else self._log_missing('_go_to_superlookup'),
185
+ "concordance_search": lambda: main_window.show_concordance_search() if hasattr(main_window, 'show_concordance_search') else self._log_missing('show_concordance_search'),
186
+
187
+ # View
188
+ "show_log": lambda: self._show_tab(main_window, "Log"),
189
+ "show_editor": lambda: self._show_tab(main_window, "Editor"),
190
+
191
+ # Dictation
192
+ "start_dictation": lambda: main_window.start_voice_dictation() if hasattr(main_window, 'start_voice_dictation') else self._log_missing('start_voice_dictation'),
193
+ "stop_listening": lambda: self._stop_voice_recognition(),
194
+ }
195
+
196
+ def _log_missing(self, method_name: str):
197
+ """Log when a method is missing from main_window"""
198
+ print(f"⚠️ Voice command: Method '{method_name}' not found on main window")
199
+ if self.main_window and hasattr(self.main_window, 'log'):
200
+ self.main_window.log(f"⚠️ Voice command: Method '{method_name}' not found")
201
+
202
+ def _show_tab(self, main_window, tab_name: str):
203
+ """Helper to switch to a specific tab"""
204
+ if hasattr(main_window, 'main_tabs'):
205
+ for i in range(main_window.main_tabs.count()):
206
+ if tab_name.lower() in main_window.main_tabs.tabText(i).lower():
207
+ main_window.main_tabs.setCurrentIndex(i)
208
+ return
209
+
210
+ def _stop_voice_recognition(self):
211
+ """Stop the voice recognition system"""
212
+ if self.main_window and hasattr(self.main_window, 'voice_command_listener'):
213
+ listener = self.main_window.voice_command_listener
214
+ if listener and hasattr(listener, 'stop'):
215
+ listener.stop()
216
+
217
+ def load_commands(self):
218
+ """Load commands from JSON file, or create defaults"""
219
+ if self.commands_file.exists():
220
+ try:
221
+ with open(self.commands_file, 'r', encoding='utf-8') as f:
222
+ data = json.load(f)
223
+
224
+ self.commands = []
225
+ self.match_threshold = data.get('match_threshold', 0.85)
226
+
227
+ for cmd_data in data.get('commands', []):
228
+ self.commands.append(VoiceCommand(
229
+ phrase=cmd_data['phrase'],
230
+ aliases=cmd_data.get('aliases', []),
231
+ action_type=cmd_data.get('action_type', 'internal'),
232
+ action=cmd_data.get('action', ''),
233
+ description=cmd_data.get('description', ''),
234
+ category=cmd_data.get('category', 'general'),
235
+ enabled=cmd_data.get('enabled', True)
236
+ ))
237
+
238
+ return
239
+ except Exception as e:
240
+ print(f"Error loading voice commands: {e}")
241
+
242
+ # Use defaults
243
+ self.commands = self.DEFAULT_COMMANDS.copy()
244
+ self.save_commands()
245
+
246
+ def save_commands(self):
247
+ """Save commands to JSON file"""
248
+ data = {
249
+ 'version': '1.0',
250
+ 'match_threshold': self.match_threshold,
251
+ 'commands': [
252
+ {
253
+ 'phrase': cmd.phrase,
254
+ 'aliases': cmd.aliases,
255
+ 'action_type': cmd.action_type,
256
+ 'action': cmd.action,
257
+ 'description': cmd.description,
258
+ 'category': cmd.category,
259
+ 'enabled': cmd.enabled
260
+ }
261
+ for cmd in self.commands
262
+ ]
263
+ }
264
+
265
+ try:
266
+ with open(self.commands_file, 'w', encoding='utf-8') as f:
267
+ json.dump(data, f, indent=2, ensure_ascii=False)
268
+ except Exception as e:
269
+ self.error_occurred.emit(f"Failed to save voice commands: {e}")
270
+
271
+ def find_matching_command(self, spoken_text: str) -> Optional[Tuple[VoiceCommand, float]]:
272
+ """
273
+ Find the best matching command for spoken text.
274
+ Returns (command, confidence) or None if no match.
275
+ """
276
+ spoken_text = spoken_text.strip()
277
+ if not spoken_text:
278
+ return None
279
+
280
+ best_match = None
281
+ best_score = 0.0
282
+
283
+ for cmd in self.commands:
284
+ if not cmd.enabled:
285
+ continue
286
+
287
+ is_match, score = cmd.matches(spoken_text, self.match_threshold)
288
+ if is_match and score > best_score:
289
+ best_match = cmd
290
+ best_score = score
291
+
292
+ if best_match:
293
+ return (best_match, best_score)
294
+ return None
295
+
296
+ def execute_command(self, command: VoiceCommand) -> bool:
297
+ """Execute a voice command. Returns True on success."""
298
+ try:
299
+ if command.action_type == "internal":
300
+ return self._execute_internal(command)
301
+ elif command.action_type == "keystroke":
302
+ return self._execute_keystroke(command)
303
+ elif command.action_type == "ahk_script":
304
+ return self._execute_ahk_script(command)
305
+ elif command.action_type == "ahk_inline":
306
+ return self._execute_ahk_inline(command)
307
+ else:
308
+ self.error_occurred.emit(f"Unknown action type: {command.action_type}")
309
+ return False
310
+ except Exception as e:
311
+ import traceback
312
+ self.error_occurred.emit(f"Error executing '{command.phrase}': {e}\n{traceback.format_exc()}")
313
+ return False
314
+
315
+ def _execute_internal(self, command: VoiceCommand) -> bool:
316
+ """Execute an internal Python action"""
317
+ handler = self.internal_handlers.get(command.action)
318
+ if handler:
319
+ try:
320
+ result = handler()
321
+ # Log success to main window if available
322
+ if self.main_window and hasattr(self.main_window, 'log'):
323
+ self.main_window.log(f"✓ Voice command executed: {command.phrase} → {command.action}")
324
+ self.command_executed.emit(command.phrase, f"✓ {command.description}")
325
+ return True
326
+ except Exception as e:
327
+ import traceback
328
+ error_msg = f"Error in handler for '{command.phrase}': {e}"
329
+ if self.main_window and hasattr(self.main_window, 'log'):
330
+ self.main_window.log(f"❌ {error_msg}")
331
+ self.main_window.log(traceback.format_exc())
332
+ self.error_occurred.emit(error_msg)
333
+ return False
334
+ else:
335
+ error_msg = f"No handler for internal action: {command.action}"
336
+ if self.main_window and hasattr(self.main_window, 'log'):
337
+ self.main_window.log(f"❌ {error_msg}")
338
+ self.main_window.log(f" Available handlers: {list(self.internal_handlers.keys())}")
339
+ self.error_occurred.emit(error_msg)
340
+ return False
341
+
342
+ def _execute_keystroke(self, command: VoiceCommand) -> bool:
343
+ """Execute a keystroke via AutoHotkey"""
344
+ # Convert keystroke format (e.g., "ctrl+s") to AHK format
345
+ ahk_keys = self._convert_to_ahk_keys(command.action)
346
+ ahk_code = f"Send, {ahk_keys}"
347
+ return self._run_ahk_code(ahk_code, command)
348
+
349
+ def _execute_ahk_script(self, command: VoiceCommand) -> bool:
350
+ """Execute a saved AHK script file"""
351
+ script_path = self.ahk_script_dir / f"{command.action}.ahk"
352
+ if not script_path.exists():
353
+ self.error_occurred.emit(f"AHK script not found: {script_path}")
354
+ return False
355
+
356
+ try:
357
+ # Find AutoHotkey executable
358
+ ahk_exe = self._find_ahk_executable()
359
+ if not ahk_exe:
360
+ self.error_occurred.emit("AutoHotkey not found. Please install AutoHotkey v2.")
361
+ return False
362
+
363
+ subprocess.Popen([ahk_exe, str(script_path)],
364
+ creationflags=subprocess.CREATE_NO_WINDOW)
365
+ self.command_executed.emit(command.phrase, f"✓ {command.description}")
366
+ return True
367
+ except Exception as e:
368
+ self.error_occurred.emit(f"Failed to run AHK script: {e}")
369
+ return False
370
+
371
+ def _execute_ahk_inline(self, command: VoiceCommand) -> bool:
372
+ """Execute inline AHK code"""
373
+ return self._run_ahk_code(command.action, command)
374
+
375
+ def _run_ahk_code(self, ahk_code: str, command: VoiceCommand) -> bool:
376
+ """Run arbitrary AHK code"""
377
+ try:
378
+ ahk_exe = self._find_ahk_executable()
379
+ if not ahk_exe:
380
+ self.error_occurred.emit("AutoHotkey not found. Please install AutoHotkey v2.")
381
+ return False
382
+
383
+ # Create temporary script
384
+ temp_script = self.ahk_script_dir / "_temp_voice_cmd.ahk"
385
+
386
+ # Wrap code in AHK v2 format
387
+ full_script = f"""#Requires AutoHotkey v2.0
388
+ #SingleInstance Force
389
+ {ahk_code}
390
+ ExitApp
391
+ """
392
+
393
+ with open(temp_script, 'w', encoding='utf-8') as f:
394
+ f.write(full_script)
395
+
396
+ # Run script
397
+ subprocess.Popen([ahk_exe, str(temp_script)],
398
+ creationflags=subprocess.CREATE_NO_WINDOW)
399
+
400
+ self.command_executed.emit(command.phrase, f"✓ {command.description}")
401
+ return True
402
+
403
+ except Exception as e:
404
+ self.error_occurred.emit(f"Failed to run AHK code: {e}")
405
+ return False
406
+
407
+ def _convert_to_ahk_keys(self, keystroke: str) -> str:
408
+ """Convert keystroke string to AHK Send format"""
409
+ # Map modifier names to AHK symbols
410
+ modifiers = {
411
+ 'ctrl': '^',
412
+ 'control': '^',
413
+ 'alt': '!',
414
+ 'shift': '+',
415
+ 'win': '#',
416
+ 'windows': '#'
417
+ }
418
+
419
+ # Special key names
420
+ special_keys = {
421
+ 'enter': '{Enter}',
422
+ 'return': '{Enter}',
423
+ 'tab': '{Tab}',
424
+ 'escape': '{Esc}',
425
+ 'esc': '{Esc}',
426
+ 'space': '{Space}',
427
+ 'backspace': '{Backspace}',
428
+ 'delete': '{Delete}',
429
+ 'del': '{Delete}',
430
+ 'insert': '{Insert}',
431
+ 'ins': '{Insert}',
432
+ 'home': '{Home}',
433
+ 'end': '{End}',
434
+ 'pageup': '{PgUp}',
435
+ 'pgup': '{PgUp}',
436
+ 'pagedown': '{PgDn}',
437
+ 'pgdn': '{PgDn}',
438
+ 'up': '{Up}',
439
+ 'down': '{Down}',
440
+ 'left': '{Left}',
441
+ 'right': '{Right}',
442
+ 'f1': '{F1}', 'f2': '{F2}', 'f3': '{F3}', 'f4': '{F4}',
443
+ 'f5': '{F5}', 'f6': '{F6}', 'f7': '{F7}', 'f8': '{F8}',
444
+ 'f9': '{F9}', 'f10': '{F10}', 'f11': '{F11}', 'f12': '{F12}',
445
+ }
446
+
447
+ parts = keystroke.lower().replace(' ', '').split('+')
448
+ result = ''
449
+
450
+ for part in parts:
451
+ if part in modifiers:
452
+ result += modifiers[part]
453
+ elif part in special_keys:
454
+ result += special_keys[part]
455
+ else:
456
+ # Regular key
457
+ result += part
458
+
459
+ return result
460
+
461
+ def _find_ahk_executable(self) -> Optional[str]:
462
+ """Find AutoHotkey v2 executable"""
463
+ # Common installation paths
464
+ possible_paths = [
465
+ r"C:\Program Files\AutoHotkey\v2\AutoHotkey64.exe",
466
+ r"C:\Program Files\AutoHotkey\v2\AutoHotkey32.exe",
467
+ r"C:\Program Files\AutoHotkey\AutoHotkey.exe",
468
+ r"C:\Program Files (x86)\AutoHotkey\AutoHotkey.exe",
469
+ ]
470
+
471
+ # Check PATH first
472
+ import shutil
473
+ ahk_in_path = shutil.which("AutoHotkey64") or shutil.which("AutoHotkey")
474
+ if ahk_in_path:
475
+ return ahk_in_path
476
+
477
+ # Check common locations
478
+ for path in possible_paths:
479
+ if os.path.exists(path):
480
+ return path
481
+
482
+ return None
483
+
484
+ def process_spoken_text(self, spoken_text: str) -> Tuple[bool, str]:
485
+ """
486
+ Process spoken text - try to match command, return success status and message.
487
+ Returns (was_command, message_or_text)
488
+ - If command matched: (True, "Command executed: ...")
489
+ - If no match: (False, original_spoken_text) for dictation fallback
490
+ """
491
+ match_result = self.find_matching_command(spoken_text)
492
+
493
+ if match_result:
494
+ command, confidence = match_result
495
+ success = self.execute_command(command)
496
+ if success:
497
+ return (True, f"✓ {command.phrase} ({confidence:.0%})")
498
+ else:
499
+ return (True, f"✗ Failed: {command.phrase}")
500
+
501
+ # No command matched - return text for dictation
502
+ self.command_not_found.emit(spoken_text)
503
+ return (False, spoken_text)
504
+
505
+ def add_command(self, command: VoiceCommand):
506
+ """Add a new command"""
507
+ self.commands.append(command)
508
+ self.save_commands()
509
+
510
+ def remove_command(self, phrase: str):
511
+ """Remove a command by phrase"""
512
+ self.commands = [c for c in self.commands if c.phrase != phrase]
513
+ self.save_commands()
514
+
515
+ def get_commands_by_category(self) -> Dict[str, List[VoiceCommand]]:
516
+ """Get commands organized by category"""
517
+ categories: Dict[str, List[VoiceCommand]] = {}
518
+ for cmd in self.commands:
519
+ if cmd.category not in categories:
520
+ categories[cmd.category] = []
521
+ categories[cmd.category].append(cmd)
522
+ return categories
523
+
524
+ def export_commands(self, filepath: Path):
525
+ """Export commands to a file"""
526
+ data = {
527
+ 'version': '1.0',
528
+ 'match_threshold': self.match_threshold,
529
+ 'commands': [
530
+ {
531
+ 'phrase': cmd.phrase,
532
+ 'aliases': cmd.aliases,
533
+ 'action_type': cmd.action_type,
534
+ 'action': cmd.action,
535
+ 'description': cmd.description,
536
+ 'category': cmd.category,
537
+ 'enabled': cmd.enabled
538
+ }
539
+ for cmd in self.commands
540
+ ]
541
+ }
542
+ with open(filepath, 'w', encoding='utf-8') as f:
543
+ json.dump(data, f, indent=2, ensure_ascii=False)
544
+
545
+ def import_commands(self, filepath: Path, merge: bool = True):
546
+ """Import commands from a file"""
547
+ with open(filepath, 'r', encoding='utf-8') as f:
548
+ data = json.load(f)
549
+
550
+ imported_commands = []
551
+ for cmd_data in data.get('commands', []):
552
+ imported_commands.append(VoiceCommand(
553
+ phrase=cmd_data['phrase'],
554
+ aliases=cmd_data.get('aliases', []),
555
+ action_type=cmd_data.get('action_type', 'internal'),
556
+ action=cmd_data.get('action', ''),
557
+ description=cmd_data.get('description', ''),
558
+ category=cmd_data.get('category', 'general'),
559
+ enabled=cmd_data.get('enabled', True)
560
+ ))
561
+
562
+ if merge:
563
+ # Add imported commands, skip duplicates
564
+ existing_phrases = {c.phrase for c in self.commands}
565
+ for cmd in imported_commands:
566
+ if cmd.phrase not in existing_phrases:
567
+ self.commands.append(cmd)
568
+ else:
569
+ # Replace all commands
570
+ self.commands = imported_commands
571
+
572
+ self.save_commands()
573
+
574
+
575
+ class ContinuousVoiceListener(QObject):
576
+ """
577
+ Continuous voice listening with Voice Activity Detection (VAD).
578
+
579
+ How it works:
580
+ 1. Continuously monitors microphone audio levels
581
+ 2. When speech is detected (audio above threshold), starts recording
582
+ 3. When silence is detected (audio below threshold for X ms), stops recording
583
+ 4. Sends recording to Whisper for transcription
584
+ 5. Processes result (command or dictation)
585
+ 6. Repeats
586
+
587
+ This eliminates the need to press F9 twice - just speak and it listens.
588
+ """
589
+
590
+ # Signals
591
+ listening_started = pyqtSignal()
592
+ listening_stopped = pyqtSignal()
593
+ speech_detected = pyqtSignal(str) # Raw transcribed text
594
+ command_detected = pyqtSignal(str, str) # (phrase, result)
595
+ text_for_dictation = pyqtSignal(str) # Text that didn't match any command
596
+ status_update = pyqtSignal(str)
597
+ error_occurred = pyqtSignal(str)
598
+ vad_status_changed = pyqtSignal(str) # "listening", "recording", "processing"
599
+
600
+ def __init__(self, command_manager: VoiceCommandManager,
601
+ model_name: str = "base",
602
+ language: str = "auto",
603
+ use_api: bool = False,
604
+ api_key: str = None):
605
+ super().__init__()
606
+ self.command_manager = command_manager
607
+ self.model_name = model_name
608
+ self.language = None if language == "auto" else language
609
+ self.use_api = use_api
610
+ self.api_key = api_key
611
+
612
+ # VAD settings
613
+ self.speech_threshold = 0.02 # RMS threshold to detect speech (adjustable)
614
+ self.silence_duration = 0.8 # Seconds of silence before stopping recording
615
+ self.min_speech_duration = 0.3 # Minimum speech duration to process
616
+ self.max_speech_duration = 15.0 # Maximum recording duration
617
+ self.is_listening = False
618
+ self._thread = None
619
+ self._whisper_model = None # Cached Whisper model
620
+
621
+ def start(self):
622
+ """Start continuous listening"""
623
+ if self.is_listening:
624
+ return
625
+
626
+ self.is_listening = True
627
+ self._thread = _VADListenerThread(self)
628
+ self._thread.transcription_ready.connect(self._on_transcription)
629
+ self._thread.status_update.connect(self.status_update.emit)
630
+ self._thread.error_occurred.connect(self.error_occurred.emit)
631
+ self._thread.vad_status.connect(self.vad_status_changed.emit)
632
+ self._thread.start()
633
+ self.listening_started.emit()
634
+
635
+ def stop(self):
636
+ """Stop continuous listening"""
637
+ self.is_listening = False
638
+ if self._thread:
639
+ self._thread.stop()
640
+ self._thread = None
641
+ self.listening_stopped.emit()
642
+
643
+ def set_sensitivity(self, level: str):
644
+ """
645
+ Set microphone sensitivity level.
646
+ - "low": Requires loud speech (noisy environment)
647
+ - "medium": Normal sensitivity
648
+ - "high": Picks up quiet speech (quiet environment)
649
+ """
650
+ thresholds = {
651
+ "low": 0.04,
652
+ "medium": 0.02,
653
+ "high": 0.01
654
+ }
655
+ self.speech_threshold = thresholds.get(level, 0.02)
656
+
657
+ def _on_transcription(self, text: str):
658
+ """Handle transcribed speech"""
659
+ self.speech_detected.emit(text)
660
+
661
+ # Try to match as command
662
+ was_command, result = self.command_manager.process_spoken_text(text)
663
+
664
+ if was_command:
665
+ self.command_detected.emit(text, result)
666
+ else:
667
+ # Pass to dictation
668
+ self.text_for_dictation.emit(text)
669
+
670
+
671
+ class _VADListenerThread(QObject):
672
+ """
673
+ Voice Activity Detection listener thread.
674
+ Uses amplitude-based VAD to detect speech start/end.
675
+ """
676
+
677
+ transcription_ready = pyqtSignal(str)
678
+ status_update = pyqtSignal(str)
679
+ error_occurred = pyqtSignal(str)
680
+ vad_status = pyqtSignal(str) # "waiting", "recording", "processing"
681
+
682
+ def __init__(self, listener: ContinuousVoiceListener):
683
+ super().__init__()
684
+ self.listener = listener
685
+ self._running = False
686
+ self._thread = None
687
+ self._model = None # Cached whisper model
688
+
689
+ def start(self):
690
+ """Start the listener thread"""
691
+ import threading
692
+
693
+ self._running = True
694
+ self._thread = threading.Thread(target=self._run, daemon=True)
695
+ self._thread.start()
696
+
697
+ def stop(self):
698
+ """Stop the listener thread"""
699
+ self._running = False
700
+
701
+ def _run(self):
702
+ """Main VAD listening loop"""
703
+ try:
704
+ import sounddevice as sd
705
+ import numpy as np
706
+ import tempfile
707
+ import wave
708
+ import os
709
+ import time
710
+
711
+ # Sample rate and chunk settings
712
+ sample_rate = 16000
713
+ chunk_samples = int(0.1 * sample_rate) # 100ms chunks for VAD
714
+
715
+ # Get settings from listener
716
+ speech_threshold = self.listener.speech_threshold
717
+ silence_duration = self.listener.silence_duration
718
+ min_speech_duration = self.listener.min_speech_duration
719
+ max_speech_duration = self.listener.max_speech_duration
720
+
721
+ # Check if using API or local model
722
+ if self.listener.use_api and self.listener.api_key:
723
+ self.status_update.emit("🎤 Using OpenAI Whisper API (fast & accurate)")
724
+ self._model = None # No local model needed
725
+ else:
726
+ # Load local Whisper model once
727
+ self.status_update.emit("🎤 Loading local speech model...")
728
+ self.vad_status.emit("loading")
729
+ try:
730
+ import whisper
731
+ except ImportError:
732
+ self.error_occurred.emit(
733
+ "Local Whisper is not installed.\n\n"
734
+ "Option A (recommended): Choose 'OpenAI Whisper API' in Settings → Supervoice (requires OpenAI API key).\n"
735
+ "Option B: Install Local Whisper:\n"
736
+ " pip install supervertaler[local-whisper]"
737
+ )
738
+ self._running = False
739
+ return
740
+ self._model = whisper.load_model(self.listener.model_name)
741
+
742
+ self.status_update.emit("🎤 Always-on listening active (waiting for speech...)")
743
+ self.vad_status.emit("waiting")
744
+
745
+ # Audio buffer for recording
746
+ audio_buffer = []
747
+ is_recording = False
748
+ silence_start = None
749
+ speech_start = None
750
+
751
+ def audio_callback(indata, frames, time_info, status):
752
+ """Callback for audio stream - processes each chunk"""
753
+ nonlocal audio_buffer, is_recording, silence_start, speech_start
754
+
755
+ if not self._running:
756
+ return
757
+
758
+ # Calculate RMS amplitude
759
+ rms = np.sqrt(np.mean(indata**2))
760
+ is_speech = rms > speech_threshold
761
+
762
+ if is_speech:
763
+ if not is_recording:
764
+ # Speech started
765
+ is_recording = True
766
+ speech_start = time.time()
767
+ audio_buffer = []
768
+ self.vad_status.emit("recording")
769
+ self.status_update.emit("🔴 Recording...")
770
+
771
+ # Reset silence counter
772
+ silence_start = None
773
+
774
+ # Add to buffer
775
+ audio_buffer.append(indata.copy())
776
+
777
+ # Check max duration
778
+ if time.time() - speech_start > max_speech_duration:
779
+ # Force stop recording
780
+ is_recording = False
781
+ self._process_audio(audio_buffer, sample_rate)
782
+ audio_buffer = []
783
+ self.vad_status.emit("waiting")
784
+
785
+ else: # Silence
786
+ if is_recording:
787
+ # Still recording, add silence chunk
788
+ audio_buffer.append(indata.copy())
789
+
790
+ # Start or continue silence timer
791
+ if silence_start is None:
792
+ silence_start = time.time()
793
+
794
+ # Check if silence duration exceeded
795
+ if time.time() - silence_start > silence_duration:
796
+ # Speech ended - process if long enough
797
+ speech_duration = time.time() - speech_start
798
+ is_recording = False
799
+
800
+ if speech_duration >= min_speech_duration:
801
+ self._process_audio(audio_buffer, sample_rate)
802
+ else:
803
+ self.status_update.emit("🎤 (too short, ignored)")
804
+
805
+ audio_buffer = []
806
+ silence_start = None
807
+ self.vad_status.emit("waiting")
808
+ self.status_update.emit("🎤 Listening...")
809
+
810
+ # Start audio stream
811
+ with sd.InputStream(
812
+ samplerate=sample_rate,
813
+ channels=1,
814
+ dtype='float32',
815
+ blocksize=chunk_samples,
816
+ callback=audio_callback
817
+ ):
818
+ while self._running:
819
+ time.sleep(0.1)
820
+
821
+ except Exception as e:
822
+ import traceback
823
+ self.error_occurred.emit(f"Listener error: {e}\n{traceback.format_exc()}")
824
+ finally:
825
+ self.vad_status.emit("stopped")
826
+ self.status_update.emit("🔇 Stopped listening")
827
+
828
+ def _process_audio(self, audio_buffer: list, sample_rate: int):
829
+ """Process recorded audio - save to file and transcribe"""
830
+ try:
831
+ import numpy as np
832
+ import tempfile
833
+ import wave
834
+ import os
835
+
836
+ self.vad_status.emit("processing")
837
+ self.status_update.emit("⏳ Transcribing...")
838
+
839
+ # Concatenate audio chunks
840
+ if not audio_buffer:
841
+ return
842
+
843
+ audio_data = np.concatenate(audio_buffer, axis=0)
844
+
845
+ # Convert to int16
846
+ audio_int16 = np.int16(audio_data * 32767)
847
+
848
+ # Save to temp file
849
+ temp_dir = tempfile.gettempdir()
850
+ temp_path = os.path.join(temp_dir, f"sv_vad_{os.getpid()}.wav")
851
+
852
+ with wave.open(temp_path, 'wb') as wf:
853
+ wf.setnchannels(1)
854
+ wf.setsampwidth(2)
855
+ wf.setframerate(sample_rate)
856
+ wf.writeframes(audio_int16.tobytes())
857
+
858
+ # Transcribe using API or local model
859
+ if self.listener.use_api and self.listener.api_key:
860
+ text = self._transcribe_with_api(temp_path)
861
+ else:
862
+ text = self._transcribe_with_local(temp_path)
863
+
864
+ # Clean up
865
+ try:
866
+ os.unlink(temp_path)
867
+ except:
868
+ pass
869
+
870
+ # Emit result
871
+ if text:
872
+ self.transcription_ready.emit(text)
873
+
874
+ except Exception as e:
875
+ import traceback
876
+ self.error_occurred.emit(f"Processing error: {e}\n{traceback.format_exc()}")
877
+
878
+ def _transcribe_with_api(self, audio_path: str) -> str:
879
+ """Transcribe using OpenAI Whisper API - much more accurate"""
880
+ try:
881
+ from openai import OpenAI
882
+
883
+ client = OpenAI(api_key=self.listener.api_key)
884
+
885
+ with open(audio_path, "rb") as audio_file:
886
+ # Use whisper-1 model (OpenAI's hosted Whisper)
887
+ kwargs = {"model": "whisper-1", "file": audio_file}
888
+
889
+ # Add language hint if specified
890
+ if self.listener.language:
891
+ kwargs["language"] = self.listener.language
892
+
893
+ response = client.audio.transcriptions.create(**kwargs)
894
+
895
+ return response.text.strip()
896
+
897
+ except Exception as e:
898
+ self.error_occurred.emit(f"OpenAI API error: {e}")
899
+ return ""
900
+
901
+ def _transcribe_with_local(self, audio_path: str) -> str:
902
+ """Transcribe using local Whisper model"""
903
+ try:
904
+ if self.listener.language:
905
+ result = self._model.transcribe(audio_path, language=self.listener.language)
906
+ else:
907
+ result = self._model.transcribe(audio_path)
908
+
909
+ return result["text"].strip()
910
+
911
+ except Exception as e:
912
+ self.error_occurred.emit(f"Local transcription error: {e}")
913
+ return ""
914
+
915
+
916
+ # Legacy class for backwards compatibility
917
+ class _ListenerThread(_VADListenerThread):
918
+ """Legacy alias for _VADListenerThread"""
919
+ pass
920
+