supervertaler 1.9.153__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of supervertaler might be problematic. Click here for more details.
- Supervertaler.py +47886 -0
- modules/__init__.py +10 -0
- modules/ai_actions.py +964 -0
- modules/ai_attachment_manager.py +343 -0
- modules/ai_file_viewer_dialog.py +210 -0
- modules/autofingers_engine.py +466 -0
- modules/cafetran_docx_handler.py +379 -0
- modules/config_manager.py +469 -0
- modules/database_manager.py +1878 -0
- modules/database_migrations.py +417 -0
- modules/dejavurtf_handler.py +779 -0
- modules/document_analyzer.py +427 -0
- modules/docx_handler.py +689 -0
- modules/encoding_repair.py +319 -0
- modules/encoding_repair_Qt.py +393 -0
- modules/encoding_repair_ui.py +481 -0
- modules/feature_manager.py +350 -0
- modules/figure_context_manager.py +340 -0
- modules/file_dialog_helper.py +148 -0
- modules/find_replace.py +164 -0
- modules/find_replace_qt.py +457 -0
- modules/glossary_manager.py +433 -0
- modules/image_extractor.py +188 -0
- modules/keyboard_shortcuts_widget.py +571 -0
- modules/llm_clients.py +1211 -0
- modules/llm_leaderboard.py +737 -0
- modules/llm_superbench_ui.py +1401 -0
- modules/local_llm_setup.py +1104 -0
- modules/model_update_dialog.py +381 -0
- modules/model_version_checker.py +373 -0
- modules/mqxliff_handler.py +638 -0
- modules/non_translatables_manager.py +743 -0
- modules/pdf_rescue_Qt.py +1822 -0
- modules/pdf_rescue_tkinter.py +909 -0
- modules/phrase_docx_handler.py +516 -0
- modules/project_home_panel.py +209 -0
- modules/prompt_assistant.py +357 -0
- modules/prompt_library.py +689 -0
- modules/prompt_library_migration.py +447 -0
- modules/quick_access_sidebar.py +282 -0
- modules/ribbon_widget.py +597 -0
- modules/sdlppx_handler.py +874 -0
- modules/setup_wizard.py +353 -0
- modules/shortcut_manager.py +932 -0
- modules/simple_segmenter.py +128 -0
- modules/spellcheck_manager.py +727 -0
- modules/statuses.py +207 -0
- modules/style_guide_manager.py +315 -0
- modules/superbench_ui.py +1319 -0
- modules/superbrowser.py +329 -0
- modules/supercleaner.py +600 -0
- modules/supercleaner_ui.py +444 -0
- modules/superdocs.py +19 -0
- modules/superdocs_viewer_qt.py +382 -0
- modules/superlookup.py +252 -0
- modules/tag_cleaner.py +260 -0
- modules/tag_manager.py +333 -0
- modules/term_extractor.py +270 -0
- modules/termbase_entry_editor.py +842 -0
- modules/termbase_import_export.py +488 -0
- modules/termbase_manager.py +1060 -0
- modules/termview_widget.py +1172 -0
- modules/theme_manager.py +499 -0
- modules/tm_editor_dialog.py +99 -0
- modules/tm_manager_qt.py +1280 -0
- modules/tm_metadata_manager.py +545 -0
- modules/tmx_editor.py +1461 -0
- modules/tmx_editor_qt.py +2784 -0
- modules/tmx_generator.py +284 -0
- modules/tracked_changes.py +900 -0
- modules/trados_docx_handler.py +430 -0
- modules/translation_memory.py +715 -0
- modules/translation_results_panel.py +2134 -0
- modules/translation_services.py +282 -0
- modules/unified_prompt_library.py +659 -0
- modules/unified_prompt_manager_qt.py +3951 -0
- modules/voice_commands.py +920 -0
- modules/voice_dictation.py +477 -0
- modules/voice_dictation_lite.py +249 -0
- supervertaler-1.9.153.dist-info/METADATA +896 -0
- supervertaler-1.9.153.dist-info/RECORD +85 -0
- supervertaler-1.9.153.dist-info/WHEEL +5 -0
- supervertaler-1.9.153.dist-info/entry_points.txt +2 -0
- supervertaler-1.9.153.dist-info/licenses/LICENSE +21 -0
- supervertaler-1.9.153.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,920 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Voice Commands Module for Supervertaler
|
|
3
|
+
Talon-style voice command system with 3 tiers:
|
|
4
|
+
- Tier 1: In-app commands (Python/PyQt6)
|
|
5
|
+
- Tier 2: System commands (AutoHotkey scripts)
|
|
6
|
+
- Tier 3: Dictation fallback (insert as text)
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
import re
|
|
12
|
+
import subprocess
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Optional, Dict, List, Callable, Tuple
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from difflib import SequenceMatcher
|
|
17
|
+
from PyQt6.QtCore import QObject, pyqtSignal
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class VoiceCommand:
|
|
22
|
+
"""Represents a single voice command"""
|
|
23
|
+
phrase: str # The spoken phrase (e.g., "confirm segment")
|
|
24
|
+
aliases: List[str] = field(default_factory=list) # Alternative phrases
|
|
25
|
+
action_type: str = "internal" # "internal", "keystroke", "ahk_script", "ahk_inline"
|
|
26
|
+
action: str = "" # Action to execute
|
|
27
|
+
description: str = "" # Human-readable description
|
|
28
|
+
category: str = "general" # Category for organization
|
|
29
|
+
enabled: bool = True
|
|
30
|
+
|
|
31
|
+
def matches(self, spoken_text: str, threshold: float = 0.85) -> Tuple[bool, float]:
|
|
32
|
+
"""
|
|
33
|
+
Check if spoken text matches this command.
|
|
34
|
+
Returns (is_match, confidence_score)
|
|
35
|
+
"""
|
|
36
|
+
spoken_lower = spoken_text.lower().strip()
|
|
37
|
+
|
|
38
|
+
# Check exact matches first
|
|
39
|
+
all_phrases = [self.phrase.lower()] + [a.lower() for a in self.aliases]
|
|
40
|
+
for phrase in all_phrases:
|
|
41
|
+
if spoken_lower == phrase:
|
|
42
|
+
return (True, 1.0)
|
|
43
|
+
|
|
44
|
+
# Check fuzzy matches
|
|
45
|
+
best_score = 0.0
|
|
46
|
+
for phrase in all_phrases:
|
|
47
|
+
# Use SequenceMatcher for fuzzy matching
|
|
48
|
+
score = SequenceMatcher(None, spoken_lower, phrase).ratio()
|
|
49
|
+
best_score = max(best_score, score)
|
|
50
|
+
|
|
51
|
+
# Also check if spoken text contains the phrase
|
|
52
|
+
if phrase in spoken_lower or spoken_lower in phrase:
|
|
53
|
+
# Boost score for partial matches
|
|
54
|
+
length_ratio = min(len(phrase), len(spoken_lower)) / max(len(phrase), len(spoken_lower))
|
|
55
|
+
best_score = max(best_score, 0.9 * length_ratio)
|
|
56
|
+
|
|
57
|
+
return (best_score >= threshold, best_score)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class VoiceCommandManager(QObject):
|
|
61
|
+
"""
|
|
62
|
+
Manages voice commands - matching spoken text to actions and executing them.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
# Signals
|
|
66
|
+
command_executed = pyqtSignal(str, str) # (command_phrase, result_message)
|
|
67
|
+
command_not_found = pyqtSignal(str) # spoken_text that didn't match
|
|
68
|
+
error_occurred = pyqtSignal(str) # error message
|
|
69
|
+
|
|
70
|
+
# Default commands
|
|
71
|
+
DEFAULT_COMMANDS = [
|
|
72
|
+
# Navigation
|
|
73
|
+
VoiceCommand("next segment", ["next", "down"], "internal", "navigate_next",
|
|
74
|
+
"Move to next segment", "navigation"),
|
|
75
|
+
VoiceCommand("previous segment", ["previous", "back", "up"], "internal", "navigate_previous",
|
|
76
|
+
"Move to previous segment", "navigation"),
|
|
77
|
+
VoiceCommand("first segment", ["go to start", "beginning"], "internal", "navigate_first",
|
|
78
|
+
"Jump to first segment", "navigation"),
|
|
79
|
+
VoiceCommand("last segment", ["go to end", "end"], "internal", "navigate_last",
|
|
80
|
+
"Jump to last segment", "navigation"),
|
|
81
|
+
|
|
82
|
+
# Segment actions
|
|
83
|
+
VoiceCommand("confirm", ["confirm segment", "done", "okay"], "internal", "confirm_segment",
|
|
84
|
+
"Confirm current segment", "editing"),
|
|
85
|
+
VoiceCommand("copy source", ["copy from source", "source to target"], "internal", "copy_source_to_target",
|
|
86
|
+
"Copy source text to target", "editing"),
|
|
87
|
+
VoiceCommand("clear target", ["clear", "delete target"], "internal", "clear_target",
|
|
88
|
+
"Clear target text", "editing"),
|
|
89
|
+
VoiceCommand("undo", [], "keystroke", "ctrl+z",
|
|
90
|
+
"Undo last action", "editing"),
|
|
91
|
+
VoiceCommand("redo", [], "keystroke", "ctrl+y",
|
|
92
|
+
"Redo last action", "editing"),
|
|
93
|
+
|
|
94
|
+
# Translation
|
|
95
|
+
VoiceCommand("translate", ["translate segment", "translate this"], "internal", "translate_segment",
|
|
96
|
+
"AI translate current segment", "translation"),
|
|
97
|
+
VoiceCommand("translate all", ["batch translate"], "internal", "batch_translate",
|
|
98
|
+
"Translate all segments", "translation"),
|
|
99
|
+
|
|
100
|
+
# Lookup & Search
|
|
101
|
+
VoiceCommand("lookup", ["super lookup", "search"], "internal", "open_superlookup",
|
|
102
|
+
"Open Superlookup (Ctrl+K)", "lookup"),
|
|
103
|
+
VoiceCommand("concordance", ["search memory", "search TM"], "internal", "concordance_search",
|
|
104
|
+
"Open concordance search", "lookup"),
|
|
105
|
+
|
|
106
|
+
# File operations
|
|
107
|
+
VoiceCommand("save project", ["save"], "keystroke", "ctrl+s",
|
|
108
|
+
"Save current project", "file"),
|
|
109
|
+
VoiceCommand("open project", ["open"], "keystroke", "ctrl+o",
|
|
110
|
+
"Open project", "file"),
|
|
111
|
+
|
|
112
|
+
# View
|
|
113
|
+
VoiceCommand("show log", ["open log", "log tab"], "internal", "show_log",
|
|
114
|
+
"Show log panel", "view"),
|
|
115
|
+
VoiceCommand("show editor", ["editor tab", "go to editor"], "internal", "show_editor",
|
|
116
|
+
"Show editor panel", "view"),
|
|
117
|
+
|
|
118
|
+
# Dictation control
|
|
119
|
+
VoiceCommand("start dictation", ["dictate", "voice input"], "internal", "start_dictation",
|
|
120
|
+
"Start voice dictation mode", "dictation"),
|
|
121
|
+
VoiceCommand("stop listening", ["stop", "pause"], "internal", "stop_listening",
|
|
122
|
+
"Stop voice recognition", "dictation"),
|
|
123
|
+
|
|
124
|
+
# memoQ-specific (AHK)
|
|
125
|
+
VoiceCommand("glossary", ["add term", "add to glossary"], "ahk_inline",
|
|
126
|
+
"Send, !{Down}", # Alt+Down
|
|
127
|
+
"Add term pair to memoQ termbase", "memoq"),
|
|
128
|
+
VoiceCommand("tag next", ["next tag", "insert tag"], "ahk_inline",
|
|
129
|
+
"Send, ^{PgDn}\nSleep, 100\nSend, {F9}\nSleep, 100\nSend, ^{Enter}",
|
|
130
|
+
"Go to end, insert next tag, confirm", "memoq"),
|
|
131
|
+
VoiceCommand("confirm memoQ", ["confirm memo"], "ahk_inline",
|
|
132
|
+
"Send, ^{Enter}",
|
|
133
|
+
"Confirm segment in memoQ", "memoq"),
|
|
134
|
+
|
|
135
|
+
# Trados-specific (AHK)
|
|
136
|
+
VoiceCommand("confirm trados", ["confirm studio"], "ahk_inline",
|
|
137
|
+
"Send, ^{Enter}",
|
|
138
|
+
"Confirm segment in Trados Studio", "trados"),
|
|
139
|
+
]
|
|
140
|
+
|
|
141
|
+
def __init__(self, user_data_path: Path, main_window=None):
|
|
142
|
+
super().__init__()
|
|
143
|
+
self.user_data_path = user_data_path
|
|
144
|
+
self.main_window = main_window
|
|
145
|
+
self.commands: List[VoiceCommand] = []
|
|
146
|
+
self.commands_file = user_data_path / "voice_commands.json"
|
|
147
|
+
self.ahk_script_dir = user_data_path / "voice_scripts"
|
|
148
|
+
self.match_threshold = 0.85 # Minimum similarity for fuzzy matching
|
|
149
|
+
|
|
150
|
+
# Internal action handlers (mapped to main_window methods)
|
|
151
|
+
self.internal_handlers: Dict[str, Callable] = {}
|
|
152
|
+
|
|
153
|
+
# Ensure directories exist
|
|
154
|
+
self.ahk_script_dir.mkdir(parents=True, exist_ok=True)
|
|
155
|
+
|
|
156
|
+
# Load commands
|
|
157
|
+
self.load_commands()
|
|
158
|
+
|
|
159
|
+
# Register internal handlers if main_window provided
|
|
160
|
+
if main_window:
|
|
161
|
+
self.register_main_window_handlers(main_window)
|
|
162
|
+
|
|
163
|
+
def register_main_window_handlers(self, main_window):
|
|
164
|
+
"""Register handlers that call main window methods"""
|
|
165
|
+
self.main_window = main_window
|
|
166
|
+
|
|
167
|
+
self.internal_handlers = {
|
|
168
|
+
# Navigation - using correct method names from Supervertaler.py
|
|
169
|
+
"navigate_next": lambda: main_window.go_to_next_segment() if hasattr(main_window, 'go_to_next_segment') else self._log_missing('go_to_next_segment'),
|
|
170
|
+
"navigate_previous": lambda: main_window.go_to_previous_segment() if hasattr(main_window, 'go_to_previous_segment') else self._log_missing('go_to_previous_segment'),
|
|
171
|
+
"navigate_first": lambda: main_window.go_to_first_segment() if hasattr(main_window, 'go_to_first_segment') else self._log_missing('go_to_first_segment'),
|
|
172
|
+
"navigate_last": lambda: main_window.go_to_last_segment() if hasattr(main_window, 'go_to_last_segment') else self._log_missing('go_to_last_segment'),
|
|
173
|
+
|
|
174
|
+
# Editing - confirm_and_next_unconfirmed is the Enter key behavior
|
|
175
|
+
"confirm_segment": lambda: main_window.confirm_and_next_unconfirmed() if hasattr(main_window, 'confirm_and_next_unconfirmed') else self._log_missing('confirm_and_next_unconfirmed'),
|
|
176
|
+
"copy_source_to_target": lambda: main_window.copy_source_to_grid_target() if hasattr(main_window, 'copy_source_to_grid_target') else self._log_missing('copy_source_to_grid_target'),
|
|
177
|
+
"clear_target": lambda: main_window.clear_grid_target() if hasattr(main_window, 'clear_grid_target') else self._log_missing('clear_grid_target'),
|
|
178
|
+
|
|
179
|
+
# Translation
|
|
180
|
+
"translate_segment": lambda: main_window.translate_current_segment() if hasattr(main_window, 'translate_current_segment') else self._log_missing('translate_current_segment'),
|
|
181
|
+
"batch_translate": lambda: main_window.translate_batch() if hasattr(main_window, 'translate_batch') else self._log_missing('translate_batch'),
|
|
182
|
+
|
|
183
|
+
# Lookup
|
|
184
|
+
"open_superlookup": lambda: main_window._go_to_superlookup() if hasattr(main_window, '_go_to_superlookup') else self._log_missing('_go_to_superlookup'),
|
|
185
|
+
"concordance_search": lambda: main_window.show_concordance_search() if hasattr(main_window, 'show_concordance_search') else self._log_missing('show_concordance_search'),
|
|
186
|
+
|
|
187
|
+
# View
|
|
188
|
+
"show_log": lambda: self._show_tab(main_window, "Log"),
|
|
189
|
+
"show_editor": lambda: self._show_tab(main_window, "Editor"),
|
|
190
|
+
|
|
191
|
+
# Dictation
|
|
192
|
+
"start_dictation": lambda: main_window.start_voice_dictation() if hasattr(main_window, 'start_voice_dictation') else self._log_missing('start_voice_dictation'),
|
|
193
|
+
"stop_listening": lambda: self._stop_voice_recognition(),
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
def _log_missing(self, method_name: str):
|
|
197
|
+
"""Log when a method is missing from main_window"""
|
|
198
|
+
print(f"⚠️ Voice command: Method '{method_name}' not found on main window")
|
|
199
|
+
if self.main_window and hasattr(self.main_window, 'log'):
|
|
200
|
+
self.main_window.log(f"⚠️ Voice command: Method '{method_name}' not found")
|
|
201
|
+
|
|
202
|
+
def _show_tab(self, main_window, tab_name: str):
|
|
203
|
+
"""Helper to switch to a specific tab"""
|
|
204
|
+
if hasattr(main_window, 'main_tabs'):
|
|
205
|
+
for i in range(main_window.main_tabs.count()):
|
|
206
|
+
if tab_name.lower() in main_window.main_tabs.tabText(i).lower():
|
|
207
|
+
main_window.main_tabs.setCurrentIndex(i)
|
|
208
|
+
return
|
|
209
|
+
|
|
210
|
+
def _stop_voice_recognition(self):
|
|
211
|
+
"""Stop the voice recognition system"""
|
|
212
|
+
if self.main_window and hasattr(self.main_window, 'voice_command_listener'):
|
|
213
|
+
listener = self.main_window.voice_command_listener
|
|
214
|
+
if listener and hasattr(listener, 'stop'):
|
|
215
|
+
listener.stop()
|
|
216
|
+
|
|
217
|
+
def load_commands(self):
|
|
218
|
+
"""Load commands from JSON file, or create defaults"""
|
|
219
|
+
if self.commands_file.exists():
|
|
220
|
+
try:
|
|
221
|
+
with open(self.commands_file, 'r', encoding='utf-8') as f:
|
|
222
|
+
data = json.load(f)
|
|
223
|
+
|
|
224
|
+
self.commands = []
|
|
225
|
+
self.match_threshold = data.get('match_threshold', 0.85)
|
|
226
|
+
|
|
227
|
+
for cmd_data in data.get('commands', []):
|
|
228
|
+
self.commands.append(VoiceCommand(
|
|
229
|
+
phrase=cmd_data['phrase'],
|
|
230
|
+
aliases=cmd_data.get('aliases', []),
|
|
231
|
+
action_type=cmd_data.get('action_type', 'internal'),
|
|
232
|
+
action=cmd_data.get('action', ''),
|
|
233
|
+
description=cmd_data.get('description', ''),
|
|
234
|
+
category=cmd_data.get('category', 'general'),
|
|
235
|
+
enabled=cmd_data.get('enabled', True)
|
|
236
|
+
))
|
|
237
|
+
|
|
238
|
+
return
|
|
239
|
+
except Exception as e:
|
|
240
|
+
print(f"Error loading voice commands: {e}")
|
|
241
|
+
|
|
242
|
+
# Use defaults
|
|
243
|
+
self.commands = self.DEFAULT_COMMANDS.copy()
|
|
244
|
+
self.save_commands()
|
|
245
|
+
|
|
246
|
+
def save_commands(self):
|
|
247
|
+
"""Save commands to JSON file"""
|
|
248
|
+
data = {
|
|
249
|
+
'version': '1.0',
|
|
250
|
+
'match_threshold': self.match_threshold,
|
|
251
|
+
'commands': [
|
|
252
|
+
{
|
|
253
|
+
'phrase': cmd.phrase,
|
|
254
|
+
'aliases': cmd.aliases,
|
|
255
|
+
'action_type': cmd.action_type,
|
|
256
|
+
'action': cmd.action,
|
|
257
|
+
'description': cmd.description,
|
|
258
|
+
'category': cmd.category,
|
|
259
|
+
'enabled': cmd.enabled
|
|
260
|
+
}
|
|
261
|
+
for cmd in self.commands
|
|
262
|
+
]
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
try:
|
|
266
|
+
with open(self.commands_file, 'w', encoding='utf-8') as f:
|
|
267
|
+
json.dump(data, f, indent=2, ensure_ascii=False)
|
|
268
|
+
except Exception as e:
|
|
269
|
+
self.error_occurred.emit(f"Failed to save voice commands: {e}")
|
|
270
|
+
|
|
271
|
+
def find_matching_command(self, spoken_text: str) -> Optional[Tuple[VoiceCommand, float]]:
|
|
272
|
+
"""
|
|
273
|
+
Find the best matching command for spoken text.
|
|
274
|
+
Returns (command, confidence) or None if no match.
|
|
275
|
+
"""
|
|
276
|
+
spoken_text = spoken_text.strip()
|
|
277
|
+
if not spoken_text:
|
|
278
|
+
return None
|
|
279
|
+
|
|
280
|
+
best_match = None
|
|
281
|
+
best_score = 0.0
|
|
282
|
+
|
|
283
|
+
for cmd in self.commands:
|
|
284
|
+
if not cmd.enabled:
|
|
285
|
+
continue
|
|
286
|
+
|
|
287
|
+
is_match, score = cmd.matches(spoken_text, self.match_threshold)
|
|
288
|
+
if is_match and score > best_score:
|
|
289
|
+
best_match = cmd
|
|
290
|
+
best_score = score
|
|
291
|
+
|
|
292
|
+
if best_match:
|
|
293
|
+
return (best_match, best_score)
|
|
294
|
+
return None
|
|
295
|
+
|
|
296
|
+
def execute_command(self, command: VoiceCommand) -> bool:
|
|
297
|
+
"""Execute a voice command. Returns True on success."""
|
|
298
|
+
try:
|
|
299
|
+
if command.action_type == "internal":
|
|
300
|
+
return self._execute_internal(command)
|
|
301
|
+
elif command.action_type == "keystroke":
|
|
302
|
+
return self._execute_keystroke(command)
|
|
303
|
+
elif command.action_type == "ahk_script":
|
|
304
|
+
return self._execute_ahk_script(command)
|
|
305
|
+
elif command.action_type == "ahk_inline":
|
|
306
|
+
return self._execute_ahk_inline(command)
|
|
307
|
+
else:
|
|
308
|
+
self.error_occurred.emit(f"Unknown action type: {command.action_type}")
|
|
309
|
+
return False
|
|
310
|
+
except Exception as e:
|
|
311
|
+
import traceback
|
|
312
|
+
self.error_occurred.emit(f"Error executing '{command.phrase}': {e}\n{traceback.format_exc()}")
|
|
313
|
+
return False
|
|
314
|
+
|
|
315
|
+
def _execute_internal(self, command: VoiceCommand) -> bool:
|
|
316
|
+
"""Execute an internal Python action"""
|
|
317
|
+
handler = self.internal_handlers.get(command.action)
|
|
318
|
+
if handler:
|
|
319
|
+
try:
|
|
320
|
+
result = handler()
|
|
321
|
+
# Log success to main window if available
|
|
322
|
+
if self.main_window and hasattr(self.main_window, 'log'):
|
|
323
|
+
self.main_window.log(f"✓ Voice command executed: {command.phrase} → {command.action}")
|
|
324
|
+
self.command_executed.emit(command.phrase, f"✓ {command.description}")
|
|
325
|
+
return True
|
|
326
|
+
except Exception as e:
|
|
327
|
+
import traceback
|
|
328
|
+
error_msg = f"Error in handler for '{command.phrase}': {e}"
|
|
329
|
+
if self.main_window and hasattr(self.main_window, 'log'):
|
|
330
|
+
self.main_window.log(f"❌ {error_msg}")
|
|
331
|
+
self.main_window.log(traceback.format_exc())
|
|
332
|
+
self.error_occurred.emit(error_msg)
|
|
333
|
+
return False
|
|
334
|
+
else:
|
|
335
|
+
error_msg = f"No handler for internal action: {command.action}"
|
|
336
|
+
if self.main_window and hasattr(self.main_window, 'log'):
|
|
337
|
+
self.main_window.log(f"❌ {error_msg}")
|
|
338
|
+
self.main_window.log(f" Available handlers: {list(self.internal_handlers.keys())}")
|
|
339
|
+
self.error_occurred.emit(error_msg)
|
|
340
|
+
return False
|
|
341
|
+
|
|
342
|
+
def _execute_keystroke(self, command: VoiceCommand) -> bool:
|
|
343
|
+
"""Execute a keystroke via AutoHotkey"""
|
|
344
|
+
# Convert keystroke format (e.g., "ctrl+s") to AHK format
|
|
345
|
+
ahk_keys = self._convert_to_ahk_keys(command.action)
|
|
346
|
+
ahk_code = f"Send, {ahk_keys}"
|
|
347
|
+
return self._run_ahk_code(ahk_code, command)
|
|
348
|
+
|
|
349
|
+
def _execute_ahk_script(self, command: VoiceCommand) -> bool:
|
|
350
|
+
"""Execute a saved AHK script file"""
|
|
351
|
+
script_path = self.ahk_script_dir / f"{command.action}.ahk"
|
|
352
|
+
if not script_path.exists():
|
|
353
|
+
self.error_occurred.emit(f"AHK script not found: {script_path}")
|
|
354
|
+
return False
|
|
355
|
+
|
|
356
|
+
try:
|
|
357
|
+
# Find AutoHotkey executable
|
|
358
|
+
ahk_exe = self._find_ahk_executable()
|
|
359
|
+
if not ahk_exe:
|
|
360
|
+
self.error_occurred.emit("AutoHotkey not found. Please install AutoHotkey v2.")
|
|
361
|
+
return False
|
|
362
|
+
|
|
363
|
+
subprocess.Popen([ahk_exe, str(script_path)],
|
|
364
|
+
creationflags=subprocess.CREATE_NO_WINDOW)
|
|
365
|
+
self.command_executed.emit(command.phrase, f"✓ {command.description}")
|
|
366
|
+
return True
|
|
367
|
+
except Exception as e:
|
|
368
|
+
self.error_occurred.emit(f"Failed to run AHK script: {e}")
|
|
369
|
+
return False
|
|
370
|
+
|
|
371
|
+
def _execute_ahk_inline(self, command: VoiceCommand) -> bool:
|
|
372
|
+
"""Execute inline AHK code"""
|
|
373
|
+
return self._run_ahk_code(command.action, command)
|
|
374
|
+
|
|
375
|
+
def _run_ahk_code(self, ahk_code: str, command: VoiceCommand) -> bool:
|
|
376
|
+
"""Run arbitrary AHK code"""
|
|
377
|
+
try:
|
|
378
|
+
ahk_exe = self._find_ahk_executable()
|
|
379
|
+
if not ahk_exe:
|
|
380
|
+
self.error_occurred.emit("AutoHotkey not found. Please install AutoHotkey v2.")
|
|
381
|
+
return False
|
|
382
|
+
|
|
383
|
+
# Create temporary script
|
|
384
|
+
temp_script = self.ahk_script_dir / "_temp_voice_cmd.ahk"
|
|
385
|
+
|
|
386
|
+
# Wrap code in AHK v2 format
|
|
387
|
+
full_script = f"""#Requires AutoHotkey v2.0
|
|
388
|
+
#SingleInstance Force
|
|
389
|
+
{ahk_code}
|
|
390
|
+
ExitApp
|
|
391
|
+
"""
|
|
392
|
+
|
|
393
|
+
with open(temp_script, 'w', encoding='utf-8') as f:
|
|
394
|
+
f.write(full_script)
|
|
395
|
+
|
|
396
|
+
# Run script
|
|
397
|
+
subprocess.Popen([ahk_exe, str(temp_script)],
|
|
398
|
+
creationflags=subprocess.CREATE_NO_WINDOW)
|
|
399
|
+
|
|
400
|
+
self.command_executed.emit(command.phrase, f"✓ {command.description}")
|
|
401
|
+
return True
|
|
402
|
+
|
|
403
|
+
except Exception as e:
|
|
404
|
+
self.error_occurred.emit(f"Failed to run AHK code: {e}")
|
|
405
|
+
return False
|
|
406
|
+
|
|
407
|
+
def _convert_to_ahk_keys(self, keystroke: str) -> str:
|
|
408
|
+
"""Convert keystroke string to AHK Send format"""
|
|
409
|
+
# Map modifier names to AHK symbols
|
|
410
|
+
modifiers = {
|
|
411
|
+
'ctrl': '^',
|
|
412
|
+
'control': '^',
|
|
413
|
+
'alt': '!',
|
|
414
|
+
'shift': '+',
|
|
415
|
+
'win': '#',
|
|
416
|
+
'windows': '#'
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
# Special key names
|
|
420
|
+
special_keys = {
|
|
421
|
+
'enter': '{Enter}',
|
|
422
|
+
'return': '{Enter}',
|
|
423
|
+
'tab': '{Tab}',
|
|
424
|
+
'escape': '{Esc}',
|
|
425
|
+
'esc': '{Esc}',
|
|
426
|
+
'space': '{Space}',
|
|
427
|
+
'backspace': '{Backspace}',
|
|
428
|
+
'delete': '{Delete}',
|
|
429
|
+
'del': '{Delete}',
|
|
430
|
+
'insert': '{Insert}',
|
|
431
|
+
'ins': '{Insert}',
|
|
432
|
+
'home': '{Home}',
|
|
433
|
+
'end': '{End}',
|
|
434
|
+
'pageup': '{PgUp}',
|
|
435
|
+
'pgup': '{PgUp}',
|
|
436
|
+
'pagedown': '{PgDn}',
|
|
437
|
+
'pgdn': '{PgDn}',
|
|
438
|
+
'up': '{Up}',
|
|
439
|
+
'down': '{Down}',
|
|
440
|
+
'left': '{Left}',
|
|
441
|
+
'right': '{Right}',
|
|
442
|
+
'f1': '{F1}', 'f2': '{F2}', 'f3': '{F3}', 'f4': '{F4}',
|
|
443
|
+
'f5': '{F5}', 'f6': '{F6}', 'f7': '{F7}', 'f8': '{F8}',
|
|
444
|
+
'f9': '{F9}', 'f10': '{F10}', 'f11': '{F11}', 'f12': '{F12}',
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
parts = keystroke.lower().replace(' ', '').split('+')
|
|
448
|
+
result = ''
|
|
449
|
+
|
|
450
|
+
for part in parts:
|
|
451
|
+
if part in modifiers:
|
|
452
|
+
result += modifiers[part]
|
|
453
|
+
elif part in special_keys:
|
|
454
|
+
result += special_keys[part]
|
|
455
|
+
else:
|
|
456
|
+
# Regular key
|
|
457
|
+
result += part
|
|
458
|
+
|
|
459
|
+
return result
|
|
460
|
+
|
|
461
|
+
def _find_ahk_executable(self) -> Optional[str]:
|
|
462
|
+
"""Find AutoHotkey v2 executable"""
|
|
463
|
+
# Common installation paths
|
|
464
|
+
possible_paths = [
|
|
465
|
+
r"C:\Program Files\AutoHotkey\v2\AutoHotkey64.exe",
|
|
466
|
+
r"C:\Program Files\AutoHotkey\v2\AutoHotkey32.exe",
|
|
467
|
+
r"C:\Program Files\AutoHotkey\AutoHotkey.exe",
|
|
468
|
+
r"C:\Program Files (x86)\AutoHotkey\AutoHotkey.exe",
|
|
469
|
+
]
|
|
470
|
+
|
|
471
|
+
# Check PATH first
|
|
472
|
+
import shutil
|
|
473
|
+
ahk_in_path = shutil.which("AutoHotkey64") or shutil.which("AutoHotkey")
|
|
474
|
+
if ahk_in_path:
|
|
475
|
+
return ahk_in_path
|
|
476
|
+
|
|
477
|
+
# Check common locations
|
|
478
|
+
for path in possible_paths:
|
|
479
|
+
if os.path.exists(path):
|
|
480
|
+
return path
|
|
481
|
+
|
|
482
|
+
return None
|
|
483
|
+
|
|
484
|
+
def process_spoken_text(self, spoken_text: str) -> Tuple[bool, str]:
|
|
485
|
+
"""
|
|
486
|
+
Process spoken text - try to match command, return success status and message.
|
|
487
|
+
Returns (was_command, message_or_text)
|
|
488
|
+
- If command matched: (True, "Command executed: ...")
|
|
489
|
+
- If no match: (False, original_spoken_text) for dictation fallback
|
|
490
|
+
"""
|
|
491
|
+
match_result = self.find_matching_command(spoken_text)
|
|
492
|
+
|
|
493
|
+
if match_result:
|
|
494
|
+
command, confidence = match_result
|
|
495
|
+
success = self.execute_command(command)
|
|
496
|
+
if success:
|
|
497
|
+
return (True, f"✓ {command.phrase} ({confidence:.0%})")
|
|
498
|
+
else:
|
|
499
|
+
return (True, f"✗ Failed: {command.phrase}")
|
|
500
|
+
|
|
501
|
+
# No command matched - return text for dictation
|
|
502
|
+
self.command_not_found.emit(spoken_text)
|
|
503
|
+
return (False, spoken_text)
|
|
504
|
+
|
|
505
|
+
def add_command(self, command: VoiceCommand):
|
|
506
|
+
"""Add a new command"""
|
|
507
|
+
self.commands.append(command)
|
|
508
|
+
self.save_commands()
|
|
509
|
+
|
|
510
|
+
def remove_command(self, phrase: str):
|
|
511
|
+
"""Remove a command by phrase"""
|
|
512
|
+
self.commands = [c for c in self.commands if c.phrase != phrase]
|
|
513
|
+
self.save_commands()
|
|
514
|
+
|
|
515
|
+
def get_commands_by_category(self) -> Dict[str, List[VoiceCommand]]:
|
|
516
|
+
"""Get commands organized by category"""
|
|
517
|
+
categories: Dict[str, List[VoiceCommand]] = {}
|
|
518
|
+
for cmd in self.commands:
|
|
519
|
+
if cmd.category not in categories:
|
|
520
|
+
categories[cmd.category] = []
|
|
521
|
+
categories[cmd.category].append(cmd)
|
|
522
|
+
return categories
|
|
523
|
+
|
|
524
|
+
def export_commands(self, filepath: Path):
|
|
525
|
+
"""Export commands to a file"""
|
|
526
|
+
data = {
|
|
527
|
+
'version': '1.0',
|
|
528
|
+
'match_threshold': self.match_threshold,
|
|
529
|
+
'commands': [
|
|
530
|
+
{
|
|
531
|
+
'phrase': cmd.phrase,
|
|
532
|
+
'aliases': cmd.aliases,
|
|
533
|
+
'action_type': cmd.action_type,
|
|
534
|
+
'action': cmd.action,
|
|
535
|
+
'description': cmd.description,
|
|
536
|
+
'category': cmd.category,
|
|
537
|
+
'enabled': cmd.enabled
|
|
538
|
+
}
|
|
539
|
+
for cmd in self.commands
|
|
540
|
+
]
|
|
541
|
+
}
|
|
542
|
+
with open(filepath, 'w', encoding='utf-8') as f:
|
|
543
|
+
json.dump(data, f, indent=2, ensure_ascii=False)
|
|
544
|
+
|
|
545
|
+
def import_commands(self, filepath: Path, merge: bool = True):
|
|
546
|
+
"""Import commands from a file"""
|
|
547
|
+
with open(filepath, 'r', encoding='utf-8') as f:
|
|
548
|
+
data = json.load(f)
|
|
549
|
+
|
|
550
|
+
imported_commands = []
|
|
551
|
+
for cmd_data in data.get('commands', []):
|
|
552
|
+
imported_commands.append(VoiceCommand(
|
|
553
|
+
phrase=cmd_data['phrase'],
|
|
554
|
+
aliases=cmd_data.get('aliases', []),
|
|
555
|
+
action_type=cmd_data.get('action_type', 'internal'),
|
|
556
|
+
action=cmd_data.get('action', ''),
|
|
557
|
+
description=cmd_data.get('description', ''),
|
|
558
|
+
category=cmd_data.get('category', 'general'),
|
|
559
|
+
enabled=cmd_data.get('enabled', True)
|
|
560
|
+
))
|
|
561
|
+
|
|
562
|
+
if merge:
|
|
563
|
+
# Add imported commands, skip duplicates
|
|
564
|
+
existing_phrases = {c.phrase for c in self.commands}
|
|
565
|
+
for cmd in imported_commands:
|
|
566
|
+
if cmd.phrase not in existing_phrases:
|
|
567
|
+
self.commands.append(cmd)
|
|
568
|
+
else:
|
|
569
|
+
# Replace all commands
|
|
570
|
+
self.commands = imported_commands
|
|
571
|
+
|
|
572
|
+
self.save_commands()
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
class ContinuousVoiceListener(QObject):
|
|
576
|
+
"""
|
|
577
|
+
Continuous voice listening with Voice Activity Detection (VAD).
|
|
578
|
+
|
|
579
|
+
How it works:
|
|
580
|
+
1. Continuously monitors microphone audio levels
|
|
581
|
+
2. When speech is detected (audio above threshold), starts recording
|
|
582
|
+
3. When silence is detected (audio below threshold for X ms), stops recording
|
|
583
|
+
4. Sends recording to Whisper for transcription
|
|
584
|
+
5. Processes result (command or dictation)
|
|
585
|
+
6. Repeats
|
|
586
|
+
|
|
587
|
+
This eliminates the need to press F9 twice - just speak and it listens.
|
|
588
|
+
"""
|
|
589
|
+
|
|
590
|
+
# Signals
|
|
591
|
+
listening_started = pyqtSignal()
|
|
592
|
+
listening_stopped = pyqtSignal()
|
|
593
|
+
speech_detected = pyqtSignal(str) # Raw transcribed text
|
|
594
|
+
command_detected = pyqtSignal(str, str) # (phrase, result)
|
|
595
|
+
text_for_dictation = pyqtSignal(str) # Text that didn't match any command
|
|
596
|
+
status_update = pyqtSignal(str)
|
|
597
|
+
error_occurred = pyqtSignal(str)
|
|
598
|
+
vad_status_changed = pyqtSignal(str) # "listening", "recording", "processing"
|
|
599
|
+
|
|
600
|
+
def __init__(self, command_manager: VoiceCommandManager,
|
|
601
|
+
model_name: str = "base",
|
|
602
|
+
language: str = "auto",
|
|
603
|
+
use_api: bool = False,
|
|
604
|
+
api_key: str = None):
|
|
605
|
+
super().__init__()
|
|
606
|
+
self.command_manager = command_manager
|
|
607
|
+
self.model_name = model_name
|
|
608
|
+
self.language = None if language == "auto" else language
|
|
609
|
+
self.use_api = use_api
|
|
610
|
+
self.api_key = api_key
|
|
611
|
+
|
|
612
|
+
# VAD settings
|
|
613
|
+
self.speech_threshold = 0.02 # RMS threshold to detect speech (adjustable)
|
|
614
|
+
self.silence_duration = 0.8 # Seconds of silence before stopping recording
|
|
615
|
+
self.min_speech_duration = 0.3 # Minimum speech duration to process
|
|
616
|
+
self.max_speech_duration = 15.0 # Maximum recording duration
|
|
617
|
+
self.is_listening = False
|
|
618
|
+
self._thread = None
|
|
619
|
+
self._whisper_model = None # Cached Whisper model
|
|
620
|
+
|
|
621
|
+
def start(self):
|
|
622
|
+
"""Start continuous listening"""
|
|
623
|
+
if self.is_listening:
|
|
624
|
+
return
|
|
625
|
+
|
|
626
|
+
self.is_listening = True
|
|
627
|
+
self._thread = _VADListenerThread(self)
|
|
628
|
+
self._thread.transcription_ready.connect(self._on_transcription)
|
|
629
|
+
self._thread.status_update.connect(self.status_update.emit)
|
|
630
|
+
self._thread.error_occurred.connect(self.error_occurred.emit)
|
|
631
|
+
self._thread.vad_status.connect(self.vad_status_changed.emit)
|
|
632
|
+
self._thread.start()
|
|
633
|
+
self.listening_started.emit()
|
|
634
|
+
|
|
635
|
+
def stop(self):
|
|
636
|
+
"""Stop continuous listening"""
|
|
637
|
+
self.is_listening = False
|
|
638
|
+
if self._thread:
|
|
639
|
+
self._thread.stop()
|
|
640
|
+
self._thread = None
|
|
641
|
+
self.listening_stopped.emit()
|
|
642
|
+
|
|
643
|
+
def set_sensitivity(self, level: str):
|
|
644
|
+
"""
|
|
645
|
+
Set microphone sensitivity level.
|
|
646
|
+
- "low": Requires loud speech (noisy environment)
|
|
647
|
+
- "medium": Normal sensitivity
|
|
648
|
+
- "high": Picks up quiet speech (quiet environment)
|
|
649
|
+
"""
|
|
650
|
+
thresholds = {
|
|
651
|
+
"low": 0.04,
|
|
652
|
+
"medium": 0.02,
|
|
653
|
+
"high": 0.01
|
|
654
|
+
}
|
|
655
|
+
self.speech_threshold = thresholds.get(level, 0.02)
|
|
656
|
+
|
|
657
|
+
def _on_transcription(self, text: str):
|
|
658
|
+
"""Handle transcribed speech"""
|
|
659
|
+
self.speech_detected.emit(text)
|
|
660
|
+
|
|
661
|
+
# Try to match as command
|
|
662
|
+
was_command, result = self.command_manager.process_spoken_text(text)
|
|
663
|
+
|
|
664
|
+
if was_command:
|
|
665
|
+
self.command_detected.emit(text, result)
|
|
666
|
+
else:
|
|
667
|
+
# Pass to dictation
|
|
668
|
+
self.text_for_dictation.emit(text)
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
class _VADListenerThread(QObject):
|
|
672
|
+
"""
|
|
673
|
+
Voice Activity Detection listener thread.
|
|
674
|
+
Uses amplitude-based VAD to detect speech start/end.
|
|
675
|
+
"""
|
|
676
|
+
|
|
677
|
+
transcription_ready = pyqtSignal(str)
|
|
678
|
+
status_update = pyqtSignal(str)
|
|
679
|
+
error_occurred = pyqtSignal(str)
|
|
680
|
+
vad_status = pyqtSignal(str) # "waiting", "recording", "processing"
|
|
681
|
+
|
|
682
|
+
def __init__(self, listener: ContinuousVoiceListener):
|
|
683
|
+
super().__init__()
|
|
684
|
+
self.listener = listener
|
|
685
|
+
self._running = False
|
|
686
|
+
self._thread = None
|
|
687
|
+
self._model = None # Cached whisper model
|
|
688
|
+
|
|
689
|
+
def start(self):
|
|
690
|
+
"""Start the listener thread"""
|
|
691
|
+
import threading
|
|
692
|
+
|
|
693
|
+
self._running = True
|
|
694
|
+
self._thread = threading.Thread(target=self._run, daemon=True)
|
|
695
|
+
self._thread.start()
|
|
696
|
+
|
|
697
|
+
def stop(self):
|
|
698
|
+
"""Stop the listener thread"""
|
|
699
|
+
self._running = False
|
|
700
|
+
|
|
701
|
+
def _run(self):
|
|
702
|
+
"""Main VAD listening loop"""
|
|
703
|
+
try:
|
|
704
|
+
import sounddevice as sd
|
|
705
|
+
import numpy as np
|
|
706
|
+
import tempfile
|
|
707
|
+
import wave
|
|
708
|
+
import os
|
|
709
|
+
import time
|
|
710
|
+
|
|
711
|
+
# Sample rate and chunk settings
|
|
712
|
+
sample_rate = 16000
|
|
713
|
+
chunk_samples = int(0.1 * sample_rate) # 100ms chunks for VAD
|
|
714
|
+
|
|
715
|
+
# Get settings from listener
|
|
716
|
+
speech_threshold = self.listener.speech_threshold
|
|
717
|
+
silence_duration = self.listener.silence_duration
|
|
718
|
+
min_speech_duration = self.listener.min_speech_duration
|
|
719
|
+
max_speech_duration = self.listener.max_speech_duration
|
|
720
|
+
|
|
721
|
+
# Check if using API or local model
|
|
722
|
+
if self.listener.use_api and self.listener.api_key:
|
|
723
|
+
self.status_update.emit("🎤 Using OpenAI Whisper API (fast & accurate)")
|
|
724
|
+
self._model = None # No local model needed
|
|
725
|
+
else:
|
|
726
|
+
# Load local Whisper model once
|
|
727
|
+
self.status_update.emit("🎤 Loading local speech model...")
|
|
728
|
+
self.vad_status.emit("loading")
|
|
729
|
+
try:
|
|
730
|
+
import whisper
|
|
731
|
+
except ImportError:
|
|
732
|
+
self.error_occurred.emit(
|
|
733
|
+
"Local Whisper is not installed.\n\n"
|
|
734
|
+
"Option A (recommended): Choose 'OpenAI Whisper API' in Settings → Supervoice (requires OpenAI API key).\n"
|
|
735
|
+
"Option B: Install Local Whisper:\n"
|
|
736
|
+
" pip install supervertaler[local-whisper]"
|
|
737
|
+
)
|
|
738
|
+
self._running = False
|
|
739
|
+
return
|
|
740
|
+
self._model = whisper.load_model(self.listener.model_name)
|
|
741
|
+
|
|
742
|
+
self.status_update.emit("🎤 Always-on listening active (waiting for speech...)")
|
|
743
|
+
self.vad_status.emit("waiting")
|
|
744
|
+
|
|
745
|
+
# Audio buffer for recording
|
|
746
|
+
audio_buffer = []
|
|
747
|
+
is_recording = False
|
|
748
|
+
silence_start = None
|
|
749
|
+
speech_start = None
|
|
750
|
+
|
|
751
|
+
def audio_callback(indata, frames, time_info, status):
|
|
752
|
+
"""Callback for audio stream - processes each chunk"""
|
|
753
|
+
nonlocal audio_buffer, is_recording, silence_start, speech_start
|
|
754
|
+
|
|
755
|
+
if not self._running:
|
|
756
|
+
return
|
|
757
|
+
|
|
758
|
+
# Calculate RMS amplitude
|
|
759
|
+
rms = np.sqrt(np.mean(indata**2))
|
|
760
|
+
is_speech = rms > speech_threshold
|
|
761
|
+
|
|
762
|
+
if is_speech:
|
|
763
|
+
if not is_recording:
|
|
764
|
+
# Speech started
|
|
765
|
+
is_recording = True
|
|
766
|
+
speech_start = time.time()
|
|
767
|
+
audio_buffer = []
|
|
768
|
+
self.vad_status.emit("recording")
|
|
769
|
+
self.status_update.emit("🔴 Recording...")
|
|
770
|
+
|
|
771
|
+
# Reset silence counter
|
|
772
|
+
silence_start = None
|
|
773
|
+
|
|
774
|
+
# Add to buffer
|
|
775
|
+
audio_buffer.append(indata.copy())
|
|
776
|
+
|
|
777
|
+
# Check max duration
|
|
778
|
+
if time.time() - speech_start > max_speech_duration:
|
|
779
|
+
# Force stop recording
|
|
780
|
+
is_recording = False
|
|
781
|
+
self._process_audio(audio_buffer, sample_rate)
|
|
782
|
+
audio_buffer = []
|
|
783
|
+
self.vad_status.emit("waiting")
|
|
784
|
+
|
|
785
|
+
else: # Silence
|
|
786
|
+
if is_recording:
|
|
787
|
+
# Still recording, add silence chunk
|
|
788
|
+
audio_buffer.append(indata.copy())
|
|
789
|
+
|
|
790
|
+
# Start or continue silence timer
|
|
791
|
+
if silence_start is None:
|
|
792
|
+
silence_start = time.time()
|
|
793
|
+
|
|
794
|
+
# Check if silence duration exceeded
|
|
795
|
+
if time.time() - silence_start > silence_duration:
|
|
796
|
+
# Speech ended - process if long enough
|
|
797
|
+
speech_duration = time.time() - speech_start
|
|
798
|
+
is_recording = False
|
|
799
|
+
|
|
800
|
+
if speech_duration >= min_speech_duration:
|
|
801
|
+
self._process_audio(audio_buffer, sample_rate)
|
|
802
|
+
else:
|
|
803
|
+
self.status_update.emit("🎤 (too short, ignored)")
|
|
804
|
+
|
|
805
|
+
audio_buffer = []
|
|
806
|
+
silence_start = None
|
|
807
|
+
self.vad_status.emit("waiting")
|
|
808
|
+
self.status_update.emit("🎤 Listening...")
|
|
809
|
+
|
|
810
|
+
# Start audio stream
|
|
811
|
+
with sd.InputStream(
|
|
812
|
+
samplerate=sample_rate,
|
|
813
|
+
channels=1,
|
|
814
|
+
dtype='float32',
|
|
815
|
+
blocksize=chunk_samples,
|
|
816
|
+
callback=audio_callback
|
|
817
|
+
):
|
|
818
|
+
while self._running:
|
|
819
|
+
time.sleep(0.1)
|
|
820
|
+
|
|
821
|
+
except Exception as e:
|
|
822
|
+
import traceback
|
|
823
|
+
self.error_occurred.emit(f"Listener error: {e}\n{traceback.format_exc()}")
|
|
824
|
+
finally:
|
|
825
|
+
self.vad_status.emit("stopped")
|
|
826
|
+
self.status_update.emit("🔇 Stopped listening")
|
|
827
|
+
|
|
828
|
+
def _process_audio(self, audio_buffer: list, sample_rate: int):
|
|
829
|
+
"""Process recorded audio - save to file and transcribe"""
|
|
830
|
+
try:
|
|
831
|
+
import numpy as np
|
|
832
|
+
import tempfile
|
|
833
|
+
import wave
|
|
834
|
+
import os
|
|
835
|
+
|
|
836
|
+
self.vad_status.emit("processing")
|
|
837
|
+
self.status_update.emit("⏳ Transcribing...")
|
|
838
|
+
|
|
839
|
+
# Concatenate audio chunks
|
|
840
|
+
if not audio_buffer:
|
|
841
|
+
return
|
|
842
|
+
|
|
843
|
+
audio_data = np.concatenate(audio_buffer, axis=0)
|
|
844
|
+
|
|
845
|
+
# Convert to int16
|
|
846
|
+
audio_int16 = np.int16(audio_data * 32767)
|
|
847
|
+
|
|
848
|
+
# Save to temp file
|
|
849
|
+
temp_dir = tempfile.gettempdir()
|
|
850
|
+
temp_path = os.path.join(temp_dir, f"sv_vad_{os.getpid()}.wav")
|
|
851
|
+
|
|
852
|
+
with wave.open(temp_path, 'wb') as wf:
|
|
853
|
+
wf.setnchannels(1)
|
|
854
|
+
wf.setsampwidth(2)
|
|
855
|
+
wf.setframerate(sample_rate)
|
|
856
|
+
wf.writeframes(audio_int16.tobytes())
|
|
857
|
+
|
|
858
|
+
# Transcribe using API or local model
|
|
859
|
+
if self.listener.use_api and self.listener.api_key:
|
|
860
|
+
text = self._transcribe_with_api(temp_path)
|
|
861
|
+
else:
|
|
862
|
+
text = self._transcribe_with_local(temp_path)
|
|
863
|
+
|
|
864
|
+
# Clean up
|
|
865
|
+
try:
|
|
866
|
+
os.unlink(temp_path)
|
|
867
|
+
except:
|
|
868
|
+
pass
|
|
869
|
+
|
|
870
|
+
# Emit result
|
|
871
|
+
if text:
|
|
872
|
+
self.transcription_ready.emit(text)
|
|
873
|
+
|
|
874
|
+
except Exception as e:
|
|
875
|
+
import traceback
|
|
876
|
+
self.error_occurred.emit(f"Processing error: {e}\n{traceback.format_exc()}")
|
|
877
|
+
|
|
878
|
+
def _transcribe_with_api(self, audio_path: str) -> str:
|
|
879
|
+
"""Transcribe using OpenAI Whisper API - much more accurate"""
|
|
880
|
+
try:
|
|
881
|
+
from openai import OpenAI
|
|
882
|
+
|
|
883
|
+
client = OpenAI(api_key=self.listener.api_key)
|
|
884
|
+
|
|
885
|
+
with open(audio_path, "rb") as audio_file:
|
|
886
|
+
# Use whisper-1 model (OpenAI's hosted Whisper)
|
|
887
|
+
kwargs = {"model": "whisper-1", "file": audio_file}
|
|
888
|
+
|
|
889
|
+
# Add language hint if specified
|
|
890
|
+
if self.listener.language:
|
|
891
|
+
kwargs["language"] = self.listener.language
|
|
892
|
+
|
|
893
|
+
response = client.audio.transcriptions.create(**kwargs)
|
|
894
|
+
|
|
895
|
+
return response.text.strip()
|
|
896
|
+
|
|
897
|
+
except Exception as e:
|
|
898
|
+
self.error_occurred.emit(f"OpenAI API error: {e}")
|
|
899
|
+
return ""
|
|
900
|
+
|
|
901
|
+
def _transcribe_with_local(self, audio_path: str) -> str:
|
|
902
|
+
"""Transcribe using local Whisper model"""
|
|
903
|
+
try:
|
|
904
|
+
if self.listener.language:
|
|
905
|
+
result = self._model.transcribe(audio_path, language=self.listener.language)
|
|
906
|
+
else:
|
|
907
|
+
result = self._model.transcribe(audio_path)
|
|
908
|
+
|
|
909
|
+
return result["text"].strip()
|
|
910
|
+
|
|
911
|
+
except Exception as e:
|
|
912
|
+
self.error_occurred.emit(f"Local transcription error: {e}")
|
|
913
|
+
return ""
|
|
914
|
+
|
|
915
|
+
|
|
916
|
+
# Legacy class for backwards compatibility
|
|
917
|
+
class _ListenerThread(_VADListenerThread):
|
|
918
|
+
"""Legacy alias for _VADListenerThread"""
|
|
919
|
+
pass
|
|
920
|
+
|