supervertaler 1.9.163__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. Supervertaler.py +48473 -0
  2. modules/__init__.py +10 -0
  3. modules/ai_actions.py +964 -0
  4. modules/ai_attachment_manager.py +343 -0
  5. modules/ai_file_viewer_dialog.py +210 -0
  6. modules/autofingers_engine.py +466 -0
  7. modules/cafetran_docx_handler.py +379 -0
  8. modules/config_manager.py +469 -0
  9. modules/database_manager.py +1911 -0
  10. modules/database_migrations.py +417 -0
  11. modules/dejavurtf_handler.py +779 -0
  12. modules/document_analyzer.py +427 -0
  13. modules/docx_handler.py +689 -0
  14. modules/encoding_repair.py +319 -0
  15. modules/encoding_repair_Qt.py +393 -0
  16. modules/encoding_repair_ui.py +481 -0
  17. modules/feature_manager.py +350 -0
  18. modules/figure_context_manager.py +340 -0
  19. modules/file_dialog_helper.py +148 -0
  20. modules/find_replace.py +164 -0
  21. modules/find_replace_qt.py +457 -0
  22. modules/glossary_manager.py +433 -0
  23. modules/image_extractor.py +188 -0
  24. modules/keyboard_shortcuts_widget.py +571 -0
  25. modules/llm_clients.py +1211 -0
  26. modules/llm_leaderboard.py +737 -0
  27. modules/llm_superbench_ui.py +1401 -0
  28. modules/local_llm_setup.py +1104 -0
  29. modules/model_update_dialog.py +381 -0
  30. modules/model_version_checker.py +373 -0
  31. modules/mqxliff_handler.py +638 -0
  32. modules/non_translatables_manager.py +743 -0
  33. modules/pdf_rescue_Qt.py +1822 -0
  34. modules/pdf_rescue_tkinter.py +909 -0
  35. modules/phrase_docx_handler.py +516 -0
  36. modules/project_home_panel.py +209 -0
  37. modules/prompt_assistant.py +357 -0
  38. modules/prompt_library.py +689 -0
  39. modules/prompt_library_migration.py +447 -0
  40. modules/quick_access_sidebar.py +282 -0
  41. modules/ribbon_widget.py +597 -0
  42. modules/sdlppx_handler.py +874 -0
  43. modules/setup_wizard.py +353 -0
  44. modules/shortcut_manager.py +932 -0
  45. modules/simple_segmenter.py +128 -0
  46. modules/spellcheck_manager.py +727 -0
  47. modules/statuses.py +207 -0
  48. modules/style_guide_manager.py +315 -0
  49. modules/superbench_ui.py +1319 -0
  50. modules/superbrowser.py +329 -0
  51. modules/supercleaner.py +600 -0
  52. modules/supercleaner_ui.py +444 -0
  53. modules/superdocs.py +19 -0
  54. modules/superdocs_viewer_qt.py +382 -0
  55. modules/superlookup.py +252 -0
  56. modules/tag_cleaner.py +260 -0
  57. modules/tag_manager.py +351 -0
  58. modules/term_extractor.py +270 -0
  59. modules/termbase_entry_editor.py +842 -0
  60. modules/termbase_import_export.py +488 -0
  61. modules/termbase_manager.py +1060 -0
  62. modules/termview_widget.py +1176 -0
  63. modules/theme_manager.py +499 -0
  64. modules/tm_editor_dialog.py +99 -0
  65. modules/tm_manager_qt.py +1280 -0
  66. modules/tm_metadata_manager.py +545 -0
  67. modules/tmx_editor.py +1461 -0
  68. modules/tmx_editor_qt.py +2784 -0
  69. modules/tmx_generator.py +284 -0
  70. modules/tracked_changes.py +900 -0
  71. modules/trados_docx_handler.py +430 -0
  72. modules/translation_memory.py +715 -0
  73. modules/translation_results_panel.py +2134 -0
  74. modules/translation_services.py +282 -0
  75. modules/unified_prompt_library.py +659 -0
  76. modules/unified_prompt_manager_qt.py +3951 -0
  77. modules/voice_commands.py +920 -0
  78. modules/voice_dictation.py +477 -0
  79. modules/voice_dictation_lite.py +249 -0
  80. supervertaler-1.9.163.dist-info/METADATA +906 -0
  81. supervertaler-1.9.163.dist-info/RECORD +85 -0
  82. supervertaler-1.9.163.dist-info/WHEEL +5 -0
  83. supervertaler-1.9.163.dist-info/entry_points.txt +2 -0
  84. supervertaler-1.9.163.dist-info/licenses/LICENSE +21 -0
  85. supervertaler-1.9.163.dist-info/top_level.txt +2 -0
@@ -0,0 +1,466 @@
1
+ """
2
+ AutoFingers Translation Automation Engine
3
+ Replicates AutoHotkey AutoFingers functionality in Python
4
+ Automates translation pasting in memoQ from TMX translation memory
5
+ """
6
+
7
+ import time
8
+ import xml.etree.ElementTree as ET
9
+ from typing import Dict, Optional, Tuple, NamedTuple
10
+ from difflib import SequenceMatcher
11
+ import pyperclip
12
+ import re
13
+
14
+ try:
15
+ import pyautogui
16
+ HAS_PYAUTOGUI = True
17
+ except ImportError:
18
+ HAS_PYAUTOGUI = False
19
+ print("[WARN] pyautogui not available - AutoFingers will have limited functionality on this platform")
20
+
21
+ from modules.tag_cleaner import TagCleaner
22
+
23
+ # Try to import AHK for better keyboard control
24
+ try:
25
+ from ahk import AHK
26
+ HAS_AHK = True
27
+ _ahk_instance = None # Lazy initialization
28
+ print("[OK] AHK library imported successfully")
29
+ except ImportError as e:
30
+ HAS_AHK = False
31
+ _ahk_instance = None
32
+ print(f"[WARN] AHK library not available: {e}")
33
+
34
+
35
+ def get_ahk():
36
+ """Get or create AHK instance lazily"""
37
+ global _ahk_instance
38
+ if _ahk_instance is None and HAS_AHK:
39
+ try:
40
+ _ahk_instance = AHK()
41
+ print(f"✓ AHK instance created: {_ahk_instance}")
42
+ except Exception as e:
43
+ print(f"✗ AHK instance creation failed: {e}")
44
+ return None
45
+ return _ahk_instance
46
+
47
+
48
+ class TranslationMatch(NamedTuple):
49
+ """Result of a translation lookup"""
50
+ translation: str
51
+ match_type: str # "exact", "fuzzy", or None
52
+ match_percent: int # 100 for exact, 0-99 for fuzzy
53
+
54
+
55
+ class AutoFingersEngine:
56
+ """
57
+ Translation automation engine for CAT tools like memoQ.
58
+ Loads translations from TMX and automates the paste workflow.
59
+ """
60
+
61
+ def __init__(self, tmx_file: str, source_lang: str = "en", target_lang: str = "nl"):
62
+ """
63
+ Initialize the AutoFingers engine.
64
+
65
+ Args:
66
+ tmx_file: Path to TMX translation memory file
67
+ source_lang: Source language code (e.g., 'en')
68
+ target_lang: Target language code (e.g., 'nl')
69
+ """
70
+ self.tmx_file = tmx_file
71
+ self.source_lang = source_lang
72
+ self.target_lang = target_lang
73
+ self.tm_database: Dict[str, str] = {}
74
+ self.tm_count = 0
75
+
76
+ # Timing settings (milliseconds)
77
+ self.loop_delay = 4000
78
+ self.confirm_delay = 900
79
+ self.copy_source_delay = 600
80
+ self.select_all_delay = 400
81
+ self.copy_delay = 800
82
+ self.paste_delay = 800
83
+ self.confirm_enter_delay = 1200
84
+
85
+ # Behavior settings
86
+ self.auto_confirm = True # If True, use Ctrl+Enter to confirm. If False, use Alt+N without confirming
87
+ self.skip_no_match = False
88
+
89
+ # Fuzzy matching settings
90
+ self.enable_fuzzy_matching = True
91
+ self.fuzzy_threshold = 0.80 # 80% similarity threshold
92
+ self.auto_confirm_fuzzy = False # Don't auto-confirm fuzzy matches (translator needs to review)
93
+
94
+ # Tag cleaning - using standalone TagCleaner module
95
+ self.tag_cleaner = TagCleaner()
96
+
97
+ # State tracking
98
+ self.is_running = False
99
+ self.segments_processed = 0
100
+ self.last_match_type = None # "exact", "fuzzy", or None
101
+ self.last_source = None # Track last source text for UI display
102
+ self.last_match = None # Track last match result for UI display
103
+
104
+ def load_tmx(self) -> Tuple[bool, str]:
105
+ """
106
+ Load and parse TMX translation memory file.
107
+
108
+ Returns:
109
+ Tuple of (success: bool, message: str)
110
+ """
111
+ try:
112
+ self.tm_database.clear()
113
+ self.tm_count = 0
114
+
115
+ # Parse TMX XML
116
+ tree = ET.parse(self.tmx_file)
117
+ root = tree.getroot()
118
+
119
+ # Find all translation units
120
+ for tu in root.findall('.//tu'):
121
+ source_text = None
122
+ target_text = None
123
+
124
+ # Extract source and target segments
125
+ for tuv in tu.findall('tuv'):
126
+ lang = tuv.get('{http://www.w3.org/XML/1998/namespace}lang',
127
+ tuv.get('lang', ''))
128
+
129
+ seg = tuv.find('seg')
130
+ if seg is not None and seg.text:
131
+ if lang.lower().startswith(self.source_lang.lower()):
132
+ source_text = seg.text.strip()
133
+ elif lang.lower().startswith(self.target_lang.lower()):
134
+ target_text = seg.text.strip()
135
+
136
+ # Add to database if both source and target found
137
+ if source_text and target_text:
138
+ # Normalize dashes for matching
139
+ normalized_source = self._normalize_dashes(source_text)
140
+ self.tm_database[normalized_source] = target_text
141
+ self.tm_count += 1
142
+
143
+ return True, f"Loaded {self.tm_count} translation units"
144
+
145
+ except FileNotFoundError:
146
+ return False, f"TMX file not found: {self.tmx_file}"
147
+ except ET.ParseError as e:
148
+ return False, f"XML parse error: {str(e)}"
149
+ except Exception as e:
150
+ return False, f"Error loading TMX: {str(e)}"
151
+
152
+ def _normalize_dashes(self, text: str) -> str:
153
+ """
154
+ Normalize different types of dashes to regular hyphen for matching.
155
+
156
+ Args:
157
+ text: Text to normalize
158
+
159
+ Returns:
160
+ Text with normalized dashes
161
+ """
162
+ # Replace en-dash (–) and em-dash (—) with regular hyphen
163
+ text = text.replace('–', '-') # En-dash
164
+ text = text.replace('—', '-') # Em-dash
165
+ text = text.replace('−', '-') # Minus sign
166
+ return text
167
+
168
+ def lookup_translation(self, source_text: str) -> Optional[TranslationMatch]:
169
+ """
170
+ Look up translation for source text in TM database.
171
+ First tries exact match, then fuzzy if enabled.
172
+
173
+ Args:
174
+ source_text: Source text to translate
175
+
176
+ Returns:
177
+ TranslationMatch with translation, match_type, and match_percent
178
+ Returns None if no match found
179
+ """
180
+ if not source_text:
181
+ return None
182
+
183
+ # Normalize and lookup
184
+ normalized = self._normalize_dashes(source_text.strip())
185
+
186
+ # Try exact match first (100%)
187
+ if normalized in self.tm_database:
188
+ translation = self.tm_database[normalized]
189
+ self.last_match_type = "exact"
190
+ return TranslationMatch(translation, "exact", 100)
191
+
192
+ # Try fuzzy match if enabled
193
+ if self.enable_fuzzy_matching:
194
+ fuzzy_match = self._find_fuzzy_match(normalized)
195
+ if fuzzy_match:
196
+ translation, similarity = fuzzy_match
197
+ match_percent = int(similarity * 100)
198
+ self.last_match_type = "fuzzy"
199
+ return TranslationMatch(translation, "fuzzy", match_percent)
200
+
201
+ # No match found
202
+ self.last_match_type = None
203
+ return None
204
+
205
+ def _find_fuzzy_match(self, source_text: str) -> Optional[Tuple[str, float]]:
206
+ """
207
+ Find best fuzzy match in TM database.
208
+
209
+ Args:
210
+ source_text: Source text to match (normalized)
211
+
212
+ Returns:
213
+ Tuple of (target_translation, similarity_ratio) or None if no match above threshold
214
+ """
215
+ best_match = None
216
+ best_similarity = 0.0
217
+
218
+ for tm_source, tm_target in self.tm_database.items():
219
+ # Calculate similarity using SequenceMatcher
220
+ similarity = SequenceMatcher(None, source_text.lower(), tm_source.lower()).ratio()
221
+
222
+ # Update if this is the best match so far and above threshold
223
+ if similarity >= self.fuzzy_threshold and similarity > best_similarity:
224
+ best_match = tm_target
225
+ best_similarity = similarity
226
+
227
+ if best_match and best_similarity >= self.fuzzy_threshold:
228
+ return best_match, best_similarity
229
+
230
+ return None
231
+
232
+ def process_single_segment(self) -> Tuple[bool, str]:
233
+ """
234
+ Process a single translation segment in memoQ.
235
+ Automates: copy source to target, lookup translation, paste, confirm.
236
+
237
+ Behavior for fuzzy matches:
238
+ - If fuzzy match found: paste it but DON'T auto-confirm
239
+ - Translator can then review and press Ctrl+Enter to confirm
240
+ - AutoFingers automatically moves to next segment
241
+
242
+ Returns:
243
+ Tuple of (success: bool, message: str)
244
+ """
245
+ if not HAS_PYAUTOGUI:
246
+ return False, "AutoFingers requires pyautogui (Windows/AutoHotkey feature not available on this platform)"
247
+
248
+ try:
249
+ # Clear clipboard
250
+ pyperclip.copy('')
251
+ time.sleep(0.2)
252
+
253
+ # Step 1: Copy source to target (Ctrl+Shift+S in memoQ)
254
+ pyautogui.hotkey('ctrl', 'shift', 's')
255
+ time.sleep(self.copy_source_delay / 1000)
256
+
257
+ # Step 2: Select all (Ctrl+A)
258
+ pyautogui.hotkey('ctrl', 'a')
259
+ time.sleep(self.select_all_delay / 1000)
260
+
261
+ # Step 3: Copy to clipboard (Ctrl+C)
262
+ pyautogui.hotkey('ctrl', 'c')
263
+ time.sleep(self.copy_delay / 1000)
264
+
265
+ # Step 4: Get clipboard content
266
+ source_text = pyperclip.paste().strip()
267
+
268
+ if not source_text:
269
+ return False, "Empty source text"
270
+
271
+ # Step 5: Look up translation (tries exact first, then fuzzy)
272
+ match_result = self.lookup_translation(source_text)
273
+
274
+ # Track for UI display
275
+ self.last_source = source_text
276
+ self.last_match = match_result
277
+
278
+ if not match_result:
279
+ # No translation found (exact or fuzzy)
280
+ if self.skip_no_match:
281
+ # Clear target box
282
+ pyautogui.hotkey('ctrl', 'a') # Select all
283
+ time.sleep(0.1)
284
+ pyautogui.press('backspace') # Delete
285
+ time.sleep(0.3)
286
+
287
+ # Use Alt+N (Go to Next) in memoQ
288
+ pyautogui.hotkey('alt', 'n')
289
+ time.sleep(self.confirm_delay / 1000)
290
+
291
+ return True, f"No translation - skipped: {source_text[:50]}..."
292
+ else:
293
+ # Pause for manual handling
294
+ self.is_running = False
295
+ return False, f"No translation found. Paused at: {source_text[:50]}..."
296
+
297
+ # Step 6: Clean tags if enabled, then copy translation to clipboard and paste
298
+ translation = match_result.translation
299
+ cleaned_translation = self.tag_cleaner.clean(translation)
300
+ pyperclip.copy(cleaned_translation)
301
+ time.sleep(0.4)
302
+
303
+ pyautogui.hotkey('ctrl', 'v')
304
+ time.sleep(self.paste_delay / 1000)
305
+
306
+ # Step 7: Confirm and navigate to next segment
307
+ is_exact = match_result.match_type == "exact"
308
+ is_fuzzy = match_result.match_type == "fuzzy"
309
+ should_auto_confirm = (is_exact and self.auto_confirm) or (is_fuzzy and self.auto_confirm_fuzzy)
310
+
311
+ if should_auto_confirm:
312
+ # Auto-confirm exact matches or fuzzy matches (if enabled) with Ctrl+Enter
313
+ time.sleep(self.confirm_delay / 1000)
314
+ pyautogui.hotkey('ctrl', 'enter')
315
+ time.sleep(self.confirm_enter_delay / 1000)
316
+ else:
317
+ # Not auto-confirming: Use Alt+N (Go to Next) in memoQ - moves to next segment WITHOUT confirming
318
+ # Note: We use Alt+N instead of Down Arrow because pyautogui has issues
319
+ # with arrow keys in memoQ. Alt+N is memoQ's native "Go to Next" command.
320
+ time.sleep(self.confirm_delay / 1000)
321
+ pyautogui.hotkey('alt', 'n')
322
+ time.sleep(self.confirm_delay / 1000)
323
+
324
+ self.segments_processed += 1
325
+
326
+ # Format match info for logging
327
+ if is_exact:
328
+ match_info = "100% exact"
329
+ else:
330
+ match_info = f"{match_result.match_percent}% fuzzy (unconfirmed)"
331
+
332
+ return True, f"[{match_info}] {source_text[:30]}... → {translation[:30]}..."
333
+
334
+ except Exception as e:
335
+ self.is_running = False
336
+ return False, f"Error processing segment: {str(e)}"
337
+
338
+ def process_multiple_segments(self, max_segments: int = 0,
339
+ callback=None) -> Tuple[int, str]:
340
+ """
341
+ Process multiple segments in loop mode.
342
+
343
+ Args:
344
+ max_segments: Maximum segments to process (0 = infinite)
345
+ callback: Optional callback function(success, message) called after each segment
346
+
347
+ Returns:
348
+ Tuple of (segments_processed: int, final_message: str)
349
+ """
350
+ self.is_running = True
351
+ self.segments_processed = 0
352
+
353
+ try:
354
+ while self.is_running:
355
+ # Check if reached limit
356
+ if max_segments > 0 and self.segments_processed >= max_segments:
357
+ msg = f"Completed {self.segments_processed} segments"
358
+ return self.segments_processed, msg
359
+
360
+ # Process one segment
361
+ success, message = self.process_single_segment()
362
+
363
+ # Call callback if provided
364
+ if callback:
365
+ callback(success, message)
366
+
367
+ # If failed and not skipping, stop loop
368
+ if not success and not self.skip_no_match:
369
+ break
370
+
371
+ # Wait before next segment
372
+ time.sleep(self.loop_delay / 1000)
373
+
374
+ msg = f"Processed {self.segments_processed} segment(s)"
375
+ return self.segments_processed, msg
376
+
377
+ except KeyboardInterrupt:
378
+ self.is_running = False
379
+ return self.segments_processed, "Stopped by user"
380
+ except Exception as e:
381
+ self.is_running = False
382
+ return self.segments_processed, f"Error: {str(e)}"
383
+
384
+ def stop(self):
385
+ """Stop the automation loop."""
386
+ self.is_running = False
387
+
388
+ def create_empty_tmx(self) -> bool:
389
+ """
390
+ Create an empty TMX file with proper structure.
391
+
392
+ Returns:
393
+ True if successful, False otherwise
394
+ """
395
+ try:
396
+ tmx_content = f"""<?xml version="1.0" encoding="UTF-8"?>
397
+ <!DOCTYPE tmx SYSTEM "tmx14.dtd">
398
+ <tmx version="1.4">
399
+ <header
400
+ creationtool="AutoFingers"
401
+ creationtoolversion="1.0"
402
+ datatype="PlainText"
403
+ segtype="sentence"
404
+ adminlang="en-US"
405
+ srclang="{self.source_lang}"
406
+ o-tmf="AutoFingers"
407
+ />
408
+ <body>
409
+ <!-- Add translation units here -->
410
+ <!-- Example:
411
+ <tu>
412
+ <tuv xml:lang="{self.source_lang}">
413
+ <seg>Source text here</seg>
414
+ </tuv>
415
+ <tuv xml:lang="{self.target_lang}">
416
+ <seg>Target translation here</seg>
417
+ </tuv>
418
+ </tu>
419
+ -->
420
+ </body>
421
+ </tmx>
422
+ """
423
+ with open(self.tmx_file, 'w', encoding='utf-8') as f:
424
+ f.write(tmx_content)
425
+ return True
426
+ except Exception as e:
427
+ print(f"Error creating TMX: {e}")
428
+ return False
429
+
430
+
431
+ # Example usage
432
+ if __name__ == "__main__":
433
+ # Create engine instance
434
+ engine = AutoFingersEngine(
435
+ tmx_file="autofingers_tm.tmx",
436
+ source_lang="en",
437
+ target_lang="nl"
438
+ )
439
+
440
+ # Configure fuzzy matching (optional)
441
+ engine.enable_fuzzy_matching = True # Enable fuzzy matching fallback
442
+ engine.fuzzy_threshold = 0.80 # 80% similarity threshold
443
+ engine.auto_confirm_fuzzy = False # Don't auto-confirm fuzzy (translator reviews)
444
+ engine.skip_no_match = True # Skip segments with no match instead of pausing
445
+
446
+ # Load TMX
447
+ success, message = engine.load_tmx()
448
+ print(message)
449
+
450
+ if success:
451
+ # Process single segment
452
+ print("\nProcessing single segment...")
453
+ print("Switch to memoQ window in 3 seconds...")
454
+ time.sleep(3)
455
+
456
+ match = engine.process_single_segment()
457
+ if match[0]:
458
+ print(f"✓ {match[1]}")
459
+ else:
460
+ print(f"✗ {match[1]}")
461
+
462
+ # Example: process multiple segments
463
+ # print("\nProcessing multiple segments...")
464
+ # engine.segments_processed = 0
465
+ # count, msg = engine.process_multiple_segments(max_segments=10)
466
+ # print(f"Processed {count} segments: {msg}")