supervertaler 1.9.163__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. Supervertaler.py +48473 -0
  2. modules/__init__.py +10 -0
  3. modules/ai_actions.py +964 -0
  4. modules/ai_attachment_manager.py +343 -0
  5. modules/ai_file_viewer_dialog.py +210 -0
  6. modules/autofingers_engine.py +466 -0
  7. modules/cafetran_docx_handler.py +379 -0
  8. modules/config_manager.py +469 -0
  9. modules/database_manager.py +1911 -0
  10. modules/database_migrations.py +417 -0
  11. modules/dejavurtf_handler.py +779 -0
  12. modules/document_analyzer.py +427 -0
  13. modules/docx_handler.py +689 -0
  14. modules/encoding_repair.py +319 -0
  15. modules/encoding_repair_Qt.py +393 -0
  16. modules/encoding_repair_ui.py +481 -0
  17. modules/feature_manager.py +350 -0
  18. modules/figure_context_manager.py +340 -0
  19. modules/file_dialog_helper.py +148 -0
  20. modules/find_replace.py +164 -0
  21. modules/find_replace_qt.py +457 -0
  22. modules/glossary_manager.py +433 -0
  23. modules/image_extractor.py +188 -0
  24. modules/keyboard_shortcuts_widget.py +571 -0
  25. modules/llm_clients.py +1211 -0
  26. modules/llm_leaderboard.py +737 -0
  27. modules/llm_superbench_ui.py +1401 -0
  28. modules/local_llm_setup.py +1104 -0
  29. modules/model_update_dialog.py +381 -0
  30. modules/model_version_checker.py +373 -0
  31. modules/mqxliff_handler.py +638 -0
  32. modules/non_translatables_manager.py +743 -0
  33. modules/pdf_rescue_Qt.py +1822 -0
  34. modules/pdf_rescue_tkinter.py +909 -0
  35. modules/phrase_docx_handler.py +516 -0
  36. modules/project_home_panel.py +209 -0
  37. modules/prompt_assistant.py +357 -0
  38. modules/prompt_library.py +689 -0
  39. modules/prompt_library_migration.py +447 -0
  40. modules/quick_access_sidebar.py +282 -0
  41. modules/ribbon_widget.py +597 -0
  42. modules/sdlppx_handler.py +874 -0
  43. modules/setup_wizard.py +353 -0
  44. modules/shortcut_manager.py +932 -0
  45. modules/simple_segmenter.py +128 -0
  46. modules/spellcheck_manager.py +727 -0
  47. modules/statuses.py +207 -0
  48. modules/style_guide_manager.py +315 -0
  49. modules/superbench_ui.py +1319 -0
  50. modules/superbrowser.py +329 -0
  51. modules/supercleaner.py +600 -0
  52. modules/supercleaner_ui.py +444 -0
  53. modules/superdocs.py +19 -0
  54. modules/superdocs_viewer_qt.py +382 -0
  55. modules/superlookup.py +252 -0
  56. modules/tag_cleaner.py +260 -0
  57. modules/tag_manager.py +351 -0
  58. modules/term_extractor.py +270 -0
  59. modules/termbase_entry_editor.py +842 -0
  60. modules/termbase_import_export.py +488 -0
  61. modules/termbase_manager.py +1060 -0
  62. modules/termview_widget.py +1176 -0
  63. modules/theme_manager.py +499 -0
  64. modules/tm_editor_dialog.py +99 -0
  65. modules/tm_manager_qt.py +1280 -0
  66. modules/tm_metadata_manager.py +545 -0
  67. modules/tmx_editor.py +1461 -0
  68. modules/tmx_editor_qt.py +2784 -0
  69. modules/tmx_generator.py +284 -0
  70. modules/tracked_changes.py +900 -0
  71. modules/trados_docx_handler.py +430 -0
  72. modules/translation_memory.py +715 -0
  73. modules/translation_results_panel.py +2134 -0
  74. modules/translation_services.py +282 -0
  75. modules/unified_prompt_library.py +659 -0
  76. modules/unified_prompt_manager_qt.py +3951 -0
  77. modules/voice_commands.py +920 -0
  78. modules/voice_dictation.py +477 -0
  79. modules/voice_dictation_lite.py +249 -0
  80. supervertaler-1.9.163.dist-info/METADATA +906 -0
  81. supervertaler-1.9.163.dist-info/RECORD +85 -0
  82. supervertaler-1.9.163.dist-info/WHEEL +5 -0
  83. supervertaler-1.9.163.dist-info/entry_points.txt +2 -0
  84. supervertaler-1.9.163.dist-info/licenses/LICENSE +21 -0
  85. supervertaler-1.9.163.dist-info/top_level.txt +2 -0
@@ -0,0 +1,284 @@
1
+ """
2
+ TMX Generator Module
3
+
4
+ Helper class for generating TMX (Translation Memory eXchange) files.
5
+ Supports TMX 1.4 format with proper XML structure.
6
+
7
+ Extracted from main Supervertaler file for better modularity.
8
+ """
9
+
10
+ import xml.etree.ElementTree as ET
11
+ from datetime import datetime
12
+
13
+
14
+ def get_simple_lang_code(lang_name_or_code_input):
15
+ """
16
+ Convert language name or code to ISO 639-1 format (2-letter) or ISO 639-1 + region (e.g., en-US)
17
+
18
+ Supports:
19
+ - Language names: "English" → "en", "Dutch" → "nl"
20
+ - ISO codes: "en" → "en", "nl-NL" → "nl-NL"
21
+ - Variants: "en-US", "nl-BE", "fr-CA" → preserved as-is
22
+
23
+ Returns base code if no variant specified, or full code with variant if provided.
24
+ """
25
+ if not lang_name_or_code_input:
26
+ return "en" # Default to English
27
+
28
+ lang_input = lang_name_or_code_input.strip()
29
+ lang_lower = lang_input.lower()
30
+
31
+ # Comprehensive language name to ISO 639-1 mapping
32
+ lang_map = {
33
+ # Major languages
34
+ "english": "en",
35
+ "dutch": "nl",
36
+ "german": "de",
37
+ "french": "fr",
38
+ "spanish": "es",
39
+ "italian": "it",
40
+ "portuguese": "pt",
41
+ "russian": "ru",
42
+ "chinese": "zh",
43
+ "japanese": "ja",
44
+ "korean": "ko",
45
+ "arabic": "ar",
46
+
47
+ # European languages
48
+ "afrikaans": "af",
49
+ "albanian": "sq",
50
+ "armenian": "hy",
51
+ "basque": "eu",
52
+ "bengali": "bn",
53
+ "bulgarian": "bg",
54
+ "catalan": "ca",
55
+ "croatian": "hr",
56
+ "czech": "cs",
57
+ "danish": "da",
58
+ "estonian": "et",
59
+ "finnish": "fi",
60
+ "galician": "gl",
61
+ "georgian": "ka",
62
+ "greek": "el",
63
+ "hebrew": "he",
64
+ "hindi": "hi",
65
+ "hungarian": "hu",
66
+ "icelandic": "is",
67
+ "indonesian": "id",
68
+ "irish": "ga",
69
+ "latvian": "lv",
70
+ "lithuanian": "lt",
71
+ "macedonian": "mk",
72
+ "malay": "ms",
73
+ "norwegian": "no",
74
+ "persian": "fa",
75
+ "polish": "pl",
76
+ "romanian": "ro",
77
+ "serbian": "sr",
78
+ "slovak": "sk",
79
+ "slovenian": "sl",
80
+ "swahili": "sw",
81
+ "swedish": "sv",
82
+ "thai": "th",
83
+ "turkish": "tr",
84
+ "ukrainian": "uk",
85
+ "urdu": "ur",
86
+ "vietnamese": "vi",
87
+ "welsh": "cy",
88
+
89
+ # Chinese variants
90
+ "chinese (simplified)": "zh-CN",
91
+ "chinese (traditional)": "zh-TW",
92
+ }
93
+
94
+ # Check if it's a full language name
95
+ if lang_lower in lang_map:
96
+ return lang_map[lang_lower]
97
+
98
+ # Check if already ISO code (2-letter or with variant)
99
+ # Examples: "en", "en-US", "nl-NL", "fr-CA"
100
+ if '-' in lang_input or '_' in lang_input:
101
+ # Has variant - preserve it
102
+ parts = lang_input.replace('_', '-').split('-')
103
+ if len(parts[0]) == 2:
104
+ # Valid format like "en-US"
105
+ return f"{parts[0].lower()}-{parts[1].upper()}"
106
+
107
+ # Extract base code if it looks like an ISO code
108
+ base_code = lang_lower.split('-')[0].split('_')[0]
109
+ if len(base_code) == 2 and base_code.isalpha():
110
+ return base_code
111
+
112
+ # Fallback: return first 2 characters or default
113
+ if len(lang_input) >= 2:
114
+ return lang_input[:2].lower()
115
+
116
+ return "en" # Ultimate fallback
117
+
118
+
119
+ def get_base_lang_code(lang_code: str) -> str:
120
+ """Extract base language code from variant (e.g., 'en-US' → 'en', 'nl-BE' → 'nl', 'Dutch' → 'nl')"""
121
+ if not lang_code:
122
+ return "en"
123
+
124
+ # First convert full language names to ISO codes
125
+ iso_code = get_simple_lang_code(lang_code)
126
+
127
+ # Then extract base code from variant
128
+ return iso_code.split('-')[0].split('_')[0].lower()
129
+
130
+
131
+ def get_lang_match_variants(lang_code: str) -> list:
132
+ """
133
+ Get all possible string variants for matching a language in database queries.
134
+
135
+ Returns list of strings that could be used to match this language, including:
136
+ - Base ISO code (e.g., 'nl', 'en')
137
+ - Full language names (e.g., 'Dutch', 'English')
138
+ - Common variants (e.g., 'nl-NL', 'en-US')
139
+
140
+ This helps match database entries that may have inconsistent language formats.
141
+ """
142
+ if not lang_code:
143
+ return ['en', 'English']
144
+
145
+ # Reverse mapping from ISO codes to full names
146
+ code_to_name = {
147
+ "en": "English",
148
+ "nl": "Dutch",
149
+ "de": "German",
150
+ "fr": "French",
151
+ "es": "Spanish",
152
+ "it": "Italian",
153
+ "pt": "Portuguese",
154
+ "ru": "Russian",
155
+ "zh": "Chinese",
156
+ "ja": "Japanese",
157
+ "ko": "Korean",
158
+ "ar": "Arabic",
159
+ "pl": "Polish",
160
+ "sv": "Swedish",
161
+ "da": "Danish",
162
+ "no": "Norwegian",
163
+ "fi": "Finnish",
164
+ }
165
+
166
+ # Get the base ISO code
167
+ base_code = get_base_lang_code(lang_code)
168
+
169
+ variants = [base_code]
170
+
171
+ # Add full language name if we know it
172
+ if base_code in code_to_name:
173
+ variants.append(code_to_name[base_code])
174
+
175
+ return variants
176
+
177
+ def normalize_lang_variant(lang_code: str) -> str:
178
+ """Normalize language variant to lowercase-UPPERCASE format (e.g., 'en-us' → 'en-US', 'nl-be' → 'nl-BE').
179
+
180
+ Handles various input formats:
181
+ - nl-nl → nl-NL
182
+ - nl-NL → nl-NL
183
+ - NL-NL → nl-NL
184
+ - nl_BE → nl-BE
185
+ - nl → nl (base code unchanged)
186
+ """
187
+ if not lang_code:
188
+ return lang_code
189
+
190
+ # Replace underscores with hyphens
191
+ lang_code = lang_code.replace('_', '-')
192
+
193
+ parts = lang_code.split('-')
194
+ if len(parts) == 1:
195
+ # Base language code only (e.g., 'nl', 'en')
196
+ return parts[0].lower()
197
+ elif len(parts) == 2:
198
+ # Language variant (e.g., 'en-US', 'nl-BE')
199
+ return f"{parts[0].lower()}-{parts[1].upper()}"
200
+ else:
201
+ # Unexpected format, just lowercase the first part
202
+ return parts[0].lower()
203
+
204
+
205
+ def languages_are_compatible(lang1: str, lang2: str) -> bool:
206
+ """Check if two language codes are compatible (same base language)"""
207
+ return get_base_lang_code(lang1) == get_base_lang_code(lang2)
208
+
209
+
210
+ class TMXGenerator:
211
+ """Helper class for generating TMX (Translation Memory eXchange) files"""
212
+
213
+ def __init__(self, log_callback=None):
214
+ self.log = log_callback if log_callback else lambda msg: None
215
+
216
+ def generate_tmx(self, source_segments, target_segments, source_lang, target_lang):
217
+ """Generate TMX content from parallel segments"""
218
+ # Basic TMX structure
219
+ tmx = ET.Element('tmx')
220
+ tmx.set('version', '1.4')
221
+
222
+ header = ET.SubElement(tmx, 'header')
223
+ header.set('creationdate', datetime.now().strftime('%Y%m%dT%H%M%SZ'))
224
+ header.set('srclang', get_simple_lang_code(source_lang))
225
+ header.set('adminlang', 'en')
226
+ header.set('segtype', 'sentence')
227
+ header.set('creationtool', 'Supervertaler')
228
+ header.set('creationtoolversion', '3.6.0-beta')
229
+ header.set('datatype', 'plaintext')
230
+
231
+ body = ET.SubElement(tmx, 'body')
232
+
233
+ # Add translation units
234
+ added_count = 0
235
+ for src, tgt in zip(source_segments, target_segments):
236
+ if not src.strip() or not tgt or '[ERR' in str(tgt) or '[Missing' in str(tgt):
237
+ continue
238
+
239
+ tu = ET.SubElement(body, 'tu')
240
+
241
+ # Source segment
242
+ tuv_src = ET.SubElement(tu, 'tuv')
243
+ tuv_src.set('xml:lang', get_simple_lang_code(source_lang))
244
+ seg_src = ET.SubElement(tuv_src, 'seg')
245
+ seg_src.text = src.strip()
246
+
247
+ # Target segment
248
+ tuv_tgt = ET.SubElement(tu, 'tuv')
249
+ tuv_tgt.set('xml:lang', get_simple_lang_code(target_lang))
250
+ seg_tgt = ET.SubElement(tuv_tgt, 'seg')
251
+ seg_tgt.text = str(tgt).strip()
252
+
253
+ added_count += 1
254
+
255
+ self.log(f"[TMX Generator] Created TMX with {added_count} translation units")
256
+ return ET.ElementTree(tmx)
257
+
258
+ def save_tmx(self, tmx_tree, output_path):
259
+ """Save TMX tree to file with proper XML formatting"""
260
+ try:
261
+ # Pretty print with indentation
262
+ self._indent(tmx_tree.getroot())
263
+ tmx_tree.write(output_path, encoding='utf-8', xml_declaration=True)
264
+ self.log(f"[TMX Generator] Saved TMX file: {output_path}")
265
+ return True
266
+ except Exception as e:
267
+ self.log(f"[TMX Generator] Error saving TMX: {e}")
268
+ return False
269
+
270
+ def _indent(self, elem, level=0):
271
+ """Add indentation to XML for pretty printing"""
272
+ i = "\n" + level * " "
273
+ if len(elem):
274
+ if not elem.text or not elem.text.strip():
275
+ elem.text = i + " "
276
+ if not elem.tail or not elem.tail.strip():
277
+ elem.tail = i
278
+ for child in elem:
279
+ self._indent(child, level + 1)
280
+ if not child.tail or not child.tail.strip():
281
+ child.tail = i
282
+ else:
283
+ if level and (not elem.tail or not elem.tail.strip()):
284
+ elem.tail = i