supervertaler 1.9.163__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. Supervertaler.py +48473 -0
  2. modules/__init__.py +10 -0
  3. modules/ai_actions.py +964 -0
  4. modules/ai_attachment_manager.py +343 -0
  5. modules/ai_file_viewer_dialog.py +210 -0
  6. modules/autofingers_engine.py +466 -0
  7. modules/cafetran_docx_handler.py +379 -0
  8. modules/config_manager.py +469 -0
  9. modules/database_manager.py +1911 -0
  10. modules/database_migrations.py +417 -0
  11. modules/dejavurtf_handler.py +779 -0
  12. modules/document_analyzer.py +427 -0
  13. modules/docx_handler.py +689 -0
  14. modules/encoding_repair.py +319 -0
  15. modules/encoding_repair_Qt.py +393 -0
  16. modules/encoding_repair_ui.py +481 -0
  17. modules/feature_manager.py +350 -0
  18. modules/figure_context_manager.py +340 -0
  19. modules/file_dialog_helper.py +148 -0
  20. modules/find_replace.py +164 -0
  21. modules/find_replace_qt.py +457 -0
  22. modules/glossary_manager.py +433 -0
  23. modules/image_extractor.py +188 -0
  24. modules/keyboard_shortcuts_widget.py +571 -0
  25. modules/llm_clients.py +1211 -0
  26. modules/llm_leaderboard.py +737 -0
  27. modules/llm_superbench_ui.py +1401 -0
  28. modules/local_llm_setup.py +1104 -0
  29. modules/model_update_dialog.py +381 -0
  30. modules/model_version_checker.py +373 -0
  31. modules/mqxliff_handler.py +638 -0
  32. modules/non_translatables_manager.py +743 -0
  33. modules/pdf_rescue_Qt.py +1822 -0
  34. modules/pdf_rescue_tkinter.py +909 -0
  35. modules/phrase_docx_handler.py +516 -0
  36. modules/project_home_panel.py +209 -0
  37. modules/prompt_assistant.py +357 -0
  38. modules/prompt_library.py +689 -0
  39. modules/prompt_library_migration.py +447 -0
  40. modules/quick_access_sidebar.py +282 -0
  41. modules/ribbon_widget.py +597 -0
  42. modules/sdlppx_handler.py +874 -0
  43. modules/setup_wizard.py +353 -0
  44. modules/shortcut_manager.py +932 -0
  45. modules/simple_segmenter.py +128 -0
  46. modules/spellcheck_manager.py +727 -0
  47. modules/statuses.py +207 -0
  48. modules/style_guide_manager.py +315 -0
  49. modules/superbench_ui.py +1319 -0
  50. modules/superbrowser.py +329 -0
  51. modules/supercleaner.py +600 -0
  52. modules/supercleaner_ui.py +444 -0
  53. modules/superdocs.py +19 -0
  54. modules/superdocs_viewer_qt.py +382 -0
  55. modules/superlookup.py +252 -0
  56. modules/tag_cleaner.py +260 -0
  57. modules/tag_manager.py +351 -0
  58. modules/term_extractor.py +270 -0
  59. modules/termbase_entry_editor.py +842 -0
  60. modules/termbase_import_export.py +488 -0
  61. modules/termbase_manager.py +1060 -0
  62. modules/termview_widget.py +1176 -0
  63. modules/theme_manager.py +499 -0
  64. modules/tm_editor_dialog.py +99 -0
  65. modules/tm_manager_qt.py +1280 -0
  66. modules/tm_metadata_manager.py +545 -0
  67. modules/tmx_editor.py +1461 -0
  68. modules/tmx_editor_qt.py +2784 -0
  69. modules/tmx_generator.py +284 -0
  70. modules/tracked_changes.py +900 -0
  71. modules/trados_docx_handler.py +430 -0
  72. modules/translation_memory.py +715 -0
  73. modules/translation_results_panel.py +2134 -0
  74. modules/translation_services.py +282 -0
  75. modules/unified_prompt_library.py +659 -0
  76. modules/unified_prompt_manager_qt.py +3951 -0
  77. modules/voice_commands.py +920 -0
  78. modules/voice_dictation.py +477 -0
  79. modules/voice_dictation_lite.py +249 -0
  80. supervertaler-1.9.163.dist-info/METADATA +906 -0
  81. supervertaler-1.9.163.dist-info/RECORD +85 -0
  82. supervertaler-1.9.163.dist-info/WHEEL +5 -0
  83. supervertaler-1.9.163.dist-info/entry_points.txt +2 -0
  84. supervertaler-1.9.163.dist-info/licenses/LICENSE +21 -0
  85. supervertaler-1.9.163.dist-info/top_level.txt +2 -0
@@ -0,0 +1,379 @@
1
+ """
2
+ CafeTran Bilingual DOCX Handler
3
+
4
+ This module handles the import and export of CafeTran bilingual DOCX files.
5
+ CafeTran uses a simple table-based format with pipe symbols (|) to mark formatted text.
6
+
7
+ Format Structure:
8
+ - Table with columns: ID | Source | Target | Notes | *
9
+ - Pipe symbols (|) surround formatted text in the source column
10
+ - Examples:
11
+ - |Atalanta| = underlined text
12
+ - Biagio Pagano| = bold text (pipe at end)
13
+ - |text| = formatted text (underlined)
14
+
15
+ The pipe symbols are preserved during translation and applied to the target text.
16
+ """
17
+
18
+ import os
19
+ from docx import Document
20
+ from docx.shared import RGBColor, Pt
21
+ from docx.enum.text import WD_UNDERLINE
22
+ import re
23
+
24
+
25
+ class FormattedSegment:
26
+ """
27
+ Represents a text segment with formatting information using pipe symbols.
28
+ """
29
+ def __init__(self, segment_id, source_with_pipes, target_with_pipes="", notes=""):
30
+ self.segment_id = segment_id
31
+ self.source_with_pipes = source_with_pipes # Source text with | markers
32
+ self.target_with_pipes = target_with_pipes # Target text with | markers
33
+ self.notes = notes
34
+
35
+ @property
36
+ def plain_text(self):
37
+ """Get source text with pipe symbols removed for translation."""
38
+ return self.source_with_pipes.replace('|', '')
39
+
40
+ def __repr__(self):
41
+ return f"FormattedSegment(id={self.segment_id}, source='{self.source_with_pipes[:50]}...', target='{self.target_with_pipes[:50]}...')"
42
+
43
+
44
+ class CafeTranDOCXHandler:
45
+ """
46
+ Handler for CafeTran bilingual DOCX files.
47
+
48
+ This class provides methods to:
49
+ - Load and parse CafeTran bilingual DOCX files
50
+ - Extract source segments with formatting markers (pipe symbols)
51
+ - Update target segments with translations
52
+ - Save modified files while preserving formatting
53
+ """
54
+
55
+ def __init__(self):
56
+ self.doc = None
57
+ self.table = None
58
+ self.segments = []
59
+ self.file_path = None
60
+ self.header_row = None
61
+
62
+ def load(self, file_path):
63
+ """
64
+ Load a CafeTran bilingual DOCX file.
65
+
66
+ Args:
67
+ file_path: Path to the CafeTran bilingual DOCX file
68
+
69
+ Returns:
70
+ bool: True if loaded successfully, False otherwise
71
+ """
72
+ try:
73
+ self.file_path = file_path
74
+ self.doc = Document(file_path)
75
+
76
+ # CafeTran bilingual files should have exactly one table
77
+ if len(self.doc.tables) == 0:
78
+ print(f"ERROR: No table found in {file_path}")
79
+ return False
80
+
81
+ self.table = self.doc.tables[0]
82
+
83
+ # Verify the header row (first row should be: ID, filename, filename, Notes, *)
84
+ if len(self.table.rows) < 2:
85
+ print(f"ERROR: Table has insufficient rows")
86
+ return False
87
+
88
+ self.header_row = [cell.text.strip() for cell in self.table.rows[0].cells]
89
+
90
+ # Check if this looks like a CafeTran bilingual DOCX
91
+ if self.header_row[0] != 'ID':
92
+ print(f"ERROR: First column header should be 'ID', got '{self.header_row[0]}'")
93
+ return False
94
+
95
+ print(f"Successfully loaded CafeTran bilingual DOCX: {file_path}")
96
+ print(f"Header: {self.header_row}")
97
+ print(f"Total rows (including header): {len(self.table.rows)}")
98
+
99
+ return True
100
+
101
+ except Exception as e:
102
+ print(f"ERROR loading CafeTran DOCX: {e}")
103
+ import traceback
104
+ traceback.print_exc()
105
+ return False
106
+
107
+ def extract_source_segments(self):
108
+ """
109
+ Extract all source segments from the CafeTran bilingual DOCX.
110
+
111
+ Returns:
112
+ list: List of FormattedSegment objects with pipe symbols preserved
113
+ """
114
+ self.segments = []
115
+
116
+ if not self.table:
117
+ print("ERROR: No table loaded")
118
+ return []
119
+
120
+ # Skip header row (index 0), process data rows
121
+ for i, row in enumerate(self.table.rows[1:], start=1):
122
+ try:
123
+ cells = row.cells
124
+
125
+ # Extract data from columns
126
+ segment_id = cells[0].text.strip()
127
+ source = cells[1].text.strip()
128
+ target = cells[2].text.strip() if len(cells) > 2 else ""
129
+ notes = cells[3].text.strip() if len(cells) > 3 else ""
130
+
131
+ # Create FormattedSegment with pipe symbols preserved
132
+ segment = FormattedSegment(
133
+ segment_id=segment_id,
134
+ source_with_pipes=source,
135
+ target_with_pipes=target,
136
+ notes=notes
137
+ )
138
+
139
+ self.segments.append(segment)
140
+
141
+ except Exception as e:
142
+ print(f"WARNING: Error processing row {i}: {e}")
143
+ continue
144
+
145
+ print(f"Extracted {len(self.segments)} segments from CafeTran DOCX")
146
+ return self.segments
147
+
148
+ def update_target_segments(self, translations):
149
+ """
150
+ Update target segments with translations.
151
+
152
+ This method takes plain translations and applies the pipe symbol formatting
153
+ from the source segments to create properly formatted target segments.
154
+
155
+ Args:
156
+ translations: List of translated strings (without pipe symbols)
157
+
158
+ Returns:
159
+ bool: True if update successful, False otherwise
160
+ """
161
+ if not self.segments:
162
+ print("ERROR: No segments loaded. Call extract_source_segments() first.")
163
+ return False
164
+
165
+ if len(translations) != len(self.segments):
166
+ print(f"ERROR: Translation count ({len(translations)}) doesn't match segment count ({len(self.segments)})")
167
+ return False
168
+
169
+ print(f"Updating {len(translations)} target segments with pipe formatting...")
170
+
171
+ for i, (segment, translation) in enumerate(zip(self.segments, translations)):
172
+ try:
173
+ # Apply pipe symbol formatting from source to translation
174
+ formatted_translation = self._apply_pipe_formatting(
175
+ source_with_pipes=segment.source_with_pipes,
176
+ translation_plain=translation
177
+ )
178
+
179
+ segment.target_with_pipes = formatted_translation
180
+
181
+ except Exception as e:
182
+ print(f"WARNING: Error updating segment {i} (ID: {segment.segment_id}): {e}")
183
+ # Fallback: use plain translation without formatting
184
+ segment.target_with_pipes = translation
185
+
186
+ print("Target segments updated successfully")
187
+ return True
188
+
189
+ def _apply_pipe_formatting(self, source_with_pipes, translation_plain):
190
+ """
191
+ Apply pipe symbol formatting from source to translation.
192
+
193
+ This method is a placeholder that will be called by the main application.
194
+ The actual formatting will be done by asking the AI to intelligently place
195
+ pipe symbols in the translation based on their positions in the source.
196
+
197
+ For now, this just returns the plain translation. The AI-based formatting
198
+ will be handled in the main application during the translation process.
199
+
200
+ Args:
201
+ source_with_pipes: Source text with pipe symbols
202
+ translation_plain: Translated text without pipe symbols
203
+
204
+ Returns:
205
+ str: Translation (will have pipes added by AI in the main app)
206
+ """
207
+ # Return plain translation for now
208
+ # The main app will handle AI-based pipe placement during translation
209
+ return translation_plain
210
+
211
+ def save(self, output_path=None):
212
+ """
213
+ Save the CafeTran bilingual DOCX with updated target segments.
214
+
215
+ Args:
216
+ output_path: Optional path for output file. If None, overwrites original.
217
+
218
+ Returns:
219
+ bool: True if saved successfully, False otherwise
220
+ """
221
+ if not self.doc or not self.table:
222
+ print("ERROR: No document loaded")
223
+ return False
224
+
225
+ if not self.segments:
226
+ print("ERROR: No segments to save")
227
+ return False
228
+
229
+ try:
230
+ # Update the table cells with translated content
231
+ for i, segment in enumerate(self.segments):
232
+ row_idx = i + 1 # +1 because row 0 is header
233
+
234
+ if row_idx >= len(self.table.rows):
235
+ print(f"WARNING: Row index {row_idx} out of range, skipping segment {segment.segment_id}")
236
+ continue
237
+
238
+ # Update target cell (column 2) with formatted pipe symbols
239
+ target_cell = self.table.rows[row_idx].cells[2]
240
+
241
+ # Clear existing content
242
+ target_cell.text = ''
243
+
244
+ # Add content with formatted pipe symbols (bold + red)
245
+ self._add_text_with_formatted_pipes(target_cell, segment.target_with_pipes)
246
+
247
+ # Save the document
248
+ save_path = output_path if output_path else self.file_path
249
+ self.doc.save(save_path)
250
+
251
+ print(f"Successfully saved CafeTran bilingual DOCX to: {save_path}")
252
+ return True
253
+
254
+ except Exception as e:
255
+ print(f"ERROR saving CafeTran DOCX: {e}")
256
+ import traceback
257
+ traceback.print_exc()
258
+ return False
259
+
260
+ def _add_text_with_formatted_pipes(self, cell, text_with_pipes):
261
+ """
262
+ Add text to a cell with pipe symbols formatted as bold and red.
263
+
264
+ Args:
265
+ cell: The table cell to add text to
266
+ text_with_pipes: Text containing pipe symbols
267
+ """
268
+ # Split text by pipe symbols
269
+ parts = text_with_pipes.split('|')
270
+
271
+ # Add a paragraph to the cell
272
+ if len(cell.paragraphs) == 0:
273
+ paragraph = cell.add_paragraph()
274
+ else:
275
+ paragraph = cell.paragraphs[0]
276
+ paragraph.clear()
277
+
278
+ # Add text parts with pipes formatted
279
+ for i, part in enumerate(parts):
280
+ if i > 0:
281
+ # Add the pipe symbol with bold + red formatting
282
+ pipe_run = paragraph.add_run('|')
283
+ pipe_run.bold = True
284
+ pipe_run.font.color.rgb = RGBColor(255, 0, 0) # Red color
285
+
286
+ if part: # Only add non-empty parts
287
+ # Add the regular text
288
+ text_run = paragraph.add_run(part)
289
+
290
+ @staticmethod
291
+ def is_cafetran_bilingual_docx(file_path):
292
+ """
293
+ Check if a DOCX file is a CafeTran bilingual DOCX.
294
+
295
+ Args:
296
+ file_path: Path to the DOCX file
297
+
298
+ Returns:
299
+ bool: True if file appears to be CafeTran bilingual DOCX, False otherwise
300
+ """
301
+ try:
302
+ doc = Document(file_path)
303
+
304
+ if len(doc.tables) == 0:
305
+ return False
306
+
307
+ table = doc.tables[0]
308
+
309
+ if len(table.rows) < 2:
310
+ return False
311
+
312
+ # Check header row
313
+ header = [cell.text.strip() for cell in table.rows[0].cells]
314
+
315
+ # CafeTran bilingual DOCX should have:
316
+ # - First column: "ID"
317
+ # - At least 4-5 columns
318
+ if len(header) >= 4 and header[0] == 'ID':
319
+ return True
320
+
321
+ return False
322
+
323
+ except Exception as e:
324
+ print(f"Error checking if file is CafeTran bilingual DOCX: {e}")
325
+ return False
326
+
327
+
328
+ # Test function for standalone execution
329
+ def test_handler():
330
+ """Test the CafeTran DOCX handler with a sample file."""
331
+ import sys
332
+
333
+ if len(sys.argv) < 2:
334
+ print("Usage: python cafetran_docx_handler.py <cafetran_bilingual.docx>")
335
+ return
336
+
337
+ file_path = sys.argv[1]
338
+
339
+ print(f"\n{'='*60}")
340
+ print(f"Testing CafeTran DOCX Handler")
341
+ print(f"{'='*60}\n")
342
+
343
+ # Test 1: Check if file is CafeTran bilingual DOCX
344
+ print("Test 1: Checking file format...")
345
+ is_cafetran = CafeTranDOCXHandler.is_cafetran_bilingual_docx(file_path)
346
+ print(f"Is CafeTran bilingual DOCX: {is_cafetran}\n")
347
+
348
+ if not is_cafetran:
349
+ print("File does not appear to be a CafeTran bilingual DOCX")
350
+ return
351
+
352
+ # Test 2: Load file
353
+ print("Test 2: Loading file...")
354
+ handler = CafeTranDOCXHandler()
355
+ if not handler.load(file_path):
356
+ print("Failed to load file")
357
+ return
358
+ print("File loaded successfully\n")
359
+
360
+ # Test 3: Extract segments
361
+ print("Test 3: Extracting segments...")
362
+ segments = handler.extract_source_segments()
363
+ print(f"Extracted {len(segments)} segments\n")
364
+
365
+ # Display first 5 segments
366
+ print("First 5 segments:")
367
+ for i, seg in enumerate(segments[:5]):
368
+ print(f"\n Segment {i+1} (ID: {seg.segment_id}):")
369
+ print(f" Source with pipes: {seg.source_with_pipes}")
370
+ print(f" Plain text: {seg.plain_text}")
371
+ print(f" Target: {seg.target_with_pipes if seg.target_with_pipes else '(empty)'}")
372
+
373
+ print(f"\n{'='*60}")
374
+ print("Testing complete!")
375
+ print(f"{'='*60}\n")
376
+
377
+
378
+ if __name__ == "__main__":
379
+ test_handler()