supervertaler 1.9.163__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- Supervertaler.py +48473 -0
- modules/__init__.py +10 -0
- modules/ai_actions.py +964 -0
- modules/ai_attachment_manager.py +343 -0
- modules/ai_file_viewer_dialog.py +210 -0
- modules/autofingers_engine.py +466 -0
- modules/cafetran_docx_handler.py +379 -0
- modules/config_manager.py +469 -0
- modules/database_manager.py +1911 -0
- modules/database_migrations.py +417 -0
- modules/dejavurtf_handler.py +779 -0
- modules/document_analyzer.py +427 -0
- modules/docx_handler.py +689 -0
- modules/encoding_repair.py +319 -0
- modules/encoding_repair_Qt.py +393 -0
- modules/encoding_repair_ui.py +481 -0
- modules/feature_manager.py +350 -0
- modules/figure_context_manager.py +340 -0
- modules/file_dialog_helper.py +148 -0
- modules/find_replace.py +164 -0
- modules/find_replace_qt.py +457 -0
- modules/glossary_manager.py +433 -0
- modules/image_extractor.py +188 -0
- modules/keyboard_shortcuts_widget.py +571 -0
- modules/llm_clients.py +1211 -0
- modules/llm_leaderboard.py +737 -0
- modules/llm_superbench_ui.py +1401 -0
- modules/local_llm_setup.py +1104 -0
- modules/model_update_dialog.py +381 -0
- modules/model_version_checker.py +373 -0
- modules/mqxliff_handler.py +638 -0
- modules/non_translatables_manager.py +743 -0
- modules/pdf_rescue_Qt.py +1822 -0
- modules/pdf_rescue_tkinter.py +909 -0
- modules/phrase_docx_handler.py +516 -0
- modules/project_home_panel.py +209 -0
- modules/prompt_assistant.py +357 -0
- modules/prompt_library.py +689 -0
- modules/prompt_library_migration.py +447 -0
- modules/quick_access_sidebar.py +282 -0
- modules/ribbon_widget.py +597 -0
- modules/sdlppx_handler.py +874 -0
- modules/setup_wizard.py +353 -0
- modules/shortcut_manager.py +932 -0
- modules/simple_segmenter.py +128 -0
- modules/spellcheck_manager.py +727 -0
- modules/statuses.py +207 -0
- modules/style_guide_manager.py +315 -0
- modules/superbench_ui.py +1319 -0
- modules/superbrowser.py +329 -0
- modules/supercleaner.py +600 -0
- modules/supercleaner_ui.py +444 -0
- modules/superdocs.py +19 -0
- modules/superdocs_viewer_qt.py +382 -0
- modules/superlookup.py +252 -0
- modules/tag_cleaner.py +260 -0
- modules/tag_manager.py +351 -0
- modules/term_extractor.py +270 -0
- modules/termbase_entry_editor.py +842 -0
- modules/termbase_import_export.py +488 -0
- modules/termbase_manager.py +1060 -0
- modules/termview_widget.py +1176 -0
- modules/theme_manager.py +499 -0
- modules/tm_editor_dialog.py +99 -0
- modules/tm_manager_qt.py +1280 -0
- modules/tm_metadata_manager.py +545 -0
- modules/tmx_editor.py +1461 -0
- modules/tmx_editor_qt.py +2784 -0
- modules/tmx_generator.py +284 -0
- modules/tracked_changes.py +900 -0
- modules/trados_docx_handler.py +430 -0
- modules/translation_memory.py +715 -0
- modules/translation_results_panel.py +2134 -0
- modules/translation_services.py +282 -0
- modules/unified_prompt_library.py +659 -0
- modules/unified_prompt_manager_qt.py +3951 -0
- modules/voice_commands.py +920 -0
- modules/voice_dictation.py +477 -0
- modules/voice_dictation_lite.py +249 -0
- supervertaler-1.9.163.dist-info/METADATA +906 -0
- supervertaler-1.9.163.dist-info/RECORD +85 -0
- supervertaler-1.9.163.dist-info/WHEEL +5 -0
- supervertaler-1.9.163.dist-info/entry_points.txt +2 -0
- supervertaler-1.9.163.dist-info/licenses/LICENSE +21 -0
- supervertaler-1.9.163.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CafeTran Bilingual DOCX Handler
|
|
3
|
+
|
|
4
|
+
This module handles the import and export of CafeTran bilingual DOCX files.
|
|
5
|
+
CafeTran uses a simple table-based format with pipe symbols (|) to mark formatted text.
|
|
6
|
+
|
|
7
|
+
Format Structure:
|
|
8
|
+
- Table with columns: ID | Source | Target | Notes | *
|
|
9
|
+
- Pipe symbols (|) surround formatted text in the source column
|
|
10
|
+
- Examples:
|
|
11
|
+
- |Atalanta| = underlined text
|
|
12
|
+
- Biagio Pagano| = bold text (pipe at end)
|
|
13
|
+
- |text| = formatted text (underlined)
|
|
14
|
+
|
|
15
|
+
The pipe symbols are preserved during translation and applied to the target text.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import os
|
|
19
|
+
from docx import Document
|
|
20
|
+
from docx.shared import RGBColor, Pt
|
|
21
|
+
from docx.enum.text import WD_UNDERLINE
|
|
22
|
+
import re
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class FormattedSegment:
|
|
26
|
+
"""
|
|
27
|
+
Represents a text segment with formatting information using pipe symbols.
|
|
28
|
+
"""
|
|
29
|
+
def __init__(self, segment_id, source_with_pipes, target_with_pipes="", notes=""):
|
|
30
|
+
self.segment_id = segment_id
|
|
31
|
+
self.source_with_pipes = source_with_pipes # Source text with | markers
|
|
32
|
+
self.target_with_pipes = target_with_pipes # Target text with | markers
|
|
33
|
+
self.notes = notes
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def plain_text(self):
|
|
37
|
+
"""Get source text with pipe symbols removed for translation."""
|
|
38
|
+
return self.source_with_pipes.replace('|', '')
|
|
39
|
+
|
|
40
|
+
def __repr__(self):
|
|
41
|
+
return f"FormattedSegment(id={self.segment_id}, source='{self.source_with_pipes[:50]}...', target='{self.target_with_pipes[:50]}...')"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class CafeTranDOCXHandler:
|
|
45
|
+
"""
|
|
46
|
+
Handler for CafeTran bilingual DOCX files.
|
|
47
|
+
|
|
48
|
+
This class provides methods to:
|
|
49
|
+
- Load and parse CafeTran bilingual DOCX files
|
|
50
|
+
- Extract source segments with formatting markers (pipe symbols)
|
|
51
|
+
- Update target segments with translations
|
|
52
|
+
- Save modified files while preserving formatting
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(self):
|
|
56
|
+
self.doc = None
|
|
57
|
+
self.table = None
|
|
58
|
+
self.segments = []
|
|
59
|
+
self.file_path = None
|
|
60
|
+
self.header_row = None
|
|
61
|
+
|
|
62
|
+
def load(self, file_path):
|
|
63
|
+
"""
|
|
64
|
+
Load a CafeTran bilingual DOCX file.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
file_path: Path to the CafeTran bilingual DOCX file
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
bool: True if loaded successfully, False otherwise
|
|
71
|
+
"""
|
|
72
|
+
try:
|
|
73
|
+
self.file_path = file_path
|
|
74
|
+
self.doc = Document(file_path)
|
|
75
|
+
|
|
76
|
+
# CafeTran bilingual files should have exactly one table
|
|
77
|
+
if len(self.doc.tables) == 0:
|
|
78
|
+
print(f"ERROR: No table found in {file_path}")
|
|
79
|
+
return False
|
|
80
|
+
|
|
81
|
+
self.table = self.doc.tables[0]
|
|
82
|
+
|
|
83
|
+
# Verify the header row (first row should be: ID, filename, filename, Notes, *)
|
|
84
|
+
if len(self.table.rows) < 2:
|
|
85
|
+
print(f"ERROR: Table has insufficient rows")
|
|
86
|
+
return False
|
|
87
|
+
|
|
88
|
+
self.header_row = [cell.text.strip() for cell in self.table.rows[0].cells]
|
|
89
|
+
|
|
90
|
+
# Check if this looks like a CafeTran bilingual DOCX
|
|
91
|
+
if self.header_row[0] != 'ID':
|
|
92
|
+
print(f"ERROR: First column header should be 'ID', got '{self.header_row[0]}'")
|
|
93
|
+
return False
|
|
94
|
+
|
|
95
|
+
print(f"Successfully loaded CafeTran bilingual DOCX: {file_path}")
|
|
96
|
+
print(f"Header: {self.header_row}")
|
|
97
|
+
print(f"Total rows (including header): {len(self.table.rows)}")
|
|
98
|
+
|
|
99
|
+
return True
|
|
100
|
+
|
|
101
|
+
except Exception as e:
|
|
102
|
+
print(f"ERROR loading CafeTran DOCX: {e}")
|
|
103
|
+
import traceback
|
|
104
|
+
traceback.print_exc()
|
|
105
|
+
return False
|
|
106
|
+
|
|
107
|
+
def extract_source_segments(self):
|
|
108
|
+
"""
|
|
109
|
+
Extract all source segments from the CafeTran bilingual DOCX.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
list: List of FormattedSegment objects with pipe symbols preserved
|
|
113
|
+
"""
|
|
114
|
+
self.segments = []
|
|
115
|
+
|
|
116
|
+
if not self.table:
|
|
117
|
+
print("ERROR: No table loaded")
|
|
118
|
+
return []
|
|
119
|
+
|
|
120
|
+
# Skip header row (index 0), process data rows
|
|
121
|
+
for i, row in enumerate(self.table.rows[1:], start=1):
|
|
122
|
+
try:
|
|
123
|
+
cells = row.cells
|
|
124
|
+
|
|
125
|
+
# Extract data from columns
|
|
126
|
+
segment_id = cells[0].text.strip()
|
|
127
|
+
source = cells[1].text.strip()
|
|
128
|
+
target = cells[2].text.strip() if len(cells) > 2 else ""
|
|
129
|
+
notes = cells[3].text.strip() if len(cells) > 3 else ""
|
|
130
|
+
|
|
131
|
+
# Create FormattedSegment with pipe symbols preserved
|
|
132
|
+
segment = FormattedSegment(
|
|
133
|
+
segment_id=segment_id,
|
|
134
|
+
source_with_pipes=source,
|
|
135
|
+
target_with_pipes=target,
|
|
136
|
+
notes=notes
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
self.segments.append(segment)
|
|
140
|
+
|
|
141
|
+
except Exception as e:
|
|
142
|
+
print(f"WARNING: Error processing row {i}: {e}")
|
|
143
|
+
continue
|
|
144
|
+
|
|
145
|
+
print(f"Extracted {len(self.segments)} segments from CafeTran DOCX")
|
|
146
|
+
return self.segments
|
|
147
|
+
|
|
148
|
+
def update_target_segments(self, translations):
|
|
149
|
+
"""
|
|
150
|
+
Update target segments with translations.
|
|
151
|
+
|
|
152
|
+
This method takes plain translations and applies the pipe symbol formatting
|
|
153
|
+
from the source segments to create properly formatted target segments.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
translations: List of translated strings (without pipe symbols)
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
bool: True if update successful, False otherwise
|
|
160
|
+
"""
|
|
161
|
+
if not self.segments:
|
|
162
|
+
print("ERROR: No segments loaded. Call extract_source_segments() first.")
|
|
163
|
+
return False
|
|
164
|
+
|
|
165
|
+
if len(translations) != len(self.segments):
|
|
166
|
+
print(f"ERROR: Translation count ({len(translations)}) doesn't match segment count ({len(self.segments)})")
|
|
167
|
+
return False
|
|
168
|
+
|
|
169
|
+
print(f"Updating {len(translations)} target segments with pipe formatting...")
|
|
170
|
+
|
|
171
|
+
for i, (segment, translation) in enumerate(zip(self.segments, translations)):
|
|
172
|
+
try:
|
|
173
|
+
# Apply pipe symbol formatting from source to translation
|
|
174
|
+
formatted_translation = self._apply_pipe_formatting(
|
|
175
|
+
source_with_pipes=segment.source_with_pipes,
|
|
176
|
+
translation_plain=translation
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
segment.target_with_pipes = formatted_translation
|
|
180
|
+
|
|
181
|
+
except Exception as e:
|
|
182
|
+
print(f"WARNING: Error updating segment {i} (ID: {segment.segment_id}): {e}")
|
|
183
|
+
# Fallback: use plain translation without formatting
|
|
184
|
+
segment.target_with_pipes = translation
|
|
185
|
+
|
|
186
|
+
print("Target segments updated successfully")
|
|
187
|
+
return True
|
|
188
|
+
|
|
189
|
+
def _apply_pipe_formatting(self, source_with_pipes, translation_plain):
|
|
190
|
+
"""
|
|
191
|
+
Apply pipe symbol formatting from source to translation.
|
|
192
|
+
|
|
193
|
+
This method is a placeholder that will be called by the main application.
|
|
194
|
+
The actual formatting will be done by asking the AI to intelligently place
|
|
195
|
+
pipe symbols in the translation based on their positions in the source.
|
|
196
|
+
|
|
197
|
+
For now, this just returns the plain translation. The AI-based formatting
|
|
198
|
+
will be handled in the main application during the translation process.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
source_with_pipes: Source text with pipe symbols
|
|
202
|
+
translation_plain: Translated text without pipe symbols
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
str: Translation (will have pipes added by AI in the main app)
|
|
206
|
+
"""
|
|
207
|
+
# Return plain translation for now
|
|
208
|
+
# The main app will handle AI-based pipe placement during translation
|
|
209
|
+
return translation_plain
|
|
210
|
+
|
|
211
|
+
def save(self, output_path=None):
|
|
212
|
+
"""
|
|
213
|
+
Save the CafeTran bilingual DOCX with updated target segments.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
output_path: Optional path for output file. If None, overwrites original.
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
bool: True if saved successfully, False otherwise
|
|
220
|
+
"""
|
|
221
|
+
if not self.doc or not self.table:
|
|
222
|
+
print("ERROR: No document loaded")
|
|
223
|
+
return False
|
|
224
|
+
|
|
225
|
+
if not self.segments:
|
|
226
|
+
print("ERROR: No segments to save")
|
|
227
|
+
return False
|
|
228
|
+
|
|
229
|
+
try:
|
|
230
|
+
# Update the table cells with translated content
|
|
231
|
+
for i, segment in enumerate(self.segments):
|
|
232
|
+
row_idx = i + 1 # +1 because row 0 is header
|
|
233
|
+
|
|
234
|
+
if row_idx >= len(self.table.rows):
|
|
235
|
+
print(f"WARNING: Row index {row_idx} out of range, skipping segment {segment.segment_id}")
|
|
236
|
+
continue
|
|
237
|
+
|
|
238
|
+
# Update target cell (column 2) with formatted pipe symbols
|
|
239
|
+
target_cell = self.table.rows[row_idx].cells[2]
|
|
240
|
+
|
|
241
|
+
# Clear existing content
|
|
242
|
+
target_cell.text = ''
|
|
243
|
+
|
|
244
|
+
# Add content with formatted pipe symbols (bold + red)
|
|
245
|
+
self._add_text_with_formatted_pipes(target_cell, segment.target_with_pipes)
|
|
246
|
+
|
|
247
|
+
# Save the document
|
|
248
|
+
save_path = output_path if output_path else self.file_path
|
|
249
|
+
self.doc.save(save_path)
|
|
250
|
+
|
|
251
|
+
print(f"Successfully saved CafeTran bilingual DOCX to: {save_path}")
|
|
252
|
+
return True
|
|
253
|
+
|
|
254
|
+
except Exception as e:
|
|
255
|
+
print(f"ERROR saving CafeTran DOCX: {e}")
|
|
256
|
+
import traceback
|
|
257
|
+
traceback.print_exc()
|
|
258
|
+
return False
|
|
259
|
+
|
|
260
|
+
def _add_text_with_formatted_pipes(self, cell, text_with_pipes):
|
|
261
|
+
"""
|
|
262
|
+
Add text to a cell with pipe symbols formatted as bold and red.
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
cell: The table cell to add text to
|
|
266
|
+
text_with_pipes: Text containing pipe symbols
|
|
267
|
+
"""
|
|
268
|
+
# Split text by pipe symbols
|
|
269
|
+
parts = text_with_pipes.split('|')
|
|
270
|
+
|
|
271
|
+
# Add a paragraph to the cell
|
|
272
|
+
if len(cell.paragraphs) == 0:
|
|
273
|
+
paragraph = cell.add_paragraph()
|
|
274
|
+
else:
|
|
275
|
+
paragraph = cell.paragraphs[0]
|
|
276
|
+
paragraph.clear()
|
|
277
|
+
|
|
278
|
+
# Add text parts with pipes formatted
|
|
279
|
+
for i, part in enumerate(parts):
|
|
280
|
+
if i > 0:
|
|
281
|
+
# Add the pipe symbol with bold + red formatting
|
|
282
|
+
pipe_run = paragraph.add_run('|')
|
|
283
|
+
pipe_run.bold = True
|
|
284
|
+
pipe_run.font.color.rgb = RGBColor(255, 0, 0) # Red color
|
|
285
|
+
|
|
286
|
+
if part: # Only add non-empty parts
|
|
287
|
+
# Add the regular text
|
|
288
|
+
text_run = paragraph.add_run(part)
|
|
289
|
+
|
|
290
|
+
@staticmethod
|
|
291
|
+
def is_cafetran_bilingual_docx(file_path):
|
|
292
|
+
"""
|
|
293
|
+
Check if a DOCX file is a CafeTran bilingual DOCX.
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
file_path: Path to the DOCX file
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
bool: True if file appears to be CafeTran bilingual DOCX, False otherwise
|
|
300
|
+
"""
|
|
301
|
+
try:
|
|
302
|
+
doc = Document(file_path)
|
|
303
|
+
|
|
304
|
+
if len(doc.tables) == 0:
|
|
305
|
+
return False
|
|
306
|
+
|
|
307
|
+
table = doc.tables[0]
|
|
308
|
+
|
|
309
|
+
if len(table.rows) < 2:
|
|
310
|
+
return False
|
|
311
|
+
|
|
312
|
+
# Check header row
|
|
313
|
+
header = [cell.text.strip() for cell in table.rows[0].cells]
|
|
314
|
+
|
|
315
|
+
# CafeTran bilingual DOCX should have:
|
|
316
|
+
# - First column: "ID"
|
|
317
|
+
# - At least 4-5 columns
|
|
318
|
+
if len(header) >= 4 and header[0] == 'ID':
|
|
319
|
+
return True
|
|
320
|
+
|
|
321
|
+
return False
|
|
322
|
+
|
|
323
|
+
except Exception as e:
|
|
324
|
+
print(f"Error checking if file is CafeTran bilingual DOCX: {e}")
|
|
325
|
+
return False
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
# Test function for standalone execution
|
|
329
|
+
def test_handler():
|
|
330
|
+
"""Test the CafeTran DOCX handler with a sample file."""
|
|
331
|
+
import sys
|
|
332
|
+
|
|
333
|
+
if len(sys.argv) < 2:
|
|
334
|
+
print("Usage: python cafetran_docx_handler.py <cafetran_bilingual.docx>")
|
|
335
|
+
return
|
|
336
|
+
|
|
337
|
+
file_path = sys.argv[1]
|
|
338
|
+
|
|
339
|
+
print(f"\n{'='*60}")
|
|
340
|
+
print(f"Testing CafeTran DOCX Handler")
|
|
341
|
+
print(f"{'='*60}\n")
|
|
342
|
+
|
|
343
|
+
# Test 1: Check if file is CafeTran bilingual DOCX
|
|
344
|
+
print("Test 1: Checking file format...")
|
|
345
|
+
is_cafetran = CafeTranDOCXHandler.is_cafetran_bilingual_docx(file_path)
|
|
346
|
+
print(f"Is CafeTran bilingual DOCX: {is_cafetran}\n")
|
|
347
|
+
|
|
348
|
+
if not is_cafetran:
|
|
349
|
+
print("File does not appear to be a CafeTran bilingual DOCX")
|
|
350
|
+
return
|
|
351
|
+
|
|
352
|
+
# Test 2: Load file
|
|
353
|
+
print("Test 2: Loading file...")
|
|
354
|
+
handler = CafeTranDOCXHandler()
|
|
355
|
+
if not handler.load(file_path):
|
|
356
|
+
print("Failed to load file")
|
|
357
|
+
return
|
|
358
|
+
print("File loaded successfully\n")
|
|
359
|
+
|
|
360
|
+
# Test 3: Extract segments
|
|
361
|
+
print("Test 3: Extracting segments...")
|
|
362
|
+
segments = handler.extract_source_segments()
|
|
363
|
+
print(f"Extracted {len(segments)} segments\n")
|
|
364
|
+
|
|
365
|
+
# Display first 5 segments
|
|
366
|
+
print("First 5 segments:")
|
|
367
|
+
for i, seg in enumerate(segments[:5]):
|
|
368
|
+
print(f"\n Segment {i+1} (ID: {seg.segment_id}):")
|
|
369
|
+
print(f" Source with pipes: {seg.source_with_pipes}")
|
|
370
|
+
print(f" Plain text: {seg.plain_text}")
|
|
371
|
+
print(f" Target: {seg.target_with_pipes if seg.target_with_pipes else '(empty)'}")
|
|
372
|
+
|
|
373
|
+
print(f"\n{'='*60}")
|
|
374
|
+
print("Testing complete!")
|
|
375
|
+
print(f"{'='*60}\n")
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
if __name__ == "__main__":
|
|
379
|
+
test_handler()
|