supervertaler 1.9.153__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of supervertaler might be problematic. Click here for more details.

Files changed (85) hide show
  1. Supervertaler.py +47886 -0
  2. modules/__init__.py +10 -0
  3. modules/ai_actions.py +964 -0
  4. modules/ai_attachment_manager.py +343 -0
  5. modules/ai_file_viewer_dialog.py +210 -0
  6. modules/autofingers_engine.py +466 -0
  7. modules/cafetran_docx_handler.py +379 -0
  8. modules/config_manager.py +469 -0
  9. modules/database_manager.py +1878 -0
  10. modules/database_migrations.py +417 -0
  11. modules/dejavurtf_handler.py +779 -0
  12. modules/document_analyzer.py +427 -0
  13. modules/docx_handler.py +689 -0
  14. modules/encoding_repair.py +319 -0
  15. modules/encoding_repair_Qt.py +393 -0
  16. modules/encoding_repair_ui.py +481 -0
  17. modules/feature_manager.py +350 -0
  18. modules/figure_context_manager.py +340 -0
  19. modules/file_dialog_helper.py +148 -0
  20. modules/find_replace.py +164 -0
  21. modules/find_replace_qt.py +457 -0
  22. modules/glossary_manager.py +433 -0
  23. modules/image_extractor.py +188 -0
  24. modules/keyboard_shortcuts_widget.py +571 -0
  25. modules/llm_clients.py +1211 -0
  26. modules/llm_leaderboard.py +737 -0
  27. modules/llm_superbench_ui.py +1401 -0
  28. modules/local_llm_setup.py +1104 -0
  29. modules/model_update_dialog.py +381 -0
  30. modules/model_version_checker.py +373 -0
  31. modules/mqxliff_handler.py +638 -0
  32. modules/non_translatables_manager.py +743 -0
  33. modules/pdf_rescue_Qt.py +1822 -0
  34. modules/pdf_rescue_tkinter.py +909 -0
  35. modules/phrase_docx_handler.py +516 -0
  36. modules/project_home_panel.py +209 -0
  37. modules/prompt_assistant.py +357 -0
  38. modules/prompt_library.py +689 -0
  39. modules/prompt_library_migration.py +447 -0
  40. modules/quick_access_sidebar.py +282 -0
  41. modules/ribbon_widget.py +597 -0
  42. modules/sdlppx_handler.py +874 -0
  43. modules/setup_wizard.py +353 -0
  44. modules/shortcut_manager.py +932 -0
  45. modules/simple_segmenter.py +128 -0
  46. modules/spellcheck_manager.py +727 -0
  47. modules/statuses.py +207 -0
  48. modules/style_guide_manager.py +315 -0
  49. modules/superbench_ui.py +1319 -0
  50. modules/superbrowser.py +329 -0
  51. modules/supercleaner.py +600 -0
  52. modules/supercleaner_ui.py +444 -0
  53. modules/superdocs.py +19 -0
  54. modules/superdocs_viewer_qt.py +382 -0
  55. modules/superlookup.py +252 -0
  56. modules/tag_cleaner.py +260 -0
  57. modules/tag_manager.py +333 -0
  58. modules/term_extractor.py +270 -0
  59. modules/termbase_entry_editor.py +842 -0
  60. modules/termbase_import_export.py +488 -0
  61. modules/termbase_manager.py +1060 -0
  62. modules/termview_widget.py +1172 -0
  63. modules/theme_manager.py +499 -0
  64. modules/tm_editor_dialog.py +99 -0
  65. modules/tm_manager_qt.py +1280 -0
  66. modules/tm_metadata_manager.py +545 -0
  67. modules/tmx_editor.py +1461 -0
  68. modules/tmx_editor_qt.py +2784 -0
  69. modules/tmx_generator.py +284 -0
  70. modules/tracked_changes.py +900 -0
  71. modules/trados_docx_handler.py +430 -0
  72. modules/translation_memory.py +715 -0
  73. modules/translation_results_panel.py +2134 -0
  74. modules/translation_services.py +282 -0
  75. modules/unified_prompt_library.py +659 -0
  76. modules/unified_prompt_manager_qt.py +3951 -0
  77. modules/voice_commands.py +920 -0
  78. modules/voice_dictation.py +477 -0
  79. modules/voice_dictation_lite.py +249 -0
  80. supervertaler-1.9.153.dist-info/METADATA +896 -0
  81. supervertaler-1.9.153.dist-info/RECORD +85 -0
  82. supervertaler-1.9.153.dist-info/WHEEL +5 -0
  83. supervertaler-1.9.153.dist-info/entry_points.txt +2 -0
  84. supervertaler-1.9.153.dist-info/licenses/LICENSE +21 -0
  85. supervertaler-1.9.153.dist-info/top_level.txt +2 -0
@@ -0,0 +1,874 @@
1
+ """
2
+ Trados Studio Package Handler (SDLPPX/SDLRPX)
3
+
4
+ This module handles the import and export of Trados Studio project packages.
5
+ SDLPPX = Project Package (sent to translator)
6
+ SDLRPX = Return Package (sent back to PM)
7
+
8
+ Package Structure:
9
+ - .sdlppx/.sdlrpx = ZIP archive containing:
10
+ - *.sdlproj = XML project file with settings
11
+ - {source-lang}/*.sdlxliff = Bilingual XLIFF files
12
+ - {target-lang}/*.sdlxliff = Target language files (may be copies)
13
+ - Reports/ = Analysis reports (optional)
14
+
15
+ SDLXLIFF Format:
16
+ - XLIFF 1.2 with SDL namespace extensions
17
+ - <g> tags for inline formatting
18
+ - <x> tags for standalone elements
19
+ - <mrk mtype="seg"> for segment boundaries
20
+ - sdl:conf attribute for confirmation status
21
+
22
+ Author: Supervertaler
23
+ """
24
+
25
+ import os
26
+ import re
27
+ import zipfile
28
+ import shutil
29
+ import tempfile
30
+ import traceback
31
+ from pathlib import Path
32
+ from typing import List, Dict, Tuple, Optional, Any
33
+ from dataclasses import dataclass, field
34
+ from datetime import datetime
35
+ from xml.etree import ElementTree as ET
36
+ from copy import deepcopy
37
+
38
+ # Namespaces used in SDLXLIFF
39
+ NAMESPACES = {
40
+ 'xliff': 'urn:oasis:names:tc:xliff:document:1.2',
41
+ 'sdl': 'http://sdl.com/FileTypes/SdlXliff/1.0',
42
+ 'xsi': 'http://www.w3.org/2001/XMLSchema-instance'
43
+ }
44
+
45
+ # Register namespaces for proper output
46
+ for prefix, uri in NAMESPACES.items():
47
+ ET.register_namespace(prefix if prefix != 'xliff' else '', uri)
48
+
49
+
50
+ @dataclass
51
+ class SDLSegment:
52
+ """Represents a segment from an SDLXLIFF file"""
53
+ segment_id: str # Unique ID within file
54
+ trans_unit_id: str # Parent trans-unit ID
55
+ source_text: str # Plain text (tags converted to markers)
56
+ target_text: str # Plain text translation
57
+ source_xml: str # Original XML with tags
58
+ target_xml: str # Target XML with tags
59
+ status: str # not_translated, draft, translated, etc.
60
+ match_percent: int = 0 # TM match percentage
61
+ origin: str = "" # mt, tm, document-match, etc.
62
+ text_match: str = "" # SourceAndTarget = CM, Source = 100%
63
+ locked: bool = False
64
+ file_path: str = "" # Source SDLXLIFF file
65
+
66
+
67
+ @dataclass
68
+ class SDLXLIFFFile:
69
+ """Represents an SDLXLIFF file within a package"""
70
+ file_path: str # Path within package
71
+ original_name: str # Original document name
72
+ source_lang: str
73
+ target_lang: str
74
+ segments: List[SDLSegment] = field(default_factory=list)
75
+
76
+ # Store the parsed XML for modification
77
+ tree: Any = None
78
+ root: Any = None
79
+
80
+
81
+ @dataclass
82
+ class TradosPackage:
83
+ """Represents a Trados Studio project package"""
84
+ package_path: str
85
+ package_type: str # 'sdlppx' or 'sdlrpx'
86
+ project_name: str
87
+ source_lang: str
88
+ target_lang: str
89
+ created_at: str
90
+ created_by: str
91
+
92
+ # Files in the package
93
+ xliff_files: List[SDLXLIFFFile] = field(default_factory=list)
94
+
95
+ # Extracted location
96
+ extract_dir: str = ""
97
+
98
+
99
+ class SDLXLIFFParser:
100
+ """
101
+ Parser for SDLXLIFF files (Trados bilingual XLIFF format).
102
+ Handles the SDL-specific extensions to standard XLIFF.
103
+ """
104
+
105
+ # Tag pattern for SDL inline tags
106
+ TAG_PATTERN = re.compile(r'<(g|x|bx|ex|ph|it|mrk)\s[^>]*>|</(g|x|bx|ex|ph|it|mrk)>')
107
+
108
+ def __init__(self, log_callback=None):
109
+ self.log = log_callback or print
110
+
111
+ def parse_file(self, file_path: str) -> Optional[SDLXLIFFFile]:
112
+ """
113
+ Parse an SDLXLIFF file and extract segments.
114
+
115
+ Args:
116
+ file_path: Path to the SDLXLIFF file
117
+
118
+ Returns:
119
+ SDLXLIFFFile object with parsed segments
120
+ """
121
+ try:
122
+ tree = ET.parse(file_path)
123
+ root = tree.getroot()
124
+
125
+ # Get file element
126
+ file_elem = root.find('xliff:file', NAMESPACES)
127
+ if file_elem is None:
128
+ # Try without namespace
129
+ file_elem = root.find('file')
130
+
131
+ if file_elem is None:
132
+ self.log(f"ERROR: No <file> element found in {file_path}")
133
+ return None
134
+
135
+ original = file_elem.get('original', Path(file_path).stem)
136
+ source_lang = file_elem.get('source-language', 'en')
137
+ target_lang = file_elem.get('target-language', '')
138
+
139
+ xliff_file = SDLXLIFFFile(
140
+ file_path=file_path,
141
+ original_name=original,
142
+ source_lang=source_lang,
143
+ target_lang=target_lang,
144
+ tree=tree,
145
+ root=root
146
+ )
147
+
148
+ # Find all trans-units
149
+ body = file_elem.find('xliff:body', NAMESPACES)
150
+ if body is None:
151
+ body = file_elem.find('body')
152
+
153
+ if body is None:
154
+ self.log(f"ERROR: No <body> element found in {file_path}")
155
+ return xliff_file
156
+
157
+ # Process trans-units (may be in groups)
158
+ trans_units = body.findall('.//xliff:trans-unit', NAMESPACES)
159
+ if not trans_units:
160
+ trans_units = body.findall('.//trans-unit')
161
+
162
+ for tu in trans_units:
163
+ segments = self._parse_trans_unit(tu, file_path)
164
+ xliff_file.segments.extend(segments)
165
+
166
+ self.log(f"Parsed {len(xliff_file.segments)} segments from {Path(file_path).name}")
167
+ return xliff_file
168
+
169
+ except Exception as e:
170
+ self.log(f"ERROR parsing SDLXLIFF: {e}")
171
+ traceback.print_exc()
172
+ return None
173
+
174
+ def _parse_trans_unit(self, tu: ET.Element, file_path: str) -> List[SDLSegment]:
175
+ """Parse a trans-unit element into segments."""
176
+ segments = []
177
+ tu_id = tu.get('id', '')
178
+
179
+ # Get source element
180
+ source_elem = tu.find('xliff:source', NAMESPACES)
181
+ if source_elem is None:
182
+ source_elem = tu.find('source')
183
+
184
+ # Get target element
185
+ target_elem = tu.find('xliff:target', NAMESPACES)
186
+ if target_elem is None:
187
+ target_elem = tu.find('target')
188
+
189
+ # Get seg-source for segmented content
190
+ seg_source = tu.find('xliff:seg-source', NAMESPACES)
191
+ if seg_source is None:
192
+ seg_source = tu.find('seg-source')
193
+
194
+ if source_elem is None:
195
+ return segments
196
+
197
+ # Check if this is a segmented trans-unit (has mrk elements)
198
+ if seg_source is not None:
199
+ # Parse segmented content
200
+ segments = self._parse_segmented_unit(tu, tu_id, seg_source, target_elem, file_path)
201
+ else:
202
+ # Single segment
203
+ source_xml = self._element_to_string(source_elem)
204
+ source_text = self._extract_text(source_elem)
205
+
206
+ target_xml = ""
207
+ target_text = ""
208
+ if target_elem is not None:
209
+ target_xml = self._element_to_string(target_elem)
210
+ target_text = self._extract_text(target_elem)
211
+
212
+ # Get SDL-specific attributes
213
+ sdl_seg = tu.find('.//sdl:seg', {'sdl': NAMESPACES['sdl']})
214
+ status = self._get_segment_status(tu, sdl_seg)
215
+ match_percent = self._get_match_percent(sdl_seg)
216
+ origin = self._get_origin(sdl_seg)
217
+ text_match = self._get_text_match(sdl_seg)
218
+ locked = self._is_locked(tu, sdl_seg)
219
+
220
+ segment = SDLSegment(
221
+ segment_id=tu_id,
222
+ trans_unit_id=tu_id,
223
+ source_text=source_text,
224
+ target_text=target_text,
225
+ source_xml=source_xml,
226
+ target_xml=target_xml,
227
+ status=status,
228
+ match_percent=match_percent,
229
+ origin=origin,
230
+ text_match=text_match,
231
+ locked=locked,
232
+ file_path=file_path
233
+ )
234
+ segments.append(segment)
235
+
236
+ return segments
237
+
238
+ def _parse_segmented_unit(self, tu: ET.Element, tu_id: str,
239
+ seg_source: ET.Element, target_elem: ET.Element,
240
+ file_path: str) -> List[SDLSegment]:
241
+ """Parse a trans-unit with segmented (mrk) content."""
242
+ segments = []
243
+
244
+ # Find all mrk elements with mtype="seg" in seg-source
245
+ source_mrks = seg_source.findall('.//xliff:mrk[@mtype="seg"]', NAMESPACES)
246
+ if not source_mrks:
247
+ source_mrks = seg_source.findall('.//mrk[@mtype="seg"]')
248
+
249
+ # Find corresponding target mrk elements
250
+ target_mrks = []
251
+ if target_elem is not None:
252
+ target_mrks = target_elem.findall('.//xliff:mrk[@mtype="seg"]', NAMESPACES)
253
+ if not target_mrks:
254
+ target_mrks = target_elem.findall('.//mrk[@mtype="seg"]')
255
+
256
+ # Create a map of target mrks by mid
257
+ target_mrk_map = {mrk.get('mid'): mrk for mrk in target_mrks}
258
+
259
+ # Get seg-defs for segment metadata
260
+ seg_defs = tu.find('sdl:seg-defs', {'sdl': NAMESPACES['sdl']})
261
+ seg_def_map = {}
262
+ if seg_defs is not None:
263
+ for seg in seg_defs.findall('sdl:seg', {'sdl': NAMESPACES['sdl']}):
264
+ mid = seg.get('id')
265
+ if mid:
266
+ seg_def_map[mid] = seg
267
+
268
+ for source_mrk in source_mrks:
269
+ mid = source_mrk.get('mid')
270
+ if not mid:
271
+ continue
272
+
273
+ source_xml = self._element_inner_xml(source_mrk)
274
+ source_text = self._extract_text(source_mrk)
275
+
276
+ target_mrk = target_mrk_map.get(mid)
277
+ target_xml = ""
278
+ target_text = ""
279
+ if target_mrk is not None:
280
+ target_xml = self._element_inner_xml(target_mrk)
281
+ target_text = self._extract_text(target_mrk)
282
+
283
+ # Get segment definition
284
+ seg_def = seg_def_map.get(mid)
285
+ status = self._get_segment_status(tu, seg_def)
286
+ match_percent = self._get_match_percent(seg_def)
287
+ origin = self._get_origin(seg_def)
288
+ text_match = self._get_text_match(seg_def)
289
+ locked = self._is_locked(tu, seg_def)
290
+
291
+ segment = SDLSegment(
292
+ segment_id=f"{tu_id}_{mid}",
293
+ trans_unit_id=tu_id,
294
+ source_text=source_text,
295
+ target_text=target_text,
296
+ source_xml=source_xml,
297
+ target_xml=target_xml,
298
+ status=status,
299
+ match_percent=match_percent,
300
+ origin=origin,
301
+ text_match=text_match,
302
+ locked=locked,
303
+ file_path=file_path
304
+ )
305
+ segments.append(segment)
306
+
307
+ return segments
308
+
309
+ def _element_to_string(self, elem: ET.Element) -> str:
310
+ """Convert element to string including tags."""
311
+ return ET.tostring(elem, encoding='unicode')
312
+
313
+ def _element_inner_xml(self, elem: ET.Element) -> str:
314
+ """Get inner XML of an element (content without the element itself)."""
315
+ result = elem.text or ""
316
+ for child in elem:
317
+ result += ET.tostring(child, encoding='unicode')
318
+ return result
319
+
320
+ def _extract_text(self, elem: ET.Element) -> str:
321
+ """Extract plain text from element, converting tags to markers."""
322
+ text_parts = []
323
+
324
+ def process_element(el, depth=0):
325
+ # Add element's text
326
+ if el.text:
327
+ text_parts.append(el.text)
328
+
329
+ # Process children
330
+ for child in el:
331
+ tag_name = child.tag.split('}')[-1] if '}' in child.tag else child.tag
332
+
333
+ if tag_name == 'g':
334
+ # Paired tag - convert to Supervertaler format
335
+ tag_id = child.get('id', '')
336
+ text_parts.append(f'<{tag_id}>')
337
+ process_element(child, depth + 1)
338
+ text_parts.append(f'</{tag_id}>')
339
+ elif tag_name in ('x', 'ph', 'bx', 'ex'):
340
+ # Standalone tag
341
+ tag_id = child.get('id', '')
342
+ text_parts.append(f'<{tag_id}/>')
343
+ elif tag_name == 'mrk':
344
+ # Marker - just process content
345
+ process_element(child, depth + 1)
346
+ else:
347
+ # Unknown - include as-is
348
+ process_element(child, depth + 1)
349
+
350
+ # Add tail text
351
+ if child.tail:
352
+ text_parts.append(child.tail)
353
+
354
+ process_element(elem)
355
+ return ''.join(text_parts)
356
+
357
+ def _get_segment_status(self, tu: ET.Element, seg_def: ET.Element) -> str:
358
+ """Get segment status from SDL attributes."""
359
+ if seg_def is not None:
360
+ conf = seg_def.get('conf')
361
+ if conf:
362
+ status_map = {
363
+ 'Draft': 'draft',
364
+ 'Translated': 'translated',
365
+ 'ApprovedTranslation': 'approved',
366
+ 'ApprovedSignOff': 'approved',
367
+ 'RejectedTranslation': 'rejected',
368
+ 'RejectedSignOff': 'rejected'
369
+ }
370
+ return status_map.get(conf, 'not_translated')
371
+ return 'not_translated'
372
+
373
+ def _get_match_percent(self, seg_def: ET.Element) -> int:
374
+ """Get TM match percentage."""
375
+ if seg_def is not None:
376
+ percent = seg_def.get('percent')
377
+ if percent:
378
+ try:
379
+ return int(percent)
380
+ except ValueError:
381
+ pass
382
+ return 0
383
+
384
+ def _get_origin(self, seg_def: ET.Element) -> str:
385
+ """Get segment origin (tm, mt, document-match, etc.)."""
386
+ if seg_def is not None:
387
+ origin = seg_def.get('origin')
388
+ if origin:
389
+ return origin.lower()
390
+ return ""
391
+
392
+ def _get_text_match(self, seg_def: ET.Element) -> str:
393
+ """Get text-match attribute (SourceAndTarget = CM, Source = 100%)."""
394
+ if seg_def is not None:
395
+ text_match = seg_def.get('text-match')
396
+ if text_match:
397
+ return text_match
398
+ return ""
399
+
400
+ def _is_locked(self, tu: ET.Element, seg_def: ET.Element) -> bool:
401
+ """Check if segment is locked."""
402
+ if seg_def is not None:
403
+ locked = seg_def.get('locked')
404
+ if locked and locked.lower() == 'true':
405
+ return True
406
+
407
+ # Check translate attribute on trans-unit
408
+ translate = tu.get('translate')
409
+ if translate and translate.lower() == 'no':
410
+ return True
411
+
412
+ return False
413
+
414
+
415
+ class TradosPackageHandler:
416
+ """
417
+ Handler for Trados Studio project packages (SDLPPX/SDLRPX).
418
+
419
+ This class provides methods to:
420
+ - Extract and parse SDLPPX packages
421
+ - Import segments into Supervertaler projects
422
+ - Update translations in SDLXLIFF files
423
+ - Create return packages (SDLRPX)
424
+ """
425
+
426
+ def __init__(self, log_callback=None):
427
+ self.log = log_callback or print
428
+ self.parser = SDLXLIFFParser(log_callback)
429
+ self.package: Optional[TradosPackage] = None
430
+ self.extract_dir: Optional[str] = None
431
+
432
+ def load_package(self, package_path: str, extract_dir: str = None) -> Optional[TradosPackage]:
433
+ """
434
+ Load and extract a Trados package.
435
+
436
+ Args:
437
+ package_path: Path to .sdlppx or .sdlrpx file
438
+ extract_dir: Directory to extract to (temp if not specified)
439
+
440
+ Returns:
441
+ TradosPackage object with parsed content
442
+ """
443
+ try:
444
+ package_path = Path(package_path)
445
+
446
+ if not package_path.exists():
447
+ self.log(f"ERROR: Package not found: {package_path}")
448
+ return None
449
+
450
+ # Determine package type
451
+ ext = package_path.suffix.lower()
452
+ if ext not in ['.sdlppx', '.sdlrpx']:
453
+ self.log(f"ERROR: Not a Trados package: {ext}")
454
+ return None
455
+
456
+ package_type = 'sdlppx' if ext == '.sdlppx' else 'sdlrpx'
457
+
458
+ # Create extraction directory
459
+ if extract_dir:
460
+ self.extract_dir = Path(extract_dir)
461
+ else:
462
+ self.extract_dir = Path(tempfile.mkdtemp(prefix='sdlppx_'))
463
+
464
+ self.extract_dir.mkdir(parents=True, exist_ok=True)
465
+
466
+ # Extract the ZIP
467
+ self.log(f"Extracting {package_path.name}...")
468
+ with zipfile.ZipFile(package_path, 'r') as zf:
469
+ zf.extractall(self.extract_dir)
470
+
471
+ # Find and parse the project file
472
+ project_file = None
473
+ for f in self.extract_dir.glob('*.sdlproj'):
474
+ project_file = f
475
+ break
476
+
477
+ if not project_file:
478
+ self.log("ERROR: No .sdlproj file found in package")
479
+ return None
480
+
481
+ # Parse project file
482
+ project_info = self._parse_project_file(project_file)
483
+
484
+ # Create package object
485
+ self.package = TradosPackage(
486
+ package_path=str(package_path),
487
+ package_type=package_type,
488
+ project_name=project_info.get('name', package_path.stem),
489
+ source_lang=project_info.get('source_lang', 'en'),
490
+ target_lang=project_info.get('target_lang', ''),
491
+ created_at=project_info.get('created_at', ''),
492
+ created_by=project_info.get('created_by', ''),
493
+ extract_dir=str(self.extract_dir)
494
+ )
495
+
496
+ # Find and parse SDLXLIFF files
497
+ self._load_xliff_files()
498
+
499
+ total_segments = sum(len(f.segments) for f in self.package.xliff_files)
500
+ self.log(f"Loaded package: {self.package.project_name}")
501
+ self.log(f" Languages: {self.package.source_lang} → {self.package.target_lang}")
502
+ self.log(f" Files: {len(self.package.xliff_files)}")
503
+ self.log(f" Segments: {total_segments}")
504
+
505
+ return self.package
506
+
507
+ except Exception as e:
508
+ self.log(f"ERROR loading package: {e}")
509
+ traceback.print_exc()
510
+ return None
511
+
512
+ def _parse_project_file(self, project_file: Path) -> Dict:
513
+ """Parse the .sdlproj XML file for project metadata."""
514
+ info = {}
515
+
516
+ try:
517
+ tree = ET.parse(project_file)
518
+ root = tree.getroot()
519
+
520
+ # Project name (from filename or attribute)
521
+ info['name'] = project_file.stem.split('-')[0] if '-' in project_file.stem else project_file.stem
522
+
523
+ # Package metadata
524
+ info['created_at'] = root.get('PackageCreatedAt', '')
525
+ info['created_by'] = root.get('PackageCreatedBy', '')
526
+
527
+ # Language directions
528
+ lang_dir = root.find('.//LanguageDirection')
529
+ if lang_dir is not None:
530
+ info['source_lang'] = lang_dir.get('SourceLanguageCode', 'en')
531
+ info['target_lang'] = lang_dir.get('TargetLanguageCode', '')
532
+
533
+ except Exception as e:
534
+ self.log(f"Warning: Could not parse project file: {e}")
535
+
536
+ return info
537
+
538
+ def _load_xliff_files(self):
539
+ """Find and load SDLXLIFF files from the TARGET language folder only.
540
+
541
+ Trados packages contain SDLXLIFF files in both source and target language
542
+ folders. We only want to load from the target folder (e.g., nl-nl/) since
543
+ that's where the translator works.
544
+ """
545
+ if not self.package or not self.extract_dir:
546
+ return
547
+
548
+ extract_path = Path(self.extract_dir)
549
+ target_lang = self.package.target_lang.lower()
550
+
551
+ # Look for SDLXLIFF files in the target language folder
552
+ target_folder = extract_path / target_lang
553
+
554
+ if target_folder.exists():
555
+ # Load from target language folder
556
+ self.log(f"Loading SDLXLIFF files from target folder: {target_lang}/")
557
+ for xliff_path in target_folder.glob('*.sdlxliff'):
558
+ xliff_file = self.parser.parse_file(str(xliff_path))
559
+ if xliff_file:
560
+ self.package.xliff_files.append(xliff_file)
561
+ else:
562
+ # Fallback: try to find target folder by matching language code patterns
563
+ # (e.g., nl-NL, nl-nl, nl_NL, etc.)
564
+ self.log(f"Target folder '{target_lang}' not found, searching alternatives...")
565
+ found = False
566
+ for folder in extract_path.iterdir():
567
+ if folder.is_dir():
568
+ folder_lower = folder.name.lower().replace('_', '-')
569
+ if folder_lower == target_lang or folder_lower.startswith(target_lang.split('-')[0]):
570
+ # Skip if this looks like the source language
571
+ source_lang = self.package.source_lang.lower()
572
+ if folder_lower == source_lang or folder_lower.startswith(source_lang.split('-')[0]):
573
+ continue
574
+
575
+ self.log(f"Loading SDLXLIFF files from folder: {folder.name}/")
576
+ for xliff_path in folder.glob('*.sdlxliff'):
577
+ xliff_file = self.parser.parse_file(str(xliff_path))
578
+ if xliff_file:
579
+ self.package.xliff_files.append(xliff_file)
580
+ found = True
581
+ break
582
+
583
+ if not found:
584
+ self.log(f"Warning: Could not find target language folder for {target_lang}")
585
+
586
+ def get_all_segments(self) -> List[SDLSegment]:
587
+ """Get all segments from all files in the package."""
588
+ if not self.package:
589
+ return []
590
+
591
+ segments = []
592
+ for xliff_file in self.package.xliff_files:
593
+ segments.extend(xliff_file.segments)
594
+
595
+ return segments
596
+
597
+ def update_segment(self, segment_id: str, target_text: str, status: str = 'translated') -> bool:
598
+ """
599
+ Update a segment's translation.
600
+
601
+ Args:
602
+ segment_id: The segment ID to update
603
+ target_text: New target text
604
+ status: New status (translated, approved, etc.)
605
+
606
+ Returns:
607
+ True if updated successfully
608
+ """
609
+ if not self.package:
610
+ return False
611
+
612
+ for xliff_file in self.package.xliff_files:
613
+ for segment in xliff_file.segments:
614
+ if segment.segment_id == segment_id:
615
+ segment.target_text = target_text
616
+ segment.status = status
617
+ return True
618
+
619
+ return False
620
+
621
+ def update_translations(self, translations: Dict[str, str]) -> int:
622
+ """
623
+ Batch update translations.
624
+
625
+ Args:
626
+ translations: Dict mapping segment_id to target_text
627
+
628
+ Returns:
629
+ Number of segments updated
630
+ """
631
+ count = 0
632
+ for segment_id, target_text in translations.items():
633
+ if self.update_segment(segment_id, target_text):
634
+ count += 1
635
+ return count
636
+
637
+ def save_xliff_files(self) -> bool:
638
+ """
639
+ Save all modified SDLXLIFF files.
640
+
641
+ Returns:
642
+ True if all files saved successfully
643
+ """
644
+ if not self.package:
645
+ return False
646
+
647
+ # TODO: Implement proper XLIFF modification
648
+ # This requires updating the XML tree with new translations
649
+ # while preserving all SDL-specific attributes
650
+
651
+ self.log("Saving SDLXLIFF files...")
652
+
653
+ for xliff_file in self.package.xliff_files:
654
+ if xliff_file.tree and xliff_file.root:
655
+ # Update segments in the XML tree
656
+ self._update_xliff_tree(xliff_file)
657
+
658
+ # Save the file
659
+ xliff_file.tree.write(
660
+ xliff_file.file_path,
661
+ encoding='utf-8',
662
+ xml_declaration=True
663
+ )
664
+ self.log(f" Saved: {Path(xliff_file.file_path).name}")
665
+
666
+ return True
667
+
668
+ def _update_xliff_tree(self, xliff_file: SDLXLIFFFile):
669
+ """Update the XML tree with segment translations."""
670
+ # Build segment map for quick lookup
671
+ segment_map = {s.segment_id: s for s in xliff_file.segments}
672
+
673
+ root = xliff_file.root
674
+
675
+ # Find all trans-units
676
+ for tu in root.findall('.//xliff:trans-unit', NAMESPACES):
677
+ tu_id = tu.get('id', '')
678
+
679
+ # Get target element (create if missing)
680
+ target_elem = tu.find('xliff:target', NAMESPACES)
681
+ if target_elem is None:
682
+ target_elem = tu.find('target')
683
+
684
+ # Check for segmented content
685
+ seg_source = tu.find('xliff:seg-source', NAMESPACES)
686
+ if seg_source is None:
687
+ seg_source = tu.find('seg-source')
688
+
689
+ if seg_source is not None:
690
+ # Update segmented content
691
+ self._update_segmented_target(tu, target_elem, segment_map)
692
+ else:
693
+ # Single segment
694
+ segment = segment_map.get(tu_id)
695
+ if segment and target_elem is not None:
696
+ # Update target text
697
+ self._set_element_text(target_elem, segment.target_text)
698
+
699
+ # Update segment confirmation status in sdl:seg-defs
700
+ self._update_segment_status(tu, segment_map, tu_id)
701
+
702
+ def _update_segmented_target(self, tu: ET.Element, target_elem: ET.Element,
703
+ segment_map: Dict[str, SDLSegment]):
704
+ """Update segmented target content with translations."""
705
+ if target_elem is None:
706
+ return
707
+
708
+ tu_id = tu.get('id', '')
709
+
710
+ # Find all target mrk elements
711
+ target_mrks = target_elem.findall('.//xliff:mrk[@mtype="seg"]', NAMESPACES)
712
+ if not target_mrks:
713
+ target_mrks = target_elem.findall('.//mrk[@mtype="seg"]')
714
+
715
+ for mrk in target_mrks:
716
+ mid = mrk.get('mid')
717
+ if mid:
718
+ segment_id = f"{tu_id}_{mid}"
719
+ segment = segment_map.get(segment_id)
720
+ if segment:
721
+ # Update the mrk element text
722
+ self._set_element_text(mrk, segment.target_text)
723
+
724
+ def _update_segment_status(self, tu: ET.Element, segment_map: Dict[str, SDLSegment], tu_id: str):
725
+ """
726
+ Update segment confirmation status in sdl:seg-defs.
727
+
728
+ Changes the conf attribute from 'Draft' to 'Translated' for segments
729
+ that have been translated in Supervertaler.
730
+ """
731
+ # Status mapping from internal to SDL format
732
+ status_to_conf = {
733
+ 'translated': 'Translated',
734
+ 'approved': 'ApprovedTranslation',
735
+ 'confirmed': 'ApprovedTranslation',
736
+ 'draft': 'Draft',
737
+ 'not_translated': 'Draft',
738
+ }
739
+
740
+ # Find sdl:seg-defs within this trans-unit (try with namespace first)
741
+ seg_defs = tu.find('.//sdl:seg-defs', {'sdl': NAMESPACES['sdl']})
742
+ if seg_defs is None:
743
+ seg_defs = tu.find('.//{%s}seg-defs' % NAMESPACES['sdl'])
744
+ if seg_defs is None:
745
+ # Try without namespace
746
+ for child in tu:
747
+ if child.tag.endswith('seg-defs'):
748
+ seg_defs = child
749
+ break
750
+
751
+ if seg_defs is None:
752
+ return
753
+
754
+ # Update each seg element
755
+ for seg_elem in seg_defs:
756
+ if not seg_elem.tag.endswith('seg'):
757
+ continue
758
+
759
+ seg_id = seg_elem.get('id', '')
760
+
761
+ # Build segment_id to look up in our map
762
+ # For segmented content: tu_id_seg_id
763
+ # For single segment: tu_id
764
+ segment = segment_map.get(f"{tu_id}_{seg_id}")
765
+ if not segment:
766
+ segment = segment_map.get(tu_id)
767
+
768
+ if segment:
769
+ # Get the new conf value based on segment status
770
+ new_conf = status_to_conf.get(segment.status, 'Translated')
771
+
772
+ # If segment has target text and is translated/approved, set to Translated
773
+ if segment.target_text and segment.status in ('translated', 'approved', 'confirmed'):
774
+ new_conf = 'Translated'
775
+
776
+ # Update the conf attribute
777
+ current_conf = seg_elem.get('conf', '')
778
+ if current_conf != new_conf:
779
+ seg_elem.set('conf', new_conf)
780
+
781
+ def _set_element_text(self, elem: ET.Element, text: str):
782
+ """Set element text, handling tags appropriately."""
783
+ # For now, just set the text
784
+ # TODO: Convert Supervertaler tags back to XLIFF format
785
+ elem.text = text
786
+ # Clear child elements (simple approach)
787
+ for child in list(elem):
788
+ if child.tag.endswith('mrk') and child.get('mtype') != 'seg':
789
+ # Keep non-segment markers
790
+ pass
791
+ else:
792
+ elem.remove(child)
793
+
794
+ def create_return_package(self, output_path: str = None) -> Optional[str]:
795
+ """
796
+ Create a return package (SDLRPX) with translations.
797
+
798
+ Args:
799
+ output_path: Path for the return package (auto-generated if not specified)
800
+
801
+ Returns:
802
+ Path to the created package
803
+ """
804
+ if not self.package or not self.extract_dir:
805
+ self.log("ERROR: No package loaded")
806
+ return None
807
+
808
+ try:
809
+ # Save all XLIFF files first
810
+ self.save_xliff_files()
811
+
812
+ # Generate output path if not specified
813
+ if not output_path:
814
+ original = Path(self.package.package_path)
815
+ output_path = original.parent / f"{original.stem}_translated.sdlrpx"
816
+
817
+ output_path = Path(output_path)
818
+
819
+ # Create the return package (ZIP)
820
+ self.log(f"Creating return package: {output_path.name}")
821
+
822
+ with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf:
823
+ # Add all files from extracted directory
824
+ extract_path = Path(self.extract_dir)
825
+ for file_path in extract_path.rglob('*'):
826
+ if file_path.is_file():
827
+ arcname = file_path.relative_to(extract_path)
828
+ zf.write(file_path, arcname)
829
+
830
+ self.log(f"Created return package: {output_path}")
831
+ return str(output_path)
832
+
833
+ except Exception as e:
834
+ self.log(f"ERROR creating return package: {e}")
835
+ traceback.print_exc()
836
+ return None
837
+
838
+ def cleanup(self):
839
+ """Clean up extracted files."""
840
+ if self.extract_dir and Path(self.extract_dir).exists():
841
+ try:
842
+ shutil.rmtree(self.extract_dir)
843
+ self.log("Cleaned up extracted files")
844
+ except Exception as e:
845
+ self.log(f"Warning: Could not clean up: {e}")
846
+
847
+
848
+ def detect_trados_package_type(file_path: str) -> Optional[str]:
849
+ """
850
+ Detect if a file is a Trados package and return its type.
851
+
852
+ Returns:
853
+ 'sdlppx', 'sdlrpx', or None if not a Trados package
854
+ """
855
+ path = Path(file_path)
856
+ ext = path.suffix.lower()
857
+
858
+ if ext == '.sdlppx':
859
+ return 'sdlppx'
860
+ elif ext == '.sdlrpx':
861
+ return 'sdlrpx'
862
+
863
+ # Check if it's a ZIP with SDLXLIFF files
864
+ if ext == '.zip':
865
+ try:
866
+ with zipfile.ZipFile(file_path, 'r') as zf:
867
+ names = zf.namelist()
868
+ if any(n.endswith('.sdlxliff') for n in names):
869
+ if any(n.endswith('.sdlproj') for n in names):
870
+ return 'sdlppx' # Assume project package
871
+ except:
872
+ pass
873
+
874
+ return None