supervertaler 1.9.163__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- Supervertaler.py +48473 -0
- modules/__init__.py +10 -0
- modules/ai_actions.py +964 -0
- modules/ai_attachment_manager.py +343 -0
- modules/ai_file_viewer_dialog.py +210 -0
- modules/autofingers_engine.py +466 -0
- modules/cafetran_docx_handler.py +379 -0
- modules/config_manager.py +469 -0
- modules/database_manager.py +1911 -0
- modules/database_migrations.py +417 -0
- modules/dejavurtf_handler.py +779 -0
- modules/document_analyzer.py +427 -0
- modules/docx_handler.py +689 -0
- modules/encoding_repair.py +319 -0
- modules/encoding_repair_Qt.py +393 -0
- modules/encoding_repair_ui.py +481 -0
- modules/feature_manager.py +350 -0
- modules/figure_context_manager.py +340 -0
- modules/file_dialog_helper.py +148 -0
- modules/find_replace.py +164 -0
- modules/find_replace_qt.py +457 -0
- modules/glossary_manager.py +433 -0
- modules/image_extractor.py +188 -0
- modules/keyboard_shortcuts_widget.py +571 -0
- modules/llm_clients.py +1211 -0
- modules/llm_leaderboard.py +737 -0
- modules/llm_superbench_ui.py +1401 -0
- modules/local_llm_setup.py +1104 -0
- modules/model_update_dialog.py +381 -0
- modules/model_version_checker.py +373 -0
- modules/mqxliff_handler.py +638 -0
- modules/non_translatables_manager.py +743 -0
- modules/pdf_rescue_Qt.py +1822 -0
- modules/pdf_rescue_tkinter.py +909 -0
- modules/phrase_docx_handler.py +516 -0
- modules/project_home_panel.py +209 -0
- modules/prompt_assistant.py +357 -0
- modules/prompt_library.py +689 -0
- modules/prompt_library_migration.py +447 -0
- modules/quick_access_sidebar.py +282 -0
- modules/ribbon_widget.py +597 -0
- modules/sdlppx_handler.py +874 -0
- modules/setup_wizard.py +353 -0
- modules/shortcut_manager.py +932 -0
- modules/simple_segmenter.py +128 -0
- modules/spellcheck_manager.py +727 -0
- modules/statuses.py +207 -0
- modules/style_guide_manager.py +315 -0
- modules/superbench_ui.py +1319 -0
- modules/superbrowser.py +329 -0
- modules/supercleaner.py +600 -0
- modules/supercleaner_ui.py +444 -0
- modules/superdocs.py +19 -0
- modules/superdocs_viewer_qt.py +382 -0
- modules/superlookup.py +252 -0
- modules/tag_cleaner.py +260 -0
- modules/tag_manager.py +351 -0
- modules/term_extractor.py +270 -0
- modules/termbase_entry_editor.py +842 -0
- modules/termbase_import_export.py +488 -0
- modules/termbase_manager.py +1060 -0
- modules/termview_widget.py +1176 -0
- modules/theme_manager.py +499 -0
- modules/tm_editor_dialog.py +99 -0
- modules/tm_manager_qt.py +1280 -0
- modules/tm_metadata_manager.py +545 -0
- modules/tmx_editor.py +1461 -0
- modules/tmx_editor_qt.py +2784 -0
- modules/tmx_generator.py +284 -0
- modules/tracked_changes.py +900 -0
- modules/trados_docx_handler.py +430 -0
- modules/translation_memory.py +715 -0
- modules/translation_results_panel.py +2134 -0
- modules/translation_services.py +282 -0
- modules/unified_prompt_library.py +659 -0
- modules/unified_prompt_manager_qt.py +3951 -0
- modules/voice_commands.py +920 -0
- modules/voice_dictation.py +477 -0
- modules/voice_dictation_lite.py +249 -0
- supervertaler-1.9.163.dist-info/METADATA +906 -0
- supervertaler-1.9.163.dist-info/RECORD +85 -0
- supervertaler-1.9.163.dist-info/WHEEL +5 -0
- supervertaler-1.9.163.dist-info/entry_points.txt +2 -0
- supervertaler-1.9.163.dist-info/licenses/LICENSE +21 -0
- supervertaler-1.9.163.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,743 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Non-Translatables Manager Module
|
|
3
|
+
|
|
4
|
+
Manages non-translatable (NT) content - terms, phrases, and patterns that should
|
|
5
|
+
not be translated. These include brand names, product names, technical identifiers,
|
|
6
|
+
codes, abbreviations, and other content that must remain in the original language.
|
|
7
|
+
|
|
8
|
+
File Format: .ntl (Non-Translatable List)
|
|
9
|
+
- YAML frontmatter with metadata
|
|
10
|
+
- Simple line-by-line entries (one NT per line)
|
|
11
|
+
- Comments start with #
|
|
12
|
+
- Blank lines are ignored
|
|
13
|
+
|
|
14
|
+
Import Support:
|
|
15
|
+
- Native .svntl format
|
|
16
|
+
- memoQ .mqres non-translatable lists (XML format)
|
|
17
|
+
|
|
18
|
+
Features:
|
|
19
|
+
- Multiple NT lists per project
|
|
20
|
+
- Case-sensitive/insensitive matching options
|
|
21
|
+
- Merge import with duplicate detection
|
|
22
|
+
- Export to native format
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
import os
|
|
26
|
+
import re
|
|
27
|
+
import yaml
|
|
28
|
+
import xml.etree.ElementTree as ET
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
from dataclasses import dataclass, field
|
|
31
|
+
from typing import List, Dict, Optional, Set, Tuple
|
|
32
|
+
from datetime import datetime
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class NonTranslatable:
|
|
37
|
+
"""Single non-translatable entry"""
|
|
38
|
+
text: str
|
|
39
|
+
case_sensitive: bool = True # Default to case-sensitive matching
|
|
40
|
+
category: str = ""
|
|
41
|
+
notes: str = ""
|
|
42
|
+
|
|
43
|
+
def matches(self, source_text: str) -> List[Tuple[int, int]]:
|
|
44
|
+
"""
|
|
45
|
+
Find all occurrences of this NT in source text.
|
|
46
|
+
|
|
47
|
+
Matching is:
|
|
48
|
+
- Case-sensitive by default (case_sensitive=True)
|
|
49
|
+
- Full word only (uses word boundaries to avoid matching inside other words)
|
|
50
|
+
- Special characters (®, ™, etc.) are handled specially
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
List of (start_pos, end_pos) tuples for each match
|
|
54
|
+
"""
|
|
55
|
+
matches = []
|
|
56
|
+
pattern = self.text
|
|
57
|
+
|
|
58
|
+
# Escape special regex characters in the pattern
|
|
59
|
+
escaped_pattern = re.escape(pattern)
|
|
60
|
+
|
|
61
|
+
# Set regex flags based on case sensitivity
|
|
62
|
+
flags = 0 if self.case_sensitive else re.IGNORECASE
|
|
63
|
+
|
|
64
|
+
# Check if pattern starts/ends with word characters (letters, digits, underscore)
|
|
65
|
+
# Word boundaries only work properly between word and non-word characters
|
|
66
|
+
starts_with_word_char = pattern and pattern[0].isalnum()
|
|
67
|
+
ends_with_word_char = pattern and pattern[-1].isalnum()
|
|
68
|
+
|
|
69
|
+
# Build pattern with appropriate boundaries
|
|
70
|
+
if starts_with_word_char:
|
|
71
|
+
boundary_pattern = r'\b' + escaped_pattern
|
|
72
|
+
else:
|
|
73
|
+
# For patterns starting with special chars, use start of string or whitespace/punctuation
|
|
74
|
+
boundary_pattern = r'(?:^|(?<=\s)|(?<=[^\w]))' + escaped_pattern
|
|
75
|
+
|
|
76
|
+
if ends_with_word_char:
|
|
77
|
+
boundary_pattern = boundary_pattern + r'\b'
|
|
78
|
+
else:
|
|
79
|
+
# For patterns ending with special chars (like ®, ™), no trailing boundary needed
|
|
80
|
+
# The special char itself acts as a natural boundary
|
|
81
|
+
pass
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
for match in re.finditer(boundary_pattern, source_text, flags):
|
|
85
|
+
matches.append((match.start(), match.end()))
|
|
86
|
+
except re.error:
|
|
87
|
+
# Fallback: try simpler word boundary pattern
|
|
88
|
+
try:
|
|
89
|
+
simple_pattern = r'\b' + escaped_pattern + r'\b'
|
|
90
|
+
for match in re.finditer(simple_pattern, source_text, flags):
|
|
91
|
+
matches.append((match.start(), match.end()))
|
|
92
|
+
except re.error:
|
|
93
|
+
# Final fallback: match anywhere but verify it's not inside a word
|
|
94
|
+
try:
|
|
95
|
+
for match in re.finditer(escaped_pattern, source_text, flags):
|
|
96
|
+
start, end = match.start(), match.end()
|
|
97
|
+
# Check if this is a standalone match (not inside a word)
|
|
98
|
+
before_ok = start == 0 or not source_text[start-1].isalnum()
|
|
99
|
+
after_ok = end == len(source_text) or not source_text[end].isalnum()
|
|
100
|
+
if before_ok and after_ok:
|
|
101
|
+
matches.append((start, end))
|
|
102
|
+
except re.error:
|
|
103
|
+
pass
|
|
104
|
+
|
|
105
|
+
return matches
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@dataclass
|
|
109
|
+
class NonTranslatableList:
|
|
110
|
+
"""A list of non-translatables with metadata"""
|
|
111
|
+
name: str
|
|
112
|
+
entries: List[NonTranslatable] = field(default_factory=list)
|
|
113
|
+
description: str = ""
|
|
114
|
+
created_date: str = ""
|
|
115
|
+
modified_date: str = ""
|
|
116
|
+
source_language: str = ""
|
|
117
|
+
target_language: str = ""
|
|
118
|
+
is_active: bool = True
|
|
119
|
+
filepath: Optional[str] = None
|
|
120
|
+
|
|
121
|
+
def __post_init__(self):
|
|
122
|
+
if not self.created_date:
|
|
123
|
+
self.created_date = datetime.now().isoformat()
|
|
124
|
+
if not self.modified_date:
|
|
125
|
+
self.modified_date = datetime.now().isoformat()
|
|
126
|
+
|
|
127
|
+
@property
|
|
128
|
+
def entry_count(self) -> int:
|
|
129
|
+
return len(self.entries)
|
|
130
|
+
|
|
131
|
+
def get_unique_texts(self) -> Set[str]:
|
|
132
|
+
"""Get set of all NT texts (lowercase for comparison)"""
|
|
133
|
+
return {nt.text.lower() for nt in self.entries}
|
|
134
|
+
|
|
135
|
+
def find_matches(self, source_text: str) -> List[Dict]:
|
|
136
|
+
"""
|
|
137
|
+
Find all NT matches in source text.
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
List of dicts with 'text', 'start', 'end', 'entry' keys
|
|
141
|
+
"""
|
|
142
|
+
all_matches = []
|
|
143
|
+
|
|
144
|
+
for entry in self.entries:
|
|
145
|
+
positions = entry.matches(source_text)
|
|
146
|
+
for start, end in positions:
|
|
147
|
+
# Get the actual matched text from source (preserves original case)
|
|
148
|
+
matched_text = source_text[start:end]
|
|
149
|
+
all_matches.append({
|
|
150
|
+
'text': matched_text,
|
|
151
|
+
'start': start,
|
|
152
|
+
'end': end,
|
|
153
|
+
'entry': entry,
|
|
154
|
+
'list_name': self.name
|
|
155
|
+
})
|
|
156
|
+
|
|
157
|
+
# Sort by position, then by length (longer matches first for same position)
|
|
158
|
+
all_matches.sort(key=lambda m: (m['start'], -(m['end'] - m['start'])))
|
|
159
|
+
|
|
160
|
+
return all_matches
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class NonTranslatablesManager:
|
|
164
|
+
"""Manages non-translatable lists: loading, saving, searching, import/export"""
|
|
165
|
+
|
|
166
|
+
# File extension for native format
|
|
167
|
+
FILE_EXTENSION = ".svntl"
|
|
168
|
+
LEGACY_EXTENSION = ".ntl" # For backward compatibility
|
|
169
|
+
|
|
170
|
+
def __init__(self, base_path: str, log_callback=None):
|
|
171
|
+
"""
|
|
172
|
+
Initialize manager.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
base_path: Base path for NT files (typically user_data/resources/non_translatables)
|
|
176
|
+
log_callback: Optional logging function
|
|
177
|
+
"""
|
|
178
|
+
self.base_path = Path(base_path)
|
|
179
|
+
self.log = log_callback if log_callback else print
|
|
180
|
+
self.lists: Dict[str, NonTranslatableList] = {} # name -> list
|
|
181
|
+
self.active_lists: List[str] = [] # Names of active lists
|
|
182
|
+
|
|
183
|
+
# Ensure directory exists
|
|
184
|
+
self.base_path.mkdir(parents=True, exist_ok=True)
|
|
185
|
+
|
|
186
|
+
# ========================================================================
|
|
187
|
+
# FILE FORMAT: .ntl (YAML frontmatter + line entries)
|
|
188
|
+
# ========================================================================
|
|
189
|
+
|
|
190
|
+
def save_list(self, nt_list: NonTranslatableList, filepath: Optional[str] = None) -> bool:
|
|
191
|
+
"""
|
|
192
|
+
Save a non-translatable list to .ntl format.
|
|
193
|
+
|
|
194
|
+
Format:
|
|
195
|
+
---
|
|
196
|
+
name: List Name
|
|
197
|
+
description: Optional description
|
|
198
|
+
created_date: ISO date
|
|
199
|
+
modified_date: ISO date
|
|
200
|
+
source_language: en
|
|
201
|
+
target_language: nl
|
|
202
|
+
---
|
|
203
|
+
# Comments start with #
|
|
204
|
+
Brand Name
|
|
205
|
+
Product™
|
|
206
|
+
Technical Term
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
nt_list: The list to save
|
|
210
|
+
filepath: Optional specific path (defaults to base_path/name.ntl)
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
True if successful
|
|
214
|
+
"""
|
|
215
|
+
try:
|
|
216
|
+
if filepath is None:
|
|
217
|
+
# Sanitize name for filename
|
|
218
|
+
safe_name = re.sub(r'[<>:"/\\|?*]', '_', nt_list.name)
|
|
219
|
+
filepath = self.base_path / f"{safe_name}{self.FILE_EXTENSION}"
|
|
220
|
+
|
|
221
|
+
filepath = Path(filepath)
|
|
222
|
+
|
|
223
|
+
# Update modified date
|
|
224
|
+
nt_list.modified_date = datetime.now().isoformat()
|
|
225
|
+
nt_list.filepath = str(filepath)
|
|
226
|
+
|
|
227
|
+
# Build YAML frontmatter
|
|
228
|
+
metadata = {
|
|
229
|
+
'name': nt_list.name,
|
|
230
|
+
'description': nt_list.description,
|
|
231
|
+
'created_date': nt_list.created_date,
|
|
232
|
+
'modified_date': nt_list.modified_date,
|
|
233
|
+
'source_language': nt_list.source_language,
|
|
234
|
+
'target_language': nt_list.target_language,
|
|
235
|
+
'is_active': nt_list.is_active,
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
with open(filepath, 'w', encoding='utf-8') as f:
|
|
239
|
+
# Write YAML frontmatter
|
|
240
|
+
f.write("---\n")
|
|
241
|
+
yaml.dump(metadata, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
|
242
|
+
f.write("---\n\n")
|
|
243
|
+
|
|
244
|
+
# Write entries (one per line)
|
|
245
|
+
f.write("# Non-translatable entries (one per line)\n")
|
|
246
|
+
f.write(f"# Total entries: {len(nt_list.entries)}\n\n")
|
|
247
|
+
|
|
248
|
+
for entry in nt_list.entries:
|
|
249
|
+
# If entry has notes or category, add as comment
|
|
250
|
+
if entry.notes:
|
|
251
|
+
f.write(f"# {entry.notes}\n")
|
|
252
|
+
f.write(f"{entry.text}\n")
|
|
253
|
+
|
|
254
|
+
self.log(f"✓ Saved NT list: {nt_list.name} ({len(nt_list.entries)} entries)")
|
|
255
|
+
return True
|
|
256
|
+
|
|
257
|
+
except Exception as e:
|
|
258
|
+
self.log(f"✗ Error saving NT list: {e}")
|
|
259
|
+
return False
|
|
260
|
+
|
|
261
|
+
def load_list(self, filepath: str) -> Optional[NonTranslatableList]:
|
|
262
|
+
"""
|
|
263
|
+
Load a non-translatable list from .ntl format.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
filepath: Path to .ntl file
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
NonTranslatableList or None if failed
|
|
270
|
+
"""
|
|
271
|
+
try:
|
|
272
|
+
filepath = Path(filepath)
|
|
273
|
+
|
|
274
|
+
if not filepath.exists():
|
|
275
|
+
self.log(f"✗ File not found: {filepath}")
|
|
276
|
+
return None
|
|
277
|
+
|
|
278
|
+
with open(filepath, 'r', encoding='utf-8') as f:
|
|
279
|
+
content = f.read()
|
|
280
|
+
|
|
281
|
+
# Parse YAML frontmatter
|
|
282
|
+
metadata = {}
|
|
283
|
+
entries = []
|
|
284
|
+
|
|
285
|
+
if content.startswith('---'):
|
|
286
|
+
# Find the closing ---
|
|
287
|
+
parts = content.split('---', 2)
|
|
288
|
+
if len(parts) >= 3:
|
|
289
|
+
yaml_content = parts[1].strip()
|
|
290
|
+
body = parts[2].strip()
|
|
291
|
+
|
|
292
|
+
try:
|
|
293
|
+
metadata = yaml.safe_load(yaml_content) or {}
|
|
294
|
+
except yaml.YAMLError as e:
|
|
295
|
+
self.log(f"⚠️ YAML parse error, treating as plain text: {e}")
|
|
296
|
+
body = content
|
|
297
|
+
else:
|
|
298
|
+
body = content
|
|
299
|
+
else:
|
|
300
|
+
body = content
|
|
301
|
+
|
|
302
|
+
# Parse entries (one per line, skip comments and empty lines)
|
|
303
|
+
for line in body.splitlines():
|
|
304
|
+
line = line.strip()
|
|
305
|
+
if line and not line.startswith('#'):
|
|
306
|
+
entries.append(NonTranslatable(text=line))
|
|
307
|
+
|
|
308
|
+
# Create list object
|
|
309
|
+
nt_list = NonTranslatableList(
|
|
310
|
+
name=metadata.get('name', filepath.stem),
|
|
311
|
+
entries=entries,
|
|
312
|
+
description=metadata.get('description', ''),
|
|
313
|
+
created_date=metadata.get('created_date', ''),
|
|
314
|
+
modified_date=metadata.get('modified_date', ''),
|
|
315
|
+
source_language=metadata.get('source_language', ''),
|
|
316
|
+
target_language=metadata.get('target_language', ''),
|
|
317
|
+
is_active=metadata.get('is_active', True),
|
|
318
|
+
filepath=str(filepath)
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
self.log(f"✓ Loaded NT list: {nt_list.name} ({len(entries)} entries)")
|
|
322
|
+
return nt_list
|
|
323
|
+
|
|
324
|
+
except Exception as e:
|
|
325
|
+
self.log(f"✗ Error loading NT list: {e}")
|
|
326
|
+
return None
|
|
327
|
+
|
|
328
|
+
def load_from_plain_text(self, filepath: str, name: Optional[str] = None) -> Optional[NonTranslatableList]:
|
|
329
|
+
"""
|
|
330
|
+
Load entries from a plain text file (one entry per line).
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
filepath: Path to text file
|
|
334
|
+
name: Optional name for the list (defaults to filename)
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
NonTranslatableList or None if failed
|
|
338
|
+
"""
|
|
339
|
+
try:
|
|
340
|
+
filepath = Path(filepath)
|
|
341
|
+
|
|
342
|
+
if not filepath.exists():
|
|
343
|
+
self.log(f"✗ File not found: {filepath}")
|
|
344
|
+
return None
|
|
345
|
+
|
|
346
|
+
with open(filepath, 'r', encoding='utf-8') as f:
|
|
347
|
+
lines = f.readlines()
|
|
348
|
+
|
|
349
|
+
entries = []
|
|
350
|
+
for line in lines:
|
|
351
|
+
line = line.strip()
|
|
352
|
+
if line and not line.startswith('#'):
|
|
353
|
+
entries.append(NonTranslatable(text=line))
|
|
354
|
+
|
|
355
|
+
list_name = name or filepath.stem
|
|
356
|
+
|
|
357
|
+
nt_list = NonTranslatableList(
|
|
358
|
+
name=list_name,
|
|
359
|
+
entries=entries,
|
|
360
|
+
description=f"Imported from {filepath.name}",
|
|
361
|
+
filepath=str(filepath)
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
self.log(f"✓ Loaded {len(entries)} entries from plain text: {filepath.name}")
|
|
365
|
+
return nt_list
|
|
366
|
+
|
|
367
|
+
except Exception as e:
|
|
368
|
+
self.log(f"✗ Error loading plain text file: {e}")
|
|
369
|
+
return None
|
|
370
|
+
|
|
371
|
+
# ========================================================================
|
|
372
|
+
# MEMOQ IMPORT (.mqres XML format)
|
|
373
|
+
# ========================================================================
|
|
374
|
+
|
|
375
|
+
def import_memoq_mqres(self, filepath: str, name: Optional[str] = None) -> Optional[NonTranslatableList]:
|
|
376
|
+
"""
|
|
377
|
+
Import non-translatables from memoQ .mqres format.
|
|
378
|
+
|
|
379
|
+
memoQ format:
|
|
380
|
+
<MemoQResource ResourceType="NonTrans" Version="1.0">
|
|
381
|
+
<Resource>
|
|
382
|
+
<Guid>...</Guid>
|
|
383
|
+
<FileName>...</FileName>
|
|
384
|
+
<Name>...</Name>
|
|
385
|
+
<Description />
|
|
386
|
+
</Resource>
|
|
387
|
+
</MemoQResource>
|
|
388
|
+
<?xml version="1.0" encoding="utf-8"?>
|
|
389
|
+
<nonTrans version="1.0">
|
|
390
|
+
<nonTransRule>term1</nonTransRule>
|
|
391
|
+
<nonTransRule>term2</nonTransRule>
|
|
392
|
+
</nonTrans>
|
|
393
|
+
|
|
394
|
+
Args:
|
|
395
|
+
filepath: Path to .mqres file
|
|
396
|
+
name: Optional name override
|
|
397
|
+
|
|
398
|
+
Returns:
|
|
399
|
+
NonTranslatableList or None if failed
|
|
400
|
+
"""
|
|
401
|
+
try:
|
|
402
|
+
filepath = Path(filepath)
|
|
403
|
+
|
|
404
|
+
if not filepath.exists():
|
|
405
|
+
self.log(f"✗ File not found: {filepath}")
|
|
406
|
+
return None
|
|
407
|
+
|
|
408
|
+
with open(filepath, 'r', encoding='utf-8') as f:
|
|
409
|
+
content = f.read()
|
|
410
|
+
|
|
411
|
+
entries = []
|
|
412
|
+
list_name = name
|
|
413
|
+
description = ""
|
|
414
|
+
|
|
415
|
+
# memoQ files can have two XML documents concatenated
|
|
416
|
+
# First: MemoQResource header, Second: nonTrans entries
|
|
417
|
+
# We need to handle this specially
|
|
418
|
+
|
|
419
|
+
# Try to find the name from MemoQResource header
|
|
420
|
+
memoq_header_match = re.search(r'<MemoQResource.*?</MemoQResource>', content, re.DOTALL)
|
|
421
|
+
if memoq_header_match:
|
|
422
|
+
header_xml = memoq_header_match.group()
|
|
423
|
+
try:
|
|
424
|
+
header_root = ET.fromstring(header_xml)
|
|
425
|
+
resource_elem = header_root.find('.//Resource')
|
|
426
|
+
if resource_elem is not None:
|
|
427
|
+
name_elem = resource_elem.find('Name')
|
|
428
|
+
if name_elem is not None and name_elem.text:
|
|
429
|
+
list_name = list_name or name_elem.text
|
|
430
|
+
desc_elem = resource_elem.find('Description')
|
|
431
|
+
if desc_elem is not None and desc_elem.text:
|
|
432
|
+
description = desc_elem.text
|
|
433
|
+
except ET.ParseError:
|
|
434
|
+
pass
|
|
435
|
+
|
|
436
|
+
# Find and parse the nonTrans section
|
|
437
|
+
nontrans_match = re.search(r'<nonTrans.*?</nonTrans>', content, re.DOTALL)
|
|
438
|
+
if nontrans_match:
|
|
439
|
+
nontrans_xml = nontrans_match.group()
|
|
440
|
+
# Clean up any XML declaration in the middle of the file
|
|
441
|
+
nontrans_xml = re.sub(r'<\?xml[^?]*\?>', '', nontrans_xml)
|
|
442
|
+
|
|
443
|
+
try:
|
|
444
|
+
root = ET.fromstring(nontrans_xml)
|
|
445
|
+
|
|
446
|
+
# Find all nonTransRule elements
|
|
447
|
+
for rule in root.findall('.//nonTransRule'):
|
|
448
|
+
if rule.text and rule.text.strip():
|
|
449
|
+
entries.append(NonTranslatable(text=rule.text.strip()))
|
|
450
|
+
|
|
451
|
+
except ET.ParseError as e:
|
|
452
|
+
self.log(f"⚠️ XML parse error in nonTrans section: {e}")
|
|
453
|
+
|
|
454
|
+
if not list_name:
|
|
455
|
+
list_name = filepath.stem
|
|
456
|
+
|
|
457
|
+
nt_list = NonTranslatableList(
|
|
458
|
+
name=list_name,
|
|
459
|
+
entries=entries,
|
|
460
|
+
description=description or f"Imported from memoQ: {filepath.name}",
|
|
461
|
+
filepath=str(filepath)
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
self.log(f"✓ Imported memoQ NT list: {list_name} ({len(entries)} entries)")
|
|
465
|
+
return nt_list
|
|
466
|
+
|
|
467
|
+
except Exception as e:
|
|
468
|
+
self.log(f"✗ Error importing memoQ file: {e}")
|
|
469
|
+
import traceback
|
|
470
|
+
self.log(f"Traceback: {traceback.format_exc()}")
|
|
471
|
+
return None
|
|
472
|
+
|
|
473
|
+
# ========================================================================
|
|
474
|
+
# LIST MANAGEMENT
|
|
475
|
+
# ========================================================================
|
|
476
|
+
|
|
477
|
+
def load_all_lists(self) -> int:
|
|
478
|
+
"""
|
|
479
|
+
Load all .svntl and legacy .ntl files from the base directory.
|
|
480
|
+
|
|
481
|
+
Returns:
|
|
482
|
+
Number of lists loaded
|
|
483
|
+
"""
|
|
484
|
+
self.lists.clear()
|
|
485
|
+
count = 0
|
|
486
|
+
|
|
487
|
+
# Load new .svntl files
|
|
488
|
+
for filepath in self.base_path.glob(f"*{self.FILE_EXTENSION}"):
|
|
489
|
+
nt_list = self.load_list(str(filepath))
|
|
490
|
+
if nt_list:
|
|
491
|
+
self.lists[nt_list.name] = nt_list
|
|
492
|
+
if nt_list.is_active:
|
|
493
|
+
self.active_lists.append(nt_list.name)
|
|
494
|
+
count += 1
|
|
495
|
+
|
|
496
|
+
# Also load legacy .ntl files (backward compatibility)
|
|
497
|
+
for filepath in self.base_path.glob(f"*{self.LEGACY_EXTENSION}"):
|
|
498
|
+
nt_list = self.load_list(str(filepath))
|
|
499
|
+
if nt_list and nt_list.name not in self.lists: # Don't overwrite if already loaded
|
|
500
|
+
self.lists[nt_list.name] = nt_list
|
|
501
|
+
if nt_list.is_active:
|
|
502
|
+
self.active_lists.append(nt_list.name)
|
|
503
|
+
count += 1
|
|
504
|
+
|
|
505
|
+
self.log(f"Loaded {count} NT lists ({len(self.active_lists)} active)")
|
|
506
|
+
return count
|
|
507
|
+
|
|
508
|
+
def get_all_lists(self) -> List[NonTranslatableList]:
|
|
509
|
+
"""Get all loaded lists"""
|
|
510
|
+
return list(self.lists.values())
|
|
511
|
+
|
|
512
|
+
def get_active_lists(self) -> List[NonTranslatableList]:
|
|
513
|
+
"""Get only active lists"""
|
|
514
|
+
return [self.lists[name] for name in self.active_lists if name in self.lists]
|
|
515
|
+
|
|
516
|
+
def set_list_active(self, name: str, active: bool):
|
|
517
|
+
"""Set whether a list is active"""
|
|
518
|
+
if name in self.lists:
|
|
519
|
+
self.lists[name].is_active = active
|
|
520
|
+
if active and name not in self.active_lists:
|
|
521
|
+
self.active_lists.append(name)
|
|
522
|
+
elif not active and name in self.active_lists:
|
|
523
|
+
self.active_lists.remove(name)
|
|
524
|
+
|
|
525
|
+
def create_list(self, name: str, description: str = "") -> NonTranslatableList:
|
|
526
|
+
"""Create a new empty NT list"""
|
|
527
|
+
nt_list = NonTranslatableList(
|
|
528
|
+
name=name,
|
|
529
|
+
description=description
|
|
530
|
+
)
|
|
531
|
+
self.lists[name] = nt_list
|
|
532
|
+
self.active_lists.append(name)
|
|
533
|
+
return nt_list
|
|
534
|
+
|
|
535
|
+
def delete_list(self, name: str) -> bool:
|
|
536
|
+
"""Delete a list (removes from memory and disk)"""
|
|
537
|
+
if name not in self.lists:
|
|
538
|
+
return False
|
|
539
|
+
|
|
540
|
+
nt_list = self.lists[name]
|
|
541
|
+
|
|
542
|
+
# Remove file if it exists
|
|
543
|
+
if nt_list.filepath:
|
|
544
|
+
try:
|
|
545
|
+
filepath = Path(nt_list.filepath)
|
|
546
|
+
if filepath.exists():
|
|
547
|
+
filepath.unlink()
|
|
548
|
+
except Exception as e:
|
|
549
|
+
self.log(f"⚠️ Could not delete file: {e}")
|
|
550
|
+
|
|
551
|
+
# Remove from memory
|
|
552
|
+
del self.lists[name]
|
|
553
|
+
if name in self.active_lists:
|
|
554
|
+
self.active_lists.remove(name)
|
|
555
|
+
|
|
556
|
+
self.log(f"✓ Deleted NT list: {name}")
|
|
557
|
+
return True
|
|
558
|
+
|
|
559
|
+
# ========================================================================
|
|
560
|
+
# MERGE & IMPORT
|
|
561
|
+
# ========================================================================
|
|
562
|
+
|
|
563
|
+
def merge_into_list(self, target_name: str, source_list: NonTranslatableList,
|
|
564
|
+
ignore_duplicates: bool = True) -> Tuple[int, int]:
|
|
565
|
+
"""
|
|
566
|
+
Merge entries from source list into target list.
|
|
567
|
+
|
|
568
|
+
Args:
|
|
569
|
+
target_name: Name of target list (must exist)
|
|
570
|
+
source_list: Source list to merge from
|
|
571
|
+
ignore_duplicates: If True, skip entries that already exist
|
|
572
|
+
|
|
573
|
+
Returns:
|
|
574
|
+
Tuple of (added_count, skipped_count)
|
|
575
|
+
"""
|
|
576
|
+
if target_name not in self.lists:
|
|
577
|
+
self.log(f"✗ Target list not found: {target_name}")
|
|
578
|
+
return (0, 0)
|
|
579
|
+
|
|
580
|
+
target = self.lists[target_name]
|
|
581
|
+
existing = target.get_unique_texts()
|
|
582
|
+
|
|
583
|
+
added = 0
|
|
584
|
+
skipped = 0
|
|
585
|
+
|
|
586
|
+
for entry in source_list.entries:
|
|
587
|
+
if entry.text.lower() in existing:
|
|
588
|
+
if ignore_duplicates:
|
|
589
|
+
skipped += 1
|
|
590
|
+
continue
|
|
591
|
+
|
|
592
|
+
target.entries.append(entry)
|
|
593
|
+
existing.add(entry.text.lower())
|
|
594
|
+
added += 1
|
|
595
|
+
|
|
596
|
+
target.modified_date = datetime.now().isoformat()
|
|
597
|
+
|
|
598
|
+
self.log(f"✓ Merged into {target_name}: {added} added, {skipped} duplicates skipped")
|
|
599
|
+
return (added, skipped)
|
|
600
|
+
|
|
601
|
+
def add_entry(self, list_name: str, text: str, notes: str = "", category: str = "") -> bool:
|
|
602
|
+
"""Add a single entry to a list"""
|
|
603
|
+
if list_name not in self.lists:
|
|
604
|
+
return False
|
|
605
|
+
|
|
606
|
+
entry = NonTranslatable(text=text, notes=notes, category=category)
|
|
607
|
+
self.lists[list_name].entries.append(entry)
|
|
608
|
+
self.lists[list_name].modified_date = datetime.now().isoformat()
|
|
609
|
+
return True
|
|
610
|
+
|
|
611
|
+
def remove_entry(self, list_name: str, text: str) -> bool:
|
|
612
|
+
"""Remove an entry from a list by text"""
|
|
613
|
+
if list_name not in self.lists:
|
|
614
|
+
return False
|
|
615
|
+
|
|
616
|
+
nt_list = self.lists[list_name]
|
|
617
|
+
original_count = len(nt_list.entries)
|
|
618
|
+
nt_list.entries = [e for e in nt_list.entries if e.text != text]
|
|
619
|
+
|
|
620
|
+
if len(nt_list.entries) < original_count:
|
|
621
|
+
nt_list.modified_date = datetime.now().isoformat()
|
|
622
|
+
return True
|
|
623
|
+
return False
|
|
624
|
+
|
|
625
|
+
# ========================================================================
|
|
626
|
+
# SEARCH & MATCHING
|
|
627
|
+
# ========================================================================
|
|
628
|
+
|
|
629
|
+
def find_all_matches(self, source_text: str) -> List[Dict]:
|
|
630
|
+
"""
|
|
631
|
+
Find all NT matches in source text from all active lists.
|
|
632
|
+
|
|
633
|
+
Args:
|
|
634
|
+
source_text: Text to search in
|
|
635
|
+
|
|
636
|
+
Returns:
|
|
637
|
+
List of match dicts sorted by position
|
|
638
|
+
"""
|
|
639
|
+
all_matches = []
|
|
640
|
+
|
|
641
|
+
for nt_list in self.get_active_lists():
|
|
642
|
+
matches = nt_list.find_matches(source_text)
|
|
643
|
+
all_matches.extend(matches)
|
|
644
|
+
|
|
645
|
+
# Sort by position, remove overlapping matches (keep longer ones)
|
|
646
|
+
all_matches.sort(key=lambda m: (m['start'], -(m['end'] - m['start'])))
|
|
647
|
+
|
|
648
|
+
# Remove overlapping matches
|
|
649
|
+
filtered = []
|
|
650
|
+
last_end = -1
|
|
651
|
+
for match in all_matches:
|
|
652
|
+
if match['start'] >= last_end:
|
|
653
|
+
filtered.append(match)
|
|
654
|
+
last_end = match['end']
|
|
655
|
+
|
|
656
|
+
return filtered
|
|
657
|
+
|
|
658
|
+
def get_unique_entries_from_active(self) -> Set[str]:
|
|
659
|
+
"""Get all unique NT entries from active lists (lowercase)"""
|
|
660
|
+
entries = set()
|
|
661
|
+
for nt_list in self.get_active_lists():
|
|
662
|
+
entries.update(nt_list.get_unique_texts())
|
|
663
|
+
return entries
|
|
664
|
+
|
|
665
|
+
# ========================================================================
|
|
666
|
+
# EXPORT
|
|
667
|
+
# ========================================================================
|
|
668
|
+
|
|
669
|
+
def export_list(self, name: str, filepath: str) -> bool:
|
|
670
|
+
"""
|
|
671
|
+
Export a list to .ntl format.
|
|
672
|
+
|
|
673
|
+
Args:
|
|
674
|
+
name: Name of list to export
|
|
675
|
+
filepath: Destination file path
|
|
676
|
+
|
|
677
|
+
Returns:
|
|
678
|
+
True if successful
|
|
679
|
+
"""
|
|
680
|
+
if name not in self.lists:
|
|
681
|
+
self.log(f"✗ List not found: {name}")
|
|
682
|
+
return False
|
|
683
|
+
|
|
684
|
+
return self.save_list(self.lists[name], filepath)
|
|
685
|
+
|
|
686
|
+
def export_to_plain_text(self, name: str, filepath: str) -> bool:
|
|
687
|
+
"""
|
|
688
|
+
Export a list to plain text (one entry per line).
|
|
689
|
+
|
|
690
|
+
Args:
|
|
691
|
+
name: Name of list to export
|
|
692
|
+
filepath: Destination file path
|
|
693
|
+
|
|
694
|
+
Returns:
|
|
695
|
+
True if successful
|
|
696
|
+
"""
|
|
697
|
+
if name not in self.lists:
|
|
698
|
+
self.log(f"✗ List not found: {name}")
|
|
699
|
+
return False
|
|
700
|
+
|
|
701
|
+
try:
|
|
702
|
+
nt_list = self.lists[name]
|
|
703
|
+
with open(filepath, 'w', encoding='utf-8') as f:
|
|
704
|
+
for entry in nt_list.entries:
|
|
705
|
+
f.write(f"{entry.text}\n")
|
|
706
|
+
|
|
707
|
+
self.log(f"✓ Exported to plain text: {filepath}")
|
|
708
|
+
return True
|
|
709
|
+
|
|
710
|
+
except Exception as e:
|
|
711
|
+
self.log(f"✗ Error exporting: {e}")
|
|
712
|
+
return False
|
|
713
|
+
|
|
714
|
+
|
|
715
|
+
# ============================================================================
|
|
716
|
+
# CONVENIENCE FUNCTION FOR CONVERSION
|
|
717
|
+
# ============================================================================
|
|
718
|
+
|
|
719
|
+
def convert_txt_to_ntl(input_path: str, output_path: Optional[str] = None,
|
|
720
|
+
name: Optional[str] = None) -> bool:
|
|
721
|
+
"""
|
|
722
|
+
Convert a plain text NT file to .ntl format.
|
|
723
|
+
|
|
724
|
+
Args:
|
|
725
|
+
input_path: Path to input .txt file
|
|
726
|
+
output_path: Path for output .ntl file (defaults to same dir with .ntl extension)
|
|
727
|
+
name: Name for the list (defaults to filename)
|
|
728
|
+
|
|
729
|
+
Returns:
|
|
730
|
+
True if successful
|
|
731
|
+
"""
|
|
732
|
+
input_path = Path(input_path)
|
|
733
|
+
|
|
734
|
+
if output_path is None:
|
|
735
|
+
output_path = input_path.with_suffix('.ntl')
|
|
736
|
+
|
|
737
|
+
manager = NonTranslatablesManager(str(input_path.parent))
|
|
738
|
+
nt_list = manager.load_from_plain_text(str(input_path), name)
|
|
739
|
+
|
|
740
|
+
if nt_list:
|
|
741
|
+
return manager.save_list(nt_list, str(output_path))
|
|
742
|
+
|
|
743
|
+
return False
|