supervertaler 1.9.163__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- Supervertaler.py +48473 -0
- modules/__init__.py +10 -0
- modules/ai_actions.py +964 -0
- modules/ai_attachment_manager.py +343 -0
- modules/ai_file_viewer_dialog.py +210 -0
- modules/autofingers_engine.py +466 -0
- modules/cafetran_docx_handler.py +379 -0
- modules/config_manager.py +469 -0
- modules/database_manager.py +1911 -0
- modules/database_migrations.py +417 -0
- modules/dejavurtf_handler.py +779 -0
- modules/document_analyzer.py +427 -0
- modules/docx_handler.py +689 -0
- modules/encoding_repair.py +319 -0
- modules/encoding_repair_Qt.py +393 -0
- modules/encoding_repair_ui.py +481 -0
- modules/feature_manager.py +350 -0
- modules/figure_context_manager.py +340 -0
- modules/file_dialog_helper.py +148 -0
- modules/find_replace.py +164 -0
- modules/find_replace_qt.py +457 -0
- modules/glossary_manager.py +433 -0
- modules/image_extractor.py +188 -0
- modules/keyboard_shortcuts_widget.py +571 -0
- modules/llm_clients.py +1211 -0
- modules/llm_leaderboard.py +737 -0
- modules/llm_superbench_ui.py +1401 -0
- modules/local_llm_setup.py +1104 -0
- modules/model_update_dialog.py +381 -0
- modules/model_version_checker.py +373 -0
- modules/mqxliff_handler.py +638 -0
- modules/non_translatables_manager.py +743 -0
- modules/pdf_rescue_Qt.py +1822 -0
- modules/pdf_rescue_tkinter.py +909 -0
- modules/phrase_docx_handler.py +516 -0
- modules/project_home_panel.py +209 -0
- modules/prompt_assistant.py +357 -0
- modules/prompt_library.py +689 -0
- modules/prompt_library_migration.py +447 -0
- modules/quick_access_sidebar.py +282 -0
- modules/ribbon_widget.py +597 -0
- modules/sdlppx_handler.py +874 -0
- modules/setup_wizard.py +353 -0
- modules/shortcut_manager.py +932 -0
- modules/simple_segmenter.py +128 -0
- modules/spellcheck_manager.py +727 -0
- modules/statuses.py +207 -0
- modules/style_guide_manager.py +315 -0
- modules/superbench_ui.py +1319 -0
- modules/superbrowser.py +329 -0
- modules/supercleaner.py +600 -0
- modules/supercleaner_ui.py +444 -0
- modules/superdocs.py +19 -0
- modules/superdocs_viewer_qt.py +382 -0
- modules/superlookup.py +252 -0
- modules/tag_cleaner.py +260 -0
- modules/tag_manager.py +351 -0
- modules/term_extractor.py +270 -0
- modules/termbase_entry_editor.py +842 -0
- modules/termbase_import_export.py +488 -0
- modules/termbase_manager.py +1060 -0
- modules/termview_widget.py +1176 -0
- modules/theme_manager.py +499 -0
- modules/tm_editor_dialog.py +99 -0
- modules/tm_manager_qt.py +1280 -0
- modules/tm_metadata_manager.py +545 -0
- modules/tmx_editor.py +1461 -0
- modules/tmx_editor_qt.py +2784 -0
- modules/tmx_generator.py +284 -0
- modules/tracked_changes.py +900 -0
- modules/trados_docx_handler.py +430 -0
- modules/translation_memory.py +715 -0
- modules/translation_results_panel.py +2134 -0
- modules/translation_services.py +282 -0
- modules/unified_prompt_library.py +659 -0
- modules/unified_prompt_manager_qt.py +3951 -0
- modules/voice_commands.py +920 -0
- modules/voice_dictation.py +477 -0
- modules/voice_dictation_lite.py +249 -0
- supervertaler-1.9.163.dist-info/METADATA +906 -0
- supervertaler-1.9.163.dist-info/RECORD +85 -0
- supervertaler-1.9.163.dist-info/WHEEL +5 -0
- supervertaler-1.9.163.dist-info/entry_points.txt +2 -0
- supervertaler-1.9.163.dist-info/licenses/LICENSE +21 -0
- supervertaler-1.9.163.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,433 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Termbase Manager Module
|
|
3
|
+
|
|
4
|
+
Handles termbase/termbase management for Supervertaler:
|
|
5
|
+
- Create/delete glossaries
|
|
6
|
+
- Add/edit/delete terms
|
|
7
|
+
- Activate/deactivate for projects
|
|
8
|
+
- Import/export glossaries
|
|
9
|
+
- Search across termbases
|
|
10
|
+
|
|
11
|
+
Unified management for both global and project-specific termbases.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from typing import List, Dict, Optional, Tuple
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class TermbaseInfo:
|
|
21
|
+
"""Information about a termbase/termbase"""
|
|
22
|
+
id: int
|
|
23
|
+
name: str
|
|
24
|
+
description: str
|
|
25
|
+
source_lang: Optional[str]
|
|
26
|
+
target_lang: Optional[str]
|
|
27
|
+
project_id: Optional[int] # None = global, set = project-specific
|
|
28
|
+
created_date: str
|
|
29
|
+
modified_date: str
|
|
30
|
+
entry_count: int
|
|
31
|
+
is_active_for_project: bool = False
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class TermbaseEntry:
|
|
36
|
+
"""A single term entry in a termbase"""
|
|
37
|
+
id: int
|
|
38
|
+
termbase_id: int
|
|
39
|
+
source_term: str
|
|
40
|
+
target_term: str
|
|
41
|
+
priority: int # 1-99, lower = higher priority
|
|
42
|
+
domain: str
|
|
43
|
+
definition: str
|
|
44
|
+
forbidden: bool
|
|
45
|
+
non_translatable: bool
|
|
46
|
+
created_date: str
|
|
47
|
+
modified_date: str
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class TermbaseManager:
|
|
51
|
+
"""Manages glossaries and termbases"""
|
|
52
|
+
|
|
53
|
+
def __init__(self, db_manager, log_callback=None):
|
|
54
|
+
"""
|
|
55
|
+
Initialize termbase manager
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
db_manager: DatabaseManager instance
|
|
59
|
+
log_callback: Optional logging function
|
|
60
|
+
"""
|
|
61
|
+
self.db = db_manager
|
|
62
|
+
self.log = log_callback if log_callback else print
|
|
63
|
+
|
|
64
|
+
def create_termbase(
|
|
65
|
+
self,
|
|
66
|
+
name: str,
|
|
67
|
+
description: str = "",
|
|
68
|
+
source_lang: Optional[str] = None,
|
|
69
|
+
target_lang: Optional[str] = None,
|
|
70
|
+
project_id: Optional[int] = None
|
|
71
|
+
) -> int:
|
|
72
|
+
"""
|
|
73
|
+
Create a new termbase
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
name: termbase name
|
|
77
|
+
description: Optional description
|
|
78
|
+
source_lang: Source language code (e.g., 'NL', 'EN')
|
|
79
|
+
target_lang: Target language code
|
|
80
|
+
project_id: Optional project ID (None = global termbase)
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
termbase ID
|
|
84
|
+
"""
|
|
85
|
+
try:
|
|
86
|
+
cursor = self.db.cursor
|
|
87
|
+
now = datetime.now().isoformat()
|
|
88
|
+
|
|
89
|
+
cursor.execute("""
|
|
90
|
+
INSERT INTO glossaries (name, description, source_lang, target_lang, project_id, created_date, modified_date)
|
|
91
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
92
|
+
""", (name, description, source_lang, target_lang, project_id, now, now))
|
|
93
|
+
|
|
94
|
+
self.db.connection.commit()
|
|
95
|
+
termbase_id = cursor.lastrowid
|
|
96
|
+
self.log(f"Created termbase '{name}' (ID: {termbase_id})")
|
|
97
|
+
return termbase_id
|
|
98
|
+
except Exception as e:
|
|
99
|
+
self.log(f"Error creating termbase: {e}")
|
|
100
|
+
raise
|
|
101
|
+
|
|
102
|
+
def get_all_termbases(self) -> List[GlossaryInfo]:
|
|
103
|
+
"""Get all glossaries (global and project-specific)"""
|
|
104
|
+
try:
|
|
105
|
+
cursor = self.db.cursor
|
|
106
|
+
cursor.execute("""
|
|
107
|
+
SELECT
|
|
108
|
+
g.id, g.name, g.description, g.source_lang, g.target_lang,
|
|
109
|
+
g.project_id, g.created_date, g.modified_date,
|
|
110
|
+
COUNT(gt.id) as entry_count
|
|
111
|
+
FROM glossaries g
|
|
112
|
+
LEFT JOIN termbase_terms gt ON g.id = gt.termbase_id
|
|
113
|
+
GROUP BY g.id
|
|
114
|
+
ORDER BY g.name
|
|
115
|
+
""")
|
|
116
|
+
|
|
117
|
+
results = cursor.fetchall()
|
|
118
|
+
glossaries = []
|
|
119
|
+
for row in results:
|
|
120
|
+
glossaries.append(GlossaryInfo(
|
|
121
|
+
id=row[0],
|
|
122
|
+
name=row[1],
|
|
123
|
+
description=row[2],
|
|
124
|
+
source_lang=row[3],
|
|
125
|
+
target_lang=row[4],
|
|
126
|
+
project_id=row[5],
|
|
127
|
+
created_date=row[6],
|
|
128
|
+
modified_date=row[7],
|
|
129
|
+
entry_count=row[8] or 0
|
|
130
|
+
))
|
|
131
|
+
return glossaries
|
|
132
|
+
except Exception as e:
|
|
133
|
+
self.log(f"Error fetching glossaries: {e}")
|
|
134
|
+
return []
|
|
135
|
+
|
|
136
|
+
def get_termbase_terms(self, termbase_id: int) -> List[TermEntry]:
|
|
137
|
+
"""Get all terms in a termbase"""
|
|
138
|
+
try:
|
|
139
|
+
cursor = self.db.cursor
|
|
140
|
+
cursor.execute("""
|
|
141
|
+
SELECT id, termbase_id, source_term, target_term, priority,
|
|
142
|
+
domain, definition, forbidden, non_translatable, created_date, modified_date
|
|
143
|
+
FROM termbase_terms
|
|
144
|
+
WHERE termbase_id = ?
|
|
145
|
+
ORDER BY priority ASC, source_term ASC
|
|
146
|
+
""", (termbase_id,))
|
|
147
|
+
|
|
148
|
+
results = cursor.fetchall()
|
|
149
|
+
terms = []
|
|
150
|
+
for row in results:
|
|
151
|
+
terms.append(TermEntry(
|
|
152
|
+
id=row[0],
|
|
153
|
+
termbase_id=row[1],
|
|
154
|
+
source_term=row[2],
|
|
155
|
+
target_term=row[3],
|
|
156
|
+
priority=row[4],
|
|
157
|
+
domain=row[5],
|
|
158
|
+
definition=row[6],
|
|
159
|
+
forbidden=bool(row[7]),
|
|
160
|
+
non_translatable=bool(row[8]),
|
|
161
|
+
created_date=row[9],
|
|
162
|
+
modified_date=row[10]
|
|
163
|
+
))
|
|
164
|
+
return terms
|
|
165
|
+
except Exception as e:
|
|
166
|
+
self.log(f"Error fetching terms for termbase {termbase_id}: {e}")
|
|
167
|
+
return []
|
|
168
|
+
|
|
169
|
+
def add_term(
|
|
170
|
+
self,
|
|
171
|
+
termbase_id: int,
|
|
172
|
+
source_term: str,
|
|
173
|
+
target_term: str,
|
|
174
|
+
priority: int = 50,
|
|
175
|
+
domain: str = "",
|
|
176
|
+
definition: str = "",
|
|
177
|
+
forbidden: bool = False,
|
|
178
|
+
non_translatable: bool = False
|
|
179
|
+
) -> int:
|
|
180
|
+
"""
|
|
181
|
+
Add a term to a termbase
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
termbase_id: Target termbase ID
|
|
185
|
+
source_term: Source language term
|
|
186
|
+
target_term: Target language term
|
|
187
|
+
priority: Priority ranking (1-99, lower = higher)
|
|
188
|
+
domain: Domain/subject area
|
|
189
|
+
definition: Definition or note
|
|
190
|
+
forbidden: Whether term is forbidden for translation
|
|
191
|
+
non_translatable: Whether term should not be translated
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
Term ID
|
|
195
|
+
"""
|
|
196
|
+
try:
|
|
197
|
+
cursor = self.db.cursor
|
|
198
|
+
now = datetime.now().isoformat()
|
|
199
|
+
|
|
200
|
+
cursor.execute("""
|
|
201
|
+
INSERT INTO termbase_terms
|
|
202
|
+
(termbase_id, source_term, target_term, priority, domain, definition,
|
|
203
|
+
forbidden, non_translatable, source_lang, target_lang, created_date, modified_date)
|
|
204
|
+
SELECT ?, ?, ?, ?, ?, ?, ?, ?, source_lang, target_lang, ?, ?
|
|
205
|
+
FROM glossaries
|
|
206
|
+
WHERE id = ?
|
|
207
|
+
""", (termbase_id, source_term, target_term, priority, domain, definition,
|
|
208
|
+
forbidden, non_translatable, now, now, termbase_id))
|
|
209
|
+
|
|
210
|
+
self.db.connection.commit()
|
|
211
|
+
term_id = cursor.lastrowid
|
|
212
|
+
self.log(f"Added term '{source_term}' to termbase {termbase_id}")
|
|
213
|
+
return term_id
|
|
214
|
+
except Exception as e:
|
|
215
|
+
self.log(f"Error adding term: {e}")
|
|
216
|
+
raise
|
|
217
|
+
|
|
218
|
+
def update_term(
|
|
219
|
+
self,
|
|
220
|
+
term_id: int,
|
|
221
|
+
source_term: str = None,
|
|
222
|
+
target_term: str = None,
|
|
223
|
+
priority: int = None,
|
|
224
|
+
domain: str = None,
|
|
225
|
+
definition: str = None,
|
|
226
|
+
forbidden: bool = None,
|
|
227
|
+
non_translatable: bool = None
|
|
228
|
+
) -> bool:
|
|
229
|
+
"""Update a term in a termbase"""
|
|
230
|
+
try:
|
|
231
|
+
cursor = self.db.cursor
|
|
232
|
+
now = datetime.now().isoformat()
|
|
233
|
+
|
|
234
|
+
# Build dynamic update query
|
|
235
|
+
updates = ["modified_date = ?"]
|
|
236
|
+
params = [now]
|
|
237
|
+
|
|
238
|
+
if source_term is not None:
|
|
239
|
+
updates.append("source_term = ?")
|
|
240
|
+
params.append(source_term)
|
|
241
|
+
if target_term is not None:
|
|
242
|
+
updates.append("target_term = ?")
|
|
243
|
+
params.append(target_term)
|
|
244
|
+
if priority is not None:
|
|
245
|
+
updates.append("priority = ?")
|
|
246
|
+
params.append(priority)
|
|
247
|
+
if domain is not None:
|
|
248
|
+
updates.append("domain = ?")
|
|
249
|
+
params.append(domain)
|
|
250
|
+
if definition is not None:
|
|
251
|
+
updates.append("definition = ?")
|
|
252
|
+
params.append(definition)
|
|
253
|
+
if forbidden is not None:
|
|
254
|
+
updates.append("forbidden = ?")
|
|
255
|
+
params.append(forbidden)
|
|
256
|
+
if non_translatable is not None:
|
|
257
|
+
updates.append("non_translatable = ?")
|
|
258
|
+
params.append(non_translatable)
|
|
259
|
+
|
|
260
|
+
params.append(term_id)
|
|
261
|
+
query = f"UPDATE termbase_terms SET {', '.join(updates)} WHERE id = ?"
|
|
262
|
+
|
|
263
|
+
cursor.execute(query, params)
|
|
264
|
+
self.db.connection.commit()
|
|
265
|
+
return cursor.rowcount > 0
|
|
266
|
+
except Exception as e:
|
|
267
|
+
self.log(f"Error updating term {term_id}: {e}")
|
|
268
|
+
return False
|
|
269
|
+
|
|
270
|
+
def delete_term(self, term_id: int) -> bool:
|
|
271
|
+
"""Delete a term from a termbase"""
|
|
272
|
+
try:
|
|
273
|
+
cursor = self.db.cursor
|
|
274
|
+
cursor.execute("DELETE FROM termbase_terms WHERE id = ?", (term_id,))
|
|
275
|
+
self.db.connection.commit()
|
|
276
|
+
return cursor.rowcount > 0
|
|
277
|
+
except Exception as e:
|
|
278
|
+
self.log(f"Error deleting term {term_id}: {e}")
|
|
279
|
+
return False
|
|
280
|
+
|
|
281
|
+
def delete_termbase(self, termbase_id: int) -> bool:
|
|
282
|
+
"""Delete a termbase and all its terms"""
|
|
283
|
+
try:
|
|
284
|
+
cursor = self.db.cursor
|
|
285
|
+
# Delete terms first
|
|
286
|
+
cursor.execute("DELETE FROM termbase_terms WHERE termbase_id = ?", (termbase_id,))
|
|
287
|
+
# Delete termbase
|
|
288
|
+
cursor.execute("DELETE FROM glossaries WHERE id = ?", (termbase_id,))
|
|
289
|
+
self.db.connection.commit()
|
|
290
|
+
self.log(f"Deleted termbase {termbase_id}")
|
|
291
|
+
return cursor.rowcount > 0
|
|
292
|
+
except Exception as e:
|
|
293
|
+
self.log(f"Error deleting termbase {termbase_id}: {e}")
|
|
294
|
+
return False
|
|
295
|
+
|
|
296
|
+
def activate_for_project(self, termbase_id: int, project_id: int) -> bool:
|
|
297
|
+
"""Mark a termbase as active for a specific project"""
|
|
298
|
+
try:
|
|
299
|
+
cursor = self.db.cursor
|
|
300
|
+
cursor.execute("""
|
|
301
|
+
INSERT OR REPLACE INTO termbase_project_activation (termbase_id, project_id, activated_date)
|
|
302
|
+
VALUES (?, ?, datetime('now'))
|
|
303
|
+
""", (termbase_id, project_id))
|
|
304
|
+
self.db.connection.commit()
|
|
305
|
+
return True
|
|
306
|
+
except Exception as e:
|
|
307
|
+
self.log(f"Error activating termbase: {e}")
|
|
308
|
+
return False
|
|
309
|
+
|
|
310
|
+
def deactivate_for_project(self, termbase_id: int, project_id: int) -> bool:
|
|
311
|
+
"""Mark a termbase as inactive for a specific project"""
|
|
312
|
+
try:
|
|
313
|
+
cursor = self.db.cursor
|
|
314
|
+
cursor.execute("""
|
|
315
|
+
DELETE FROM termbase_project_activation
|
|
316
|
+
WHERE termbase_id = ? AND project_id = ?
|
|
317
|
+
""", (termbase_id, project_id))
|
|
318
|
+
self.db.connection.commit()
|
|
319
|
+
return True
|
|
320
|
+
except Exception as e:
|
|
321
|
+
self.log(f"Error deactivating termbase: {e}")
|
|
322
|
+
return False
|
|
323
|
+
|
|
324
|
+
def is_active_for_project(self, termbase_id: int, project_id: int) -> bool:
|
|
325
|
+
"""Check if termbase is active for a project"""
|
|
326
|
+
try:
|
|
327
|
+
cursor = self.db.cursor
|
|
328
|
+
cursor.execute("""
|
|
329
|
+
SELECT 1 FROM termbase_project_activation
|
|
330
|
+
WHERE termbase_id = ? AND project_id = ?
|
|
331
|
+
""", (termbase_id, project_id))
|
|
332
|
+
return cursor.fetchone() is not None
|
|
333
|
+
except Exception as e:
|
|
334
|
+
self.log(f"Error checking activation status: {e}")
|
|
335
|
+
return False
|
|
336
|
+
|
|
337
|
+
def get_active_glossaries_for_project(self, project_id: int) -> List[GlossaryInfo]:
|
|
338
|
+
"""Get all glossaries active for a specific project (global + project-specific)"""
|
|
339
|
+
try:
|
|
340
|
+
cursor = self.db.cursor
|
|
341
|
+
# Get global glossaries (project_id IS NULL) that are activated
|
|
342
|
+
# Plus project-specific glossaries (project_id = target_project)
|
|
343
|
+
cursor.execute("""
|
|
344
|
+
SELECT DISTINCT
|
|
345
|
+
g.id, g.name, g.description, g.source_lang, g.target_lang,
|
|
346
|
+
g.project_id, g.created_date, g.modified_date,
|
|
347
|
+
COUNT(gt.id) as entry_count
|
|
348
|
+
FROM glossaries g
|
|
349
|
+
LEFT JOIN termbase_terms gt ON g.id = gt.termbase_id
|
|
350
|
+
WHERE (g.project_id = ? OR
|
|
351
|
+
(g.project_id IS NULL AND g.id IN
|
|
352
|
+
(SELECT termbase_id FROM termbase_project_activation WHERE project_id = ?)))
|
|
353
|
+
GROUP BY g.id
|
|
354
|
+
ORDER BY g.name
|
|
355
|
+
""", (project_id, project_id))
|
|
356
|
+
|
|
357
|
+
results = cursor.fetchall()
|
|
358
|
+
glossaries = []
|
|
359
|
+
for row in results:
|
|
360
|
+
glossaries.append(GlossaryInfo(
|
|
361
|
+
id=row[0],
|
|
362
|
+
name=row[1],
|
|
363
|
+
description=row[2],
|
|
364
|
+
source_lang=row[3],
|
|
365
|
+
target_lang=row[4],
|
|
366
|
+
project_id=row[5],
|
|
367
|
+
created_date=row[6],
|
|
368
|
+
modified_date=row[7],
|
|
369
|
+
entry_count=row[8] or 0,
|
|
370
|
+
is_active_for_project=True
|
|
371
|
+
))
|
|
372
|
+
return glossaries
|
|
373
|
+
except Exception as e:
|
|
374
|
+
self.log(f"Error fetching active glossaries: {e}")
|
|
375
|
+
return []
|
|
376
|
+
|
|
377
|
+
def export_glossary_to_csv(self, termbase_id: int, filepath: str) -> bool:
|
|
378
|
+
"""Export termbase to CSV format"""
|
|
379
|
+
try:
|
|
380
|
+
import csv
|
|
381
|
+
terms = self.get_termbase_terms(termbase_id)
|
|
382
|
+
|
|
383
|
+
with open(filepath, 'w', newline='', encoding='utf-8') as f:
|
|
384
|
+
writer = csv.writer(f)
|
|
385
|
+
writer.writerow(['Source Term', 'Target Term', 'Domain', 'Definition', 'Priority', 'Forbidden', 'Non-Translatable'])
|
|
386
|
+
|
|
387
|
+
for term in terms:
|
|
388
|
+
writer.writerow([
|
|
389
|
+
term.source_term,
|
|
390
|
+
term.target_term,
|
|
391
|
+
term.domain,
|
|
392
|
+
term.definition,
|
|
393
|
+
term.priority,
|
|
394
|
+
'Yes' if term.forbidden else 'No',
|
|
395
|
+
'Yes' if term.non_translatable else 'No'
|
|
396
|
+
])
|
|
397
|
+
|
|
398
|
+
self.log(f"Exported termbase {termbase_id} to {filepath}")
|
|
399
|
+
return True
|
|
400
|
+
except Exception as e:
|
|
401
|
+
self.log(f"Error exporting termbase: {e}")
|
|
402
|
+
return False
|
|
403
|
+
|
|
404
|
+
def import_glossary_from_csv(self, termbase_id: int, filepath: str) -> int:
|
|
405
|
+
"""Import terms into termbase from CSV file"""
|
|
406
|
+
try:
|
|
407
|
+
import csv
|
|
408
|
+
count = 0
|
|
409
|
+
|
|
410
|
+
with open(filepath, 'r', encoding='utf-8') as f:
|
|
411
|
+
reader = csv.DictReader(f)
|
|
412
|
+
for row in reader:
|
|
413
|
+
priority = int(row.get('Priority', 50))
|
|
414
|
+
forbidden = row.get('Forbidden', 'No').lower() == 'yes'
|
|
415
|
+
non_translatable = row.get('Non-Translatable', 'No').lower() == 'yes'
|
|
416
|
+
|
|
417
|
+
self.add_term(
|
|
418
|
+
termbase_id,
|
|
419
|
+
row['Source Term'],
|
|
420
|
+
row['Target Term'],
|
|
421
|
+
priority=priority,
|
|
422
|
+
domain=row.get('Domain', ''),
|
|
423
|
+
definition=row.get('Definition', ''),
|
|
424
|
+
forbidden=forbidden,
|
|
425
|
+
non_translatable=non_translatable
|
|
426
|
+
)
|
|
427
|
+
count += 1
|
|
428
|
+
|
|
429
|
+
self.log(f"Imported {count} terms into termbase {termbase_id}")
|
|
430
|
+
return count
|
|
431
|
+
except Exception as e:
|
|
432
|
+
self.log(f"Error importing termbase: {e}")
|
|
433
|
+
return 0
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""
|
|
2
|
+
═══════════════════════════════════════════════════════════════════════════════
|
|
3
|
+
Image Extractor Module for Supervertaler
|
|
4
|
+
═══════════════════════════════════════════════════════════════════════════════
|
|
5
|
+
|
|
6
|
+
Purpose:
|
|
7
|
+
Extract images from DOCX files and save them as sequentially numbered PNG files.
|
|
8
|
+
Integrated into the Reference Images tab under Translation Resources.
|
|
9
|
+
|
|
10
|
+
Features:
|
|
11
|
+
- Extract all images from DOCX documents
|
|
12
|
+
- Save as PNG files with sequential naming (Fig. 1.png, Fig. 2.png, etc.)
|
|
13
|
+
- Support for various image formats embedded in DOCX
|
|
14
|
+
- Progress feedback during extraction
|
|
15
|
+
- Can be used as standalone tool or within Translation Resources workflow
|
|
16
|
+
|
|
17
|
+
Author: Supervertaler Development Team
|
|
18
|
+
Created: 2025-11-17
|
|
19
|
+
Last Modified: 2025-11-17
|
|
20
|
+
|
|
21
|
+
═══════════════════════════════════════════════════════════════════════════════
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
import os
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import List, Tuple, Optional
|
|
27
|
+
from zipfile import ZipFile
|
|
28
|
+
from io import BytesIO
|
|
29
|
+
from PIL import Image
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ImageExtractor:
|
|
33
|
+
"""Extract images from DOCX files and save as PNG"""
|
|
34
|
+
|
|
35
|
+
def __init__(self):
|
|
36
|
+
self.supported_formats = ['.docx']
|
|
37
|
+
|
|
38
|
+
def extract_images_from_docx(self, docx_path: str, output_dir: str,
|
|
39
|
+
prefix: str = "Fig.") -> Tuple[int, List[str]]:
|
|
40
|
+
"""
|
|
41
|
+
Extract all images from a DOCX file and save as PNG files.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
docx_path: Path to the DOCX file
|
|
45
|
+
output_dir: Directory where images will be saved
|
|
46
|
+
prefix: Prefix for output filenames (default: "Fig.")
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
Tuple of (number of images extracted, list of output file paths)
|
|
50
|
+
"""
|
|
51
|
+
# Validate input
|
|
52
|
+
if not os.path.exists(docx_path):
|
|
53
|
+
raise FileNotFoundError(f"DOCX file not found: {docx_path}")
|
|
54
|
+
|
|
55
|
+
if not docx_path.lower().endswith('.docx'):
|
|
56
|
+
raise ValueError("File must be a DOCX document")
|
|
57
|
+
|
|
58
|
+
# Create output directory if it doesn't exist
|
|
59
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
60
|
+
|
|
61
|
+
extracted_files = []
|
|
62
|
+
image_count = 0
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
# DOCX files are ZIP archives
|
|
66
|
+
with ZipFile(docx_path, 'r') as zip_ref:
|
|
67
|
+
# Images are typically in word/media/ folder
|
|
68
|
+
image_files = [f for f in zip_ref.namelist()
|
|
69
|
+
if f.startswith('word/media/')]
|
|
70
|
+
|
|
71
|
+
for img_file in image_files:
|
|
72
|
+
image_count += 1
|
|
73
|
+
|
|
74
|
+
# Read image data
|
|
75
|
+
img_data = zip_ref.read(img_file)
|
|
76
|
+
|
|
77
|
+
# Open with PIL to convert to PNG
|
|
78
|
+
try:
|
|
79
|
+
img = Image.open(BytesIO(img_data))
|
|
80
|
+
|
|
81
|
+
# Convert RGBA to RGB if necessary (for JPEG compatibility)
|
|
82
|
+
if img.mode in ('RGBA', 'LA', 'P'):
|
|
83
|
+
# Create white background
|
|
84
|
+
background = Image.new('RGB', img.size, (255, 255, 255))
|
|
85
|
+
if img.mode == 'P':
|
|
86
|
+
img = img.convert('RGBA')
|
|
87
|
+
background.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None)
|
|
88
|
+
img = background
|
|
89
|
+
elif img.mode != 'RGB':
|
|
90
|
+
img = img.convert('RGB')
|
|
91
|
+
|
|
92
|
+
# Generate output filename
|
|
93
|
+
output_filename = f"{prefix} {image_count}.png"
|
|
94
|
+
output_path = os.path.join(output_dir, output_filename)
|
|
95
|
+
|
|
96
|
+
# Save as PNG
|
|
97
|
+
img.save(output_path, 'PNG', optimize=True)
|
|
98
|
+
extracted_files.append(output_path)
|
|
99
|
+
|
|
100
|
+
except Exception as e:
|
|
101
|
+
print(f"Warning: Could not process image {img_file}: {e}")
|
|
102
|
+
continue
|
|
103
|
+
|
|
104
|
+
except Exception as e:
|
|
105
|
+
raise Exception(f"Error extracting images: {e}")
|
|
106
|
+
|
|
107
|
+
return image_count, extracted_files
|
|
108
|
+
|
|
109
|
+
def extract_from_multiple_docx(self, docx_paths: List[str], output_dir: str,
|
|
110
|
+
prefix: str = "Fig.") -> Tuple[int, List[str]]:
|
|
111
|
+
"""
|
|
112
|
+
Extract images from multiple DOCX files.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
docx_paths: List of paths to DOCX files
|
|
116
|
+
output_dir: Directory where images will be saved
|
|
117
|
+
prefix: Prefix for output filenames (default: "Fig.")
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
Tuple of (total number of images extracted, list of output file paths)
|
|
121
|
+
"""
|
|
122
|
+
all_extracted_files = []
|
|
123
|
+
total_count = 0
|
|
124
|
+
current_number = 1
|
|
125
|
+
|
|
126
|
+
# Create output directory if it doesn't exist
|
|
127
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
128
|
+
|
|
129
|
+
for docx_path in docx_paths:
|
|
130
|
+
try:
|
|
131
|
+
# Extract images with sequential numbering across all files
|
|
132
|
+
with ZipFile(docx_path, 'r') as zip_ref:
|
|
133
|
+
image_files = [f for f in zip_ref.namelist()
|
|
134
|
+
if f.startswith('word/media/')]
|
|
135
|
+
|
|
136
|
+
for img_file in image_files:
|
|
137
|
+
img_data = zip_ref.read(img_file)
|
|
138
|
+
|
|
139
|
+
try:
|
|
140
|
+
img = Image.open(BytesIO(img_data))
|
|
141
|
+
|
|
142
|
+
# Convert to RGB
|
|
143
|
+
if img.mode in ('RGBA', 'LA', 'P'):
|
|
144
|
+
background = Image.new('RGB', img.size, (255, 255, 255))
|
|
145
|
+
if img.mode == 'P':
|
|
146
|
+
img = img.convert('RGBA')
|
|
147
|
+
background.paste(img, mask=img.split()[-1] if img.mode in ('RGBA', 'LA') else None)
|
|
148
|
+
img = background
|
|
149
|
+
elif img.mode != 'RGB':
|
|
150
|
+
img = img.convert('RGB')
|
|
151
|
+
|
|
152
|
+
# Generate output filename with sequential numbering
|
|
153
|
+
output_filename = f"{prefix} {current_number}.png"
|
|
154
|
+
output_path = os.path.join(output_dir, output_filename)
|
|
155
|
+
|
|
156
|
+
# Save as PNG
|
|
157
|
+
img.save(output_path, 'PNG', optimize=True)
|
|
158
|
+
all_extracted_files.append(output_path)
|
|
159
|
+
|
|
160
|
+
current_number += 1
|
|
161
|
+
total_count += 1
|
|
162
|
+
|
|
163
|
+
except Exception as e:
|
|
164
|
+
print(f"Warning: Could not process image from {docx_path}: {e}")
|
|
165
|
+
continue
|
|
166
|
+
|
|
167
|
+
except Exception as e:
|
|
168
|
+
print(f"Warning: Could not process file {docx_path}: {e}")
|
|
169
|
+
continue
|
|
170
|
+
|
|
171
|
+
return total_count, all_extracted_files
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
# Standalone usage example
|
|
175
|
+
if __name__ == "__main__":
|
|
176
|
+
extractor = ImageExtractor()
|
|
177
|
+
|
|
178
|
+
# Example usage
|
|
179
|
+
docx_file = "example.docx"
|
|
180
|
+
output_directory = "extracted_images"
|
|
181
|
+
|
|
182
|
+
if os.path.exists(docx_file):
|
|
183
|
+
count, files = extractor.extract_images_from_docx(docx_file, output_directory)
|
|
184
|
+
print(f"Extracted {count} images:")
|
|
185
|
+
for f in files:
|
|
186
|
+
print(f" - {f}")
|
|
187
|
+
else:
|
|
188
|
+
print(f"File not found: {docx_file}")
|