supervertaler 1.9.163__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. Supervertaler.py +48473 -0
  2. modules/__init__.py +10 -0
  3. modules/ai_actions.py +964 -0
  4. modules/ai_attachment_manager.py +343 -0
  5. modules/ai_file_viewer_dialog.py +210 -0
  6. modules/autofingers_engine.py +466 -0
  7. modules/cafetran_docx_handler.py +379 -0
  8. modules/config_manager.py +469 -0
  9. modules/database_manager.py +1911 -0
  10. modules/database_migrations.py +417 -0
  11. modules/dejavurtf_handler.py +779 -0
  12. modules/document_analyzer.py +427 -0
  13. modules/docx_handler.py +689 -0
  14. modules/encoding_repair.py +319 -0
  15. modules/encoding_repair_Qt.py +393 -0
  16. modules/encoding_repair_ui.py +481 -0
  17. modules/feature_manager.py +350 -0
  18. modules/figure_context_manager.py +340 -0
  19. modules/file_dialog_helper.py +148 -0
  20. modules/find_replace.py +164 -0
  21. modules/find_replace_qt.py +457 -0
  22. modules/glossary_manager.py +433 -0
  23. modules/image_extractor.py +188 -0
  24. modules/keyboard_shortcuts_widget.py +571 -0
  25. modules/llm_clients.py +1211 -0
  26. modules/llm_leaderboard.py +737 -0
  27. modules/llm_superbench_ui.py +1401 -0
  28. modules/local_llm_setup.py +1104 -0
  29. modules/model_update_dialog.py +381 -0
  30. modules/model_version_checker.py +373 -0
  31. modules/mqxliff_handler.py +638 -0
  32. modules/non_translatables_manager.py +743 -0
  33. modules/pdf_rescue_Qt.py +1822 -0
  34. modules/pdf_rescue_tkinter.py +909 -0
  35. modules/phrase_docx_handler.py +516 -0
  36. modules/project_home_panel.py +209 -0
  37. modules/prompt_assistant.py +357 -0
  38. modules/prompt_library.py +689 -0
  39. modules/prompt_library_migration.py +447 -0
  40. modules/quick_access_sidebar.py +282 -0
  41. modules/ribbon_widget.py +597 -0
  42. modules/sdlppx_handler.py +874 -0
  43. modules/setup_wizard.py +353 -0
  44. modules/shortcut_manager.py +932 -0
  45. modules/simple_segmenter.py +128 -0
  46. modules/spellcheck_manager.py +727 -0
  47. modules/statuses.py +207 -0
  48. modules/style_guide_manager.py +315 -0
  49. modules/superbench_ui.py +1319 -0
  50. modules/superbrowser.py +329 -0
  51. modules/supercleaner.py +600 -0
  52. modules/supercleaner_ui.py +444 -0
  53. modules/superdocs.py +19 -0
  54. modules/superdocs_viewer_qt.py +382 -0
  55. modules/superlookup.py +252 -0
  56. modules/tag_cleaner.py +260 -0
  57. modules/tag_manager.py +351 -0
  58. modules/term_extractor.py +270 -0
  59. modules/termbase_entry_editor.py +842 -0
  60. modules/termbase_import_export.py +488 -0
  61. modules/termbase_manager.py +1060 -0
  62. modules/termview_widget.py +1176 -0
  63. modules/theme_manager.py +499 -0
  64. modules/tm_editor_dialog.py +99 -0
  65. modules/tm_manager_qt.py +1280 -0
  66. modules/tm_metadata_manager.py +545 -0
  67. modules/tmx_editor.py +1461 -0
  68. modules/tmx_editor_qt.py +2784 -0
  69. modules/tmx_generator.py +284 -0
  70. modules/tracked_changes.py +900 -0
  71. modules/trados_docx_handler.py +430 -0
  72. modules/translation_memory.py +715 -0
  73. modules/translation_results_panel.py +2134 -0
  74. modules/translation_services.py +282 -0
  75. modules/unified_prompt_library.py +659 -0
  76. modules/unified_prompt_manager_qt.py +3951 -0
  77. modules/voice_commands.py +920 -0
  78. modules/voice_dictation.py +477 -0
  79. modules/voice_dictation_lite.py +249 -0
  80. supervertaler-1.9.163.dist-info/METADATA +906 -0
  81. supervertaler-1.9.163.dist-info/RECORD +85 -0
  82. supervertaler-1.9.163.dist-info/WHEEL +5 -0
  83. supervertaler-1.9.163.dist-info/entry_points.txt +2 -0
  84. supervertaler-1.9.163.dist-info/licenses/LICENSE +21 -0
  85. supervertaler-1.9.163.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1280 @@
1
+ """
2
+ Translation Memory Manager for Supervertaler Qt
3
+ Provides comprehensive TM management features:
4
+ - Browse all TM entries
5
+ - Concordance search
6
+ - Import/Export TMX files
7
+ - Delete entries
8
+ - View statistics
9
+ """
10
+
11
+ from PyQt6.QtWidgets import (QDialog, QVBoxLayout, QHBoxLayout, QTabWidget,
12
+ QTableWidget, QTableWidgetItem, QLineEdit, QPushButton,
13
+ QLabel, QMessageBox, QFileDialog, QHeaderView,
14
+ QGroupBox, QTextEdit, QComboBox, QSpinBox, QCheckBox,
15
+ QProgressBar, QWidget, QStyle, QStyledItemDelegate)
16
+ from PyQt6.QtCore import Qt, QThread, pyqtSignal, QSize
17
+ from PyQt6.QtGui import QColor, QFont, QPalette
18
+ import xml.etree.ElementTree as ET
19
+ from datetime import datetime
20
+ from pathlib import Path
21
+ from typing import List, Dict, Optional, Callable
22
+
23
+
24
+ class TMXImportThread(QThread):
25
+ """Background thread for importing TMX files"""
26
+ progress = pyqtSignal(int, str) # progress percentage, status message
27
+ finished = pyqtSignal(bool, str, int) # success, message, entries_imported
28
+
29
+ def __init__(self, tmx_path: str, db_manager, source_lang: str, target_lang: str, tm_id: str = 'imported'):
30
+ super().__init__()
31
+ self.tmx_path = tmx_path
32
+ self.db_manager = db_manager
33
+ self.source_lang = source_lang
34
+ self.target_lang = target_lang
35
+ self.tm_id = tm_id
36
+
37
+ def run(self):
38
+ """Import TMX file in background"""
39
+ try:
40
+ tree = ET.parse(self.tmx_path)
41
+ root = tree.getroot()
42
+
43
+ # Find body element
44
+ body = root.find('.//body')
45
+ if body is None:
46
+ self.finished.emit(False, "Invalid TMX file: no body element found", 0)
47
+ return
48
+
49
+ # Get all translation units
50
+ tus = body.findall('tu')
51
+ total = len(tus)
52
+ imported = 0
53
+
54
+ for idx, tu in enumerate(tus):
55
+ # Extract source and target
56
+ tuvs = tu.findall('tuv')
57
+ if len(tuvs) < 2:
58
+ continue
59
+
60
+ source_text = None
61
+ target_text = None
62
+
63
+ for tuv in tuvs:
64
+ lang = tuv.get('{http://www.w3.org/XML/1998/namespace}lang',
65
+ tuv.get('lang', ''))
66
+ seg = tuv.find('seg')
67
+ if seg is None or seg.text is None:
68
+ continue
69
+
70
+ # Simple language matching (could be improved)
71
+ if not source_text:
72
+ source_text = seg.text
73
+ else:
74
+ target_text = seg.text
75
+
76
+ # Add to TM if both source and target found
77
+ if source_text and target_text:
78
+ self.db_manager.add_translation_unit(
79
+ source=source_text,
80
+ target=target_text,
81
+ source_lang=self.source_lang,
82
+ target_lang=self.target_lang,
83
+ tm_id=self.tm_id,
84
+ save_mode='all' # Always use 'all' mode for imports
85
+ )
86
+ imported += 1
87
+
88
+ # Update progress every 10 entries
89
+ if idx % 10 == 0:
90
+ progress_pct = int((idx / total) * 100)
91
+ self.progress.emit(progress_pct, f"Importing... {idx}/{total}")
92
+
93
+ self.finished.emit(True, f"Successfully imported {imported} entries", imported)
94
+
95
+ except Exception as e:
96
+ self.finished.emit(False, f"Import failed: {str(e)}", 0)
97
+
98
+
99
+ class HighlightDelegate(QStyledItemDelegate):
100
+ """Custom delegate to render HTML with highlighted text in table cells"""
101
+
102
+ def __init__(self, parent=None):
103
+ super().__init__(parent)
104
+ self.search_term = ""
105
+
106
+ def set_search_term(self, term: str):
107
+ """Set the term to highlight"""
108
+ self.search_term = term
109
+
110
+ def paint(self, painter, option, index):
111
+ """Paint the cell with HTML rendering for highlighting"""
112
+ from PyQt6.QtGui import QTextDocument, QAbstractTextDocumentLayout
113
+ from PyQt6.QtCore import QRectF
114
+
115
+ # Get the text
116
+ text = index.data(Qt.ItemDataRole.DisplayRole)
117
+ if not text:
118
+ super().paint(painter, option, index)
119
+ return
120
+
121
+ # Create HTML with highlighting
122
+ if self.search_term:
123
+ import re
124
+ pattern = re.compile(re.escape(self.search_term), re.IGNORECASE)
125
+ html_text = pattern.sub(
126
+ lambda m: f"<span style='background-color: #FFD54F; font-weight: bold;'>{m.group()}</span>",
127
+ text
128
+ )
129
+ else:
130
+ html_text = text
131
+
132
+ # Setup painter
133
+ painter.save()
134
+
135
+ # Draw selection background if selected
136
+ if option.state & QStyle.StateFlag.State_Selected:
137
+ painter.fillRect(option.rect, option.palette.highlight())
138
+
139
+ # Create document for HTML rendering
140
+ doc = QTextDocument()
141
+ doc.setDefaultFont(option.font)
142
+ doc.setHtml(html_text)
143
+ doc.setTextWidth(option.rect.width() - 6) # Some padding
144
+
145
+ # Translate to cell position
146
+ painter.translate(option.rect.left() + 3, option.rect.top() + 2)
147
+
148
+ # Create clip rect
149
+ clip = QRectF(0, 0, option.rect.width() - 6, option.rect.height() - 4)
150
+
151
+ # Draw the document
152
+ ctx = QAbstractTextDocumentLayout.PaintContext()
153
+ if option.state & QStyle.StateFlag.State_Selected:
154
+ ctx.palette.setColor(QPalette.ColorRole.Text, option.palette.highlightedText().color())
155
+ doc.documentLayout().draw(painter, ctx)
156
+
157
+ painter.restore()
158
+
159
+ def sizeHint(self, option, index):
160
+ """Return size hint based on content"""
161
+ from PyQt6.QtGui import QTextDocument
162
+
163
+ text = index.data(Qt.ItemDataRole.DisplayRole)
164
+ if not text:
165
+ return super().sizeHint(option, index)
166
+
167
+ doc = QTextDocument()
168
+ doc.setDefaultFont(option.font)
169
+ doc.setHtml(text)
170
+ doc.setTextWidth(option.rect.width() if option.rect.width() > 0 else 400)
171
+
172
+ return QSize(int(doc.idealWidth()), max(int(doc.size().height()) + 8, 50))
173
+
174
+
175
+ class ConcordanceSearchDialog(QDialog):
176
+ """
177
+ Lightweight Concordance Search dialog for Ctrl+K.
178
+ Focused on quick concordance search without other TM management features.
179
+ Features two view modes: List view and Table view (memoQ-style side-by-side).
180
+ """
181
+
182
+ def __init__(self, parent, db_manager, log_callback: Optional[Callable] = None, initial_query: str = None):
183
+ super().__init__(parent)
184
+ self.db_manager = db_manager
185
+ self.log = log_callback if log_callback else lambda x: None
186
+ self.parent_app = parent
187
+ self.current_results = [] # Store results for both views
188
+ self.current_search_term = ""
189
+ self._updating_heights = False # Flag to prevent recursive updates
190
+ self._initial_query = initial_query # Store for after show
191
+
192
+ # Get language names from parent app
193
+ self.source_lang_name = getattr(parent, 'source_language', 'Source')
194
+ self.target_lang_name = getattr(parent, 'target_language', 'Target')
195
+
196
+ self.setWindowTitle("Concordance Search")
197
+ self.setMinimumSize(800, 600)
198
+
199
+ self.setup_ui()
200
+
201
+ def exec(self):
202
+ """Override exec to restore saved geometry or match parent window"""
203
+ # Try to restore saved geometry from project
204
+ geometry_restored = False
205
+ if hasattr(self.parent_app, 'current_project') and self.parent_app.current_project:
206
+ project = self.parent_app.current_project
207
+ if hasattr(project, 'concordance_geometry') and project.concordance_geometry:
208
+ geom = project.concordance_geometry
209
+ self.setGeometry(geom['x'], geom['y'], geom['width'], geom['height'])
210
+ geometry_restored = True
211
+
212
+ # If no saved geometry, match parent window size and position
213
+ if not geometry_restored and self.parent_app:
214
+ parent_geom = self.parent_app.geometry()
215
+ self.setGeometry(parent_geom)
216
+
217
+ self.show()
218
+
219
+ # Set initial query and search if provided
220
+ if self._initial_query:
221
+ self.search_input.setText(self._initial_query)
222
+ self.do_search()
223
+
224
+ return super().exec()
225
+
226
+ def closeEvent(self, event):
227
+ """Save window geometry to project when closing"""
228
+ if hasattr(self.parent_app, 'current_project') and self.parent_app.current_project:
229
+ geom = self.geometry()
230
+ self.parent_app.current_project.concordance_geometry = {
231
+ 'x': geom.x(),
232
+ 'y': geom.y(),
233
+ 'width': geom.width(),
234
+ 'height': geom.height()
235
+ }
236
+ super().closeEvent(event)
237
+
238
+ def setup_ui(self):
239
+ """Setup the UI with TM and Supermemory tabs"""
240
+ layout = QVBoxLayout()
241
+ layout.setContentsMargins(15, 15, 15, 15)
242
+
243
+ # Header
244
+ header = QLabel("🔍 Concordance Search")
245
+ header_font = QFont()
246
+ header_font.setPointSize(14)
247
+ header_font.setBold(True)
248
+ header.setFont(header_font)
249
+ layout.addWidget(header)
250
+
251
+ # Description
252
+ desc = QLabel("Search across translation memories (exact match) and Supermemory (semantic/meaning-based)")
253
+ desc.setStyleSheet("color: #666; margin-bottom: 10px;")
254
+ layout.addWidget(desc)
255
+
256
+ # Search controls
257
+ search_layout = QHBoxLayout()
258
+ search_layout.addWidget(QLabel("Search:"))
259
+ self.search_input = QLineEdit()
260
+ self.search_input.setPlaceholderText("Enter text to search...")
261
+ self.search_input.returnPressed.connect(self.do_search)
262
+ self.search_input.setStyleSheet("padding: 8px; font-size: 13px;")
263
+ search_layout.addWidget(self.search_input)
264
+
265
+ search_btn = QPushButton("🔍 Search")
266
+ search_btn.clicked.connect(self.do_search)
267
+ search_btn.setStyleSheet("padding: 8px 16px;")
268
+ search_layout.addWidget(search_btn)
269
+
270
+ layout.addLayout(search_layout)
271
+
272
+ # Tab widget for TM vs Supermemory
273
+ self.view_tabs = QTabWidget()
274
+
275
+ # Tab 1: TM Concordance (exact/fuzzy text matching)
276
+ self.tm_tab = QWidget()
277
+ tm_layout = QVBoxLayout(self.tm_tab)
278
+ tm_layout.setContentsMargins(0, 10, 0, 0)
279
+
280
+ self.search_results = QTextEdit()
281
+ self.search_results.setReadOnly(True)
282
+ self.search_results.setFont(QFont("Segoe UI", 10))
283
+ self.search_results.setStyleSheet("background-color: #fafafa; border: 1px solid #ddd; border-radius: 4px;")
284
+ tm_layout.addWidget(self.search_results)
285
+
286
+ # Tab 2: Supermemory (semantic search)
287
+ self.supermemory_tab = QWidget()
288
+ supermemory_layout = QVBoxLayout(self.supermemory_tab)
289
+ supermemory_layout.setContentsMargins(0, 10, 0, 0)
290
+
291
+ self.supermemory_results = QTextEdit()
292
+ self.supermemory_results.setReadOnly(True)
293
+ self.supermemory_results.setFont(QFont("Segoe UI", 10))
294
+ self.supermemory_results.setStyleSheet("background-color: #f8f5ff; border: 1px solid #d0c4e8; border-radius: 4px;")
295
+ supermemory_layout.addWidget(self.supermemory_results)
296
+
297
+ # Add tabs with result counts (will be updated after search)
298
+ self.view_tabs.addTab(self.tm_tab, "📋 TM Matches")
299
+ self.view_tabs.addTab(self.supermemory_tab, "🧠 Supermemory")
300
+
301
+ layout.addWidget(self.view_tabs)
302
+
303
+ # Status bar
304
+ status_layout = QHBoxLayout()
305
+ self.status_label = QLabel("Enter a search term and press Search or Enter")
306
+ self.status_label.setStyleSheet("color: #666;")
307
+ status_layout.addWidget(self.status_label)
308
+ status_layout.addStretch()
309
+
310
+ # Close button
311
+ close_btn = QPushButton("Close")
312
+ close_btn.clicked.connect(self.accept)
313
+ close_btn.setStyleSheet("padding: 6px 20px;")
314
+ status_layout.addWidget(close_btn)
315
+
316
+ layout.addLayout(status_layout)
317
+
318
+ self.setLayout(layout)
319
+
320
+ # Focus on search input
321
+ self.search_input.setFocus()
322
+
323
+ # Check if Supermemory is available
324
+ # The engine is stored in supermemory_widget.engine
325
+ self.supermemory = None
326
+ if hasattr(self.parent_app, 'supermemory_widget') and self.parent_app.supermemory_widget:
327
+ if hasattr(self.parent_app.supermemory_widget, 'engine'):
328
+ self.supermemory = self.parent_app.supermemory_widget.engine
329
+
330
+ def do_search(self):
331
+ """Perform both TM concordance and Supermemory semantic search"""
332
+ search_text = self.search_input.text().strip()
333
+ if not search_text:
334
+ self.status_label.setText("⚠️ Please enter a search term")
335
+ return
336
+
337
+ self.status_label.setText("🔍 Searching...")
338
+ self.search_results.clear()
339
+ self.supermemory_results.clear()
340
+ self.current_search_term = search_text
341
+
342
+ tm_count = 0
343
+ supermemory_count = 0
344
+
345
+ # Search TM (concordance)
346
+ try:
347
+ results = self.db_manager.concordance_search(search_text)
348
+ self.current_results = results if results else []
349
+ tm_count = len(self.current_results)
350
+
351
+ if not results:
352
+ self.search_results.setHtml(
353
+ f"<p style='color: #666; padding: 20px; text-align: center;'>"
354
+ f"No TM matches found for '<b>{search_text}</b>'</p>"
355
+ )
356
+ else:
357
+ self.update_tm_view()
358
+
359
+ except Exception as e:
360
+ self.search_results.setHtml(f"<p style='color: red; padding: 20px;'>TM Search Error: {str(e)}</p>")
361
+ self.log(f"TM Concordance search error: {e}")
362
+
363
+ # Search Supermemory (semantic)
364
+ try:
365
+ if self.supermemory and self.supermemory.is_initialized():
366
+ # Get only active TM IDs for filtering
367
+ active_tm_ids = self.supermemory.get_active_tm_ids()
368
+
369
+ # Search with active TM filter
370
+ semantic_results = self.supermemory.search(
371
+ search_text,
372
+ n_results=25,
373
+ tm_ids=active_tm_ids if active_tm_ids else None # None = search all
374
+ )
375
+ self.current_semantic_results = semantic_results if semantic_results else []
376
+ supermemory_count = len(self.current_semantic_results)
377
+
378
+ if not semantic_results:
379
+ self.supermemory_results.setHtml(
380
+ f"<p style='color: #666; padding: 20px; text-align: center;'>"
381
+ f"No semantic matches found for '<b>{search_text}</b>'</p>"
382
+ )
383
+ else:
384
+ self.update_supermemory_view()
385
+ else:
386
+ self.current_semantic_results = []
387
+ self.supermemory_results.setHtml(
388
+ "<p style='color: #888; padding: 20px; text-align: center;'>"
389
+ "<b>🧠 Supermemory not available</b><br><br>"
390
+ "Supermemory provides semantic search (find by meaning, not just text).<br><br>"
391
+ "To enable: Go to <b>Resources → Supermemory</b> and index your TMX files."
392
+ "</p>"
393
+ )
394
+
395
+ except Exception as e:
396
+ self.supermemory_results.setHtml(f"<p style='color: red; padding: 20px;'>Supermemory Error: {str(e)}</p>")
397
+ self.log(f"Supermemory search error: {e}")
398
+
399
+ # Update tab titles with counts
400
+ self.view_tabs.setTabText(0, f"📋 TM Matches ({tm_count})")
401
+ self.view_tabs.setTabText(1, f"🧠 Supermemory ({supermemory_count})")
402
+
403
+ # Update status
404
+ total = tm_count + supermemory_count
405
+ if total > 0:
406
+ self.status_label.setText(f"✓ Found {tm_count} TM + {supermemory_count} semantic matches")
407
+ self.log(f"Concordance: Found {tm_count} TM + {supermemory_count} semantic matches for '{search_text}'")
408
+ else:
409
+ self.status_label.setText("No matches found")
410
+
411
+ def update_tm_view(self):
412
+ """Update the TM concordance view with current results"""
413
+ if not self.current_results:
414
+ return
415
+
416
+ search_text = self.current_search_term
417
+ results = self.current_results
418
+
419
+ # Format results with highlighting
420
+ html = f"<h3 style='color: #333; margin-bottom: 15px;'>Found {len(results)} TM matches for '<span style='color: #2196F3;'>{search_text}</span>'</h3>"
421
+
422
+ for idx, match in enumerate(results, 1):
423
+ source = match.get('source_text', '')
424
+ target = match.get('target_text', '')
425
+ tm_id = match.get('tm_id', 'Unknown')
426
+ usage_count = match.get('usage_count', 0)
427
+ modified_date = match.get('modified_date', 'Unknown')
428
+
429
+ # Highlight search term in source and target
430
+ highlighted_source = self._highlight_term(source, search_text)
431
+ highlighted_target = self._highlight_term(target, search_text)
432
+
433
+ # Alternating background colors for better visibility
434
+ bg_color = '#f5f5f5' if idx % 2 == 0 else '#ffffff'
435
+
436
+ html += f"""
437
+ <div style='background-color: {bg_color}; padding: 10px 8px; margin: 0;'>
438
+ <div style='color: #555; font-size: 11px; margin-bottom: 6px;'>
439
+ #{idx} - TM: <b>{tm_id}</b> - Used: {usage_count} times - Modified: {modified_date}
440
+ </div>
441
+ <div style='margin-bottom: 4px;'>
442
+ <b style='color: #1976D2;'>{self.source_lang_name}:</b> {highlighted_source}
443
+ </div>
444
+ <div>
445
+ <b style='color: #388E3C;'>{self.target_lang_name}:</b> {highlighted_target}
446
+ </div>
447
+ </div>
448
+ <hr style='border: none; border-top: 2px solid #666; margin: 0;'>
449
+ """
450
+
451
+ self.search_results.setHtml(html)
452
+
453
+ def update_supermemory_view(self):
454
+ """Update the Supermemory semantic search view"""
455
+ if not hasattr(self, 'current_semantic_results') or not self.current_semantic_results:
456
+ return
457
+
458
+ search_text = self.current_search_term
459
+ results = self.current_semantic_results
460
+
461
+ # Format results with similarity scores
462
+ html = f"""<h3 style='color: #5e35b1; margin-bottom: 15px;'>
463
+ Found {len(results)} semantic matches for '<span style='color: #7c4dff;'>{search_text}</span>'
464
+ </h3>
465
+ <p style='color: #666; font-size: 11px; margin-bottom: 15px;'>
466
+ Semantic search finds translations with similar <i>meaning</i>, even if the exact words differ.
467
+ </p>"""
468
+
469
+ for result in results:
470
+ entry = result.entry
471
+ similarity = result.similarity
472
+ rank = result.rank
473
+
474
+ source = entry.source
475
+ target = entry.target
476
+ tm_name = entry.tm_name
477
+ domain = entry.domain or "General"
478
+
479
+ # Color-coded similarity
480
+ if similarity >= 0.8:
481
+ sim_color = '#2e7d32' # Green - high
482
+ sim_label = 'High'
483
+ elif similarity >= 0.6:
484
+ sim_color = '#f57c00' # Orange - medium
485
+ sim_label = 'Medium'
486
+ else:
487
+ sim_color = '#757575' # Gray - low
488
+ sim_label = 'Low'
489
+
490
+ # Alternating background colors with purple tint
491
+ bg_color = '#f3e5f5' if rank % 2 == 0 else '#ffffff'
492
+
493
+ html += f"""
494
+ <div style='background-color: {bg_color}; padding: 10px 8px; margin: 0;'>
495
+ <div style='color: #555; font-size: 11px; margin-bottom: 6px;'>
496
+ #{rank} -
497
+ <span style='color: {sim_color}; font-weight: bold;'>
498
+ {similarity:.0%} {sim_label}
499
+ </span>
500
+ - TM: <b>{tm_name}</b>
501
+ - Domain: <span style='color: #7c4dff;'>{domain}</span>
502
+ </div>
503
+ <div style='margin-bottom: 4px;'>
504
+ <b style='color: #5e35b1;'>{self.source_lang_name}:</b> {source}
505
+ </div>
506
+ <div>
507
+ <b style='color: #00897b;'>{self.target_lang_name}:</b> {target}
508
+ </div>
509
+ </div>
510
+ <hr style='border: none; border-top: 2px solid #9575cd; margin: 0;'>
511
+ """
512
+
513
+ self.supermemory_results.setHtml(html)
514
+
515
+ def _highlight_term(self, text: str, search_term: str) -> str:
516
+ """Highlight search term in text with yellow/orange background"""
517
+ if not text or not search_term:
518
+ return text or ""
519
+
520
+ import re
521
+ # Case-insensitive highlighting
522
+ pattern = re.compile(re.escape(search_term), re.IGNORECASE)
523
+ return pattern.sub(
524
+ lambda m: f"<span style='background-color: #FFD54F; padding: 1px 3px; border-radius: 2px; font-weight: bold;'>{m.group()}</span>",
525
+ text
526
+ )
527
+
528
+
529
+ class TMManagerDialog(QDialog):
530
+ """Translation Memory Manager dialog"""
531
+
532
+ def __init__(self, parent, db_manager, log_callback: Optional[Callable] = None, tm_ids: list = None):
533
+ super().__init__(parent)
534
+ self.db_manager = db_manager
535
+ self.log = log_callback if log_callback else lambda x: None
536
+ self.parent_app = parent
537
+ self.filter_tm_ids = tm_ids # Optional filter: only show entries from these TM IDs
538
+
539
+ self.setWindowTitle("Translation Memory Manager")
540
+ self.resize(1000, 700)
541
+
542
+ self.setup_ui()
543
+ self.load_initial_data()
544
+
545
+ def setup_ui(self):
546
+ """Setup the UI with tabs"""
547
+ layout = QVBoxLayout()
548
+
549
+ # Header
550
+ header = QLabel("📚 Translation Memory Manager")
551
+ header_font = QFont()
552
+ header_font.setPointSize(14)
553
+ header_font.setBold(True)
554
+ header.setFont(header_font)
555
+ layout.addWidget(header)
556
+
557
+ # Tab widget
558
+ self.tabs = QTabWidget()
559
+
560
+ # Create tabs
561
+ self.browser_tab = self.create_browser_tab()
562
+ self.search_tab = self.create_search_tab()
563
+ self.import_export_tab = self.create_import_export_tab()
564
+ self.stats_tab = self.create_stats_tab()
565
+
566
+ self.tabs.addTab(self.browser_tab, "📋 Browse")
567
+ self.tabs.addTab(self.search_tab, "🔍 Concordance")
568
+ self.tabs.addTab(self.import_export_tab, "📥 Import/Export")
569
+ self.tabs.addTab(self.stats_tab, "📊 Statistics")
570
+
571
+ # Add maintenance tab for cleaning
572
+ self.maintenance_tab = self.create_maintenance_tab()
573
+ self.tabs.addTab(self.maintenance_tab, "🧹 Maintenance")
574
+
575
+ layout.addWidget(self.tabs)
576
+
577
+ # Close button
578
+ close_btn = QPushButton("Close")
579
+ close_btn.clicked.connect(self.accept)
580
+ layout.addWidget(close_btn)
581
+
582
+ self.setLayout(layout)
583
+
584
+ def create_browser_tab(self):
585
+ """Create TM browser tab"""
586
+ widget = QGroupBox()
587
+ layout = QVBoxLayout()
588
+
589
+ # Filter controls
590
+ filter_layout = QHBoxLayout()
591
+ filter_layout.addWidget(QLabel("Filter:"))
592
+ self.browser_filter = QLineEdit()
593
+ self.browser_filter.setPlaceholderText("Type to filter entries...")
594
+ self.browser_filter.textChanged.connect(self.filter_browser_entries)
595
+ filter_layout.addWidget(self.browser_filter)
596
+
597
+ self.browser_limit = QSpinBox()
598
+ self.browser_limit.setRange(100, 10000)
599
+ self.browser_limit.setValue(500)
600
+ self.browser_limit.setSingleStep(100)
601
+ self.browser_limit.setPrefix("Show: ")
602
+ self.browser_limit.setSuffix(" entries")
603
+ filter_layout.addWidget(self.browser_limit)
604
+
605
+ refresh_btn = QPushButton("🔄 Refresh")
606
+ refresh_btn.clicked.connect(self.refresh_browser)
607
+ filter_layout.addWidget(refresh_btn)
608
+
609
+ layout.addLayout(filter_layout)
610
+
611
+ # Table
612
+ self.browser_table = QTableWidget()
613
+ self.browser_table.setColumnCount(6)
614
+ self.browser_table.setHorizontalHeaderLabels([
615
+ "ID", "Source", "Target", "TM", "Usage", "Modified"
616
+ ])
617
+ self.browser_table.horizontalHeader().setSectionResizeMode(1, QHeaderView.ResizeMode.Stretch)
618
+ self.browser_table.horizontalHeader().setSectionResizeMode(2, QHeaderView.ResizeMode.Stretch)
619
+ self.browser_table.setSelectionBehavior(QTableWidget.SelectionBehavior.SelectRows)
620
+ self.browser_table.setAlternatingRowColors(True)
621
+ # Enable sorting by clicking column headers
622
+ self.browser_table.setSortingEnabled(True)
623
+ layout.addWidget(self.browser_table)
624
+
625
+ # Action buttons
626
+ btn_layout = QHBoxLayout()
627
+ delete_btn = QPushButton("🗑️ Delete Selected")
628
+ delete_btn.clicked.connect(self.delete_selected_entry)
629
+ btn_layout.addWidget(delete_btn)
630
+ btn_layout.addStretch()
631
+
632
+ self.browser_status = QLabel("Ready")
633
+ btn_layout.addWidget(self.browser_status)
634
+
635
+ layout.addLayout(btn_layout)
636
+
637
+ widget.setLayout(layout)
638
+ return widget
639
+
640
+ def create_search_tab(self):
641
+ """Create concordance search tab"""
642
+ widget = QGroupBox()
643
+ layout = QVBoxLayout()
644
+
645
+ # Search controls
646
+ search_layout = QHBoxLayout()
647
+ search_layout.addWidget(QLabel("Search:"))
648
+ self.search_input = QLineEdit()
649
+ self.search_input.setPlaceholderText("Enter text to search in source and target...")
650
+ self.search_input.returnPressed.connect(self.do_concordance_search)
651
+ search_layout.addWidget(self.search_input)
652
+
653
+ search_btn = QPushButton("🔍 Search")
654
+ search_btn.clicked.connect(self.do_concordance_search)
655
+ search_layout.addWidget(search_btn)
656
+
657
+ layout.addLayout(search_layout)
658
+
659
+ # Results display
660
+ self.search_results = QTextEdit()
661
+ self.search_results.setReadOnly(True)
662
+ self.search_results.setFont(QFont("Segoe UI", 10))
663
+ layout.addWidget(self.search_results)
664
+
665
+ # Status
666
+ self.search_status = QLabel("Enter a search term and press Search")
667
+ layout.addWidget(self.search_status)
668
+
669
+ widget.setLayout(layout)
670
+ return widget
671
+
672
+ def create_import_export_tab(self):
673
+ """Create import/export tab"""
674
+ widget = QGroupBox()
675
+ layout = QVBoxLayout()
676
+
677
+ # Import section
678
+ import_group = QGroupBox("Import TMX")
679
+ import_layout = QVBoxLayout()
680
+
681
+ import_info = QLabel(
682
+ "Import translation units from a TMX file into your database.\n"
683
+ "All entries will be added to a new TM or merged with an existing one."
684
+ )
685
+ import_info.setWordWrap(True)
686
+ import_layout.addWidget(import_info)
687
+
688
+ import_controls = QHBoxLayout()
689
+ import_controls.addWidget(QLabel("TM ID:"))
690
+ self.import_tm_id = QLineEdit("imported")
691
+ self.import_tm_id.setPlaceholderText("Enter TM identifier")
692
+ import_controls.addWidget(self.import_tm_id)
693
+
694
+ import_btn = QPushButton("📂 Select and Import TMX...")
695
+ import_btn.clicked.connect(self.import_tmx)
696
+ import_controls.addWidget(import_btn)
697
+ import_layout.addLayout(import_controls)
698
+
699
+ self.import_progress = QProgressBar()
700
+ self.import_progress.setVisible(False)
701
+ import_layout.addWidget(self.import_progress)
702
+
703
+ self.import_status = QLabel("")
704
+ import_layout.addWidget(self.import_status)
705
+
706
+ import_group.setLayout(import_layout)
707
+ layout.addWidget(import_group)
708
+
709
+ # Export section
710
+ export_group = QGroupBox("Export TMX")
711
+ export_layout = QVBoxLayout()
712
+
713
+ export_info = QLabel(
714
+ "Export your translation memory to a standard TMX file.\n"
715
+ "The TMX file can be used in other CAT tools or shared with colleagues."
716
+ )
717
+ export_info.setWordWrap(True)
718
+ export_layout.addWidget(export_info)
719
+
720
+ export_controls = QHBoxLayout()
721
+ export_controls.addWidget(QLabel("TM to export:"))
722
+ self.export_tm_selector = QComboBox()
723
+ self.export_tm_selector.addItem("All TMs", "all")
724
+ self.export_tm_selector.addItem("Project TM only", "project")
725
+ export_controls.addWidget(self.export_tm_selector)
726
+
727
+ export_btn = QPushButton("💾 Export to TMX...")
728
+ export_btn.clicked.connect(self.export_tmx)
729
+ export_controls.addWidget(export_btn)
730
+ export_layout.addLayout(export_controls)
731
+
732
+ self.export_status = QLabel("")
733
+ export_layout.addWidget(self.export_status)
734
+
735
+ export_group.setLayout(export_layout)
736
+ layout.addWidget(export_group)
737
+
738
+ layout.addStretch()
739
+
740
+ widget.setLayout(layout)
741
+ return widget
742
+
743
+ def create_stats_tab(self):
744
+ """Create statistics tab"""
745
+ widget = QGroupBox()
746
+ layout = QVBoxLayout()
747
+
748
+ self.stats_display = QTextEdit()
749
+ self.stats_display.setReadOnly(True)
750
+ self.stats_display.setFont(QFont("Courier New", 10))
751
+ layout.addWidget(self.stats_display)
752
+
753
+ refresh_btn = QPushButton("🔄 Refresh Statistics")
754
+ refresh_btn.clicked.connect(self.refresh_stats)
755
+ layout.addWidget(refresh_btn)
756
+
757
+ widget.setLayout(layout)
758
+ return widget
759
+
760
+ def load_initial_data(self):
761
+ """Load initial data for all tabs"""
762
+ self.refresh_browser()
763
+ self.refresh_stats()
764
+
765
+ def refresh_browser(self):
766
+ """Refresh the TM browser table"""
767
+ try:
768
+ limit = self.browser_limit.value()
769
+ filter_text = self.browser_filter.text().strip()
770
+
771
+ # Build TM filter clause
772
+ tm_filter = ""
773
+ params = []
774
+ if self.filter_tm_ids:
775
+ placeholders = ','.join('?' * len(self.filter_tm_ids))
776
+ tm_filter = f" WHERE tm_id IN ({placeholders})"
777
+ params = self.filter_tm_ids[:]
778
+
779
+ # Get entries from database
780
+ if filter_text:
781
+ entries = self.db_manager.concordance_search(filter_text)
782
+ # Apply TM filter to concordance results
783
+ if self.filter_tm_ids:
784
+ entries = [e for e in entries if e.get('tm_id') in self.filter_tm_ids]
785
+ else:
786
+ # Get recent entries
787
+ query = f"SELECT * FROM translation_units{tm_filter} ORDER BY modified_date DESC LIMIT {limit}"
788
+ self.db_manager.cursor.execute(query, params)
789
+ entries = [dict(row) for row in self.db_manager.cursor.fetchall()]
790
+
791
+ # Populate table
792
+ self.browser_table.setRowCount(len(entries))
793
+ for row, entry in enumerate(entries):
794
+ self.browser_table.setItem(row, 0, QTableWidgetItem(str(entry['id'])))
795
+ self.browser_table.setItem(row, 1, QTableWidgetItem(entry['source_text'][:100]))
796
+ self.browser_table.setItem(row, 2, QTableWidgetItem(entry['target_text'][:100]))
797
+ self.browser_table.setItem(row, 3, QTableWidgetItem(entry['tm_id']))
798
+ self.browser_table.setItem(row, 4, QTableWidgetItem(str(entry.get('usage_count', 0))))
799
+ self.browser_table.setItem(row, 5, QTableWidgetItem(entry.get('modified_date', '')[:16]))
800
+
801
+ self.browser_status.setText(f"Showing {len(entries)} entries")
802
+ self.log(f"TM Browser: Loaded {len(entries)} entries")
803
+
804
+ except Exception as e:
805
+ self.browser_status.setText(f"Error: {str(e)}")
806
+ self.log(f"Error refreshing TM browser: {e}")
807
+
808
+ def filter_browser_entries(self):
809
+ """Filter browser entries as user types"""
810
+ # Auto-refresh on filter change (with debouncing in real implementation)
811
+ pass
812
+
813
+ def delete_selected_entry(self):
814
+ """Delete the selected TM entry"""
815
+ selected_rows = self.browser_table.selectedItems()
816
+ if not selected_rows:
817
+ QMessageBox.warning(self, "No Selection", "Please select an entry to delete")
818
+ return
819
+
820
+ row = self.browser_table.currentRow()
821
+ entry_id = int(self.browser_table.item(row, 0).text())
822
+ source = self.browser_table.item(row, 1).text()
823
+ target = self.browser_table.item(row, 2).text()
824
+
825
+ # Confirm deletion
826
+ reply = QMessageBox.question(
827
+ self, "Confirm Deletion",
828
+ f"Delete this TM entry?\n\nSource: {source}\nTarget: {target}\n\nThis cannot be undone.",
829
+ QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No
830
+ )
831
+
832
+ if reply == QMessageBox.StandardButton.Yes:
833
+ try:
834
+ self.db_manager.cursor.execute("DELETE FROM translation_units WHERE id = ?", (entry_id,))
835
+ self.db_manager.connection.commit()
836
+ self.log(f"Deleted TM entry {entry_id}")
837
+ self.refresh_browser()
838
+ QMessageBox.information(self, "Success", "Entry deleted successfully")
839
+ except Exception as e:
840
+ QMessageBox.critical(self, "Error", f"Failed to delete entry: {str(e)}")
841
+
842
+ def do_concordance_search(self):
843
+ """Perform concordance search"""
844
+ query = self.search_input.text().strip()
845
+ if not query:
846
+ self.search_status.setText("Please enter a search term")
847
+ return
848
+
849
+ try:
850
+ results = self.db_manager.concordance_search(query)
851
+
852
+ # Display results
853
+ self.search_results.clear()
854
+ html = f"<h3>Found {len(results)} matches for '{query}'</h3>"
855
+
856
+ for idx, match in enumerate(results, 1):
857
+ source_highlighted = match['source_text'].replace(
858
+ query, f"<span style='background-color: yellow;'>{query}</span>"
859
+ )
860
+ target_highlighted = match['target_text'].replace(
861
+ query, f"<span style='background-color: yellow;'>{query}</span>"
862
+ )
863
+
864
+ html += f"""
865
+ <div style='border: 1px solid #ccc; padding: 10px; margin: 10px 0;'>
866
+ <p><strong>#{idx}</strong> - TM: {match['tm_id']} - Used: {match.get('usage_count', 0)} times</p>
867
+ <p><strong>Source:</strong> {source_highlighted}</p>
868
+ <p><strong>Target:</strong> {target_highlighted}</p>
869
+ <p style='color: #888; font-size: 9pt;'>Modified: {match.get('modified_date', 'N/A')}</p>
870
+ </div>
871
+ """
872
+
873
+ self.search_results.setHtml(html)
874
+ self.search_status.setText(f"Found {len(results)} matches")
875
+ self.log(f"Concordance search: {len(results)} matches for '{query}'")
876
+
877
+ except Exception as e:
878
+ self.search_status.setText(f"Error: {str(e)}")
879
+ self.log(f"Error in concordance search: {e}")
880
+
881
+ def import_tmx(self):
882
+ """Import a TMX file"""
883
+ file_path, _ = QFileDialog.getOpenFileName(
884
+ self, "Select TMX File", "",
885
+ "TMX Files (*.tmx);;All Files (*.*)"
886
+ )
887
+
888
+ if not file_path:
889
+ return
890
+
891
+ tm_id = self.import_tm_id.text().strip() or "imported"
892
+
893
+ # Get source and target languages from parent app
894
+ if hasattr(self.parent_app, 'current_project'):
895
+ source_lang = self.parent_app.current_project.source_lang
896
+ target_lang = self.parent_app.current_project.target_lang
897
+ else:
898
+ source_lang = "en"
899
+ target_lang = "de"
900
+
901
+ # Show progress bar
902
+ self.import_progress.setValue(0)
903
+ self.import_progress.setVisible(True)
904
+ self.import_status.setText("Importing...")
905
+
906
+ # Start import thread
907
+ self.import_thread = TMXImportThread(file_path, self.db_manager, source_lang, target_lang, tm_id)
908
+ self.import_thread.progress.connect(self.on_import_progress)
909
+ self.import_thread.finished.connect(self.on_import_finished)
910
+ self.import_thread.start()
911
+
912
+ def on_import_progress(self, percent, message):
913
+ """Update import progress"""
914
+ self.import_progress.setValue(percent)
915
+ self.import_status.setText(message)
916
+
917
+ def on_import_finished(self, success, message, count):
918
+ """Import finished"""
919
+ self.import_progress.setVisible(False)
920
+ self.import_status.setText(message)
921
+
922
+ if success:
923
+ QMessageBox.information(self, "Import Complete", f"{message}\n\nTotal entries: {count}")
924
+ self.refresh_browser()
925
+ self.refresh_stats()
926
+ else:
927
+ QMessageBox.critical(self, "Import Failed", message)
928
+
929
+ def export_tmx(self):
930
+ """Export TM to TMX file"""
931
+ file_path, _ = QFileDialog.getSaveFileName(
932
+ self, "Save TMX File", "",
933
+ "TMX Files (*.tmx);;All Files (*.*)"
934
+ )
935
+
936
+ if not file_path:
937
+ return
938
+
939
+ try:
940
+ tm_filter = self.export_tm_selector.currentData()
941
+
942
+ # Get entries to export
943
+ if tm_filter == "all":
944
+ self.db_manager.cursor.execute("SELECT * FROM translation_units")
945
+ else:
946
+ self.db_manager.cursor.execute("SELECT * FROM translation_units WHERE tm_id = ?", (tm_filter,))
947
+
948
+ entries = [dict(row) for row in self.db_manager.cursor.fetchall()]
949
+
950
+ if not entries:
951
+ QMessageBox.warning(self, "No Entries", "No translation units to export")
952
+ return
953
+
954
+ # Create TMX
955
+ tmx = ET.Element('tmx')
956
+ tmx.set('version', '1.4')
957
+
958
+ header = ET.SubElement(tmx, 'header')
959
+ header.set('creationdate', datetime.now().strftime('%Y%m%dT%H%M%SZ'))
960
+ header.set('srclang', 'en')
961
+ header.set('adminlang', 'en')
962
+ header.set('segtype', 'sentence')
963
+ header.set('creationtool', 'Supervertaler')
964
+ header.set('creationtoolversion', '4.0')
965
+ header.set('datatype', 'plaintext')
966
+
967
+ body = ET.SubElement(tmx, 'body')
968
+
969
+ for entry in entries:
970
+ tu = ET.SubElement(body, 'tu')
971
+
972
+ # Source
973
+ tuv_src = ET.SubElement(tu, 'tuv')
974
+ tuv_src.set('xml:lang', entry.get('source_lang', 'en'))
975
+ seg_src = ET.SubElement(tuv_src, 'seg')
976
+ seg_src.text = entry['source_text']
977
+
978
+ # Target
979
+ tuv_tgt = ET.SubElement(tu, 'tuv')
980
+ tuv_tgt.set('xml:lang', entry.get('target_lang', 'de'))
981
+ seg_tgt = ET.SubElement(tuv_tgt, 'seg')
982
+ seg_tgt.text = entry['target_text']
983
+
984
+ # Write to file
985
+ tree = ET.ElementTree(tmx)
986
+ ET.indent(tree, space=" ")
987
+ tree.write(file_path, encoding='utf-8', xml_declaration=True)
988
+
989
+ self.export_status.setText(f"Exported {len(entries)} entries to {Path(file_path).name}")
990
+ QMessageBox.information(self, "Export Complete",
991
+ f"Successfully exported {len(entries)} translation units")
992
+ self.log(f"Exported {len(entries)} entries to {file_path}")
993
+
994
+ except Exception as e:
995
+ self.export_status.setText(f"Error: {str(e)}")
996
+ QMessageBox.critical(self, "Export Failed", f"Failed to export TMX:\n{str(e)}")
997
+ self.log(f"Error exporting TMX: {e}")
998
+
999
+ def refresh_stats(self):
1000
+ """Refresh TM statistics"""
1001
+ try:
1002
+ # Build TM filter clause
1003
+ tm_filter = ""
1004
+ params = []
1005
+ if self.filter_tm_ids:
1006
+ placeholders = ','.join('?' * len(self.filter_tm_ids))
1007
+ tm_filter = f" WHERE tm_id IN ({placeholders})"
1008
+ params = self.filter_tm_ids[:]
1009
+
1010
+ # Get various statistics
1011
+ self.db_manager.cursor.execute(f"SELECT COUNT(*) FROM translation_units{tm_filter}", params)
1012
+ total_entries = self.db_manager.cursor.fetchone()[0]
1013
+
1014
+ self.db_manager.cursor.execute(f"SELECT COUNT(DISTINCT tm_id) FROM translation_units{tm_filter}", params)
1015
+ tm_count = self.db_manager.cursor.fetchone()[0]
1016
+
1017
+ query = f"""
1018
+ SELECT tm_id, COUNT(*) as count
1019
+ FROM translation_units{tm_filter}
1020
+ GROUP BY tm_id
1021
+ ORDER BY count DESC
1022
+ """
1023
+ self.db_manager.cursor.execute(query, params)
1024
+ tm_breakdown = self.db_manager.cursor.fetchall()
1025
+
1026
+ query = f"""
1027
+ SELECT AVG(LENGTH(source_text)), AVG(LENGTH(target_text))
1028
+ FROM translation_units{tm_filter}
1029
+ """
1030
+ self.db_manager.cursor.execute(query, params)
1031
+ avg_lengths = self.db_manager.cursor.fetchone()
1032
+
1033
+ # Handle empty TM (AVG returns None)
1034
+ avg_source = avg_lengths[0] if avg_lengths[0] is not None else 0
1035
+ avg_target = avg_lengths[1] if avg_lengths[1] is not None else 0
1036
+
1037
+ # Format statistics
1038
+ stats_text = f"""
1039
+ ═══════════════════════════════════════════════
1040
+ TRANSLATION MEMORY STATISTICS
1041
+ ═══════════════════════════════════════════════
1042
+
1043
+ Total Translation Units: {total_entries:,}
1044
+ Number of TMs: {tm_count}
1045
+
1046
+ Average Source Length: {avg_source:.1f} characters
1047
+ Average Target Length: {avg_target:.1f} characters
1048
+
1049
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
1050
+ BREAKDOWN BY TM
1051
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
1052
+
1053
+ """
1054
+ for tm_id, count in tm_breakdown:
1055
+ pct = (count / total_entries * 100) if total_entries > 0 else 0
1056
+ stats_text += f"{tm_id:20s} {count:8,} entries ({pct:5.1f}%)\n"
1057
+
1058
+ self.stats_display.setPlainText(stats_text)
1059
+ self.log("TM statistics refreshed")
1060
+
1061
+ except Exception as e:
1062
+ self.stats_display.setPlainText(f"Error loading statistics:\n{str(e)}")
1063
+ self.log(f"Error refreshing stats: {e}")
1064
+
1065
+ def create_maintenance_tab(self):
1066
+ """Create maintenance/cleaning tab"""
1067
+ widget = QGroupBox()
1068
+ layout = QVBoxLayout()
1069
+
1070
+ # Header
1071
+ header_label = QLabel("<h3>🧹 TM Maintenance & Cleaning</h3>")
1072
+ layout.addWidget(header_label)
1073
+
1074
+ info_label = QLabel(
1075
+ "Clean up your translation memory by removing duplicates and redundant entries.\n"
1076
+ "This helps keep your TM efficient and reduces clutter."
1077
+ )
1078
+ info_label.setWordWrap(True)
1079
+ info_label.setStyleSheet("color: #666; margin-bottom: 15px;")
1080
+ layout.addWidget(info_label)
1081
+
1082
+ # Duplicate cleaning section
1083
+ dup_group = QGroupBox("Duplicate Entry Cleaning")
1084
+ dup_layout = QVBoxLayout()
1085
+
1086
+ # Option 1: Identical source + target
1087
+ dup1_layout = QHBoxLayout()
1088
+ dup1_desc = QLabel(
1089
+ "<b>Remove identical source + target pairs:</b><br>"
1090
+ "Deletes entries where both source and target text are exactly the same.<br>"
1091
+ "<i>Example: 'Hello' → 'Hello' (untranslated entries)</i>"
1092
+ )
1093
+ dup1_desc.setWordWrap(True)
1094
+ dup1_layout.addWidget(dup1_desc, 1)
1095
+
1096
+ clean_identical_btn = QPushButton("🗑️ Clean")
1097
+ clean_identical_btn.setFixedWidth(100)
1098
+ clean_identical_btn.clicked.connect(self.clean_identical_source_target)
1099
+ dup1_layout.addWidget(clean_identical_btn)
1100
+ dup_layout.addLayout(dup1_layout)
1101
+
1102
+ dup_layout.addWidget(QLabel("")) # Spacer
1103
+
1104
+ # Option 2: Identical source (keep newest)
1105
+ dup2_layout = QHBoxLayout()
1106
+ dup2_desc = QLabel(
1107
+ "<b>Remove duplicate sources (keep newest only):</b><br>"
1108
+ "For entries with identical source text, keeps only the most recent translation.<br>"
1109
+ "<i>Useful for removing outdated translations of the same source.</i>"
1110
+ )
1111
+ dup2_desc.setWordWrap(True)
1112
+ dup2_layout.addWidget(dup2_desc, 1)
1113
+
1114
+ clean_duplicates_btn = QPushButton("🗑️ Clean")
1115
+ clean_duplicates_btn.setFixedWidth(100)
1116
+ clean_duplicates_btn.clicked.connect(self.clean_duplicate_sources)
1117
+ dup2_layout.addWidget(clean_duplicates_btn)
1118
+ dup_layout.addLayout(dup2_layout)
1119
+
1120
+ dup_group.setLayout(dup_layout)
1121
+ layout.addWidget(dup_group)
1122
+
1123
+ # Results display
1124
+ self.maintenance_results = QTextEdit()
1125
+ self.maintenance_results.setReadOnly(True)
1126
+ self.maintenance_results.setMaximumHeight(200)
1127
+ self.maintenance_results.setPlaceholderText("Cleaning results will appear here...")
1128
+ layout.addWidget(self.maintenance_results)
1129
+
1130
+ layout.addStretch()
1131
+
1132
+ widget.setLayout(layout)
1133
+ return widget
1134
+
1135
+ def clean_identical_source_target(self):
1136
+ """Remove entries where source and target are identical"""
1137
+ try:
1138
+ # Confirm with user
1139
+ reply = QMessageBox.question(
1140
+ self, "Confirm Cleaning",
1141
+ "This will delete all TM entries where the source and target text are identical.\n\n"
1142
+ "This action cannot be undone. Continue?",
1143
+ QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No
1144
+ )
1145
+
1146
+ if reply != QMessageBox.StandardButton.Yes:
1147
+ return
1148
+
1149
+ # Build TM filter clause
1150
+ tm_filter = ""
1151
+ params = []
1152
+ if self.filter_tm_ids:
1153
+ placeholders = ','.join('?' * len(self.filter_tm_ids))
1154
+ tm_filter = f" AND tm_id IN ({placeholders})"
1155
+ params = self.filter_tm_ids[:]
1156
+
1157
+ # Find and count identical entries
1158
+ query = f"SELECT COUNT(*) FROM translation_units WHERE source_text = target_text{tm_filter}"
1159
+ self.db_manager.cursor.execute(query, params)
1160
+ count_before = self.db_manager.cursor.fetchone()[0]
1161
+
1162
+ if count_before == 0:
1163
+ self.maintenance_results.setPlainText("✅ No identical source/target entries found. TM is clean!")
1164
+ return
1165
+
1166
+ # Delete identical entries
1167
+ query = f"DELETE FROM translation_units WHERE source_text = target_text{tm_filter}"
1168
+ self.db_manager.cursor.execute(query, params)
1169
+ self.db_manager.connection.commit()
1170
+
1171
+ # Report results
1172
+ result_text = f"""
1173
+ ✅ Cleaning Complete!
1174
+
1175
+ Removed {count_before:,} entries where source = target
1176
+
1177
+ These were likely untranslated entries or placeholders.
1178
+ Your TM is now cleaner and more efficient.
1179
+ """
1180
+ self.maintenance_results.setPlainText(result_text)
1181
+ self.log(f"Cleaned {count_before} identical source/target entries from TM")
1182
+
1183
+ # Refresh stats if on stats tab
1184
+ self.refresh_stats()
1185
+
1186
+ except Exception as e:
1187
+ error_msg = f"❌ Error during cleaning:\n{str(e)}"
1188
+ self.maintenance_results.setPlainText(error_msg)
1189
+ QMessageBox.critical(self, "Cleaning Error", str(e))
1190
+
1191
+ def clean_duplicate_sources(self):
1192
+ """Remove duplicate sources, keeping only the newest translation"""
1193
+ try:
1194
+ # Confirm with user
1195
+ reply = QMessageBox.question(
1196
+ self, "Confirm Cleaning",
1197
+ "This will find entries with identical source text and keep only the most recent translation.\n\n"
1198
+ "Older translations of the same source will be deleted.\n"
1199
+ "This action cannot be undone. Continue?",
1200
+ QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No
1201
+ )
1202
+
1203
+ if reply != QMessageBox.StandardButton.Yes:
1204
+ return
1205
+
1206
+ # Build TM filter clause
1207
+ tm_filter = ""
1208
+ params = []
1209
+ if self.filter_tm_ids:
1210
+ placeholders = ','.join('?' * len(self.filter_tm_ids))
1211
+ tm_filter = f" WHERE tm_id IN ({placeholders})"
1212
+ params = self.filter_tm_ids[:]
1213
+
1214
+ # Find duplicate sources
1215
+ query = f"""
1216
+ SELECT source_hash, COUNT(*) as cnt
1217
+ FROM translation_units{tm_filter}
1218
+ GROUP BY source_hash
1219
+ HAVING cnt > 1
1220
+ """
1221
+ self.db_manager.cursor.execute(query, params)
1222
+ duplicates = self.db_manager.cursor.fetchall()
1223
+
1224
+ if not duplicates:
1225
+ self.maintenance_results.setPlainText("✅ No duplicate sources found. TM is clean!")
1226
+ return
1227
+
1228
+ total_deleted = 0
1229
+
1230
+ # For each duplicate source, keep only the newest
1231
+ for source_hash, count in duplicates:
1232
+ # Build filter for this source hash
1233
+ hash_params = [source_hash]
1234
+ hash_filter = ""
1235
+ if self.filter_tm_ids:
1236
+ hash_filter = f" AND tm_id IN ({','.join('?' * len(self.filter_tm_ids))})"
1237
+ hash_params.extend(self.filter_tm_ids)
1238
+
1239
+ # Get all entries for this source, ordered by date (newest first)
1240
+ query = f"""
1241
+ SELECT id FROM translation_units
1242
+ WHERE source_hash = ?{hash_filter}
1243
+ ORDER BY modified_date DESC
1244
+ """
1245
+ self.db_manager.cursor.execute(query, hash_params)
1246
+
1247
+ ids = [row[0] for row in self.db_manager.cursor.fetchall()]
1248
+
1249
+ # Keep the first (newest), delete the rest
1250
+ if len(ids) > 1:
1251
+ ids_to_delete = ids[1:] # All except the first
1252
+ placeholders = ','.join('?' * len(ids_to_delete))
1253
+ self.db_manager.cursor.execute(f"""
1254
+ DELETE FROM translation_units
1255
+ WHERE id IN ({placeholders})
1256
+ """, ids_to_delete)
1257
+ total_deleted += len(ids_to_delete)
1258
+
1259
+ self.db_manager.connection.commit()
1260
+
1261
+ # Report results
1262
+ result_text = f"""
1263
+ ✅ Cleaning Complete!
1264
+
1265
+ Found {len(duplicates):,} sources with multiple translations
1266
+ Removed {total_deleted:,} older translations
1267
+ Kept the most recent translation for each source
1268
+
1269
+ Your TM now has only the latest translations.
1270
+ """
1271
+ self.maintenance_results.setPlainText(result_text)
1272
+ self.log(f"Cleaned {total_deleted} duplicate source entries from TM (kept newest)")
1273
+
1274
+ # Refresh stats if on stats tab
1275
+ self.refresh_stats()
1276
+
1277
+ except Exception as e:
1278
+ error_msg = f"❌ Error during cleaning:\n{str(e)}"
1279
+ self.maintenance_results.setPlainText(error_msg)
1280
+ QMessageBox.critical(self, "Cleaning Error", str(e))