supervertaler 1.9.163__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. Supervertaler.py +48473 -0
  2. modules/__init__.py +10 -0
  3. modules/ai_actions.py +964 -0
  4. modules/ai_attachment_manager.py +343 -0
  5. modules/ai_file_viewer_dialog.py +210 -0
  6. modules/autofingers_engine.py +466 -0
  7. modules/cafetran_docx_handler.py +379 -0
  8. modules/config_manager.py +469 -0
  9. modules/database_manager.py +1911 -0
  10. modules/database_migrations.py +417 -0
  11. modules/dejavurtf_handler.py +779 -0
  12. modules/document_analyzer.py +427 -0
  13. modules/docx_handler.py +689 -0
  14. modules/encoding_repair.py +319 -0
  15. modules/encoding_repair_Qt.py +393 -0
  16. modules/encoding_repair_ui.py +481 -0
  17. modules/feature_manager.py +350 -0
  18. modules/figure_context_manager.py +340 -0
  19. modules/file_dialog_helper.py +148 -0
  20. modules/find_replace.py +164 -0
  21. modules/find_replace_qt.py +457 -0
  22. modules/glossary_manager.py +433 -0
  23. modules/image_extractor.py +188 -0
  24. modules/keyboard_shortcuts_widget.py +571 -0
  25. modules/llm_clients.py +1211 -0
  26. modules/llm_leaderboard.py +737 -0
  27. modules/llm_superbench_ui.py +1401 -0
  28. modules/local_llm_setup.py +1104 -0
  29. modules/model_update_dialog.py +381 -0
  30. modules/model_version_checker.py +373 -0
  31. modules/mqxliff_handler.py +638 -0
  32. modules/non_translatables_manager.py +743 -0
  33. modules/pdf_rescue_Qt.py +1822 -0
  34. modules/pdf_rescue_tkinter.py +909 -0
  35. modules/phrase_docx_handler.py +516 -0
  36. modules/project_home_panel.py +209 -0
  37. modules/prompt_assistant.py +357 -0
  38. modules/prompt_library.py +689 -0
  39. modules/prompt_library_migration.py +447 -0
  40. modules/quick_access_sidebar.py +282 -0
  41. modules/ribbon_widget.py +597 -0
  42. modules/sdlppx_handler.py +874 -0
  43. modules/setup_wizard.py +353 -0
  44. modules/shortcut_manager.py +932 -0
  45. modules/simple_segmenter.py +128 -0
  46. modules/spellcheck_manager.py +727 -0
  47. modules/statuses.py +207 -0
  48. modules/style_guide_manager.py +315 -0
  49. modules/superbench_ui.py +1319 -0
  50. modules/superbrowser.py +329 -0
  51. modules/supercleaner.py +600 -0
  52. modules/supercleaner_ui.py +444 -0
  53. modules/superdocs.py +19 -0
  54. modules/superdocs_viewer_qt.py +382 -0
  55. modules/superlookup.py +252 -0
  56. modules/tag_cleaner.py +260 -0
  57. modules/tag_manager.py +351 -0
  58. modules/term_extractor.py +270 -0
  59. modules/termbase_entry_editor.py +842 -0
  60. modules/termbase_import_export.py +488 -0
  61. modules/termbase_manager.py +1060 -0
  62. modules/termview_widget.py +1176 -0
  63. modules/theme_manager.py +499 -0
  64. modules/tm_editor_dialog.py +99 -0
  65. modules/tm_manager_qt.py +1280 -0
  66. modules/tm_metadata_manager.py +545 -0
  67. modules/tmx_editor.py +1461 -0
  68. modules/tmx_editor_qt.py +2784 -0
  69. modules/tmx_generator.py +284 -0
  70. modules/tracked_changes.py +900 -0
  71. modules/trados_docx_handler.py +430 -0
  72. modules/translation_memory.py +715 -0
  73. modules/translation_results_panel.py +2134 -0
  74. modules/translation_services.py +282 -0
  75. modules/unified_prompt_library.py +659 -0
  76. modules/unified_prompt_manager_qt.py +3951 -0
  77. modules/voice_commands.py +920 -0
  78. modules/voice_dictation.py +477 -0
  79. modules/voice_dictation_lite.py +249 -0
  80. supervertaler-1.9.163.dist-info/METADATA +906 -0
  81. supervertaler-1.9.163.dist-info/RECORD +85 -0
  82. supervertaler-1.9.163.dist-info/WHEEL +5 -0
  83. supervertaler-1.9.163.dist-info/entry_points.txt +2 -0
  84. supervertaler-1.9.163.dist-info/licenses/LICENSE +21 -0
  85. supervertaler-1.9.163.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1176 @@
1
+ """
2
+ Termview Widget - RYS-style Inline Terminology Display
3
+
4
+ Displays source text with termbase translations shown directly underneath each word/phrase.
5
+ Inspired by the RYS Trados plugin's inline term visualization.
6
+
7
+ Features:
8
+ - Visual mapping: translations appear under their source terms
9
+ - Hover tooltips: show synonyms/alternatives
10
+ - Click to insert: click any translation to insert into target
11
+ - Multi-word term support: handles both single words and phrases
12
+ """
13
+
14
+ from PyQt6.QtWidgets import (QWidget, QVBoxLayout, QLabel, QFrame, QScrollArea,
15
+ QHBoxLayout, QPushButton, QToolTip, QLayout, QLayoutItem, QSizePolicy, QStyle,
16
+ QMenu, QMessageBox)
17
+ from PyQt6.QtCore import Qt, QPoint, pyqtSignal, QRect, QSize
18
+ from PyQt6.QtGui import QFont, QCursor, QAction
19
+ from typing import Dict, List, Optional, Tuple
20
+ import re
21
+
22
+
23
+ class FlowLayout(QLayout):
24
+ """Flow layout that wraps widgets to next line when needed"""
25
+
26
+ def __init__(self, parent=None, margin=0, spacing=-1):
27
+ super().__init__(parent)
28
+ self.itemList = []
29
+ self.m_hSpace = spacing
30
+ self.m_vSpace = spacing
31
+ self.setContentsMargins(margin, margin, margin, margin)
32
+
33
+ def __del__(self):
34
+ item = self.takeAt(0)
35
+ while item:
36
+ item = self.takeAt(0)
37
+
38
+ def addItem(self, item):
39
+ self.itemList.append(item)
40
+
41
+ def horizontalSpacing(self):
42
+ if self.m_hSpace >= 0:
43
+ return self.m_hSpace
44
+ else:
45
+ return self.smartSpacing(QStyle.PixelMetric.PM_LayoutHorizontalSpacing)
46
+
47
+ def verticalSpacing(self):
48
+ if self.m_vSpace >= 0:
49
+ return self.m_vSpace
50
+ else:
51
+ return self.smartSpacing(QStyle.PixelMetric.PM_LayoutVerticalSpacing)
52
+
53
+ def count(self):
54
+ return len(self.itemList)
55
+
56
+ def itemAt(self, index):
57
+ if 0 <= index < len(self.itemList):
58
+ return self.itemList[index]
59
+ return None
60
+
61
+ def takeAt(self, index):
62
+ if 0 <= index < len(self.itemList):
63
+ return self.itemList.pop(index)
64
+ return None
65
+
66
+ def expandingDirections(self):
67
+ return Qt.Orientation(0)
68
+
69
+ def hasHeightForWidth(self):
70
+ return True
71
+
72
+ def heightForWidth(self, width):
73
+ height = self.doLayout(QRect(0, 0, width, 0), True)
74
+ return height
75
+
76
+ def setGeometry(self, rect):
77
+ super().setGeometry(rect)
78
+ self.doLayout(rect, False)
79
+
80
+ def sizeHint(self):
81
+ return self.minimumSize()
82
+
83
+ def minimumSize(self):
84
+ size = QSize()
85
+ for item in self.itemList:
86
+ size = size.expandedTo(item.minimumSize())
87
+ margin = self.contentsMargins().left()
88
+ size += QSize(2 * margin, 2 * margin)
89
+ return size
90
+
91
+ def doLayout(self, rect, testOnly):
92
+ x = rect.x()
93
+ y = rect.y()
94
+ lineHeight = 0
95
+ spacing = self.horizontalSpacing()
96
+ if spacing < 0:
97
+ spacing = 5 # Default spacing
98
+
99
+ for item in self.itemList:
100
+ wid = item.widget()
101
+ spaceX = spacing
102
+ spaceY = spacing
103
+
104
+ nextX = x + item.sizeHint().width() + spaceX
105
+ if nextX - spaceX > rect.right() and lineHeight > 0:
106
+ x = rect.x()
107
+ y = y + lineHeight + spaceY
108
+ nextX = x + item.sizeHint().width() + spaceX
109
+ lineHeight = 0
110
+
111
+ if not testOnly:
112
+ item.setGeometry(QRect(QPoint(x, y), item.sizeHint()))
113
+
114
+ x = nextX
115
+ lineHeight = max(lineHeight, item.sizeHint().height())
116
+
117
+ return y + lineHeight - rect.y()
118
+
119
+ def smartSpacing(self, pm):
120
+ parent = self.parent()
121
+ if not parent:
122
+ return -1
123
+ if parent.isWidgetType():
124
+ return parent.style().pixelMetric(pm, None, parent)
125
+ else:
126
+ return parent.spacing()
127
+
128
+
129
+ class TermBlock(QWidget):
130
+ """Individual term block showing source word and its translation(s)"""
131
+
132
+ term_clicked = pyqtSignal(str, str) # source_term, target_term
133
+ edit_requested = pyqtSignal(int, int) # term_id, termbase_id
134
+ delete_requested = pyqtSignal(int, int, str, str) # term_id, termbase_id, source_term, target_term
135
+
136
+ def __init__(self, source_text: str, translations: List[Dict], parent=None, theme_manager=None, font_size: int = 10, font_family: str = "Segoe UI", font_bold: bool = False, shortcut_number: int = None):
137
+ """
138
+ Args:
139
+ source_text: Source word/phrase
140
+ translations: List of dicts with keys: 'target', 'termbase_name', 'priority', 'term_id', 'termbase_id', etc.
141
+ theme_manager: Optional theme manager for dark mode support
142
+ font_size: Base font size in points (default 10)
143
+ font_family: Font family name (default "Segoe UI")
144
+ font_bold: Whether to use bold font (default False)
145
+ shortcut_number: Optional number (1-9) for Ctrl+N shortcut badge
146
+ """
147
+ super().__init__(parent)
148
+ self.source_text = source_text
149
+ self.translations = translations
150
+ self.theme_manager = theme_manager
151
+ self.font_size = font_size
152
+ self.font_family = font_family
153
+ self.font_bold = font_bold
154
+ self.shortcut_number = shortcut_number
155
+ # Store first translation's IDs for context menu (if available)
156
+ self.term_id = None
157
+ self.termbase_id = None
158
+ self.target_term = None
159
+ if translations:
160
+ first_trans = translations[0]
161
+ self.term_id = first_trans.get('term_id')
162
+ self.termbase_id = first_trans.get('termbase_id')
163
+ self.target_term = first_trans.get('target_term', first_trans.get('target', ''))
164
+ self.init_ui()
165
+
166
+ def init_ui(self):
167
+ """Create the visual layout for this term block - COMPACT RYS-style"""
168
+ layout = QVBoxLayout(self)
169
+ layout.setContentsMargins(1, 0, 1, 1)
170
+ layout.setSpacing(0)
171
+
172
+ # Get theme colors
173
+ is_dark = self.theme_manager and self.theme_manager.current_theme.name == "Dark"
174
+ separator_color = "#555555" if is_dark else "#CCCCCC"
175
+ source_text_color = "#E0E0E0" if is_dark else "#333"
176
+ no_match_color = "#666666" if is_dark else "#ddd"
177
+ no_match_bg = "#2A2A2A" if is_dark else "#F5F5F5"
178
+
179
+ # Add thin gray separator line at top (like RYS)
180
+ separator = QFrame()
181
+ separator.setFrameShape(QFrame.Shape.HLine)
182
+ separator.setFixedHeight(1)
183
+ separator.setStyleSheet(f"background-color: {separator_color}; border: none;")
184
+ layout.addWidget(separator)
185
+
186
+ # Determine border color based on whether we have translations
187
+ if self.translations:
188
+ primary_translation = self.translations[0]
189
+ is_project = primary_translation.get('is_project_termbase', False)
190
+ ranking = primary_translation.get('ranking', None)
191
+
192
+ # IMPORTANT: Treat ranking #1 as project termbase (matches main app logic)
193
+ is_effective_project = is_project or (ranking == 1)
194
+
195
+ # Background color: pink for project termbase, blue for regular termbase
196
+ self.bg_color = "#FFE5F0" if is_effective_project else "#D6EBFF"
197
+ self.is_effective_project = is_effective_project
198
+ else:
199
+ self.bg_color = no_match_bg # Theme-aware for no matches
200
+ self.is_effective_project = False
201
+
202
+ # Source text (top) - compact
203
+ self.source_label = QLabel(self.source_text)
204
+ source_font = QFont(self.font_family)
205
+ source_font.setPointSize(self.font_size)
206
+ source_font.setBold(self.font_bold)
207
+ self.source_label.setFont(source_font)
208
+ self.source_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
209
+ self.source_label.setStyleSheet(f"""
210
+ QLabel {{
211
+ color: {source_text_color};
212
+ padding: 1px 3px;
213
+ background-color: transparent;
214
+ border: none;
215
+ }}
216
+ """)
217
+ # Enable context menu on source label for edit/delete actions (only if we have translations with IDs)
218
+ if self.translations and self.term_id is not None:
219
+ self.source_label.setContextMenuPolicy(Qt.ContextMenuPolicy.CustomContextMenu)
220
+ self.source_label.customContextMenuRequested.connect(self._show_context_menu)
221
+ layout.addWidget(self.source_label)
222
+
223
+ # Target translation (bottom) - show first/best match - COMPACT
224
+ if self.translations:
225
+ target_text = primary_translation.get('target_term', primary_translation.get('target', ''))
226
+ termbase_name = primary_translation.get('termbase_name', '')
227
+
228
+ # Background color based on termbase type
229
+ bg_color = "#FFE5F0" if self.is_effective_project else "#D6EBFF" # Pink for project, light blue for regular
230
+ hover_color = "#FFD0E8" if self.is_effective_project else "#BBDEFB" # Slightly darker on hover
231
+
232
+ # Create horizontal layout for target + shortcut badge
233
+ # Apply background to container so it covers both text and badge
234
+ target_container = QWidget()
235
+ target_container.setStyleSheet(f"""
236
+ QWidget {{
237
+ background-color: {bg_color};
238
+ border-radius: 3px;
239
+ }}
240
+ QWidget:hover {{
241
+ background-color: {hover_color};
242
+ }}
243
+ """)
244
+ target_layout = QHBoxLayout(target_container)
245
+ target_layout.setContentsMargins(3, 1, 3, 1)
246
+ target_layout.setSpacing(3)
247
+
248
+ target_label = QLabel(target_text)
249
+ target_font = QFont(self.font_family)
250
+ target_font.setPointSize(self.font_size) # Same size as source
251
+ target_font.setBold(self.font_bold)
252
+ target_label.setFont(target_font)
253
+ target_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
254
+ target_label.setStyleSheet(f"""
255
+ QLabel {{
256
+ color: #0052A3;
257
+ padding: 0px;
258
+ background-color: transparent;
259
+ border: none;
260
+ }}
261
+ """)
262
+ target_label.setCursor(Qt.CursorShape.PointingHandCursor)
263
+ target_label.mousePressEvent = lambda e: self.on_translation_clicked(target_text)
264
+
265
+ # Build tooltip with shortcut hint if applicable
266
+ if self.shortcut_number is not None and self.shortcut_number <= 19:
267
+ # Alt+0 (and Alt+0,0) are reserved for the Compare Panel.
268
+ # Do not advertise or display these shortcuts in TermView.
269
+ if self.shortcut_number in (0, 10):
270
+ shortcut_hint = ""
271
+ elif self.shortcut_number <= 9:
272
+ shortcut_hint = f"<br><i>Press Alt+{self.shortcut_number} to insert</i>"
273
+ else:
274
+ # Double-tap shortcuts (10-19 displayed as 00, 11, 22, etc.)
275
+ double_digit = (self.shortcut_number - 10)
276
+ shortcut_hint = f"<br><i>Press Alt+{double_digit},{double_digit} to insert</i>"
277
+ else:
278
+ shortcut_hint = ""
279
+
280
+ # Set tooltip if multiple translations exist
281
+ if len(self.translations) > 1:
282
+ tooltip_lines = [f"<b>{target_text}</b> (click to insert){shortcut_hint}<br>"]
283
+ # Add notes if available
284
+ notes = primary_translation.get('notes', '')
285
+ if notes:
286
+ tooltip_lines.append(f"<br><i>Note: {notes}</i><br>")
287
+ tooltip_lines.append("<br><b>Alternatives:</b>")
288
+ for i, trans in enumerate(self.translations[1:], 1):
289
+ alt_target = trans.get('target_term', trans.get('target', ''))
290
+ alt_termbase = trans.get('termbase_name', '')
291
+ tooltip_lines.append(f"{i}. {alt_target} ({alt_termbase})")
292
+ target_label.setToolTip("<br>".join(tooltip_lines))
293
+ else:
294
+ # Build tooltip for single translation
295
+ tooltip_text = f"<b>{target_text}</b><br>From: {termbase_name}{shortcut_hint}"
296
+ notes = primary_translation.get('notes', '')
297
+ if notes:
298
+ tooltip_text += f"<br><i>Note: {notes}</i>"
299
+ tooltip_text += "<br>(click to insert)"
300
+ target_label.setToolTip(tooltip_text)
301
+
302
+ target_layout.addWidget(target_label)
303
+
304
+ # Add shortcut number badge if assigned (0-9 for first 10, 00/11/22/.../99 for 11-20)
305
+ if self.shortcut_number is not None and self.shortcut_number < 20:
306
+ # Alt+0 (and Alt+0,0) are reserved for the Compare Panel.
307
+ # Hide the corresponding TermView badges (0 and 00).
308
+ if self.shortcut_number in (0, 10):
309
+ layout.addWidget(target_container)
310
+
311
+ # Show count if multiple translations - very compact
312
+ if len(self.translations) > 1:
313
+ count_label = QLabel(f"+{len(self.translations) - 1}")
314
+ count_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
315
+ count_label.setStyleSheet("""
316
+ QLabel {
317
+ color: #999;
318
+ font-size: 7px;
319
+ }
320
+ """)
321
+ layout.addWidget(count_label)
322
+ return
323
+
324
+ # Badge text: 0-9 for first 10 terms, 00/11/22/.../99 for terms 11-20
325
+ if self.shortcut_number < 10:
326
+ badge_text = str(self.shortcut_number)
327
+ shortcut_hint = f"Alt+{self.shortcut_number}"
328
+ badge_width = 14
329
+ else:
330
+ # Terms 11-20: show as 00, 11, 22, ..., 99
331
+ digit = self.shortcut_number - 10
332
+ badge_text = str(digit) * 2 # "00", "11", "22", etc.
333
+ shortcut_hint = f"Alt+{digit},{digit}"
334
+ badge_width = 20 # Wider for 2 digits
335
+
336
+ badge_label = QLabel(badge_text)
337
+ badge_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
338
+ badge_label.setFixedSize(badge_width, 14)
339
+ badge_label.setStyleSheet(f"""
340
+ QLabel {{
341
+ background-color: #1976D2;
342
+ color: white;
343
+ font-size: 9px;
344
+ font-weight: bold;
345
+ border-radius: 7px;
346
+ padding: 0px;
347
+ }}
348
+ """)
349
+ badge_label.setToolTip(f"Press {shortcut_hint} to insert")
350
+ badge_label.setCursor(Qt.CursorShape.PointingHandCursor)
351
+ badge_label.mousePressEvent = lambda e: self.on_translation_clicked(target_text)
352
+ target_layout.addWidget(badge_label)
353
+
354
+ layout.addWidget(target_container)
355
+
356
+ # Show count if multiple translations - very compact
357
+ if len(self.translations) > 1:
358
+ count_label = QLabel(f"+{len(self.translations) - 1}")
359
+ count_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
360
+ count_label.setStyleSheet("""
361
+ QLabel {
362
+ color: #999;
363
+ font-size: 7px;
364
+ }
365
+ """)
366
+ layout.addWidget(count_label)
367
+ else:
368
+ # No translation found - very subtle (theme-aware)
369
+ is_dark = self.theme_manager and self.theme_manager.current_theme.name == "Dark"
370
+ no_match_dot_color = "#666666" if is_dark else "#ddd"
371
+ no_match_label = QLabel("·")
372
+ no_match_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
373
+ no_match_label.setStyleSheet(f"color: {no_match_dot_color}; font-size: 8px;")
374
+ layout.addWidget(no_match_label)
375
+
376
+ def on_translation_clicked(self, target_text: str):
377
+ """Handle click on translation to insert into target"""
378
+ self.term_clicked.emit(self.source_text, target_text)
379
+
380
+ def _show_context_menu(self, pos: QPoint):
381
+ """Show context menu with Edit/Delete options for glossary entry"""
382
+ if not self.term_id or not self.termbase_id:
383
+ return
384
+
385
+ menu = QMenu(self)
386
+
387
+ # Edit entry action
388
+ edit_action = QAction("✏️ Edit Glossary Entry", menu)
389
+ edit_action.triggered.connect(self._edit_entry)
390
+ menu.addAction(edit_action)
391
+
392
+ # Delete entry action
393
+ delete_action = QAction("🗑️ Delete Glossary Entry", menu)
394
+ delete_action.triggered.connect(self._delete_entry)
395
+ menu.addAction(delete_action)
396
+
397
+ menu.exec(self.source_label.mapToGlobal(pos))
398
+
399
+ def _edit_entry(self):
400
+ """Emit signal to edit glossary entry"""
401
+ if self.term_id and self.termbase_id:
402
+ self.edit_requested.emit(self.term_id, self.termbase_id)
403
+
404
+ def _delete_entry(self):
405
+ """Emit signal to delete glossary entry"""
406
+ if self.term_id and self.termbase_id:
407
+ self.delete_requested.emit(self.term_id, self.termbase_id, self.source_text, self.target_term or '')
408
+
409
+
410
+ class NTBlock(QWidget):
411
+ """Non-translatable block showing source word with pastel yellow styling"""
412
+
413
+ nt_clicked = pyqtSignal(str) # Emits NT text to insert as-is
414
+
415
+ def __init__(self, source_text: str, list_name: str = "", parent=None, theme_manager=None, font_size: int = 10, font_family: str = "Segoe UI", font_bold: bool = False):
416
+ """
417
+ Args:
418
+ source_text: Non-translatable word/phrase
419
+ list_name: Name of the NT list it comes from
420
+ theme_manager: Optional theme manager for dark mode support
421
+ font_size: Base font size in points (default 10)
422
+ font_family: Font family name (default "Segoe UI")
423
+ font_bold: Whether to use bold font (default False)
424
+ """
425
+ super().__init__(parent)
426
+ self.source_text = source_text
427
+ self.list_name = list_name
428
+ self.theme_manager = theme_manager
429
+ self.font_size = font_size
430
+ self.font_family = font_family
431
+ self.font_bold = font_bold
432
+ self.init_ui()
433
+
434
+ def init_ui(self):
435
+ """Create the visual layout for this NT block - pastel yellow styling"""
436
+ layout = QVBoxLayout(self)
437
+ layout.setContentsMargins(1, 1, 1, 1)
438
+ layout.setSpacing(0)
439
+
440
+ # Get theme colors
441
+ is_dark = self.theme_manager and self.theme_manager.current_theme.name == "Dark"
442
+ source_text_color = "#E0E0E0" if is_dark else "#5D4E37"
443
+
444
+ # Pastel yellow border for non-translatables
445
+ border_color = "#E6C200" # Darker yellow for border
446
+
447
+ self.setStyleSheet(f"""
448
+ QWidget {{
449
+ border-top: 2px solid {border_color};
450
+ border-radius: 0px;
451
+ }}
452
+ """)
453
+
454
+ # Source text (top)
455
+ self.source_label = QLabel(self.source_text)
456
+ source_font = QFont(self.font_family)
457
+ source_font.setPointSize(self.font_size)
458
+ source_font.setBold(self.font_bold)
459
+ self.source_label.setFont(source_font)
460
+ self.source_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
461
+ self.source_label.setStyleSheet(f"""
462
+ QLabel {{
463
+ color: {source_text_color};
464
+ padding: 1px 3px;
465
+ background-color: transparent;
466
+ }}
467
+ """)
468
+ layout.addWidget(self.source_label)
469
+
470
+ # "Do not translate" indicator with pastel yellow background
471
+ nt_label = QLabel("🚫 NT")
472
+ nt_font = QFont()
473
+ nt_font.setPointSize(7)
474
+ nt_label.setFont(nt_font)
475
+ nt_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
476
+ nt_label.setStyleSheet("""
477
+ QLabel {
478
+ color: #5D4E37;
479
+ padding: 1px 3px;
480
+ background-color: #FFFDD0;
481
+ border-radius: 2px;
482
+ }
483
+ QLabel:hover {
484
+ background-color: #FFF9B0;
485
+ cursor: pointer;
486
+ }
487
+ """)
488
+ nt_label.setCursor(Qt.CursorShape.PointingHandCursor)
489
+ nt_label.mousePressEvent = lambda e: self.on_nt_clicked()
490
+
491
+ tooltip = f"<b>🚫 Non-Translatable</b><br>{self.source_text}<br><br>From: {self.list_name}<br>(click to insert as-is)"
492
+ nt_label.setToolTip(tooltip)
493
+
494
+ layout.addWidget(nt_label)
495
+
496
+ def on_nt_clicked(self):
497
+ """Handle click on NT to insert source text as-is"""
498
+ self.nt_clicked.emit(self.source_text)
499
+
500
+
501
+ class TermviewWidget(QWidget):
502
+ """Main Termview widget showing inline terminology for current segment"""
503
+
504
+ term_insert_requested = pyqtSignal(str) # Emits target text to insert
505
+ edit_entry_requested = pyqtSignal(int, int) # term_id, termbase_id
506
+ delete_entry_requested = pyqtSignal(int, int, str, str) # term_id, termbase_id, source, target
507
+
508
+ def __init__(self, parent=None, db_manager=None, log_callback=None, theme_manager=None):
509
+ super().__init__(parent)
510
+ self.db_manager = db_manager
511
+ self.log = log_callback if log_callback else print
512
+ self.theme_manager = theme_manager
513
+ self.current_source = ""
514
+ self.current_source_lang = None
515
+ self.current_target_lang = None
516
+ self.current_project_id = None # Store project ID for termbase priority lookup
517
+
518
+ # Debug mode - disable verbose tokenization logging by default (performance)
519
+ self.debug_tokenize = False
520
+
521
+ # Default font settings (will be updated from main app settings)
522
+ self.current_font_family = "Segoe UI"
523
+ self.current_font_size = 10
524
+ self.current_font_bold = False
525
+
526
+ # Track terms by shortcut number for Alt+1-9 insertion
527
+ self.shortcut_terms = {} # {1: "translation1", 2: "translation2", ...}
528
+
529
+ self.init_ui()
530
+
531
+ def init_ui(self):
532
+ """Initialize the UI"""
533
+ layout = QVBoxLayout(self)
534
+ layout.setContentsMargins(5, 5, 5, 5)
535
+ layout.setSpacing(5)
536
+
537
+ # Get theme colors
538
+ if self.theme_manager:
539
+ theme = self.theme_manager.current_theme
540
+ bg_color = theme.base
541
+ border_color = theme.border
542
+ header_bg = theme.panel_info
543
+ header_text = theme.button_info
544
+ info_text = theme.text_disabled
545
+ else:
546
+ # Fallback colors if no theme manager
547
+ bg_color = "white"
548
+ border_color = "#ddd"
549
+ header_bg = "#E3F2FD"
550
+ header_text = "#1565C0"
551
+ info_text = "#999"
552
+
553
+ # Header
554
+ header = QLabel("") # Empty - tab already shows the name
555
+ header.setStyleSheet(f"""
556
+ QLabel {{
557
+ font-weight: bold;
558
+ font-size: 12px;
559
+ color: {header_text};
560
+ padding: 5px;
561
+ background-color: {header_bg};
562
+ border-radius: 4px;
563
+ }}
564
+ """)
565
+ header.hide() # Hide the header to save space
566
+ layout.addWidget(header)
567
+
568
+ # Scroll area for term blocks
569
+ scroll = QScrollArea()
570
+ scroll.setWidgetResizable(True)
571
+ scroll.setHorizontalScrollBarPolicy(Qt.ScrollBarPolicy.ScrollBarAlwaysOff) # No horizontal scroll
572
+ scroll.setVerticalScrollBarPolicy(Qt.ScrollBarPolicy.ScrollBarAsNeeded)
573
+ scroll.setStyleSheet(f"""
574
+ QScrollArea {{
575
+ border: 1px solid {border_color};
576
+ border-radius: 4px;
577
+ background-color: {bg_color};
578
+ }}
579
+ """)
580
+
581
+ # Container for term blocks (flow layout with wrapping)
582
+ self.terms_container = QWidget()
583
+ self.terms_layout = FlowLayout(self.terms_container, margin=5, spacing=4)
584
+
585
+ scroll.setWidget(self.terms_container)
586
+ layout.addWidget(scroll)
587
+
588
+ # Info label - use slightly brighter text for dark mode
589
+ self.info_label = QLabel("No segment selected")
590
+ self.info_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
591
+ is_dark = self.theme_manager and self.theme_manager.current_theme.name == "Dark"
592
+ info_label_color = "#909090" if is_dark else info_text
593
+ self.info_label.setStyleSheet(f"color: {info_label_color}; font-size: 10px; padding: 5px;")
594
+ layout.addWidget(self.info_label)
595
+
596
+ # Store references for theme refresh
597
+ self.header = header
598
+ self.scroll = scroll
599
+
600
+ def apply_theme(self):
601
+ """Refresh all theme-dependent colors when theme changes"""
602
+ if not self.theme_manager:
603
+ return
604
+
605
+ theme = self.theme_manager.current_theme
606
+ bg_color = theme.base
607
+ border_color = theme.border
608
+ header_bg = theme.panel_info
609
+ header_text = theme.button_info
610
+ info_text = theme.text_disabled
611
+
612
+ # Update header
613
+ if hasattr(self, 'header'):
614
+ self.header.setStyleSheet(f"""
615
+ QLabel {{
616
+ font-weight: bold;
617
+ font-size: 12px;
618
+ color: {header_text};
619
+ padding: 5px;
620
+ background-color: {header_bg};
621
+ border-radius: 4px;
622
+ }}
623
+ """)
624
+
625
+ # Update scroll area
626
+ if hasattr(self, 'scroll'):
627
+ self.scroll.setStyleSheet(f"""
628
+ QScrollArea {{
629
+ border: 1px solid {border_color};
630
+ border-radius: 4px;
631
+ background-color: {bg_color};
632
+ }}
633
+ """)
634
+
635
+ # Update info label - use slightly brighter text for better visibility in dark mode
636
+ if hasattr(self, 'info_label'):
637
+ is_dark = theme.name == "Dark"
638
+ info_label_color = "#909090" if is_dark else info_text
639
+ self.info_label.setStyleSheet(f"color: {info_label_color}; font-size: 10px; padding: 5px;")
640
+
641
+ def set_font_settings(self, font_family: str = "Segoe UI", font_size: int = 10, bold: bool = False):
642
+ """Update font settings for Termview
643
+
644
+ Args:
645
+ font_family: Font family name
646
+ font_size: Font size in points
647
+ bold: Whether to use bold font
648
+ """
649
+ self.current_font_family = font_family
650
+ self.current_font_size = font_size
651
+ self.current_font_bold = bold
652
+
653
+ # Refresh display if we have content
654
+ if hasattr(self, 'current_source') and self.current_source:
655
+ # Get all existing term blocks
656
+ term_blocks = []
657
+ nt_blocks = []
658
+
659
+ for i in range(self.terms_layout.count()):
660
+ item = self.terms_layout.itemAt(i)
661
+ if item and item.widget():
662
+ widget = item.widget()
663
+ if isinstance(widget, TermBlock):
664
+ term_blocks.append(widget)
665
+ elif isinstance(widget, NTBlock):
666
+ nt_blocks.append(widget)
667
+
668
+ # Update font for all term blocks
669
+ for block in term_blocks:
670
+ if hasattr(block, 'source_label'):
671
+ font = QFont(self.current_font_family)
672
+ font.setPointSize(self.current_font_size)
673
+ font.setBold(self.current_font_bold)
674
+ block.source_label.setFont(font)
675
+
676
+ # Update translation labels
677
+ layout = block.layout()
678
+ if layout:
679
+ for i in range(layout.count()):
680
+ item = layout.itemAt(i)
681
+ if item and item.widget():
682
+ label = item.widget()
683
+ if isinstance(label, QLabel) and label != block.source_label:
684
+ font = QFont(self.current_font_family)
685
+ font.setPointSize(max(6, self.current_font_size - 2))
686
+ font.setBold(self.current_font_bold)
687
+ label.setFont(font)
688
+
689
+ # Update font for NT blocks
690
+ for block in nt_blocks:
691
+ if hasattr(block, 'source_label'):
692
+ font = QFont(self.current_font_family)
693
+ font.setPointSize(self.current_font_size)
694
+ font.setBold(self.current_font_bold)
695
+ block.source_label.setFont(font)
696
+
697
+ def update_with_matches(self, source_text: str, termbase_matches: List[Dict], nt_matches: List[Dict] = None):
698
+ """
699
+ Update the termview display with pre-computed termbase and NT matches
700
+
701
+ RYS-STYLE DISPLAY: Show source text as tokens with translations underneath
702
+
703
+ Args:
704
+ source_text: Source segment text
705
+ termbase_matches: List of termbase match dicts from Translation Results
706
+ nt_matches: Optional list of NT match dicts with 'text', 'start', 'end', 'list_name' keys
707
+ """
708
+ self.current_source = source_text
709
+
710
+ # Clear existing blocks and shortcut mappings
711
+ self.clear_terms()
712
+ self.shortcut_terms = {} # Reset shortcut mappings
713
+
714
+ if not source_text or not source_text.strip():
715
+ self.info_label.setText("No segment selected")
716
+ return
717
+
718
+ # Strip HTML/XML tags from source text for display in TermView
719
+ # This handles CAT tool tags like <b>, </b>, <i>, </i>, <u>, </u>, <bi>, <sub>, <sup>, <li-o>, <li-b>
720
+ # as well as memoQ tags {1}, [2}, {3], Trados tags <1>, </1>, and Déjà Vu tags {00001}
721
+ display_text = re.sub(r'</?(?:b|i|u|bi|sub|sup|li-[ob]|\d+)/?>', '', source_text) # HTML/XML tags
722
+ display_text = re.sub(r'[\[{]\d+[}\]]', '', display_text) # memoQ/Phrase numeric tags: {1}, [2}, {3]
723
+ display_text = re.sub(r'\{\d{5}\}', '', display_text) # Déjà Vu tags: {00001}
724
+ # memoQ content tags: [uicontrol id="..."} or {uicontrol] or [tagname ...} or {tagname]
725
+ display_text = re.sub(r'\[[^\[\]]*\}', '', display_text) # Opening: [anything}
726
+ display_text = re.sub(r'\{[^\{\}]*\]', '', display_text) # Closing: {anything]
727
+ display_text = display_text.strip()
728
+
729
+ # If stripping tags leaves nothing, fall back to original
730
+ if not display_text:
731
+ display_text = source_text
732
+
733
+ has_termbase = termbase_matches and len(termbase_matches) > 0
734
+ has_nt = nt_matches and len(nt_matches) > 0
735
+
736
+ if not has_termbase and not has_nt:
737
+ self.info_label.setText("No terminology or NT matches for this segment")
738
+ return
739
+
740
+ # Convert termbase matches to dict for easy lookup: {source_term.lower(): [translations]}
741
+ matches_dict = {}
742
+ if termbase_matches:
743
+ for match in termbase_matches:
744
+ source_term = match.get('source_term', match.get('source', ''))
745
+ target_term = match.get('target_term', match.get('translation', ''))
746
+
747
+ # Ensure source_term and target_term are strings
748
+ if not isinstance(source_term, str):
749
+ source_term = str(source_term) if source_term else ''
750
+ if not isinstance(target_term, str):
751
+ target_term = str(target_term) if target_term else ''
752
+
753
+ if not source_term or not target_term:
754
+ continue
755
+
756
+ # Strip punctuation from key to match lookup normalization
757
+ # This ensures "ca." in glossary matches "ca." token stripped to "ca"
758
+ PUNCT_CHARS_FOR_KEY = '.,;:!?\"\'\u201C\u201D\u201E\u00AB\u00BB\u2018\u2019\u201A\u2039\u203A()[]'
759
+ key = source_term.lower().strip(PUNCT_CHARS_FOR_KEY)
760
+ if key not in matches_dict:
761
+ matches_dict[key] = []
762
+
763
+ # Add main target term (include term_id and termbase_id for edit/delete context menu)
764
+ matches_dict[key].append({
765
+ 'target_term': target_term,
766
+ 'termbase_name': match.get('termbase_name', ''),
767
+ 'ranking': match.get('ranking', 99),
768
+ 'is_project_termbase': match.get('is_project_termbase', False),
769
+ 'term_id': match.get('term_id'),
770
+ 'termbase_id': match.get('termbase_id'),
771
+ 'notes': match.get('notes', '')
772
+ })
773
+
774
+ # Add synonyms as additional translations
775
+ target_synonyms = match.get('target_synonyms', [])
776
+ for synonym in target_synonyms:
777
+ matches_dict[key].append({
778
+ 'target_term': synonym,
779
+ 'termbase_name': match.get('termbase_name', '') + ' (syn)',
780
+ 'ranking': match.get('ranking', 99) + 1, # Slightly lower priority
781
+ 'is_project_termbase': match.get('is_project_termbase', False)
782
+ })
783
+
784
+ # Convert NT matches to dict: {text.lower(): list_name}
785
+ nt_dict = {}
786
+ if nt_matches:
787
+ for match in nt_matches:
788
+ nt_text = match.get('text', '')
789
+ if nt_text:
790
+ nt_dict[nt_text.lower()] = match.get('list_name', 'Non-Translatables')
791
+
792
+ # Combine all known multi-word terms for tokenization
793
+ all_terms_dict = dict(matches_dict)
794
+ for nt_key in nt_dict:
795
+ if nt_key not in all_terms_dict:
796
+ all_terms_dict[nt_key] = [] # Empty list = NT only
797
+
798
+ # Tokenize the tag-stripped display text, respecting multi-word terms
799
+ tokens = self.tokenize_with_multiword_terms(display_text, all_terms_dict)
800
+
801
+ if not tokens:
802
+ self.info_label.setText("No words to analyze")
803
+ return
804
+
805
+ # Create blocks for each token
806
+ blocks_with_translations = 0
807
+ blocks_with_nt = 0
808
+ shortcut_counter = 0 # Track shortcut numbers for terms with translations
809
+
810
+ # Comprehensive set of quote and punctuation characters to strip
811
+ # Using Unicode escapes to avoid encoding issues
812
+ # Include brackets for terms like "(typisch)" to match "typisch"
813
+ PUNCT_CHARS = '.,;:!?\"\'\u201C\u201D\u201E\u00AB\u00BB\u2018\u2019\u201A\u2039\u203A()[]'
814
+
815
+ # Track which terms have already been assigned shortcuts (avoid duplicates)
816
+ assigned_shortcuts = set()
817
+
818
+ for token in tokens:
819
+ # Strip leading and trailing punctuation/quotes for lookup
820
+ token_clean = token.rstrip(PUNCT_CHARS)
821
+ token_clean = token_clean.lstrip(PUNCT_CHARS)
822
+ lookup_key = token_clean.lower()
823
+
824
+ # Check if this is a non-translatable
825
+ if lookup_key in nt_dict:
826
+ nt_block = NTBlock(token, nt_dict[lookup_key], self, theme_manager=self.theme_manager,
827
+ font_size=self.current_font_size, font_family=self.current_font_family,
828
+ font_bold=self.current_font_bold)
829
+ nt_block.nt_clicked.connect(self.on_term_insert_requested)
830
+ self.terms_layout.addWidget(nt_block)
831
+ blocks_with_nt += 1
832
+ else:
833
+ # Get termbase translations for this token
834
+ translations = matches_dict.get(lookup_key, [])
835
+
836
+ # Assign shortcut number only to first occurrence of each term with translations.
837
+ # TermView numbering starts at 1 (Alt+1..Alt+9), because Alt+0 is reserved for the Compare Panel.
838
+ # After 1-9, we support 11-99 via double-tap Alt+N,N (internally 11-19).
839
+ shortcut_num = None
840
+ if translations and lookup_key not in assigned_shortcuts:
841
+ if shortcut_counter < 18: # Support up to 18 terms (1-9 + 11-99)
842
+ # Map 0-8 -> 1-9, 9-17 -> 11-19
843
+ shortcut_num = shortcut_counter + 1 if shortcut_counter < 9 else shortcut_counter + 2
844
+ # Store the first translation for Alt+N insertion
845
+ first_trans = translations[0]
846
+ if isinstance(first_trans, dict):
847
+ self.shortcut_terms[shortcut_num] = first_trans.get('target_term', '')
848
+ else:
849
+ self.shortcut_terms[shortcut_num] = str(first_trans)
850
+ shortcut_counter += 1
851
+ assigned_shortcuts.add(lookup_key)
852
+
853
+ # Create term block (even if no translation - shows source word)
854
+ term_block = TermBlock(token, translations, self, theme_manager=self.theme_manager,
855
+ font_size=self.current_font_size, font_family=self.current_font_family,
856
+ font_bold=self.current_font_bold, shortcut_number=shortcut_num)
857
+ term_block.term_clicked.connect(self.on_term_insert_requested)
858
+ term_block.edit_requested.connect(self._on_edit_entry_requested)
859
+ term_block.delete_requested.connect(self._on_delete_entry_requested)
860
+ self.terms_layout.addWidget(term_block)
861
+
862
+ if translations:
863
+ blocks_with_translations += 1
864
+
865
+ info_parts = []
866
+ if blocks_with_translations > 0:
867
+ info_parts.append(f"{blocks_with_translations} terms")
868
+ if blocks_with_nt > 0:
869
+ info_parts.append(f"{blocks_with_nt} NTs")
870
+
871
+ if info_parts:
872
+ self.info_label.setText(f"✓ Found {', '.join(info_parts)} in {len(tokens)} words")
873
+ else:
874
+ self.info_label.setText(f"No matches in {len(tokens)} words")
875
+
876
+ def get_all_termbase_matches(self, text: str) -> Dict[str, List[Dict]]:
877
+ """
878
+ Get all termbase matches for text by using the proper termbase search
879
+
880
+ This uses the SAME search logic as the Translation Results panel,
881
+ ensuring we only show terms that actually match, not false positives.
882
+
883
+ Args:
884
+ text: Source text
885
+
886
+ Returns:
887
+ Dict mapping source term (lowercase) to list of translation dicts
888
+ """
889
+ if not self.db_manager or not self.current_source_lang or not self.current_target_lang:
890
+ return {}
891
+
892
+ matches = {}
893
+
894
+ try:
895
+ # Extract all words from the text to search
896
+ # Use the same token pattern as we use for display
897
+ # Includes / for unit-style terms like kg/l, m/s, etc.
898
+ token_pattern = re.compile(r'(?<!\w)[\w.,%-/]+(?!\w)', re.UNICODE)
899
+ tokens = [match.group() for match in token_pattern.finditer(text)]
900
+
901
+ # Also check for multi-word phrases (up to 8 words)
902
+ words = re.findall(r'\b[\w-]+\b', text, re.UNICODE)
903
+ phrases_to_check = []
904
+
905
+ # Generate n-grams for multi-word term detection
906
+ for n in range(2, min(9, len(words) + 1)):
907
+ for i in range(len(words) - n + 1):
908
+ phrase = ' '.join(words[i:i+n])
909
+ phrases_to_check.append(phrase)
910
+
911
+ # Search each token and phrase using the database's search_termbases method
912
+ all_search_terms = set(tokens + phrases_to_check)
913
+
914
+ for search_term in all_search_terms:
915
+ if not search_term or len(search_term) < 2:
916
+ continue
917
+
918
+ # Strip trailing punctuation for search (but keep internal punctuation like "gew.%")
919
+ # This handles cases like "edelmetalen." → "edelmetalen"
920
+ search_term_clean = search_term.rstrip('.,;:!?')
921
+ if not search_term_clean or len(search_term_clean) < 2:
922
+ continue
923
+
924
+ # Use the SAME search method as translation results panel
925
+ results = self.db_manager.search_termbases(
926
+ search_term=search_term_clean,
927
+ source_lang=self.current_source_lang,
928
+ target_lang=self.current_target_lang,
929
+ project_id=self.current_project_id,
930
+ min_length=2
931
+ )
932
+
933
+ # Add results to matches dict, but ONLY if the source term actually exists in the text
934
+ for result in results:
935
+ source_term = result.get('source_term', '')
936
+ if not source_term:
937
+ continue
938
+
939
+ # CRITICAL FIX: Verify the source term actually exists in the segment
940
+ # This prevents false positives like "het gebruik van" showing when only "het" exists
941
+ source_lower = source_term.lower()
942
+ text_lower = text.lower()
943
+
944
+ # Normalize text: replace ALL quote variants with spaces
945
+ # Using Unicode escapes to avoid encoding issues
946
+ normalized_text = text_lower
947
+ for quote_char in '\"\'\u201C\u201D\u201E\u00AB\u00BB\u2018\u2019\u201A\u2039\u203A':
948
+ normalized_text = normalized_text.replace(quote_char, ' ')
949
+
950
+ # CRITICAL FIX v1.9.118: Strip punctuation from glossary term before matching
951
+ # This allows entries like "...problemen." (with period) to match source text
952
+ # where tokenization strips the period during word splitting
953
+ # Comprehensive set of quote and punctuation characters to strip
954
+ PUNCT_CHARS = '.,;:!?\"\'\u201C\u201D\u201E\u00AB\u00BB\u2018\u2019\u201A\u2039\u203A'
955
+ normalized_term = source_lower.rstrip(PUNCT_CHARS).lstrip(PUNCT_CHARS)
956
+
957
+ # Use word boundaries to match complete words/phrases only
958
+ if ' ' in source_term:
959
+ # Multi-word term - must exist as exact phrase
960
+ pattern = r'\b' + re.escape(normalized_term) + r'\b'
961
+ else:
962
+ # Single word
963
+ pattern = r'\b' + re.escape(normalized_term) + r'\b'
964
+
965
+ # Try matching on normalized text first, then original
966
+ if not re.search(pattern, normalized_text) and not re.search(pattern, text_lower):
967
+ continue # Skip - term not actually in segment
968
+
969
+ key = source_lower
970
+ if key not in matches:
971
+ matches[key] = []
972
+
973
+ # DEDUPLICATION: Only add if not already present
974
+ # Check by target_term to avoid duplicate translations
975
+ target_term = result.get('target_term', '')
976
+ already_exists = any(
977
+ m.get('target_term', '') == target_term
978
+ for m in matches[key]
979
+ )
980
+ if not already_exists:
981
+ matches[key].append(result)
982
+
983
+ return matches
984
+ except Exception as e:
985
+ self.log(f"✗ Error getting termbase matches: {e}")
986
+ import traceback
987
+ traceback.print_exc()
988
+ return {}
989
+
990
+ def tokenize_with_multiword_terms(self, text: str, matches: Dict[str, List[Dict]]) -> List[str]:
991
+ """
992
+ Tokenize text, preserving multi-word terms found in termbase
993
+
994
+ Args:
995
+ text: Source text
996
+ matches: Dict of termbase matches (from get_all_termbase_matches)
997
+
998
+ Returns:
999
+ List of tokens (words/phrases/numbers), with multi-word terms kept together
1000
+ """
1001
+ # DEBUG: Log multi-word terms we're looking for (only if debug_tokenize enabled)
1002
+ multi_word_terms = [k for k in matches.keys() if ' ' in k]
1003
+ if multi_word_terms and self.debug_tokenize:
1004
+ self.log(f"🔍 Tokenize: Looking for {len(multi_word_terms)} multi-word terms:")
1005
+ for term in sorted(multi_word_terms, key=len, reverse=True)[:3]:
1006
+ self.log(f" - '{term}'")
1007
+
1008
+ # Sort matched terms by length (longest first) to match multi-word terms first
1009
+ matched_terms = sorted(matches.keys(), key=len, reverse=True)
1010
+
1011
+ # Track which parts of the text have been matched
1012
+ text_lower = text.lower()
1013
+ used_positions = set()
1014
+ tokens_with_positions = []
1015
+
1016
+ # First pass: find multi-word terms with proper word boundary checking
1017
+ for term in matched_terms:
1018
+ if ' ' in term: # Only process multi-word terms in first pass
1019
+ # Use regex with word boundaries to find term
1020
+ term_escaped = re.escape(term)
1021
+
1022
+ # Check if term has punctuation - use different pattern
1023
+ if any(char in term for char in ['.', '%', ',', '-', '/']):
1024
+ pattern = r'(?<!\w)' + term_escaped + r'(?!\w)'
1025
+ else:
1026
+ pattern = r'\b' + term_escaped + r'\b'
1027
+
1028
+ # DEBUG: Check if multi-word term is found (only if debug_tokenize enabled)
1029
+ found = re.search(pattern, text_lower)
1030
+ if self.debug_tokenize:
1031
+ self.log(f"🔍 Tokenize: Pattern '{pattern}' for '{term}' → {'FOUND' if found else 'NOT FOUND'}")
1032
+ if found:
1033
+ self.log(f" Match at position {found.span()}: '{text[found.start():found.end()]}'")
1034
+
1035
+ # Find all matches using regex
1036
+ for match in re.finditer(pattern, text_lower):
1037
+ pos = match.start()
1038
+
1039
+ # Check if this position overlaps with already matched terms
1040
+ term_positions = set(range(pos, pos + len(term)))
1041
+ if not term_positions.intersection(used_positions):
1042
+ # Extract the original case version
1043
+ original_term = text[pos:pos + len(term)]
1044
+ tokens_with_positions.append((pos, len(term), original_term))
1045
+ used_positions.update(term_positions)
1046
+ if self.debug_tokenize:
1047
+ self.log(f" ✅ Added multi-word token: '{original_term}' covering positions {pos}-{pos+len(term)}")
1048
+
1049
+ # DEBUG: Log used_positions after first pass (only if debug_tokenize enabled)
1050
+ if matches and ' ' in sorted(matches.keys(), key=len, reverse=True)[0] and self.debug_tokenize:
1051
+ self.log(f"🔍 After first pass: {len(used_positions)} positions marked as used")
1052
+ self.log(f" Used positions: {sorted(list(used_positions))[:20]}...")
1053
+
1054
+ # Second pass: fill in gaps with ALL words/numbers/punctuation combos
1055
+ # Enhanced pattern to capture words, numbers, and combinations like "gew.%", "0,1", "kg/l", etc.
1056
+ # Use (?<!\w) and (?!\w) instead of \b to handle punctuation properly
1057
+ # Includes / for unit-style terms like kg/l, m/s, etc.
1058
+ token_pattern = re.compile(r'(?<!\w)[\w.,%-/]+(?!\w)', re.UNICODE)
1059
+
1060
+ for match in token_pattern.finditer(text):
1061
+ word_start = match.start()
1062
+ word_end = match.end()
1063
+ word_positions = set(range(word_start, word_end))
1064
+
1065
+ # Only add if not already covered by a multi-word term
1066
+ if not word_positions.intersection(used_positions):
1067
+ token = match.group()
1068
+ # Include ALL tokens - no filtering by length
1069
+ tokens_with_positions.append((word_start, len(token), token))
1070
+ used_positions.update(word_positions)
1071
+
1072
+ # Sort by position and extract tokens
1073
+ tokens_with_positions.sort(key=lambda x: x[0])
1074
+ tokens = [token for pos, length, token in tokens_with_positions]
1075
+
1076
+ return tokens
1077
+
1078
+ def search_term(self, term: str) -> List[Dict]:
1079
+ """
1080
+ Search termbases for a specific term
1081
+
1082
+ Args:
1083
+ term: Source term to search
1084
+
1085
+ Returns:
1086
+ List of translation dicts (filtered to only include terms that exist in current segment)
1087
+ """
1088
+ if not self.db_manager or not self.current_source_lang or not self.current_target_lang:
1089
+ return []
1090
+
1091
+ try:
1092
+ # Use database manager's search_termbases method
1093
+ results = self.db_manager.search_termbases(
1094
+ search_term=term,
1095
+ source_lang=self.current_source_lang,
1096
+ target_lang=self.current_target_lang,
1097
+ project_id=self.current_project_id,
1098
+ min_length=2
1099
+ )
1100
+
1101
+ # CRITICAL FIX: Filter out results where the source term doesn't exist in the segment
1102
+ # This prevents "het gebruik van" from showing when searching "het" if the phrase isn't in the segment
1103
+ filtered_results = []
1104
+ segment_lower = self.current_source.lower()
1105
+
1106
+ for result in results:
1107
+ source_term = result.get('source_term', '')
1108
+ if not source_term:
1109
+ continue
1110
+
1111
+ # Check if this term actually exists in the current segment
1112
+ source_lower = source_term.lower()
1113
+
1114
+ # Use word boundaries to match complete words/phrases only
1115
+ if ' ' in source_term:
1116
+ # Multi-word term - must exist as exact phrase
1117
+ pattern = r'\b' + re.escape(source_lower) + r'\b'
1118
+ else:
1119
+ # Single word
1120
+ pattern = r'\b' + re.escape(source_lower) + r'\b'
1121
+
1122
+ if re.search(pattern, segment_lower):
1123
+ filtered_results.append(result)
1124
+
1125
+ return filtered_results
1126
+ except Exception as e:
1127
+ self.log(f"✗ Error searching term '{term}': {e}")
1128
+ return []
1129
+
1130
+ def clear_terms(self):
1131
+ """Clear all term blocks"""
1132
+ # Remove all widgets from flow layout
1133
+ while self.terms_layout.count() > 0:
1134
+ item = self.terms_layout.takeAt(0)
1135
+ if item and item.widget():
1136
+ item.widget().deleteLater()
1137
+
1138
+ def on_term_insert_requested(self, source_term: str, target_term: str):
1139
+ """Handle request to insert a translation"""
1140
+ self.log(f"💡 Termview: Inserting '{target_term}' for '{source_term}'")
1141
+ self.term_insert_requested.emit(target_term)
1142
+
1143
+ def _on_edit_entry_requested(self, term_id: int, termbase_id: int):
1144
+ """Forward edit request to parent (main application)"""
1145
+ self.log(f"✏️ Termview: Edit requested for term_id={term_id}, termbase_id={termbase_id}")
1146
+ self.edit_entry_requested.emit(term_id, termbase_id)
1147
+
1148
+ def _on_delete_entry_requested(self, term_id: int, termbase_id: int, source_term: str, target_term: str):
1149
+ """Forward delete request to parent (main application)"""
1150
+ self.log(f"🗑️ Termview: Delete requested for term_id={term_id}, termbase_id={termbase_id}")
1151
+ self.delete_entry_requested.emit(term_id, termbase_id, source_term, target_term)
1152
+
1153
+ def insert_term_by_number(self, number: int) -> bool:
1154
+ """Insert term by shortcut number.
1155
+
1156
+ TermView numbering starts at 1:
1157
+ - Alt+1..Alt+9 insert 1..9
1158
+ - Double-tap Alt+N,N inserts 11..99 (internally 11..19)
1159
+
1160
+ Args:
1161
+ number: Shortcut number (typically 1-9 or 11-19)
1162
+
1163
+ Returns:
1164
+ True if term was inserted, False if no term at that number
1165
+ """
1166
+ if number in self.shortcut_terms and self.shortcut_terms[number]:
1167
+ target_text = self.shortcut_terms[number]
1168
+ # Display badge for logging
1169
+ if number < 10:
1170
+ badge = str(number)
1171
+ else:
1172
+ badge = str(number - 10) * 2 # "00", "11", etc.
1173
+ self.log(f"💡 Termview: Inserting term [{badge}]: '{target_text}'")
1174
+ self.term_insert_requested.emit(target_text)
1175
+ return True
1176
+ return False