supervertaler 1.9.153__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of supervertaler might be problematic. Click here for more details.

Files changed (85) hide show
  1. Supervertaler.py +47886 -0
  2. modules/__init__.py +10 -0
  3. modules/ai_actions.py +964 -0
  4. modules/ai_attachment_manager.py +343 -0
  5. modules/ai_file_viewer_dialog.py +210 -0
  6. modules/autofingers_engine.py +466 -0
  7. modules/cafetran_docx_handler.py +379 -0
  8. modules/config_manager.py +469 -0
  9. modules/database_manager.py +1878 -0
  10. modules/database_migrations.py +417 -0
  11. modules/dejavurtf_handler.py +779 -0
  12. modules/document_analyzer.py +427 -0
  13. modules/docx_handler.py +689 -0
  14. modules/encoding_repair.py +319 -0
  15. modules/encoding_repair_Qt.py +393 -0
  16. modules/encoding_repair_ui.py +481 -0
  17. modules/feature_manager.py +350 -0
  18. modules/figure_context_manager.py +340 -0
  19. modules/file_dialog_helper.py +148 -0
  20. modules/find_replace.py +164 -0
  21. modules/find_replace_qt.py +457 -0
  22. modules/glossary_manager.py +433 -0
  23. modules/image_extractor.py +188 -0
  24. modules/keyboard_shortcuts_widget.py +571 -0
  25. modules/llm_clients.py +1211 -0
  26. modules/llm_leaderboard.py +737 -0
  27. modules/llm_superbench_ui.py +1401 -0
  28. modules/local_llm_setup.py +1104 -0
  29. modules/model_update_dialog.py +381 -0
  30. modules/model_version_checker.py +373 -0
  31. modules/mqxliff_handler.py +638 -0
  32. modules/non_translatables_manager.py +743 -0
  33. modules/pdf_rescue_Qt.py +1822 -0
  34. modules/pdf_rescue_tkinter.py +909 -0
  35. modules/phrase_docx_handler.py +516 -0
  36. modules/project_home_panel.py +209 -0
  37. modules/prompt_assistant.py +357 -0
  38. modules/prompt_library.py +689 -0
  39. modules/prompt_library_migration.py +447 -0
  40. modules/quick_access_sidebar.py +282 -0
  41. modules/ribbon_widget.py +597 -0
  42. modules/sdlppx_handler.py +874 -0
  43. modules/setup_wizard.py +353 -0
  44. modules/shortcut_manager.py +932 -0
  45. modules/simple_segmenter.py +128 -0
  46. modules/spellcheck_manager.py +727 -0
  47. modules/statuses.py +207 -0
  48. modules/style_guide_manager.py +315 -0
  49. modules/superbench_ui.py +1319 -0
  50. modules/superbrowser.py +329 -0
  51. modules/supercleaner.py +600 -0
  52. modules/supercleaner_ui.py +444 -0
  53. modules/superdocs.py +19 -0
  54. modules/superdocs_viewer_qt.py +382 -0
  55. modules/superlookup.py +252 -0
  56. modules/tag_cleaner.py +260 -0
  57. modules/tag_manager.py +333 -0
  58. modules/term_extractor.py +270 -0
  59. modules/termbase_entry_editor.py +842 -0
  60. modules/termbase_import_export.py +488 -0
  61. modules/termbase_manager.py +1060 -0
  62. modules/termview_widget.py +1172 -0
  63. modules/theme_manager.py +499 -0
  64. modules/tm_editor_dialog.py +99 -0
  65. modules/tm_manager_qt.py +1280 -0
  66. modules/tm_metadata_manager.py +545 -0
  67. modules/tmx_editor.py +1461 -0
  68. modules/tmx_editor_qt.py +2784 -0
  69. modules/tmx_generator.py +284 -0
  70. modules/tracked_changes.py +900 -0
  71. modules/trados_docx_handler.py +430 -0
  72. modules/translation_memory.py +715 -0
  73. modules/translation_results_panel.py +2134 -0
  74. modules/translation_services.py +282 -0
  75. modules/unified_prompt_library.py +659 -0
  76. modules/unified_prompt_manager_qt.py +3951 -0
  77. modules/voice_commands.py +920 -0
  78. modules/voice_dictation.py +477 -0
  79. modules/voice_dictation_lite.py +249 -0
  80. supervertaler-1.9.153.dist-info/METADATA +896 -0
  81. supervertaler-1.9.153.dist-info/RECORD +85 -0
  82. supervertaler-1.9.153.dist-info/WHEEL +5 -0
  83. supervertaler-1.9.153.dist-info/entry_points.txt +2 -0
  84. supervertaler-1.9.153.dist-info/licenses/LICENSE +21 -0
  85. supervertaler-1.9.153.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1172 @@
1
+ """
2
+ Termview Widget - RYS-style Inline Terminology Display
3
+
4
+ Displays source text with termbase translations shown directly underneath each word/phrase.
5
+ Inspired by the RYS Trados plugin's inline term visualization.
6
+
7
+ Features:
8
+ - Visual mapping: translations appear under their source terms
9
+ - Hover tooltips: show synonyms/alternatives
10
+ - Click to insert: click any translation to insert into target
11
+ - Multi-word term support: handles both single words and phrases
12
+ """
13
+
14
+ from PyQt6.QtWidgets import (QWidget, QVBoxLayout, QLabel, QFrame, QScrollArea,
15
+ QHBoxLayout, QPushButton, QToolTip, QLayout, QLayoutItem, QSizePolicy, QStyle,
16
+ QMenu, QMessageBox)
17
+ from PyQt6.QtCore import Qt, QPoint, pyqtSignal, QRect, QSize
18
+ from PyQt6.QtGui import QFont, QCursor, QAction
19
+ from typing import Dict, List, Optional, Tuple
20
+ import re
21
+
22
+
23
+ class FlowLayout(QLayout):
24
+ """Flow layout that wraps widgets to next line when needed"""
25
+
26
+ def __init__(self, parent=None, margin=0, spacing=-1):
27
+ super().__init__(parent)
28
+ self.itemList = []
29
+ self.m_hSpace = spacing
30
+ self.m_vSpace = spacing
31
+ self.setContentsMargins(margin, margin, margin, margin)
32
+
33
+ def __del__(self):
34
+ item = self.takeAt(0)
35
+ while item:
36
+ item = self.takeAt(0)
37
+
38
+ def addItem(self, item):
39
+ self.itemList.append(item)
40
+
41
+ def horizontalSpacing(self):
42
+ if self.m_hSpace >= 0:
43
+ return self.m_hSpace
44
+ else:
45
+ return self.smartSpacing(QStyle.PixelMetric.PM_LayoutHorizontalSpacing)
46
+
47
+ def verticalSpacing(self):
48
+ if self.m_vSpace >= 0:
49
+ return self.m_vSpace
50
+ else:
51
+ return self.smartSpacing(QStyle.PixelMetric.PM_LayoutVerticalSpacing)
52
+
53
+ def count(self):
54
+ return len(self.itemList)
55
+
56
+ def itemAt(self, index):
57
+ if 0 <= index < len(self.itemList):
58
+ return self.itemList[index]
59
+ return None
60
+
61
+ def takeAt(self, index):
62
+ if 0 <= index < len(self.itemList):
63
+ return self.itemList.pop(index)
64
+ return None
65
+
66
+ def expandingDirections(self):
67
+ return Qt.Orientation(0)
68
+
69
+ def hasHeightForWidth(self):
70
+ return True
71
+
72
+ def heightForWidth(self, width):
73
+ height = self.doLayout(QRect(0, 0, width, 0), True)
74
+ return height
75
+
76
+ def setGeometry(self, rect):
77
+ super().setGeometry(rect)
78
+ self.doLayout(rect, False)
79
+
80
+ def sizeHint(self):
81
+ return self.minimumSize()
82
+
83
+ def minimumSize(self):
84
+ size = QSize()
85
+ for item in self.itemList:
86
+ size = size.expandedTo(item.minimumSize())
87
+ margin = self.contentsMargins().left()
88
+ size += QSize(2 * margin, 2 * margin)
89
+ return size
90
+
91
+ def doLayout(self, rect, testOnly):
92
+ x = rect.x()
93
+ y = rect.y()
94
+ lineHeight = 0
95
+ spacing = self.horizontalSpacing()
96
+ if spacing < 0:
97
+ spacing = 5 # Default spacing
98
+
99
+ for item in self.itemList:
100
+ wid = item.widget()
101
+ spaceX = spacing
102
+ spaceY = spacing
103
+
104
+ nextX = x + item.sizeHint().width() + spaceX
105
+ if nextX - spaceX > rect.right() and lineHeight > 0:
106
+ x = rect.x()
107
+ y = y + lineHeight + spaceY
108
+ nextX = x + item.sizeHint().width() + spaceX
109
+ lineHeight = 0
110
+
111
+ if not testOnly:
112
+ item.setGeometry(QRect(QPoint(x, y), item.sizeHint()))
113
+
114
+ x = nextX
115
+ lineHeight = max(lineHeight, item.sizeHint().height())
116
+
117
+ return y + lineHeight - rect.y()
118
+
119
+ def smartSpacing(self, pm):
120
+ parent = self.parent()
121
+ if not parent:
122
+ return -1
123
+ if parent.isWidgetType():
124
+ return parent.style().pixelMetric(pm, None, parent)
125
+ else:
126
+ return parent.spacing()
127
+
128
+
129
+ class TermBlock(QWidget):
130
+ """Individual term block showing source word and its translation(s)"""
131
+
132
+ term_clicked = pyqtSignal(str, str) # source_term, target_term
133
+ edit_requested = pyqtSignal(int, int) # term_id, termbase_id
134
+ delete_requested = pyqtSignal(int, int, str, str) # term_id, termbase_id, source_term, target_term
135
+
136
+ def __init__(self, source_text: str, translations: List[Dict], parent=None, theme_manager=None, font_size: int = 10, font_family: str = "Segoe UI", font_bold: bool = False, shortcut_number: int = None):
137
+ """
138
+ Args:
139
+ source_text: Source word/phrase
140
+ translations: List of dicts with keys: 'target', 'termbase_name', 'priority', 'term_id', 'termbase_id', etc.
141
+ theme_manager: Optional theme manager for dark mode support
142
+ font_size: Base font size in points (default 10)
143
+ font_family: Font family name (default "Segoe UI")
144
+ font_bold: Whether to use bold font (default False)
145
+ shortcut_number: Optional number (1-9) for Ctrl+N shortcut badge
146
+ """
147
+ super().__init__(parent)
148
+ self.source_text = source_text
149
+ self.translations = translations
150
+ self.theme_manager = theme_manager
151
+ self.font_size = font_size
152
+ self.font_family = font_family
153
+ self.font_bold = font_bold
154
+ self.shortcut_number = shortcut_number
155
+ # Store first translation's IDs for context menu (if available)
156
+ self.term_id = None
157
+ self.termbase_id = None
158
+ self.target_term = None
159
+ if translations:
160
+ first_trans = translations[0]
161
+ self.term_id = first_trans.get('term_id')
162
+ self.termbase_id = first_trans.get('termbase_id')
163
+ self.target_term = first_trans.get('target_term', first_trans.get('target', ''))
164
+ self.init_ui()
165
+
166
+ def init_ui(self):
167
+ """Create the visual layout for this term block - COMPACT RYS-style"""
168
+ layout = QVBoxLayout(self)
169
+ layout.setContentsMargins(1, 0, 1, 1)
170
+ layout.setSpacing(0)
171
+
172
+ # Get theme colors
173
+ is_dark = self.theme_manager and self.theme_manager.current_theme.name == "Dark"
174
+ separator_color = "#555555" if is_dark else "#CCCCCC"
175
+ source_text_color = "#E0E0E0" if is_dark else "#333"
176
+ no_match_color = "#666666" if is_dark else "#ddd"
177
+ no_match_bg = "#2A2A2A" if is_dark else "#F5F5F5"
178
+
179
+ # Add thin gray separator line at top (like RYS)
180
+ separator = QFrame()
181
+ separator.setFrameShape(QFrame.Shape.HLine)
182
+ separator.setFixedHeight(1)
183
+ separator.setStyleSheet(f"background-color: {separator_color}; border: none;")
184
+ layout.addWidget(separator)
185
+
186
+ # Determine border color based on whether we have translations
187
+ if self.translations:
188
+ primary_translation = self.translations[0]
189
+ is_project = primary_translation.get('is_project_termbase', False)
190
+ ranking = primary_translation.get('ranking', None)
191
+
192
+ # IMPORTANT: Treat ranking #1 as project termbase (matches main app logic)
193
+ is_effective_project = is_project or (ranking == 1)
194
+
195
+ # Background color: pink for project termbase, blue for regular termbase
196
+ self.bg_color = "#FFE5F0" if is_effective_project else "#D6EBFF"
197
+ self.is_effective_project = is_effective_project
198
+ else:
199
+ self.bg_color = no_match_bg # Theme-aware for no matches
200
+ self.is_effective_project = False
201
+
202
+ # Source text (top) - compact
203
+ self.source_label = QLabel(self.source_text)
204
+ source_font = QFont(self.font_family)
205
+ source_font.setPointSize(self.font_size)
206
+ source_font.setBold(self.font_bold)
207
+ self.source_label.setFont(source_font)
208
+ self.source_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
209
+ self.source_label.setStyleSheet(f"""
210
+ QLabel {{
211
+ color: {source_text_color};
212
+ padding: 1px 3px;
213
+ background-color: transparent;
214
+ border: none;
215
+ }}
216
+ """)
217
+ # Enable context menu on source label for edit/delete actions (only if we have translations with IDs)
218
+ if self.translations and self.term_id is not None:
219
+ self.source_label.setContextMenuPolicy(Qt.ContextMenuPolicy.CustomContextMenu)
220
+ self.source_label.customContextMenuRequested.connect(self._show_context_menu)
221
+ layout.addWidget(self.source_label)
222
+
223
+ # Target translation (bottom) - show first/best match - COMPACT
224
+ if self.translations:
225
+ target_text = primary_translation.get('target_term', primary_translation.get('target', ''))
226
+ termbase_name = primary_translation.get('termbase_name', '')
227
+
228
+ # Background color based on termbase type
229
+ bg_color = "#FFE5F0" if self.is_effective_project else "#D6EBFF" # Pink for project, light blue for regular
230
+ hover_color = "#FFD0E8" if self.is_effective_project else "#BBDEFB" # Slightly darker on hover
231
+
232
+ # Create horizontal layout for target + shortcut badge
233
+ # Apply background to container so it covers both text and badge
234
+ target_container = QWidget()
235
+ target_container.setStyleSheet(f"""
236
+ QWidget {{
237
+ background-color: {bg_color};
238
+ border-radius: 3px;
239
+ }}
240
+ QWidget:hover {{
241
+ background-color: {hover_color};
242
+ }}
243
+ """)
244
+ target_layout = QHBoxLayout(target_container)
245
+ target_layout.setContentsMargins(3, 1, 3, 1)
246
+ target_layout.setSpacing(3)
247
+
248
+ target_label = QLabel(target_text)
249
+ target_font = QFont(self.font_family)
250
+ target_font.setPointSize(self.font_size) # Same size as source
251
+ target_font.setBold(self.font_bold)
252
+ target_label.setFont(target_font)
253
+ target_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
254
+ target_label.setStyleSheet(f"""
255
+ QLabel {{
256
+ color: #0052A3;
257
+ padding: 0px;
258
+ background-color: transparent;
259
+ border: none;
260
+ }}
261
+ """)
262
+ target_label.setCursor(Qt.CursorShape.PointingHandCursor)
263
+ target_label.mousePressEvent = lambda e: self.on_translation_clicked(target_text)
264
+
265
+ # Build tooltip with shortcut hint if applicable
266
+ if self.shortcut_number is not None and self.shortcut_number <= 19:
267
+ # Alt+0 (and Alt+0,0) are reserved for the Compare Panel.
268
+ # Do not advertise or display these shortcuts in TermView.
269
+ if self.shortcut_number in (0, 10):
270
+ shortcut_hint = ""
271
+ elif self.shortcut_number <= 9:
272
+ shortcut_hint = f"<br><i>Press Alt+{self.shortcut_number} to insert</i>"
273
+ else:
274
+ # Double-tap shortcuts (10-19 displayed as 00, 11, 22, etc.)
275
+ double_digit = (self.shortcut_number - 10)
276
+ shortcut_hint = f"<br><i>Press Alt+{double_digit},{double_digit} to insert</i>"
277
+ else:
278
+ shortcut_hint = ""
279
+
280
+ # Set tooltip if multiple translations exist
281
+ if len(self.translations) > 1:
282
+ tooltip_lines = [f"<b>{target_text}</b> (click to insert){shortcut_hint}<br>"]
283
+ # Add notes if available
284
+ notes = primary_translation.get('notes', '')
285
+ if notes:
286
+ tooltip_lines.append(f"<br><i>Note: {notes}</i><br>")
287
+ tooltip_lines.append("<br><b>Alternatives:</b>")
288
+ for i, trans in enumerate(self.translations[1:], 1):
289
+ alt_target = trans.get('target_term', trans.get('target', ''))
290
+ alt_termbase = trans.get('termbase_name', '')
291
+ tooltip_lines.append(f"{i}. {alt_target} ({alt_termbase})")
292
+ target_label.setToolTip("<br>".join(tooltip_lines))
293
+ else:
294
+ # Build tooltip for single translation
295
+ tooltip_text = f"<b>{target_text}</b><br>From: {termbase_name}{shortcut_hint}"
296
+ notes = primary_translation.get('notes', '')
297
+ if notes:
298
+ tooltip_text += f"<br><i>Note: {notes}</i>"
299
+ tooltip_text += "<br>(click to insert)"
300
+ target_label.setToolTip(tooltip_text)
301
+
302
+ target_layout.addWidget(target_label)
303
+
304
+ # Add shortcut number badge if assigned (0-9 for first 10, 00/11/22/.../99 for 11-20)
305
+ if self.shortcut_number is not None and self.shortcut_number < 20:
306
+ # Alt+0 (and Alt+0,0) are reserved for the Compare Panel.
307
+ # Hide the corresponding TermView badges (0 and 00).
308
+ if self.shortcut_number in (0, 10):
309
+ layout.addWidget(target_container)
310
+
311
+ # Show count if multiple translations - very compact
312
+ if len(self.translations) > 1:
313
+ count_label = QLabel(f"+{len(self.translations) - 1}")
314
+ count_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
315
+ count_label.setStyleSheet("""
316
+ QLabel {
317
+ color: #999;
318
+ font-size: 7px;
319
+ }
320
+ """)
321
+ layout.addWidget(count_label)
322
+ return
323
+
324
+ # Badge text: 0-9 for first 10 terms, 00/11/22/.../99 for terms 11-20
325
+ if self.shortcut_number < 10:
326
+ badge_text = str(self.shortcut_number)
327
+ shortcut_hint = f"Alt+{self.shortcut_number}"
328
+ badge_width = 14
329
+ else:
330
+ # Terms 11-20: show as 00, 11, 22, ..., 99
331
+ digit = self.shortcut_number - 10
332
+ badge_text = str(digit) * 2 # "00", "11", "22", etc.
333
+ shortcut_hint = f"Alt+{digit},{digit}"
334
+ badge_width = 20 # Wider for 2 digits
335
+
336
+ badge_label = QLabel(badge_text)
337
+ badge_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
338
+ badge_label.setFixedSize(badge_width, 14)
339
+ badge_label.setStyleSheet(f"""
340
+ QLabel {{
341
+ background-color: #1976D2;
342
+ color: white;
343
+ font-size: 9px;
344
+ font-weight: bold;
345
+ border-radius: 7px;
346
+ padding: 0px;
347
+ }}
348
+ """)
349
+ badge_label.setToolTip(f"Press {shortcut_hint} to insert")
350
+ badge_label.setCursor(Qt.CursorShape.PointingHandCursor)
351
+ badge_label.mousePressEvent = lambda e: self.on_translation_clicked(target_text)
352
+ target_layout.addWidget(badge_label)
353
+
354
+ layout.addWidget(target_container)
355
+
356
+ # Show count if multiple translations - very compact
357
+ if len(self.translations) > 1:
358
+ count_label = QLabel(f"+{len(self.translations) - 1}")
359
+ count_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
360
+ count_label.setStyleSheet("""
361
+ QLabel {
362
+ color: #999;
363
+ font-size: 7px;
364
+ }
365
+ """)
366
+ layout.addWidget(count_label)
367
+ else:
368
+ # No translation found - very subtle (theme-aware)
369
+ is_dark = self.theme_manager and self.theme_manager.current_theme.name == "Dark"
370
+ no_match_dot_color = "#666666" if is_dark else "#ddd"
371
+ no_match_label = QLabel("·")
372
+ no_match_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
373
+ no_match_label.setStyleSheet(f"color: {no_match_dot_color}; font-size: 8px;")
374
+ layout.addWidget(no_match_label)
375
+
376
+ def on_translation_clicked(self, target_text: str):
377
+ """Handle click on translation to insert into target"""
378
+ self.term_clicked.emit(self.source_text, target_text)
379
+
380
+ def _show_context_menu(self, pos: QPoint):
381
+ """Show context menu with Edit/Delete options for glossary entry"""
382
+ if not self.term_id or not self.termbase_id:
383
+ return
384
+
385
+ menu = QMenu(self)
386
+
387
+ # Edit entry action
388
+ edit_action = QAction("✏️ Edit Glossary Entry", menu)
389
+ edit_action.triggered.connect(self._edit_entry)
390
+ menu.addAction(edit_action)
391
+
392
+ # Delete entry action
393
+ delete_action = QAction("🗑️ Delete Glossary Entry", menu)
394
+ delete_action.triggered.connect(self._delete_entry)
395
+ menu.addAction(delete_action)
396
+
397
+ menu.exec(self.source_label.mapToGlobal(pos))
398
+
399
+ def _edit_entry(self):
400
+ """Emit signal to edit glossary entry"""
401
+ if self.term_id and self.termbase_id:
402
+ self.edit_requested.emit(self.term_id, self.termbase_id)
403
+
404
+ def _delete_entry(self):
405
+ """Emit signal to delete glossary entry"""
406
+ if self.term_id and self.termbase_id:
407
+ self.delete_requested.emit(self.term_id, self.termbase_id, self.source_text, self.target_term or '')
408
+
409
+
410
+ class NTBlock(QWidget):
411
+ """Non-translatable block showing source word with pastel yellow styling"""
412
+
413
+ nt_clicked = pyqtSignal(str) # Emits NT text to insert as-is
414
+
415
+ def __init__(self, source_text: str, list_name: str = "", parent=None, theme_manager=None, font_size: int = 10, font_family: str = "Segoe UI", font_bold: bool = False):
416
+ """
417
+ Args:
418
+ source_text: Non-translatable word/phrase
419
+ list_name: Name of the NT list it comes from
420
+ theme_manager: Optional theme manager for dark mode support
421
+ font_size: Base font size in points (default 10)
422
+ font_family: Font family name (default "Segoe UI")
423
+ font_bold: Whether to use bold font (default False)
424
+ """
425
+ super().__init__(parent)
426
+ self.source_text = source_text
427
+ self.list_name = list_name
428
+ self.theme_manager = theme_manager
429
+ self.font_size = font_size
430
+ self.font_family = font_family
431
+ self.font_bold = font_bold
432
+ self.init_ui()
433
+
434
+ def init_ui(self):
435
+ """Create the visual layout for this NT block - pastel yellow styling"""
436
+ layout = QVBoxLayout(self)
437
+ layout.setContentsMargins(1, 1, 1, 1)
438
+ layout.setSpacing(0)
439
+
440
+ # Get theme colors
441
+ is_dark = self.theme_manager and self.theme_manager.current_theme.name == "Dark"
442
+ source_text_color = "#E0E0E0" if is_dark else "#5D4E37"
443
+
444
+ # Pastel yellow border for non-translatables
445
+ border_color = "#E6C200" # Darker yellow for border
446
+
447
+ self.setStyleSheet(f"""
448
+ QWidget {{
449
+ border-top: 2px solid {border_color};
450
+ border-radius: 0px;
451
+ }}
452
+ """)
453
+
454
+ # Source text (top)
455
+ self.source_label = QLabel(self.source_text)
456
+ source_font = QFont(self.font_family)
457
+ source_font.setPointSize(self.font_size)
458
+ source_font.setBold(self.font_bold)
459
+ self.source_label.setFont(source_font)
460
+ self.source_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
461
+ self.source_label.setStyleSheet(f"""
462
+ QLabel {{
463
+ color: {source_text_color};
464
+ padding: 1px 3px;
465
+ background-color: transparent;
466
+ }}
467
+ """)
468
+ layout.addWidget(self.source_label)
469
+
470
+ # "Do not translate" indicator with pastel yellow background
471
+ nt_label = QLabel("🚫 NT")
472
+ nt_font = QFont()
473
+ nt_font.setPointSize(7)
474
+ nt_label.setFont(nt_font)
475
+ nt_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
476
+ nt_label.setStyleSheet("""
477
+ QLabel {
478
+ color: #5D4E37;
479
+ padding: 1px 3px;
480
+ background-color: #FFFDD0;
481
+ border-radius: 2px;
482
+ }
483
+ QLabel:hover {
484
+ background-color: #FFF9B0;
485
+ cursor: pointer;
486
+ }
487
+ """)
488
+ nt_label.setCursor(Qt.CursorShape.PointingHandCursor)
489
+ nt_label.mousePressEvent = lambda e: self.on_nt_clicked()
490
+
491
+ tooltip = f"<b>🚫 Non-Translatable</b><br>{self.source_text}<br><br>From: {self.list_name}<br>(click to insert as-is)"
492
+ nt_label.setToolTip(tooltip)
493
+
494
+ layout.addWidget(nt_label)
495
+
496
+ def on_nt_clicked(self):
497
+ """Handle click on NT to insert source text as-is"""
498
+ self.nt_clicked.emit(self.source_text)
499
+
500
+
501
+ class TermviewWidget(QWidget):
502
+ """Main Termview widget showing inline terminology for current segment"""
503
+
504
+ term_insert_requested = pyqtSignal(str) # Emits target text to insert
505
+ edit_entry_requested = pyqtSignal(int, int) # term_id, termbase_id
506
+ delete_entry_requested = pyqtSignal(int, int, str, str) # term_id, termbase_id, source, target
507
+
508
+ def __init__(self, parent=None, db_manager=None, log_callback=None, theme_manager=None):
509
+ super().__init__(parent)
510
+ self.db_manager = db_manager
511
+ self.log = log_callback if log_callback else print
512
+ self.theme_manager = theme_manager
513
+ self.current_source = ""
514
+ self.current_source_lang = None
515
+ self.current_target_lang = None
516
+ self.current_project_id = None # Store project ID for termbase priority lookup
517
+
518
+ # Default font settings (will be updated from main app settings)
519
+ self.current_font_family = "Segoe UI"
520
+ self.current_font_size = 10
521
+ self.current_font_bold = False
522
+
523
+ # Track terms by shortcut number for Alt+1-9 insertion
524
+ self.shortcut_terms = {} # {1: "translation1", 2: "translation2", ...}
525
+
526
+ self.init_ui()
527
+
528
+ def init_ui(self):
529
+ """Initialize the UI"""
530
+ layout = QVBoxLayout(self)
531
+ layout.setContentsMargins(5, 5, 5, 5)
532
+ layout.setSpacing(5)
533
+
534
+ # Get theme colors
535
+ if self.theme_manager:
536
+ theme = self.theme_manager.current_theme
537
+ bg_color = theme.base
538
+ border_color = theme.border
539
+ header_bg = theme.panel_info
540
+ header_text = theme.button_info
541
+ info_text = theme.text_disabled
542
+ else:
543
+ # Fallback colors if no theme manager
544
+ bg_color = "white"
545
+ border_color = "#ddd"
546
+ header_bg = "#E3F2FD"
547
+ header_text = "#1565C0"
548
+ info_text = "#999"
549
+
550
+ # Header
551
+ header = QLabel("") # Empty - tab already shows the name
552
+ header.setStyleSheet(f"""
553
+ QLabel {{
554
+ font-weight: bold;
555
+ font-size: 12px;
556
+ color: {header_text};
557
+ padding: 5px;
558
+ background-color: {header_bg};
559
+ border-radius: 4px;
560
+ }}
561
+ """)
562
+ header.hide() # Hide the header to save space
563
+ layout.addWidget(header)
564
+
565
+ # Scroll area for term blocks
566
+ scroll = QScrollArea()
567
+ scroll.setWidgetResizable(True)
568
+ scroll.setHorizontalScrollBarPolicy(Qt.ScrollBarPolicy.ScrollBarAlwaysOff) # No horizontal scroll
569
+ scroll.setVerticalScrollBarPolicy(Qt.ScrollBarPolicy.ScrollBarAsNeeded)
570
+ scroll.setStyleSheet(f"""
571
+ QScrollArea {{
572
+ border: 1px solid {border_color};
573
+ border-radius: 4px;
574
+ background-color: {bg_color};
575
+ }}
576
+ """)
577
+
578
+ # Container for term blocks (flow layout with wrapping)
579
+ self.terms_container = QWidget()
580
+ self.terms_layout = FlowLayout(self.terms_container, margin=5, spacing=4)
581
+
582
+ scroll.setWidget(self.terms_container)
583
+ layout.addWidget(scroll)
584
+
585
+ # Info label - use slightly brighter text for dark mode
586
+ self.info_label = QLabel("No segment selected")
587
+ self.info_label.setAlignment(Qt.AlignmentFlag.AlignCenter)
588
+ is_dark = self.theme_manager and self.theme_manager.current_theme.name == "Dark"
589
+ info_label_color = "#909090" if is_dark else info_text
590
+ self.info_label.setStyleSheet(f"color: {info_label_color}; font-size: 10px; padding: 5px;")
591
+ layout.addWidget(self.info_label)
592
+
593
+ # Store references for theme refresh
594
+ self.header = header
595
+ self.scroll = scroll
596
+
597
+ def apply_theme(self):
598
+ """Refresh all theme-dependent colors when theme changes"""
599
+ if not self.theme_manager:
600
+ return
601
+
602
+ theme = self.theme_manager.current_theme
603
+ bg_color = theme.base
604
+ border_color = theme.border
605
+ header_bg = theme.panel_info
606
+ header_text = theme.button_info
607
+ info_text = theme.text_disabled
608
+
609
+ # Update header
610
+ if hasattr(self, 'header'):
611
+ self.header.setStyleSheet(f"""
612
+ QLabel {{
613
+ font-weight: bold;
614
+ font-size: 12px;
615
+ color: {header_text};
616
+ padding: 5px;
617
+ background-color: {header_bg};
618
+ border-radius: 4px;
619
+ }}
620
+ """)
621
+
622
+ # Update scroll area
623
+ if hasattr(self, 'scroll'):
624
+ self.scroll.setStyleSheet(f"""
625
+ QScrollArea {{
626
+ border: 1px solid {border_color};
627
+ border-radius: 4px;
628
+ background-color: {bg_color};
629
+ }}
630
+ """)
631
+
632
+ # Update info label - use slightly brighter text for better visibility in dark mode
633
+ if hasattr(self, 'info_label'):
634
+ is_dark = theme.name == "Dark"
635
+ info_label_color = "#909090" if is_dark else info_text
636
+ self.info_label.setStyleSheet(f"color: {info_label_color}; font-size: 10px; padding: 5px;")
637
+
638
+ def set_font_settings(self, font_family: str = "Segoe UI", font_size: int = 10, bold: bool = False):
639
+ """Update font settings for Termview
640
+
641
+ Args:
642
+ font_family: Font family name
643
+ font_size: Font size in points
644
+ bold: Whether to use bold font
645
+ """
646
+ self.current_font_family = font_family
647
+ self.current_font_size = font_size
648
+ self.current_font_bold = bold
649
+
650
+ # Refresh display if we have content
651
+ if hasattr(self, 'current_source') and self.current_source:
652
+ # Get all existing term blocks
653
+ term_blocks = []
654
+ nt_blocks = []
655
+
656
+ for i in range(self.terms_layout.count()):
657
+ item = self.terms_layout.itemAt(i)
658
+ if item and item.widget():
659
+ widget = item.widget()
660
+ if isinstance(widget, TermBlock):
661
+ term_blocks.append(widget)
662
+ elif isinstance(widget, NTBlock):
663
+ nt_blocks.append(widget)
664
+
665
+ # Update font for all term blocks
666
+ for block in term_blocks:
667
+ if hasattr(block, 'source_label'):
668
+ font = QFont(self.current_font_family)
669
+ font.setPointSize(self.current_font_size)
670
+ font.setBold(self.current_font_bold)
671
+ block.source_label.setFont(font)
672
+
673
+ # Update translation labels
674
+ layout = block.layout()
675
+ if layout:
676
+ for i in range(layout.count()):
677
+ item = layout.itemAt(i)
678
+ if item and item.widget():
679
+ label = item.widget()
680
+ if isinstance(label, QLabel) and label != block.source_label:
681
+ font = QFont(self.current_font_family)
682
+ font.setPointSize(max(6, self.current_font_size - 2))
683
+ font.setBold(self.current_font_bold)
684
+ label.setFont(font)
685
+
686
+ # Update font for NT blocks
687
+ for block in nt_blocks:
688
+ if hasattr(block, 'source_label'):
689
+ font = QFont(self.current_font_family)
690
+ font.setPointSize(self.current_font_size)
691
+ font.setBold(self.current_font_bold)
692
+ block.source_label.setFont(font)
693
+
694
+ def update_with_matches(self, source_text: str, termbase_matches: List[Dict], nt_matches: List[Dict] = None):
695
+ """
696
+ Update the termview display with pre-computed termbase and NT matches
697
+
698
+ RYS-STYLE DISPLAY: Show source text as tokens with translations underneath
699
+
700
+ Args:
701
+ source_text: Source segment text
702
+ termbase_matches: List of termbase match dicts from Translation Results
703
+ nt_matches: Optional list of NT match dicts with 'text', 'start', 'end', 'list_name' keys
704
+ """
705
+ self.current_source = source_text
706
+
707
+ # Clear existing blocks and shortcut mappings
708
+ self.clear_terms()
709
+ self.shortcut_terms = {} # Reset shortcut mappings
710
+
711
+ if not source_text or not source_text.strip():
712
+ self.info_label.setText("No segment selected")
713
+ return
714
+
715
+ # Strip HTML/XML tags from source text for display in TermView
716
+ # This handles CAT tool tags like <b>, </b>, <i>, </i>, <u>, </u>, <bi>, <sub>, <sup>, <li-o>, <li-b>
717
+ # as well as memoQ tags {1}, [2}, {3], Trados tags <1>, </1>, and Déjà Vu tags {00001}
718
+ display_text = re.sub(r'</?(?:b|i|u|bi|sub|sup|li-[ob]|\d+)/?>', '', source_text) # HTML/XML tags
719
+ display_text = re.sub(r'[\[{]\d+[}\]]', '', display_text) # memoQ/Phrase numeric tags: {1}, [2}, {3]
720
+ display_text = re.sub(r'\{\d{5}\}', '', display_text) # Déjà Vu tags: {00001}
721
+ # memoQ content tags: [uicontrol id="..."} or {uicontrol] or [tagname ...} or {tagname]
722
+ display_text = re.sub(r'\[[^\[\]]*\}', '', display_text) # Opening: [anything}
723
+ display_text = re.sub(r'\{[^\{\}]*\]', '', display_text) # Closing: {anything]
724
+ display_text = display_text.strip()
725
+
726
+ # If stripping tags leaves nothing, fall back to original
727
+ if not display_text:
728
+ display_text = source_text
729
+
730
+ has_termbase = termbase_matches and len(termbase_matches) > 0
731
+ has_nt = nt_matches and len(nt_matches) > 0
732
+
733
+ if not has_termbase and not has_nt:
734
+ self.info_label.setText("No terminology or NT matches for this segment")
735
+ return
736
+
737
+ # Convert termbase matches to dict for easy lookup: {source_term.lower(): [translations]}
738
+ matches_dict = {}
739
+ if termbase_matches:
740
+ for match in termbase_matches:
741
+ source_term = match.get('source_term', match.get('source', ''))
742
+ target_term = match.get('target_term', match.get('translation', ''))
743
+
744
+ # Ensure source_term and target_term are strings
745
+ if not isinstance(source_term, str):
746
+ source_term = str(source_term) if source_term else ''
747
+ if not isinstance(target_term, str):
748
+ target_term = str(target_term) if target_term else ''
749
+
750
+ if not source_term or not target_term:
751
+ continue
752
+
753
+ # Strip punctuation from key to match lookup normalization
754
+ # This ensures "ca." in glossary matches "ca." token stripped to "ca"
755
+ PUNCT_CHARS_FOR_KEY = '.,;:!?\"\'\u201C\u201D\u201E\u00AB\u00BB\u2018\u2019\u201A\u2039\u203A()[]'
756
+ key = source_term.lower().strip(PUNCT_CHARS_FOR_KEY)
757
+ if key not in matches_dict:
758
+ matches_dict[key] = []
759
+
760
+ # Add main target term (include term_id and termbase_id for edit/delete context menu)
761
+ matches_dict[key].append({
762
+ 'target_term': target_term,
763
+ 'termbase_name': match.get('termbase_name', ''),
764
+ 'ranking': match.get('ranking', 99),
765
+ 'is_project_termbase': match.get('is_project_termbase', False),
766
+ 'term_id': match.get('term_id'),
767
+ 'termbase_id': match.get('termbase_id'),
768
+ 'notes': match.get('notes', '')
769
+ })
770
+
771
+ # Add synonyms as additional translations
772
+ target_synonyms = match.get('target_synonyms', [])
773
+ for synonym in target_synonyms:
774
+ matches_dict[key].append({
775
+ 'target_term': synonym,
776
+ 'termbase_name': match.get('termbase_name', '') + ' (syn)',
777
+ 'ranking': match.get('ranking', 99) + 1, # Slightly lower priority
778
+ 'is_project_termbase': match.get('is_project_termbase', False)
779
+ })
780
+
781
+ # Convert NT matches to dict: {text.lower(): list_name}
782
+ nt_dict = {}
783
+ if nt_matches:
784
+ for match in nt_matches:
785
+ nt_text = match.get('text', '')
786
+ if nt_text:
787
+ nt_dict[nt_text.lower()] = match.get('list_name', 'Non-Translatables')
788
+
789
+ # Combine all known multi-word terms for tokenization
790
+ all_terms_dict = dict(matches_dict)
791
+ for nt_key in nt_dict:
792
+ if nt_key not in all_terms_dict:
793
+ all_terms_dict[nt_key] = [] # Empty list = NT only
794
+
795
+ # Tokenize the tag-stripped display text, respecting multi-word terms
796
+ tokens = self.tokenize_with_multiword_terms(display_text, all_terms_dict)
797
+
798
+ if not tokens:
799
+ self.info_label.setText("No words to analyze")
800
+ return
801
+
802
+ # Create blocks for each token
803
+ blocks_with_translations = 0
804
+ blocks_with_nt = 0
805
+ shortcut_counter = 0 # Track shortcut numbers for terms with translations
806
+
807
+ # Comprehensive set of quote and punctuation characters to strip
808
+ # Using Unicode escapes to avoid encoding issues
809
+ # Include brackets for terms like "(typisch)" to match "typisch"
810
+ PUNCT_CHARS = '.,;:!?\"\'\u201C\u201D\u201E\u00AB\u00BB\u2018\u2019\u201A\u2039\u203A()[]'
811
+
812
+ # Track which terms have already been assigned shortcuts (avoid duplicates)
813
+ assigned_shortcuts = set()
814
+
815
+ for token in tokens:
816
+ # Strip leading and trailing punctuation/quotes for lookup
817
+ token_clean = token.rstrip(PUNCT_CHARS)
818
+ token_clean = token_clean.lstrip(PUNCT_CHARS)
819
+ lookup_key = token_clean.lower()
820
+
821
+ # Check if this is a non-translatable
822
+ if lookup_key in nt_dict:
823
+ # Create NT block
824
+ nt_block = NTBlock(token, nt_dict[lookup_key], self, theme_manager=self.theme_manager,
825
+ font_size=self.current_font_size, font_family=self.current_font_family,
826
+ font_bold=self.current_font_bold)
827
+ nt_block.nt_clicked.connect(self.on_term_insert_requested)
828
+ self.terms_layout.addWidget(nt_block)
829
+ blocks_with_nt += 1
830
+ else:
831
+ # Get termbase translations for this token
832
+ translations = matches_dict.get(lookup_key, [])
833
+
834
+ # Assign shortcut number only to first occurrence of each term with translations.
835
+ # TermView numbering starts at 1 (Alt+1..Alt+9), because Alt+0 is reserved for the Compare Panel.
836
+ # After 1-9, we support 11-99 via double-tap Alt+N,N (internally 11-19).
837
+ shortcut_num = None
838
+ if translations and lookup_key not in assigned_shortcuts:
839
+ if shortcut_counter < 18: # Support up to 18 terms (1-9 + 11-99)
840
+ # Map 0-8 -> 1-9, 9-17 -> 11-19
841
+ shortcut_num = shortcut_counter + 1 if shortcut_counter < 9 else shortcut_counter + 2
842
+ # Store the first translation for Alt+N insertion
843
+ first_trans = translations[0]
844
+ if isinstance(first_trans, dict):
845
+ self.shortcut_terms[shortcut_num] = first_trans.get('target_term', '')
846
+ else:
847
+ self.shortcut_terms[shortcut_num] = str(first_trans)
848
+ shortcut_counter += 1
849
+ assigned_shortcuts.add(lookup_key)
850
+
851
+ # Create term block (even if no translation - shows source word)
852
+ term_block = TermBlock(token, translations, self, theme_manager=self.theme_manager,
853
+ font_size=self.current_font_size, font_family=self.current_font_family,
854
+ font_bold=self.current_font_bold, shortcut_number=shortcut_num)
855
+ term_block.term_clicked.connect(self.on_term_insert_requested)
856
+ term_block.edit_requested.connect(self._on_edit_entry_requested)
857
+ term_block.delete_requested.connect(self._on_delete_entry_requested)
858
+ self.terms_layout.addWidget(term_block)
859
+
860
+ if translations:
861
+ blocks_with_translations += 1
862
+
863
+ info_parts = []
864
+ if blocks_with_translations > 0:
865
+ info_parts.append(f"{blocks_with_translations} terms")
866
+ if blocks_with_nt > 0:
867
+ info_parts.append(f"{blocks_with_nt} NTs")
868
+
869
+ if info_parts:
870
+ self.info_label.setText(f"✓ Found {', '.join(info_parts)} in {len(tokens)} words")
871
+ else:
872
+ self.info_label.setText(f"No matches in {len(tokens)} words")
873
+
874
+ def get_all_termbase_matches(self, text: str) -> Dict[str, List[Dict]]:
875
+ """
876
+ Get all termbase matches for text by using the proper termbase search
877
+
878
+ This uses the SAME search logic as the Translation Results panel,
879
+ ensuring we only show terms that actually match, not false positives.
880
+
881
+ Args:
882
+ text: Source text
883
+
884
+ Returns:
885
+ Dict mapping source term (lowercase) to list of translation dicts
886
+ """
887
+ if not self.db_manager or not self.current_source_lang or not self.current_target_lang:
888
+ return {}
889
+
890
+ matches = {}
891
+
892
+ try:
893
+ # Extract all words from the text to search
894
+ # Use the same token pattern as we use for display
895
+ # Includes / for unit-style terms like kg/l, m/s, etc.
896
+ token_pattern = re.compile(r'(?<!\w)[\w.,%-/]+(?!\w)', re.UNICODE)
897
+ tokens = [match.group() for match in token_pattern.finditer(text)]
898
+
899
+ # Also check for multi-word phrases (up to 8 words)
900
+ words = re.findall(r'\b[\w-]+\b', text, re.UNICODE)
901
+ phrases_to_check = []
902
+
903
+ # Generate n-grams for multi-word term detection
904
+ for n in range(2, min(9, len(words) + 1)):
905
+ for i in range(len(words) - n + 1):
906
+ phrase = ' '.join(words[i:i+n])
907
+ phrases_to_check.append(phrase)
908
+
909
+ # Search each token and phrase using the database's search_termbases method
910
+ all_search_terms = set(tokens + phrases_to_check)
911
+
912
+ for search_term in all_search_terms:
913
+ if not search_term or len(search_term) < 2:
914
+ continue
915
+
916
+ # Strip trailing punctuation for search (but keep internal punctuation like "gew.%")
917
+ # This handles cases like "edelmetalen." → "edelmetalen"
918
+ search_term_clean = search_term.rstrip('.,;:!?')
919
+ if not search_term_clean or len(search_term_clean) < 2:
920
+ continue
921
+
922
+ # Use the SAME search method as translation results panel
923
+ results = self.db_manager.search_termbases(
924
+ search_term=search_term_clean,
925
+ source_lang=self.current_source_lang,
926
+ target_lang=self.current_target_lang,
927
+ project_id=self.current_project_id,
928
+ min_length=2
929
+ )
930
+
931
+ # Add results to matches dict, but ONLY if the source term actually exists in the text
932
+ for result in results:
933
+ source_term = result.get('source_term', '')
934
+ if not source_term:
935
+ continue
936
+
937
+ # CRITICAL FIX: Verify the source term actually exists in the segment
938
+ # This prevents false positives like "het gebruik van" showing when only "het" exists
939
+ source_lower = source_term.lower()
940
+ text_lower = text.lower()
941
+
942
+ # Normalize text: replace ALL quote variants with spaces
943
+ # Using Unicode escapes to avoid encoding issues
944
+ normalized_text = text_lower
945
+ for quote_char in '\"\'\u201C\u201D\u201E\u00AB\u00BB\u2018\u2019\u201A\u2039\u203A':
946
+ normalized_text = normalized_text.replace(quote_char, ' ')
947
+
948
+ # CRITICAL FIX v1.9.118: Strip punctuation from glossary term before matching
949
+ # This allows entries like "...problemen." (with period) to match source text
950
+ # where tokenization strips the period during word splitting
951
+ # Comprehensive set of quote and punctuation characters to strip
952
+ PUNCT_CHARS = '.,;:!?\"\'\u201C\u201D\u201E\u00AB\u00BB\u2018\u2019\u201A\u2039\u203A'
953
+ normalized_term = source_lower.rstrip(PUNCT_CHARS).lstrip(PUNCT_CHARS)
954
+
955
+ # Use word boundaries to match complete words/phrases only
956
+ if ' ' in source_term:
957
+ # Multi-word term - must exist as exact phrase
958
+ pattern = r'\b' + re.escape(normalized_term) + r'\b'
959
+ else:
960
+ # Single word
961
+ pattern = r'\b' + re.escape(normalized_term) + r'\b'
962
+
963
+ # Try matching on normalized text first, then original
964
+ if not re.search(pattern, normalized_text) and not re.search(pattern, text_lower):
965
+ continue # Skip - term not actually in segment
966
+
967
+ key = source_lower
968
+ if key not in matches:
969
+ matches[key] = []
970
+
971
+ # DEDUPLICATION: Only add if not already present
972
+ # Check by target_term to avoid duplicate translations
973
+ target_term = result.get('target_term', '')
974
+ already_exists = any(
975
+ m.get('target_term', '') == target_term
976
+ for m in matches[key]
977
+ )
978
+ if not already_exists:
979
+ matches[key].append(result)
980
+
981
+ return matches
982
+ except Exception as e:
983
+ self.log(f"✗ Error getting termbase matches: {e}")
984
+ import traceback
985
+ traceback.print_exc()
986
+ return {}
987
+
988
+ def tokenize_with_multiword_terms(self, text: str, matches: Dict[str, List[Dict]]) -> List[str]:
989
+ """
990
+ Tokenize text, preserving multi-word terms found in termbase
991
+
992
+ Args:
993
+ text: Source text
994
+ matches: Dict of termbase matches (from get_all_termbase_matches)
995
+
996
+ Returns:
997
+ List of tokens (words/phrases/numbers), with multi-word terms kept together
998
+ """
999
+ # DEBUG: Log multi-word terms we're looking for
1000
+ multi_word_terms = [k for k in matches.keys() if ' ' in k]
1001
+ if multi_word_terms:
1002
+ self.log(f"🔍 Tokenize: Looking for {len(multi_word_terms)} multi-word terms:")
1003
+ for term in sorted(multi_word_terms, key=len, reverse=True)[:3]:
1004
+ self.log(f" - '{term}'")
1005
+
1006
+ # Sort matched terms by length (longest first) to match multi-word terms first
1007
+ matched_terms = sorted(matches.keys(), key=len, reverse=True)
1008
+
1009
+ # Track which parts of the text have been matched
1010
+ text_lower = text.lower()
1011
+ used_positions = set()
1012
+ tokens_with_positions = []
1013
+
1014
+ # First pass: find multi-word terms with proper word boundary checking
1015
+ for term in matched_terms:
1016
+ if ' ' in term: # Only process multi-word terms in first pass
1017
+ # Use regex with word boundaries to find term
1018
+ term_escaped = re.escape(term)
1019
+
1020
+ # Check if term has punctuation - use different pattern
1021
+ if any(char in term for char in ['.', '%', ',', '-', '/']):
1022
+ pattern = r'(?<!\w)' + term_escaped + r'(?!\w)'
1023
+ else:
1024
+ pattern = r'\b' + term_escaped + r'\b'
1025
+
1026
+ # DEBUG: Check if multi-word term is found
1027
+ found = re.search(pattern, text_lower)
1028
+ self.log(f"🔍 Tokenize: Pattern '{pattern}' for '{term}' → {'FOUND' if found else 'NOT FOUND'}")
1029
+ if found:
1030
+ self.log(f" Match at position {found.span()}: '{text[found.start():found.end()]}'")
1031
+
1032
+ # Find all matches using regex
1033
+ for match in re.finditer(pattern, text_lower):
1034
+ pos = match.start()
1035
+
1036
+ # Check if this position overlaps with already matched terms
1037
+ term_positions = set(range(pos, pos + len(term)))
1038
+ if not term_positions.intersection(used_positions):
1039
+ # Extract the original case version
1040
+ original_term = text[pos:pos + len(term)]
1041
+ tokens_with_positions.append((pos, len(term), original_term))
1042
+ used_positions.update(term_positions)
1043
+ self.log(f" ✅ Added multi-word token: '{original_term}' covering positions {pos}-{pos+len(term)}")
1044
+
1045
+ # DEBUG: Log used_positions after first pass
1046
+ if ' ' in sorted(matches.keys(), key=len, reverse=True)[0]:
1047
+ self.log(f"🔍 After first pass: {len(used_positions)} positions marked as used")
1048
+ self.log(f" Used positions: {sorted(list(used_positions))[:20]}...")
1049
+
1050
+ # Second pass: fill in gaps with ALL words/numbers/punctuation combos
1051
+ # Enhanced pattern to capture words, numbers, and combinations like "gew.%", "0,1", "kg/l", etc.
1052
+ # Use (?<!\w) and (?!\w) instead of \b to handle punctuation properly
1053
+ # Includes / for unit-style terms like kg/l, m/s, etc.
1054
+ token_pattern = re.compile(r'(?<!\w)[\w.,%-/]+(?!\w)', re.UNICODE)
1055
+
1056
+ for match in token_pattern.finditer(text):
1057
+ word_start = match.start()
1058
+ word_end = match.end()
1059
+ word_positions = set(range(word_start, word_end))
1060
+
1061
+ # Only add if not already covered by a multi-word term
1062
+ if not word_positions.intersection(used_positions):
1063
+ token = match.group()
1064
+ # Include ALL tokens - no filtering by length
1065
+ tokens_with_positions.append((word_start, len(token), token))
1066
+ used_positions.update(word_positions)
1067
+
1068
+ # Sort by position and extract tokens
1069
+ tokens_with_positions.sort(key=lambda x: x[0])
1070
+ tokens = [token for pos, length, token in tokens_with_positions]
1071
+
1072
+ return tokens
1073
+
1074
+ def search_term(self, term: str) -> List[Dict]:
1075
+ """
1076
+ Search termbases for a specific term
1077
+
1078
+ Args:
1079
+ term: Source term to search
1080
+
1081
+ Returns:
1082
+ List of translation dicts (filtered to only include terms that exist in current segment)
1083
+ """
1084
+ if not self.db_manager or not self.current_source_lang or not self.current_target_lang:
1085
+ return []
1086
+
1087
+ try:
1088
+ # Use database manager's search_termbases method
1089
+ results = self.db_manager.search_termbases(
1090
+ search_term=term,
1091
+ source_lang=self.current_source_lang,
1092
+ target_lang=self.current_target_lang,
1093
+ project_id=self.current_project_id,
1094
+ min_length=2
1095
+ )
1096
+
1097
+ # CRITICAL FIX: Filter out results where the source term doesn't exist in the segment
1098
+ # This prevents "het gebruik van" from showing when searching "het" if the phrase isn't in the segment
1099
+ filtered_results = []
1100
+ segment_lower = self.current_source.lower()
1101
+
1102
+ for result in results:
1103
+ source_term = result.get('source_term', '')
1104
+ if not source_term:
1105
+ continue
1106
+
1107
+ # Check if this term actually exists in the current segment
1108
+ source_lower = source_term.lower()
1109
+
1110
+ # Use word boundaries to match complete words/phrases only
1111
+ if ' ' in source_term:
1112
+ # Multi-word term - must exist as exact phrase
1113
+ pattern = r'\b' + re.escape(source_lower) + r'\b'
1114
+ else:
1115
+ # Single word
1116
+ pattern = r'\b' + re.escape(source_lower) + r'\b'
1117
+
1118
+ if re.search(pattern, segment_lower):
1119
+ filtered_results.append(result)
1120
+
1121
+ return filtered_results
1122
+ except Exception as e:
1123
+ self.log(f"✗ Error searching term '{term}': {e}")
1124
+ return []
1125
+
1126
+ def clear_terms(self):
1127
+ """Clear all term blocks"""
1128
+ # Remove all widgets from flow layout
1129
+ while self.terms_layout.count() > 0:
1130
+ item = self.terms_layout.takeAt(0)
1131
+ if item and item.widget():
1132
+ item.widget().deleteLater()
1133
+
1134
+ def on_term_insert_requested(self, source_term: str, target_term: str):
1135
+ """Handle request to insert a translation"""
1136
+ self.log(f"💡 Termview: Inserting '{target_term}' for '{source_term}'")
1137
+ self.term_insert_requested.emit(target_term)
1138
+
1139
+ def _on_edit_entry_requested(self, term_id: int, termbase_id: int):
1140
+ """Forward edit request to parent (main application)"""
1141
+ self.log(f"✏️ Termview: Edit requested for term_id={term_id}, termbase_id={termbase_id}")
1142
+ self.edit_entry_requested.emit(term_id, termbase_id)
1143
+
1144
+ def _on_delete_entry_requested(self, term_id: int, termbase_id: int, source_term: str, target_term: str):
1145
+ """Forward delete request to parent (main application)"""
1146
+ self.log(f"🗑️ Termview: Delete requested for term_id={term_id}, termbase_id={termbase_id}")
1147
+ self.delete_entry_requested.emit(term_id, termbase_id, source_term, target_term)
1148
+
1149
+ def insert_term_by_number(self, number: int) -> bool:
1150
+ """Insert term by shortcut number.
1151
+
1152
+ TermView numbering starts at 1:
1153
+ - Alt+1..Alt+9 insert 1..9
1154
+ - Double-tap Alt+N,N inserts 11..99 (internally 11..19)
1155
+
1156
+ Args:
1157
+ number: Shortcut number (typically 1-9 or 11-19)
1158
+
1159
+ Returns:
1160
+ True if term was inserted, False if no term at that number
1161
+ """
1162
+ if number in self.shortcut_terms and self.shortcut_terms[number]:
1163
+ target_text = self.shortcut_terms[number]
1164
+ # Display badge for logging
1165
+ if number < 10:
1166
+ badge = str(number)
1167
+ else:
1168
+ badge = str(number - 10) * 2 # "00", "11", etc.
1169
+ self.log(f"💡 Termview: Inserting term [{badge}]: '{target_text}'")
1170
+ self.term_insert_requested.emit(target_text)
1171
+ return True
1172
+ return False