supervertaler 1.9.181__py3-none-any.whl → 1.9.183__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of supervertaler might be problematic. Click here for more details.
- Supervertaler.py +558 -364
- modules/extract_tm.py +518 -0
- modules/project_tm.py +320 -0
- modules/termbase_manager.py +0 -1
- modules/termview_widget.py +12 -11
- modules/translation_memory.py +3 -12
- modules/translation_results_panel.py +0 -7
- {supervertaler-1.9.181.dist-info → supervertaler-1.9.183.dist-info}/METADATA +1 -1
- {supervertaler-1.9.181.dist-info → supervertaler-1.9.183.dist-info}/RECORD +13 -11
- {supervertaler-1.9.181.dist-info → supervertaler-1.9.183.dist-info}/WHEEL +0 -0
- {supervertaler-1.9.181.dist-info → supervertaler-1.9.183.dist-info}/entry_points.txt +0 -0
- {supervertaler-1.9.181.dist-info → supervertaler-1.9.183.dist-info}/licenses/LICENSE +0 -0
- {supervertaler-1.9.181.dist-info → supervertaler-1.9.183.dist-info}/top_level.txt +0 -0
Supervertaler.py
CHANGED
|
@@ -32,9 +32,9 @@ License: MIT
|
|
|
32
32
|
"""
|
|
33
33
|
|
|
34
34
|
# Version Information.
|
|
35
|
-
__version__ = "1.9.
|
|
35
|
+
__version__ = "1.9.183"
|
|
36
36
|
__phase__ = "0.9"
|
|
37
|
-
__release_date__ = "2026-01-
|
|
37
|
+
__release_date__ = "2026-01-31"
|
|
38
38
|
__edition__ = "Qt"
|
|
39
39
|
|
|
40
40
|
import sys
|
|
@@ -1659,16 +1659,12 @@ class ReadOnlyGridTextEditor(QTextEdit):
|
|
|
1659
1659
|
matches_dict: Dictionary of {term: {'translation': str, 'priority': int}} or {term: str}
|
|
1660
1660
|
"""
|
|
1661
1661
|
from PyQt6.QtGui import QTextCursor, QTextCharFormat, QColor, QFont
|
|
1662
|
-
|
|
1663
|
-
print(f"[HIGHLIGHT DEBUG] highlight_termbase_matches called with {len(matches_dict) if matches_dict else 0} matches")
|
|
1664
|
-
|
|
1662
|
+
|
|
1665
1663
|
# Get the document and create a cursor
|
|
1666
1664
|
doc = self.document()
|
|
1667
1665
|
text = self.toPlainText()
|
|
1668
1666
|
text_lower = text.lower()
|
|
1669
1667
|
|
|
1670
|
-
print(f"[HIGHLIGHT DEBUG] Widget text length: {len(text)}, text preview: {text[:60]}...")
|
|
1671
|
-
|
|
1672
1668
|
# IMPORTANT: Always clear all previous formatting first to prevent inconsistent highlighting
|
|
1673
1669
|
cursor = QTextCursor(doc)
|
|
1674
1670
|
cursor.select(QTextCursor.SelectionType.Document)
|
|
@@ -1677,7 +1673,6 @@ class ReadOnlyGridTextEditor(QTextEdit):
|
|
|
1677
1673
|
|
|
1678
1674
|
# If no matches, we're done (highlighting has been cleared)
|
|
1679
1675
|
if not matches_dict:
|
|
1680
|
-
print(f"[HIGHLIGHT DEBUG] No matches, returning after clear")
|
|
1681
1676
|
return
|
|
1682
1677
|
|
|
1683
1678
|
# Get highlight style from main window settings
|
|
@@ -1695,9 +1690,7 @@ class ReadOnlyGridTextEditor(QTextEdit):
|
|
|
1695
1690
|
dotted_color = settings.get('termbase_dotted_color', '#808080')
|
|
1696
1691
|
break
|
|
1697
1692
|
parent = parent.parent() if hasattr(parent, 'parent') else None
|
|
1698
|
-
|
|
1699
|
-
print(f"[HIGHLIGHT DEBUG] Using style: {highlight_style}")
|
|
1700
|
-
|
|
1693
|
+
|
|
1701
1694
|
# Sort matches by source term length (longest first) to avoid partial matches
|
|
1702
1695
|
# Since dict keys are now term_ids, we need to extract source terms first
|
|
1703
1696
|
term_entries = []
|
|
@@ -1706,11 +1699,7 @@ class ReadOnlyGridTextEditor(QTextEdit):
|
|
|
1706
1699
|
source_term = match_info.get('source', '')
|
|
1707
1700
|
if source_term:
|
|
1708
1701
|
term_entries.append((source_term, term_id, match_info))
|
|
1709
|
-
|
|
1710
|
-
print(f"[HIGHLIGHT DEBUG] Built {len(term_entries)} term entries from matches")
|
|
1711
|
-
if term_entries:
|
|
1712
|
-
print(f"[HIGHLIGHT DEBUG] First few terms to search: {[t[0] for t in term_entries[:3]]}")
|
|
1713
|
-
|
|
1702
|
+
|
|
1714
1703
|
# Sort by source term length (longest first)
|
|
1715
1704
|
term_entries.sort(key=lambda x: len(x[0]), reverse=True)
|
|
1716
1705
|
|
|
@@ -1835,8 +1824,6 @@ class ReadOnlyGridTextEditor(QTextEdit):
|
|
|
1835
1824
|
highlighted_ranges.append((idx, end_idx))
|
|
1836
1825
|
|
|
1837
1826
|
start = end_idx
|
|
1838
|
-
|
|
1839
|
-
print(f"[HIGHLIGHT DEBUG] Applied formatting to {found_count} term occurrences in text")
|
|
1840
1827
|
|
|
1841
1828
|
def highlight_non_translatables(self, nt_matches: list, highlighted_ranges: list = None):
|
|
1842
1829
|
"""
|
|
@@ -6224,6 +6211,12 @@ class SupervertalerQt(QMainWindow):
|
|
|
6224
6211
|
self.termbase_cache_lock = threading.Lock() # Thread-safe cache access
|
|
6225
6212
|
self.termbase_batch_worker_thread = None # Background worker thread
|
|
6226
6213
|
self.termbase_batch_stop_event = threading.Event() # Signal to stop background worker
|
|
6214
|
+
|
|
6215
|
+
# In-memory termbase index for instant lookups (v1.9.182)
|
|
6216
|
+
# Loaded once on project load, contains ALL terms from activated termbases
|
|
6217
|
+
# Structure: list of term dicts with pre-compiled regex patterns
|
|
6218
|
+
self.termbase_index = []
|
|
6219
|
+
self.termbase_index_lock = threading.Lock()
|
|
6227
6220
|
|
|
6228
6221
|
# TM/MT/LLM prefetch cache for instant segment switching (like memoQ)
|
|
6229
6222
|
# Maps segment ID → {"TM": [...], "MT": [...], "LLM": [...]}
|
|
@@ -6237,9 +6230,9 @@ class SupervertalerQt(QMainWindow):
|
|
|
6237
6230
|
self.idle_prefetch_timer = None # QTimer for triggering prefetch after typing pause
|
|
6238
6231
|
self.idle_prefetch_delay_ms = 1500 # Start prefetch 1.5s after user stops typing
|
|
6239
6232
|
|
|
6240
|
-
#
|
|
6233
|
+
# Cache kill switch for performance testing
|
|
6241
6234
|
# When True, all caches are bypassed - direct lookups every time
|
|
6242
|
-
self.disable_all_caches =
|
|
6235
|
+
self.disable_all_caches = False # v1.9.183: Default to False (caches ENABLED)
|
|
6243
6236
|
|
|
6244
6237
|
# Undo/Redo stack for grid edits
|
|
6245
6238
|
self.undo_stack = [] # List of (segment_id, old_target, new_target, old_status, new_status)
|
|
@@ -10335,12 +10328,9 @@ class SupervertalerQt(QMainWindow):
|
|
|
10335
10328
|
|
|
10336
10329
|
# Superdocs removed (online GitBook will be used instead)
|
|
10337
10330
|
|
|
10338
|
-
print("[DEBUG] About to create SuperlookupTab...")
|
|
10339
10331
|
lookup_tab = SuperlookupTab(self, user_data_path=self.user_data_path)
|
|
10340
|
-
print("[DEBUG] SuperlookupTab created successfully")
|
|
10341
10332
|
self.lookup_tab = lookup_tab # Store reference for later use
|
|
10342
10333
|
modules_tabs.addTab(lookup_tab, "🔍 Superlookup")
|
|
10343
|
-
print("[DEBUG] Superlookup tab added to modules_tabs")
|
|
10344
10334
|
|
|
10345
10335
|
# Supervoice - Voice Commands & Dictation
|
|
10346
10336
|
supervoice_tab = self._create_voice_dictation_settings_tab()
|
|
@@ -12231,6 +12221,46 @@ class SupervertalerQt(QMainWindow):
|
|
|
12231
12221
|
except Exception as e:
|
|
12232
12222
|
self.log(f"Error updating Match Panel termview: {e}")
|
|
12233
12223
|
|
|
12224
|
+
def _update_termview_for_segment(self, segment):
|
|
12225
|
+
"""Explicitly update termview for a segment (v1.9.182).
|
|
12226
|
+
|
|
12227
|
+
This is called directly from Ctrl+Enter navigation to ensure
|
|
12228
|
+
the termview updates immediately, bypassing the deferred timer approach.
|
|
12229
|
+
"""
|
|
12230
|
+
if not segment or not hasattr(self, 'termview_widget'):
|
|
12231
|
+
return
|
|
12232
|
+
|
|
12233
|
+
try:
|
|
12234
|
+
# Use in-memory index for fast lookup
|
|
12235
|
+
stored_matches = self.find_termbase_matches_in_source(segment.source)
|
|
12236
|
+
|
|
12237
|
+
# Convert dict format to list format for termview
|
|
12238
|
+
termbase_matches = [
|
|
12239
|
+
{
|
|
12240
|
+
'source_term': match_data.get('source', ''),
|
|
12241
|
+
'target_term': match_data.get('translation', ''),
|
|
12242
|
+
'termbase_name': match_data.get('termbase_name', ''),
|
|
12243
|
+
'ranking': match_data.get('ranking', 99),
|
|
12244
|
+
'is_project_termbase': match_data.get('is_project_termbase', False),
|
|
12245
|
+
'term_id': match_data.get('term_id'),
|
|
12246
|
+
'termbase_id': match_data.get('termbase_id'),
|
|
12247
|
+
'notes': match_data.get('notes', '')
|
|
12248
|
+
}
|
|
12249
|
+
for match_data in stored_matches.values()
|
|
12250
|
+
] if stored_matches else []
|
|
12251
|
+
|
|
12252
|
+
# Get NT matches
|
|
12253
|
+
nt_matches = self.find_nt_matches_in_source(segment.source)
|
|
12254
|
+
|
|
12255
|
+
# Get status hint
|
|
12256
|
+
status_hint = self._get_termbase_status_hint()
|
|
12257
|
+
|
|
12258
|
+
# Update both Termview widgets
|
|
12259
|
+
self._update_both_termviews(segment.source, termbase_matches, nt_matches, status_hint)
|
|
12260
|
+
|
|
12261
|
+
except Exception as e:
|
|
12262
|
+
self.log(f"Error in _update_termview_for_segment: {e}")
|
|
12263
|
+
|
|
12234
12264
|
def _get_termbase_status_hint(self) -> str:
|
|
12235
12265
|
"""Check termbase activation status and return appropriate hint.
|
|
12236
12266
|
|
|
@@ -12263,7 +12293,7 @@ class SupervertalerQt(QMainWindow):
|
|
|
12263
12293
|
project_target = (self.current_project.target_lang or '').lower()
|
|
12264
12294
|
|
|
12265
12295
|
# Get all termbases and check language pairs
|
|
12266
|
-
all_termbases = self.termbase_mgr.
|
|
12296
|
+
all_termbases = self.termbase_mgr.get_all_termbases()
|
|
12267
12297
|
has_matching_language = False
|
|
12268
12298
|
|
|
12269
12299
|
for tb in all_termbases:
|
|
@@ -12818,6 +12848,39 @@ class SupervertalerQt(QMainWindow):
|
|
|
12818
12848
|
# Use term_id as key to avoid duplicates
|
|
12819
12849
|
self.termbase_cache[segment_id][term_id] = new_match
|
|
12820
12850
|
self.log(f"⚡ Added term directly to cache (instant update)")
|
|
12851
|
+
|
|
12852
|
+
# v1.9.182: Also add to in-memory termbase index for future lookups
|
|
12853
|
+
import re
|
|
12854
|
+
source_lower = source_text.lower().strip()
|
|
12855
|
+
try:
|
|
12856
|
+
if any(c in source_lower for c in '.%,/-'):
|
|
12857
|
+
pattern = re.compile(r'(?<!\w)' + re.escape(source_lower) + r'(?!\w)')
|
|
12858
|
+
else:
|
|
12859
|
+
pattern = re.compile(r'\b' + re.escape(source_lower) + r'\b')
|
|
12860
|
+
except re.error:
|
|
12861
|
+
pattern = None
|
|
12862
|
+
|
|
12863
|
+
index_entry = {
|
|
12864
|
+
'term_id': term_id,
|
|
12865
|
+
'source_term': source_text,
|
|
12866
|
+
'source_term_lower': source_lower,
|
|
12867
|
+
'target_term': target_text,
|
|
12868
|
+
'termbase_id': target_termbase['id'],
|
|
12869
|
+
'priority': 99,
|
|
12870
|
+
'domain': '',
|
|
12871
|
+
'notes': '',
|
|
12872
|
+
'project': '',
|
|
12873
|
+
'client': '',
|
|
12874
|
+
'forbidden': False,
|
|
12875
|
+
'is_project_termbase': False,
|
|
12876
|
+
'termbase_name': target_termbase['name'],
|
|
12877
|
+
'ranking': glossary_rank,
|
|
12878
|
+
'pattern': pattern,
|
|
12879
|
+
}
|
|
12880
|
+
with self.termbase_index_lock:
|
|
12881
|
+
self.termbase_index.append(index_entry)
|
|
12882
|
+
# Re-sort by length (longest first) for proper phrase matching
|
|
12883
|
+
self.termbase_index.sort(key=lambda x: len(x['source_term_lower']), reverse=True)
|
|
12821
12884
|
|
|
12822
12885
|
# Update TermView widget with the new term
|
|
12823
12886
|
if hasattr(self, 'termview_widget') and self.termview_widget:
|
|
@@ -13703,15 +13766,16 @@ class SupervertalerQt(QMainWindow):
|
|
|
13703
13766
|
# Use 0 (global) when no project is loaded - allows Superlookup to work
|
|
13704
13767
|
curr_proj = self.current_project if hasattr(self, 'current_project') else None
|
|
13705
13768
|
curr_proj_id = curr_proj.id if (curr_proj and hasattr(curr_proj, 'id')) else 0 # 0 = global
|
|
13706
|
-
|
|
13769
|
+
|
|
13707
13770
|
if checked:
|
|
13708
13771
|
termbase_mgr.activate_termbase(tb_id, curr_proj_id)
|
|
13709
13772
|
else:
|
|
13710
13773
|
termbase_mgr.deactivate_termbase(tb_id, curr_proj_id)
|
|
13711
|
-
|
|
13712
|
-
# Clear cache and
|
|
13774
|
+
|
|
13775
|
+
# Clear cache and rebuild in-memory index (v1.9.182)
|
|
13713
13776
|
with self.termbase_cache_lock:
|
|
13714
13777
|
self.termbase_cache.clear()
|
|
13778
|
+
self._build_termbase_index() # Rebuild index with new activation state
|
|
13715
13779
|
refresh_termbase_list()
|
|
13716
13780
|
|
|
13717
13781
|
read_checkbox.toggled.connect(on_read_toggle)
|
|
@@ -17040,7 +17104,7 @@ class SupervertalerQt(QMainWindow):
|
|
|
17040
17104
|
|
|
17041
17105
|
# Cache kill switch
|
|
17042
17106
|
disable_cache_cb = CheckmarkCheckBox("Disable ALL caches (direct lookups every time)")
|
|
17043
|
-
disable_cache_cb.setChecked(general_settings.get('disable_all_caches',
|
|
17107
|
+
disable_cache_cb.setChecked(general_settings.get('disable_all_caches', False))
|
|
17044
17108
|
disable_cache_cb.setToolTip(
|
|
17045
17109
|
"When enabled, ALL caching is bypassed:\n"
|
|
17046
17110
|
"• Termbase cache\n"
|
|
@@ -19684,7 +19748,7 @@ class SupervertalerQt(QMainWindow):
|
|
|
19684
19748
|
'results_compare_font_size': 9,
|
|
19685
19749
|
'autohotkey_path': ahk_path_edit.text().strip() if ahk_path_edit is not None else existing_settings.get('autohotkey_path', ''),
|
|
19686
19750
|
'enable_sound_effects': sound_effects_cb.isChecked() if sound_effects_cb is not None else existing_settings.get('enable_sound_effects', False),
|
|
19687
|
-
'disable_all_caches': disable_cache_cb.isChecked() if disable_cache_cb is not None else existing_settings.get('disable_all_caches',
|
|
19751
|
+
'disable_all_caches': disable_cache_cb.isChecked() if disable_cache_cb is not None else existing_settings.get('disable_all_caches', False)
|
|
19688
19752
|
}
|
|
19689
19753
|
|
|
19690
19754
|
# Keep a fast-access instance value
|
|
@@ -22252,7 +22316,154 @@ class SupervertalerQt(QMainWindow):
|
|
|
22252
22316
|
except Exception as e:
|
|
22253
22317
|
QMessageBox.critical(self, "Error", f"Failed to load project:\n{str(e)}")
|
|
22254
22318
|
self.log(f"✗ Error loading project: {e}")
|
|
22255
|
-
|
|
22319
|
+
|
|
22320
|
+
def _build_termbase_index(self):
|
|
22321
|
+
"""
|
|
22322
|
+
Build in-memory index of ALL terms from activated termbases (v1.9.182).
|
|
22323
|
+
|
|
22324
|
+
This is called ONCE on project load and replaces thousands of per-word
|
|
22325
|
+
database queries with a single bulk load + fast in-memory lookups.
|
|
22326
|
+
|
|
22327
|
+
Performance: Reduces 349-segment termbase search from 365 seconds to <1 second.
|
|
22328
|
+
"""
|
|
22329
|
+
import re
|
|
22330
|
+
import time
|
|
22331
|
+
start_time = time.time()
|
|
22332
|
+
|
|
22333
|
+
if not self.current_project or not hasattr(self, 'db_manager') or not self.db_manager:
|
|
22334
|
+
return
|
|
22335
|
+
|
|
22336
|
+
project_id = self.current_project.id if hasattr(self.current_project, 'id') else None
|
|
22337
|
+
|
|
22338
|
+
# Query ALL terms from activated termbases in ONE query
|
|
22339
|
+
# This replaces ~17,500 individual queries (349 segments × 50 words each)
|
|
22340
|
+
query = """
|
|
22341
|
+
SELECT
|
|
22342
|
+
t.id, t.source_term, t.target_term, t.termbase_id, t.priority,
|
|
22343
|
+
t.domain, t.notes, t.project, t.client, t.forbidden,
|
|
22344
|
+
tb.is_project_termbase, tb.name as termbase_name,
|
|
22345
|
+
COALESCE(ta.priority, tb.ranking) as ranking
|
|
22346
|
+
FROM termbase_terms t
|
|
22347
|
+
LEFT JOIN termbases tb ON CAST(t.termbase_id AS INTEGER) = tb.id
|
|
22348
|
+
LEFT JOIN termbase_activation ta ON ta.termbase_id = tb.id
|
|
22349
|
+
AND ta.project_id = ? AND ta.is_active = 1
|
|
22350
|
+
WHERE (ta.is_active = 1 OR tb.is_project_termbase = 1)
|
|
22351
|
+
"""
|
|
22352
|
+
|
|
22353
|
+
new_index = []
|
|
22354
|
+
try:
|
|
22355
|
+
self.db_manager.cursor.execute(query, [project_id or 0])
|
|
22356
|
+
rows = self.db_manager.cursor.fetchall()
|
|
22357
|
+
|
|
22358
|
+
for row in rows:
|
|
22359
|
+
source_term = row[1] # source_term
|
|
22360
|
+
if not source_term:
|
|
22361
|
+
continue
|
|
22362
|
+
|
|
22363
|
+
source_term_lower = source_term.lower().strip()
|
|
22364
|
+
if len(source_term_lower) < 2:
|
|
22365
|
+
continue
|
|
22366
|
+
|
|
22367
|
+
# Pre-compile regex pattern for word-boundary matching
|
|
22368
|
+
# This avoids recompiling the same pattern thousands of times
|
|
22369
|
+
try:
|
|
22370
|
+
# Handle terms with punctuation differently
|
|
22371
|
+
if any(c in source_term_lower for c in '.%,/-'):
|
|
22372
|
+
pattern = re.compile(r'(?<!\w)' + re.escape(source_term_lower) + r'(?!\w)')
|
|
22373
|
+
else:
|
|
22374
|
+
pattern = re.compile(r'\b' + re.escape(source_term_lower) + r'\b')
|
|
22375
|
+
except re.error:
|
|
22376
|
+
# If regex fails, use simple substring matching
|
|
22377
|
+
pattern = None
|
|
22378
|
+
|
|
22379
|
+
new_index.append({
|
|
22380
|
+
'term_id': row[0],
|
|
22381
|
+
'source_term': source_term,
|
|
22382
|
+
'source_term_lower': source_term_lower,
|
|
22383
|
+
'target_term': row[2],
|
|
22384
|
+
'termbase_id': row[3],
|
|
22385
|
+
'priority': row[4],
|
|
22386
|
+
'domain': row[5],
|
|
22387
|
+
'notes': row[6],
|
|
22388
|
+
'project': row[7],
|
|
22389
|
+
'client': row[8],
|
|
22390
|
+
'forbidden': row[9],
|
|
22391
|
+
'is_project_termbase': row[10],
|
|
22392
|
+
'termbase_name': row[11],
|
|
22393
|
+
'ranking': row[12],
|
|
22394
|
+
'pattern': pattern, # Pre-compiled regex
|
|
22395
|
+
})
|
|
22396
|
+
|
|
22397
|
+
# Sort by term length (longest first) for better phrase matching
|
|
22398
|
+
new_index.sort(key=lambda x: len(x['source_term_lower']), reverse=True)
|
|
22399
|
+
|
|
22400
|
+
# Thread-safe update of the index
|
|
22401
|
+
with self.termbase_index_lock:
|
|
22402
|
+
self.termbase_index = new_index
|
|
22403
|
+
|
|
22404
|
+
elapsed = time.time() - start_time
|
|
22405
|
+
self.log(f"✅ Built termbase index: {len(new_index)} terms in {elapsed:.2f}s")
|
|
22406
|
+
|
|
22407
|
+
except Exception as e:
|
|
22408
|
+
self.log(f"❌ Failed to build termbase index: {e}")
|
|
22409
|
+
import traceback
|
|
22410
|
+
self.log(traceback.format_exc())
|
|
22411
|
+
|
|
22412
|
+
def _search_termbase_in_memory(self, source_text: str) -> dict:
|
|
22413
|
+
"""
|
|
22414
|
+
Search termbase using in-memory index (v1.9.182).
|
|
22415
|
+
|
|
22416
|
+
This replaces _search_termbases_thread_safe() for batch operations.
|
|
22417
|
+
Instead of N database queries (one per word), we do:
|
|
22418
|
+
- 1 pass through the index (typically ~1000 terms)
|
|
22419
|
+
- Fast string 'in' check + pre-compiled regex validation
|
|
22420
|
+
|
|
22421
|
+
Performance: <1ms per segment vs 1+ second per segment.
|
|
22422
|
+
"""
|
|
22423
|
+
if not source_text:
|
|
22424
|
+
return {}
|
|
22425
|
+
|
|
22426
|
+
with self.termbase_index_lock:
|
|
22427
|
+
if not self.termbase_index:
|
|
22428
|
+
return {}
|
|
22429
|
+
index = self.termbase_index # Local reference for thread safety
|
|
22430
|
+
|
|
22431
|
+
source_lower = source_text.lower()
|
|
22432
|
+
matches = {}
|
|
22433
|
+
|
|
22434
|
+
for term in index:
|
|
22435
|
+
term_lower = term['source_term_lower']
|
|
22436
|
+
|
|
22437
|
+
# Quick substring check first (very fast, implemented in C)
|
|
22438
|
+
if term_lower not in source_lower:
|
|
22439
|
+
continue
|
|
22440
|
+
|
|
22441
|
+
# Word boundary validation using pre-compiled pattern
|
|
22442
|
+
pattern = term.get('pattern')
|
|
22443
|
+
if pattern:
|
|
22444
|
+
if not pattern.search(source_lower):
|
|
22445
|
+
continue
|
|
22446
|
+
|
|
22447
|
+
# Term matches! Add to results
|
|
22448
|
+
term_id = term['term_id']
|
|
22449
|
+
matches[term_id] = {
|
|
22450
|
+
'source': term['source_term'],
|
|
22451
|
+
'translation': term['target_term'],
|
|
22452
|
+
'term_id': term_id,
|
|
22453
|
+
'termbase_id': term['termbase_id'],
|
|
22454
|
+
'termbase_name': term['termbase_name'],
|
|
22455
|
+
'priority': term['priority'],
|
|
22456
|
+
'ranking': term['ranking'],
|
|
22457
|
+
'is_project_termbase': term['is_project_termbase'],
|
|
22458
|
+
'forbidden': term['forbidden'],
|
|
22459
|
+
'domain': term['domain'],
|
|
22460
|
+
'notes': term['notes'],
|
|
22461
|
+
'project': term['project'],
|
|
22462
|
+
'client': term['client'],
|
|
22463
|
+
}
|
|
22464
|
+
|
|
22465
|
+
return matches
|
|
22466
|
+
|
|
22256
22467
|
def _start_termbase_batch_worker(self):
|
|
22257
22468
|
"""
|
|
22258
22469
|
Start background thread to batch-process termbase matches for all segments.
|
|
@@ -22260,21 +22471,25 @@ class SupervertalerQt(QMainWindow):
|
|
|
22260
22471
|
"""
|
|
22261
22472
|
if not self.current_project or len(self.current_project.segments) == 0:
|
|
22262
22473
|
return
|
|
22263
|
-
|
|
22474
|
+
|
|
22475
|
+
# Build in-memory termbase index FIRST (v1.9.182)
|
|
22476
|
+
# This is the key optimization: load all terms once, then do fast in-memory lookups
|
|
22477
|
+
self._build_termbase_index()
|
|
22478
|
+
|
|
22264
22479
|
# 🧪 EXPERIMENTAL: Skip batch worker if cache kill switch is enabled
|
|
22265
22480
|
if getattr(self, 'disable_all_caches', False):
|
|
22266
22481
|
self.log("🧪 Termbase batch worker SKIPPED (caches disabled)")
|
|
22267
22482
|
return
|
|
22268
|
-
|
|
22483
|
+
|
|
22269
22484
|
# Stop any existing worker thread
|
|
22270
22485
|
self.termbase_batch_stop_event.set()
|
|
22271
22486
|
if self.termbase_batch_worker_thread and self.termbase_batch_worker_thread.is_alive():
|
|
22272
22487
|
self.log("⏹️ Stopping existing termbase batch worker...")
|
|
22273
22488
|
self.termbase_batch_worker_thread.join(timeout=2)
|
|
22274
|
-
|
|
22489
|
+
|
|
22275
22490
|
# Reset stop event for new worker
|
|
22276
22491
|
self.termbase_batch_stop_event.clear()
|
|
22277
|
-
|
|
22492
|
+
|
|
22278
22493
|
# Start new background worker thread
|
|
22279
22494
|
segment_count = len(self.current_project.segments)
|
|
22280
22495
|
self.log(f"🔄 Starting background termbase batch processor for {segment_count} segments...")
|
|
@@ -22290,96 +22505,60 @@ class SupervertalerQt(QMainWindow):
|
|
|
22290
22505
|
"""
|
|
22291
22506
|
Background worker thread: process all segments and populate termbase cache.
|
|
22292
22507
|
Runs in separate thread to not block UI.
|
|
22293
|
-
|
|
22294
|
-
|
|
22508
|
+
|
|
22509
|
+
v1.9.182: Now uses in-memory termbase index for 1000x faster lookups.
|
|
22510
|
+
Old approach: 365 seconds for 349 segments (1 second/segment)
|
|
22511
|
+
New approach: <1 second for 349 segments (<3ms/segment)
|
|
22295
22512
|
"""
|
|
22296
22513
|
if not segments:
|
|
22297
22514
|
return
|
|
22298
|
-
|
|
22299
|
-
# Create a separate database connection for this thread
|
|
22300
|
-
# SQLite connections are thread-local and cannot be shared across threads
|
|
22301
|
-
import sqlite3
|
|
22302
|
-
try:
|
|
22303
|
-
thread_db_connection = sqlite3.connect(self.db_manager.db_path)
|
|
22304
|
-
thread_db_connection.row_factory = sqlite3.Row
|
|
22305
|
-
thread_db_cursor = thread_db_connection.cursor()
|
|
22306
|
-
except Exception as e:
|
|
22307
|
-
self.log(f"❌ Failed to create database connection in batch worker: {e}")
|
|
22308
|
-
return
|
|
22309
|
-
|
|
22515
|
+
|
|
22310
22516
|
try:
|
|
22311
22517
|
processed = 0
|
|
22312
22518
|
cached = 0
|
|
22519
|
+
with_matches = 0
|
|
22313
22520
|
start_time = time.time()
|
|
22314
|
-
|
|
22521
|
+
|
|
22315
22522
|
for segment in segments:
|
|
22316
22523
|
# Check if stop event was signaled (user closed project or started new one)
|
|
22317
22524
|
if self.termbase_batch_stop_event.is_set():
|
|
22318
22525
|
self.log(f"⏹️ Termbase batch worker stopped by user (processed {processed} segments)")
|
|
22319
22526
|
break
|
|
22320
|
-
|
|
22527
|
+
|
|
22321
22528
|
segment_id = segment.id
|
|
22322
|
-
|
|
22529
|
+
|
|
22323
22530
|
# Skip if already in cache (thread-safe check)
|
|
22324
22531
|
with self.termbase_cache_lock:
|
|
22325
22532
|
if segment_id in self.termbase_cache:
|
|
22326
22533
|
cached += 1
|
|
22327
22534
|
continue
|
|
22328
|
-
|
|
22329
|
-
#
|
|
22535
|
+
|
|
22536
|
+
# v1.9.182: Use in-memory index for instant lookup (no database queries!)
|
|
22330
22537
|
try:
|
|
22331
|
-
|
|
22332
|
-
|
|
22333
|
-
|
|
22334
|
-
|
|
22335
|
-
|
|
22336
|
-
|
|
22337
|
-
|
|
22338
|
-
target_lang=self.current_project.target_lang if self.current_project else None,
|
|
22339
|
-
project_id=current_project_id
|
|
22340
|
-
)
|
|
22341
|
-
|
|
22538
|
+
matches = self._search_termbase_in_memory(segment.source)
|
|
22539
|
+
|
|
22540
|
+
# Store in cache (thread-safe) - even empty results to avoid re-lookup
|
|
22541
|
+
with self.termbase_cache_lock:
|
|
22542
|
+
self.termbase_cache[segment_id] = matches
|
|
22543
|
+
|
|
22544
|
+
processed += 1
|
|
22342
22545
|
if matches:
|
|
22343
|
-
|
|
22344
|
-
|
|
22345
|
-
self.termbase_cache[segment_id] = matches
|
|
22346
|
-
|
|
22347
|
-
processed += 1
|
|
22348
|
-
|
|
22349
|
-
# Log progress every 100 segments
|
|
22350
|
-
if processed % 100 == 0:
|
|
22351
|
-
elapsed = time.time() - start_time
|
|
22352
|
-
rate = processed / elapsed if elapsed > 0 else 0
|
|
22353
|
-
remaining = len(segments) - processed
|
|
22354
|
-
eta_seconds = remaining / rate if rate > 0 else 0
|
|
22355
|
-
self.log(f"📊 Batch progress: {processed}/{len(segments)} cached " +
|
|
22356
|
-
f"({rate:.1f} seg/sec, ETA: {int(eta_seconds)}s)")
|
|
22357
|
-
|
|
22546
|
+
with_matches += 1
|
|
22547
|
+
|
|
22358
22548
|
except Exception as e:
|
|
22359
22549
|
self.log(f"❌ Error processing segment {segment_id} in batch worker: {e}")
|
|
22360
22550
|
continue
|
|
22361
|
-
|
|
22362
|
-
# Small delay to prevent CPU saturation (let UI thread work)
|
|
22363
|
-
time.sleep(0.001) # 1ms delay between segments
|
|
22364
|
-
|
|
22551
|
+
|
|
22365
22552
|
elapsed = time.time() - start_time
|
|
22366
22553
|
total_cached = len(self.termbase_cache)
|
|
22367
|
-
|
|
22368
|
-
|
|
22369
|
-
|
|
22554
|
+
rate = processed / elapsed if elapsed > 0 else 0
|
|
22555
|
+
self.log(f"✅ Termbase batch worker complete: {processed} segments in {elapsed:.2f}s " +
|
|
22556
|
+
f"({rate:.0f} seg/sec, {with_matches} with matches)")
|
|
22557
|
+
|
|
22370
22558
|
except Exception as e:
|
|
22371
22559
|
self.log(f"❌ Termbase batch worker error: {e}")
|
|
22372
22560
|
import traceback
|
|
22373
22561
|
self.log(traceback.format_exc())
|
|
22374
|
-
|
|
22375
|
-
finally:
|
|
22376
|
-
# Close thread-local database connection
|
|
22377
|
-
try:
|
|
22378
|
-
thread_db_cursor.close()
|
|
22379
|
-
thread_db_connection.close()
|
|
22380
|
-
self.log("✓ Closed thread-local database connection in batch worker")
|
|
22381
|
-
except:
|
|
22382
|
-
pass
|
|
22383
22562
|
|
|
22384
22563
|
def _search_termbases_thread_safe(self, source_text: str, cursor, source_lang: str = None, target_lang: str = None, project_id: int = None) -> Dict[str, str]:
|
|
22385
22564
|
"""
|
|
@@ -22579,11 +22758,8 @@ class SupervertalerQt(QMainWindow):
|
|
|
22579
22758
|
Also triggers PROACTIVE HIGHLIGHTING for upcoming segments with glossary matches.
|
|
22580
22759
|
"""
|
|
22581
22760
|
import json
|
|
22582
|
-
|
|
22583
|
-
print(f"[PROACTIVE DEBUG] _trigger_idle_prefetch called for row {current_row}")
|
|
22584
|
-
|
|
22761
|
+
|
|
22585
22762
|
if not self.current_project or current_row < 0:
|
|
22586
|
-
print(f"[PROACTIVE DEBUG] Early exit: no project or invalid row")
|
|
22587
22763
|
return
|
|
22588
22764
|
|
|
22589
22765
|
try:
|
|
@@ -22592,9 +22768,7 @@ class SupervertalerQt(QMainWindow):
|
|
|
22592
22768
|
already_cached_ids = []
|
|
22593
22769
|
start_idx = current_row + 1
|
|
22594
22770
|
end_idx = min(start_idx + 5, len(self.current_project.segments))
|
|
22595
|
-
|
|
22596
|
-
print(f"[PROACTIVE DEBUG] Checking segments {start_idx} to {end_idx}")
|
|
22597
|
-
|
|
22771
|
+
|
|
22598
22772
|
for seg in self.current_project.segments[start_idx:end_idx]:
|
|
22599
22773
|
# Check if already cached
|
|
22600
22774
|
with self.translation_matches_cache_lock:
|
|
@@ -22602,23 +22776,19 @@ class SupervertalerQt(QMainWindow):
|
|
|
22602
22776
|
next_segment_ids.append(seg.id)
|
|
22603
22777
|
else:
|
|
22604
22778
|
already_cached_ids.append(seg.id)
|
|
22605
|
-
|
|
22606
|
-
print(f"[PROACTIVE DEBUG] Already cached IDs: {already_cached_ids}, Need prefetch: {next_segment_ids}")
|
|
22607
|
-
|
|
22779
|
+
|
|
22608
22780
|
# For already-cached segments, trigger proactive highlighting immediately
|
|
22609
22781
|
# This handles the case where segments were cached earlier but not highlighted
|
|
22610
22782
|
for seg_id in already_cached_ids:
|
|
22611
22783
|
try:
|
|
22612
22784
|
with self.termbase_cache_lock:
|
|
22613
22785
|
termbase_raw = self.termbase_cache.get(seg_id, {})
|
|
22614
|
-
print(f"[PROACTIVE DEBUG] Segment {seg_id} termbase cache: {len(termbase_raw) if termbase_raw else 0} matches")
|
|
22615
22786
|
if termbase_raw:
|
|
22616
22787
|
termbase_json = json.dumps(termbase_raw)
|
|
22617
22788
|
# Apply highlighting on main thread (we're already on main thread here)
|
|
22618
|
-
print(f"[PROACTIVE DEBUG] Calling _apply_proactive_highlighting for seg {seg_id}")
|
|
22619
22789
|
self._apply_proactive_highlighting(seg_id, termbase_json)
|
|
22620
|
-
except Exception
|
|
22621
|
-
|
|
22790
|
+
except Exception:
|
|
22791
|
+
pass # Silent failure for proactive highlighting
|
|
22622
22792
|
|
|
22623
22793
|
if next_segment_ids:
|
|
22624
22794
|
# Start prefetch in background (silent, no logging)
|
|
@@ -22700,43 +22870,35 @@ class SupervertalerQt(QMainWindow):
|
|
|
22700
22870
|
|
|
22701
22871
|
# Fetch TM/termbase matches (pass cursor for thread-safe termbase lookups)
|
|
22702
22872
|
matches = self._fetch_all_matches_for_segment(segment, thread_db_cursor)
|
|
22703
|
-
|
|
22704
|
-
#
|
|
22705
|
-
# This prevents "empty cache hits" when TM database is still empty
|
|
22873
|
+
|
|
22874
|
+
# Count matches for logging and proactive highlighting
|
|
22706
22875
|
tm_count = len(matches.get("TM", []))
|
|
22707
22876
|
tb_count = len(matches.get("Termbases", []))
|
|
22708
22877
|
mt_count = len(matches.get("MT", []))
|
|
22709
22878
|
llm_count = len(matches.get("LLM", []))
|
|
22710
22879
|
total_matches = tm_count + tb_count + mt_count + llm_count
|
|
22711
22880
|
|
|
22712
|
-
|
|
22713
|
-
|
|
22881
|
+
# Only cache results if we found something
|
|
22882
|
+
# Don't cache empty results - let main thread do fresh lookup
|
|
22714
22883
|
if total_matches > 0:
|
|
22715
|
-
# Store in cache only if we have results
|
|
22716
22884
|
with self.translation_matches_cache_lock:
|
|
22717
22885
|
self.translation_matches_cache[segment_id] = matches
|
|
22718
|
-
|
|
22719
|
-
|
|
22720
|
-
|
|
22721
|
-
|
|
22722
|
-
|
|
22723
|
-
|
|
22724
|
-
|
|
22725
|
-
|
|
22726
|
-
|
|
22727
|
-
|
|
22728
|
-
|
|
22729
|
-
|
|
22730
|
-
|
|
22731
|
-
|
|
22732
|
-
|
|
22733
|
-
|
|
22734
|
-
self._proactive_highlight_signal.emit(segment_id, termbase_json)
|
|
22735
|
-
else:
|
|
22736
|
-
print(f"[PREFETCH DEBUG] WARNING: tb_count={tb_count} but termbase_raw is empty!")
|
|
22737
|
-
except Exception as e:
|
|
22738
|
-
print(f"[PREFETCH DEBUG] ERROR emitting signal: {e}")
|
|
22739
|
-
# else: Don't cache empty results - let it fall through to slow lookup next time
|
|
22886
|
+
|
|
22887
|
+
# PROACTIVE HIGHLIGHTING: Emit signal to apply highlighting on main thread
|
|
22888
|
+
# This makes upcoming segments show their glossary matches immediately
|
|
22889
|
+
if tb_count > 0:
|
|
22890
|
+
try:
|
|
22891
|
+
# Extract raw termbase matches from cache for highlighting
|
|
22892
|
+
with self.termbase_cache_lock:
|
|
22893
|
+
termbase_raw = self.termbase_cache.get(segment_id, {})
|
|
22894
|
+
|
|
22895
|
+
if termbase_raw:
|
|
22896
|
+
# Convert to JSON for thread-safe signal transfer
|
|
22897
|
+
termbase_json = json.dumps(termbase_raw)
|
|
22898
|
+
# Emit signal - will be handled on main thread
|
|
22899
|
+
self._proactive_highlight_signal.emit(segment_id, termbase_json)
|
|
22900
|
+
except Exception:
|
|
22901
|
+
pass # Silent fail for proactive highlighting
|
|
22740
22902
|
|
|
22741
22903
|
except Exception as e:
|
|
22742
22904
|
self.log(f"Error in prefetch worker: {e}")
|
|
@@ -22786,31 +22948,9 @@ class SupervertalerQt(QMainWindow):
|
|
|
22786
22948
|
source_lang_code = self._convert_language_to_code(source_lang)
|
|
22787
22949
|
target_lang_code = self._convert_language_to_code(target_lang)
|
|
22788
22950
|
|
|
22789
|
-
# 1. TM matches (
|
|
22790
|
-
|
|
22791
|
-
|
|
22792
|
-
try:
|
|
22793
|
-
tm_results = self.db_manager.search_translation_memory(
|
|
22794
|
-
segment.source,
|
|
22795
|
-
source_lang,
|
|
22796
|
-
target_lang,
|
|
22797
|
-
limit=5
|
|
22798
|
-
)
|
|
22799
|
-
|
|
22800
|
-
if tm_results: # Only add if we got results
|
|
22801
|
-
for tm_match in tm_results:
|
|
22802
|
-
match_obj = TranslationMatch(
|
|
22803
|
-
source=tm_match.get('source', ''),
|
|
22804
|
-
target=tm_match.get('target', ''),
|
|
22805
|
-
relevance=tm_match.get('similarity', 0),
|
|
22806
|
-
metadata={'tm_name': tm_match.get('tm_id', 'project')},
|
|
22807
|
-
match_type='TM',
|
|
22808
|
-
compare_source=tm_match.get('source', ''),
|
|
22809
|
-
provider_code='TM'
|
|
22810
|
-
)
|
|
22811
|
-
matches_dict["TM"].append(match_obj)
|
|
22812
|
-
except Exception as e:
|
|
22813
|
-
pass # Silently continue
|
|
22951
|
+
# 1. TM matches - SKIP in prefetch worker (TM search not thread-safe)
|
|
22952
|
+
# TM will be fetched on-demand when user navigates to segment
|
|
22953
|
+
pass
|
|
22814
22954
|
|
|
22815
22955
|
# 2. MT matches (if enabled)
|
|
22816
22956
|
if self.enable_mt_matching:
|
|
@@ -22985,8 +23125,9 @@ class SupervertalerQt(QMainWindow):
|
|
|
22985
23125
|
mode_note = " (overwrite)" if overwrite_mode else ""
|
|
22986
23126
|
msg = f"💾 Saved segment to {saved_count} TM(s){mode_note}"
|
|
22987
23127
|
self._queue_tm_save_log(msg)
|
|
22988
|
-
#
|
|
22989
|
-
|
|
23128
|
+
# NOTE: Removed cache invalidation here - it was destroying batch worker's cache
|
|
23129
|
+
# on every Ctrl+Enter, making navigation extremely slow. The small chance of
|
|
23130
|
+
# seeing stale TM matches is far less important than responsive navigation.
|
|
22990
23131
|
|
|
22991
23132
|
def invalidate_translation_cache(self, smart_invalidation=True):
|
|
22992
23133
|
"""
|
|
@@ -30970,8 +31111,8 @@ class SupervertalerQt(QMainWindow):
|
|
|
30970
31111
|
self.show_translation_results_pane = settings.get('show_translation_results_pane', False)
|
|
30971
31112
|
self.show_compare_panel = settings.get('show_compare_panel', True)
|
|
30972
31113
|
|
|
30973
|
-
#
|
|
30974
|
-
self.disable_all_caches = settings.get('disable_all_caches',
|
|
31114
|
+
# Load cache kill switch setting (default: False = caches ENABLED for performance)
|
|
31115
|
+
self.disable_all_caches = settings.get('disable_all_caches', False)
|
|
30975
31116
|
|
|
30976
31117
|
# Load LLM provider settings for AI Assistant
|
|
30977
31118
|
llm_settings = self.load_llm_settings()
|
|
@@ -31384,7 +31525,7 @@ class SupervertalerQt(QMainWindow):
|
|
|
31384
31525
|
"""Handle cell selection change"""
|
|
31385
31526
|
if self.debug_mode_enabled:
|
|
31386
31527
|
self.log(f"🎯 on_cell_selected called: row {current_row}, col {current_col}")
|
|
31387
|
-
|
|
31528
|
+
|
|
31388
31529
|
# 🚫 GUARD: Don't re-run lookups if we're staying on the same row
|
|
31389
31530
|
# This prevents lookups when user edits text (focus changes within same row)
|
|
31390
31531
|
if hasattr(self, '_last_selected_row') and self._last_selected_row == current_row:
|
|
@@ -31392,34 +31533,35 @@ class SupervertalerQt(QMainWindow):
|
|
|
31392
31533
|
self.log(f"⏭️ Skipping lookup - already on row {current_row}")
|
|
31393
31534
|
return
|
|
31394
31535
|
self._last_selected_row = current_row
|
|
31395
|
-
|
|
31396
|
-
# ⚡
|
|
31397
|
-
#
|
|
31398
|
-
|
|
31399
|
-
|
|
31400
|
-
|
|
31401
|
-
if is_arrow_nav or is_ctrl_enter_nav:
|
|
31402
|
-
self._arrow_key_navigation = False # Reset flags
|
|
31403
|
-
self._ctrl_enter_navigation = False
|
|
31404
|
-
|
|
31405
|
-
# Schedule deferred lookup with short delay (150ms) for rapid navigation
|
|
31406
|
-
if hasattr(self, '_deferred_lookup_timer') and self._deferred_lookup_timer:
|
|
31407
|
-
self._deferred_lookup_timer.stop()
|
|
31408
|
-
from PyQt6.QtCore import QTimer
|
|
31409
|
-
self._deferred_lookup_timer = QTimer()
|
|
31410
|
-
self._deferred_lookup_timer.setSingleShot(True)
|
|
31411
|
-
self._deferred_lookup_timer.timeout.connect(
|
|
31412
|
-
lambda r=current_row, c=current_col, pr=previous_row, pc=previous_col:
|
|
31413
|
-
self._on_cell_selected_full(r, c, pr, pc)
|
|
31414
|
-
)
|
|
31415
|
-
self._deferred_lookup_timer.start(150) # 150ms debounce
|
|
31416
|
-
|
|
31417
|
-
# Do minimal UI update immediately (orange highlight, scroll)
|
|
31536
|
+
|
|
31537
|
+
# ⚡ FILTER MODE: Skip ALL heavy lookups when text filters are active
|
|
31538
|
+
# User is quickly navigating through filtered results - don't slow them down
|
|
31539
|
+
is_filtering = getattr(self, 'filtering_active', False)
|
|
31540
|
+
if is_filtering:
|
|
31541
|
+
# Only do minimal UI update (orange highlight) - no TM/termbase lookups
|
|
31418
31542
|
self._on_cell_selected_minimal(current_row, previous_row)
|
|
31419
31543
|
return
|
|
31420
|
-
|
|
31421
|
-
#
|
|
31422
|
-
|
|
31544
|
+
|
|
31545
|
+
# ⚡ FAST PATH: Defer heavy lookups for ALL navigation (arrow keys, Ctrl+Enter, AND mouse clicks)
|
|
31546
|
+
# This makes segment navigation feel INSTANT - cursor moves first, lookups happen after
|
|
31547
|
+
# Reset any navigation flags
|
|
31548
|
+
self._arrow_key_navigation = False
|
|
31549
|
+
self._ctrl_enter_navigation = False
|
|
31550
|
+
|
|
31551
|
+
# Schedule deferred lookup with short delay - debounce prevents hammering during rapid navigation
|
|
31552
|
+
if hasattr(self, '_deferred_lookup_timer') and self._deferred_lookup_timer:
|
|
31553
|
+
self._deferred_lookup_timer.stop()
|
|
31554
|
+
from PyQt6.QtCore import QTimer
|
|
31555
|
+
self._deferred_lookup_timer = QTimer()
|
|
31556
|
+
self._deferred_lookup_timer.setSingleShot(True)
|
|
31557
|
+
self._deferred_lookup_timer.timeout.connect(
|
|
31558
|
+
lambda r=current_row, c=current_col, pr=previous_row, pc=previous_col:
|
|
31559
|
+
self._on_cell_selected_full(r, c, pr, pc)
|
|
31560
|
+
)
|
|
31561
|
+
self._deferred_lookup_timer.start(10) # 10ms - just enough to batch rapid arrow key holding
|
|
31562
|
+
|
|
31563
|
+
# Do minimal UI update immediately (orange highlight, scroll)
|
|
31564
|
+
self._on_cell_selected_minimal(current_row, previous_row)
|
|
31423
31565
|
|
|
31424
31566
|
def _center_row_in_viewport(self, row: int):
|
|
31425
31567
|
"""Center the given row vertically in the visible table viewport.
|
|
@@ -31679,9 +31821,25 @@ class SupervertalerQt(QMainWindow):
|
|
|
31679
31821
|
if has_fuzzy_match and not has_100_match:
|
|
31680
31822
|
self._play_sound_effect('tm_fuzzy_match')
|
|
31681
31823
|
|
|
31682
|
-
# Skip the slow lookup below, we already have
|
|
31683
|
-
#
|
|
31824
|
+
# Skip the slow TERMBASE lookup below, we already have termbase matches cached
|
|
31825
|
+
# But TM lookup was skipped in prefetch (not thread-safe), so schedule it now
|
|
31684
31826
|
matches_dict = cached_matches # Set for later use
|
|
31827
|
+
|
|
31828
|
+
# v1.9.182: Schedule TM lookup even on cache hit (prefetch skips TM - not thread-safe)
|
|
31829
|
+
tm_count = len(cached_matches.get("TM", []))
|
|
31830
|
+
if tm_count == 0 and self.enable_tm_matching:
|
|
31831
|
+
find_replace_active = getattr(self, 'find_replace_active', False)
|
|
31832
|
+
if not find_replace_active:
|
|
31833
|
+
# Get termbase matches for the lookup
|
|
31834
|
+
termbase_matches_for_tm = [
|
|
31835
|
+
{
|
|
31836
|
+
'source_term': match.source,
|
|
31837
|
+
'target_term': match.target,
|
|
31838
|
+
'termbase_name': match.metadata.get('termbase_name', '') if match.metadata else '',
|
|
31839
|
+
}
|
|
31840
|
+
for match in cached_matches.get("Termbases", [])
|
|
31841
|
+
]
|
|
31842
|
+
self._schedule_mt_and_llm_matches(segment, termbase_matches_for_tm)
|
|
31685
31843
|
|
|
31686
31844
|
# Check if TM/Termbase matching is enabled
|
|
31687
31845
|
if not matches_dict and (not self.enable_tm_matching and not self.enable_termbase_matching):
|
|
@@ -31902,15 +32060,19 @@ class SupervertalerQt(QMainWindow):
|
|
|
31902
32060
|
|
|
31903
32061
|
# Schedule expensive searches (TM, MT, LLM) with debouncing to prevent UI blocking
|
|
31904
32062
|
# ONLY schedule if:
|
|
31905
|
-
# 1. Cache miss
|
|
32063
|
+
# 1. Cache miss OR cache hit with no TM matches (prefetch doesn't include TM - not thread-safe)
|
|
31906
32064
|
# 2. TM matching is enabled
|
|
31907
32065
|
# 3. Find/Replace is not active (to avoid slowdowns during navigation)
|
|
32066
|
+
needs_tm_lookup = True
|
|
31908
32067
|
with self.translation_matches_cache_lock:
|
|
31909
|
-
|
|
31910
|
-
|
|
32068
|
+
if segment_id in self.translation_matches_cache:
|
|
32069
|
+
cached = self.translation_matches_cache[segment_id]
|
|
32070
|
+
# v1.9.182: Check if TM matches exist - prefetch worker skips TM lookups
|
|
32071
|
+
needs_tm_lookup = len(cached.get("TM", [])) == 0
|
|
32072
|
+
|
|
31911
32073
|
find_replace_active = getattr(self, 'find_replace_active', False)
|
|
31912
|
-
|
|
31913
|
-
if
|
|
32074
|
+
|
|
32075
|
+
if needs_tm_lookup and self.enable_tm_matching and not find_replace_active:
|
|
31914
32076
|
# Get termbase matches if they exist (could be None or empty)
|
|
31915
32077
|
termbase_matches = matches_dict.get('Termbases', []) if matches_dict else []
|
|
31916
32078
|
self._schedule_mt_and_llm_matches(segment, termbase_matches)
|
|
@@ -31922,9 +32084,7 @@ class SupervertalerQt(QMainWindow):
|
|
|
31922
32084
|
next_segment_ids = []
|
|
31923
32085
|
start_idx = current_row + 1
|
|
31924
32086
|
end_idx = min(start_idx + 20, len(self.current_project.segments))
|
|
31925
|
-
|
|
31926
|
-
print(f"[PROACTIVE NAV DEBUG] Navigation to row {current_row}, checking segments {start_idx} to {end_idx}")
|
|
31927
|
-
|
|
32087
|
+
|
|
31928
32088
|
for seg in self.current_project.segments[start_idx:end_idx]:
|
|
31929
32089
|
# Check if already cached
|
|
31930
32090
|
with self.translation_matches_cache_lock:
|
|
@@ -31938,15 +32098,12 @@ class SupervertalerQt(QMainWindow):
|
|
|
31938
32098
|
try:
|
|
31939
32099
|
with self.termbase_cache_lock:
|
|
31940
32100
|
termbase_raw = self.termbase_cache.get(seg.id, {})
|
|
31941
|
-
print(f"[PROACTIVE NAV DEBUG] Seg {seg.id}: cached, termbase_raw has {len(termbase_raw) if termbase_raw else 0} matches")
|
|
31942
32101
|
if termbase_raw:
|
|
31943
32102
|
termbase_json = json.dumps(termbase_raw)
|
|
31944
|
-
print(f"[PROACTIVE NAV DEBUG] Calling _apply_proactive_highlighting for seg {seg.id}")
|
|
31945
32103
|
self._apply_proactive_highlighting(seg.id, termbase_json)
|
|
31946
|
-
except Exception
|
|
31947
|
-
|
|
31948
|
-
|
|
31949
|
-
print(f"[PROACTIVE NAV DEBUG] Need to prefetch: {len(next_segment_ids)} segments")
|
|
32104
|
+
except Exception:
|
|
32105
|
+
pass # Silent failure for proactive highlighting
|
|
32106
|
+
|
|
31950
32107
|
if next_segment_ids:
|
|
31951
32108
|
self._start_prefetch_worker(next_segment_ids)
|
|
31952
32109
|
|
|
@@ -34349,18 +34506,32 @@ OUTPUT ONLY THE SEGMENT MARKERS. DO NOT ADD EXPLANATIONS BEFORE OR AFTER."""
|
|
|
34349
34506
|
"""
|
|
34350
34507
|
Find all termbase matches in source text
|
|
34351
34508
|
Returns dict of {term: translation} for all matches found
|
|
34509
|
+
|
|
34510
|
+
v1.9.182: Uses in-memory index for instant lookup when available.
|
|
34511
|
+
Falls back to per-word database queries if index not built.
|
|
34352
34512
|
"""
|
|
34353
34513
|
if not source_text or not hasattr(self, 'db_manager') or not self.db_manager:
|
|
34354
34514
|
return {}
|
|
34355
34515
|
|
|
34356
34516
|
try:
|
|
34517
|
+
# v1.9.182: Use in-memory index for instant lookup (1000x faster)
|
|
34518
|
+
# The index is built on project load by _build_termbase_index()
|
|
34519
|
+
with self.termbase_index_lock:
|
|
34520
|
+
has_index = bool(self.termbase_index)
|
|
34521
|
+
|
|
34522
|
+
if has_index:
|
|
34523
|
+
# Fast path: use pre-built in-memory index
|
|
34524
|
+
return self._search_termbase_in_memory(source_text)
|
|
34525
|
+
|
|
34526
|
+
# Fallback: original per-word database query approach
|
|
34527
|
+
# (only used if index not yet built, e.g., during startup)
|
|
34357
34528
|
source_lang = self.current_project.source_lang if self.current_project else None
|
|
34358
34529
|
target_lang = self.current_project.target_lang if self.current_project else None
|
|
34359
|
-
|
|
34530
|
+
|
|
34360
34531
|
# Convert language names to codes for termbase search
|
|
34361
34532
|
source_lang_code = self._convert_language_to_code(source_lang) if source_lang else None
|
|
34362
34533
|
target_lang_code = self._convert_language_to_code(target_lang) if target_lang else None
|
|
34363
|
-
|
|
34534
|
+
|
|
34364
34535
|
# Strip HTML/XML/CAT tool tags from source text before word splitting
|
|
34365
34536
|
# This handles <b>, </b>, <i>, memoQ {1}, [2}, Trados <1>, Déjà Vu {00001}, etc.
|
|
34366
34537
|
import re
|
|
@@ -34370,7 +34541,7 @@ OUTPUT ONLY THE SEGMENT MARKERS. DO NOT ADD EXPLANATIONS BEFORE OR AFTER."""
|
|
|
34370
34541
|
# memoQ content tags: [uicontrol id="..."} or {uicontrol] or [tagname ...} or {tagname]
|
|
34371
34542
|
clean_source_text = re.sub(r'\[[^\[\]]*\}', '', clean_source_text) # Opening: [anything}
|
|
34372
34543
|
clean_source_text = re.sub(r'\{[^\{\}]*\]', '', clean_source_text) # Closing: {anything]
|
|
34373
|
-
|
|
34544
|
+
|
|
34374
34545
|
# Search termbases for all terms that appear in the source text
|
|
34375
34546
|
# Split source text into words and search for each one
|
|
34376
34547
|
words = clean_source_text.split()
|
|
@@ -34592,23 +34763,17 @@ OUTPUT ONLY THE SEGMENT MARKERS. DO NOT ADD EXPLANATIONS BEFORE OR AFTER."""
|
|
|
34592
34763
|
termbase_matches_json: JSON-encoded termbase matches dict (thread-safe transfer)
|
|
34593
34764
|
"""
|
|
34594
34765
|
import json
|
|
34595
|
-
|
|
34596
|
-
print(f"[PROACTIVE DEBUG] _apply_proactive_highlighting called for segment {segment_id}")
|
|
34597
|
-
|
|
34766
|
+
|
|
34598
34767
|
if not self.current_project or not self.table:
|
|
34599
|
-
print(f"[PROACTIVE DEBUG] Early exit: no project or table")
|
|
34600
34768
|
return
|
|
34601
|
-
|
|
34769
|
+
|
|
34602
34770
|
try:
|
|
34603
34771
|
# Decode the matches from JSON
|
|
34604
34772
|
termbase_matches = json.loads(termbase_matches_json) if termbase_matches_json else {}
|
|
34605
|
-
|
|
34606
|
-
print(f"[PROACTIVE DEBUG] Decoded {len(termbase_matches)} termbase matches")
|
|
34607
|
-
|
|
34773
|
+
|
|
34608
34774
|
if not termbase_matches:
|
|
34609
|
-
print(f"[PROACTIVE DEBUG] No matches to highlight, returning")
|
|
34610
34775
|
return # Nothing to highlight
|
|
34611
|
-
|
|
34776
|
+
|
|
34612
34777
|
# Find the row for this segment ID
|
|
34613
34778
|
row = -1
|
|
34614
34779
|
for r in range(self.table.rowCount()):
|
|
@@ -34621,44 +34786,25 @@ OUTPUT ONLY THE SEGMENT MARKERS. DO NOT ADD EXPLANATIONS BEFORE OR AFTER."""
|
|
|
34621
34786
|
break
|
|
34622
34787
|
except ValueError:
|
|
34623
34788
|
continue
|
|
34624
|
-
|
|
34625
|
-
print(f"[PROACTIVE DEBUG] Found row {row} for segment {segment_id}")
|
|
34626
|
-
|
|
34789
|
+
|
|
34627
34790
|
if row < 0:
|
|
34628
|
-
print(f"[PROACTIVE DEBUG] Segment not visible in current page")
|
|
34629
34791
|
return # Segment not visible in current page
|
|
34630
|
-
|
|
34792
|
+
|
|
34631
34793
|
# Get segment source text
|
|
34632
34794
|
segment = None
|
|
34633
34795
|
for seg in self.current_project.segments:
|
|
34634
34796
|
if seg.id == segment_id:
|
|
34635
34797
|
segment = seg
|
|
34636
34798
|
break
|
|
34637
|
-
|
|
34799
|
+
|
|
34638
34800
|
if not segment:
|
|
34639
|
-
print(f"[PROACTIVE DEBUG] Segment object not found")
|
|
34640
34801
|
return
|
|
34641
|
-
|
|
34642
|
-
print(f"[PROACTIVE DEBUG] Applying highlight_source_with_termbase to row {row}")
|
|
34643
|
-
print(f"[PROACTIVE DEBUG] Source text: {segment.source[:80]}...")
|
|
34644
|
-
print(f"[PROACTIVE DEBUG] Matches keys: {list(termbase_matches.keys())[:5]}")
|
|
34645
|
-
if termbase_matches:
|
|
34646
|
-
first_key = list(termbase_matches.keys())[0]
|
|
34647
|
-
print(f"[PROACTIVE DEBUG] Sample match: {first_key} => {termbase_matches[first_key]}")
|
|
34648
|
-
|
|
34649
|
-
# Check if the source widget exists and is the right type
|
|
34650
|
-
source_widget = self.table.cellWidget(row, 2)
|
|
34651
|
-
print(f"[PROACTIVE DEBUG] Source widget type: {type(source_widget).__name__ if source_widget else 'None'}")
|
|
34652
|
-
print(f"[PROACTIVE DEBUG] Has highlight method: {hasattr(source_widget, 'highlight_termbase_matches') if source_widget else 'N/A'}")
|
|
34653
|
-
|
|
34802
|
+
|
|
34654
34803
|
# Apply highlighting (this updates the source cell widget)
|
|
34655
34804
|
self.highlight_source_with_termbase(row, segment.source, termbase_matches)
|
|
34656
|
-
|
|
34657
|
-
|
|
34658
|
-
|
|
34659
|
-
print(f"[PROACTIVE DEBUG] ERROR: {e}")
|
|
34660
|
-
import traceback
|
|
34661
|
-
print(f"[PROACTIVE DEBUG] Traceback: {traceback.format_exc()}")
|
|
34805
|
+
|
|
34806
|
+
except Exception:
|
|
34807
|
+
pass # Silent failure for proactive highlighting
|
|
34662
34808
|
|
|
34663
34809
|
def insert_term_translation(self, row: int, translation: str):
|
|
34664
34810
|
"""
|
|
@@ -38828,95 +38974,32 @@ OUTPUT ONLY THE SEGMENT MARKERS. DO NOT ADD EXPLANATIONS BEFORE OR AFTER."""
|
|
|
38828
38974
|
|
|
38829
38975
|
self.table.clearSelection()
|
|
38830
38976
|
self.table.setCurrentCell(row, 3) # Column 3 = Target (widget column)
|
|
38977
|
+
self.table.selectRow(row) # v1.9.182: Ensure row is visually selected
|
|
38978
|
+
# Ensure the row is visible by scrolling to it
|
|
38979
|
+
self.table.scrollToItem(self.table.item(row, 0), QTableWidget.ScrollHint.PositionAtCenter)
|
|
38831
38980
|
self.log(f"⏭️ Moved to next unconfirmed segment {seg.id}")
|
|
38832
|
-
|
|
38833
|
-
#
|
|
38834
|
-
|
|
38835
|
-
|
|
38836
|
-
|
|
38837
|
-
|
|
38838
|
-
|
|
38839
|
-
|
|
38840
|
-
|
|
38841
|
-
|
|
38842
|
-
|
|
38843
|
-
|
|
38844
|
-
# Use get_exact_match for 100% matches instead of fuzzy search
|
|
38845
|
-
source_lang = self.current_project.source_lang if hasattr(self.current_project, 'source_lang') else None
|
|
38846
|
-
target_lang = self.current_project.target_lang if hasattr(self.current_project, 'target_lang') else None
|
|
38847
|
-
exact_match = self.db_manager.get_exact_match(
|
|
38848
|
-
seg.source,
|
|
38849
|
-
tm_ids=activated_tm_ids,
|
|
38850
|
-
source_lang=source_lang,
|
|
38851
|
-
target_lang=target_lang
|
|
38852
|
-
)
|
|
38853
|
-
|
|
38854
|
-
# Check if there's a 100% match and (target is empty OR overwrite is enabled)
|
|
38855
|
-
target_is_empty = not seg.target.strip()
|
|
38856
|
-
can_auto_confirm = target_is_empty or self.auto_confirm_overwrite_existing
|
|
38857
|
-
|
|
38858
|
-
if exact_match and can_auto_confirm:
|
|
38859
|
-
match_target = exact_match.get('target_text', '')
|
|
38860
|
-
overwrite_note = " (overwriting existing)" if not target_is_empty else " (empty target)"
|
|
38861
|
-
self.log(f"🎯 Auto-confirm: Found 100% TM match for segment {seg.id}{overwrite_note}")
|
|
38862
|
-
|
|
38863
|
-
# Insert the match into the target cell
|
|
38864
|
-
target_widget = self.table.cellWidget(row, 3)
|
|
38865
|
-
if target_widget and match_target:
|
|
38866
|
-
target_widget.setPlainText(match_target)
|
|
38867
|
-
seg.target = match_target
|
|
38868
|
-
seg.status = 'confirmed'
|
|
38869
|
-
self.update_status_icon(row, 'confirmed')
|
|
38870
|
-
self.project_modified = True
|
|
38871
|
-
|
|
38872
|
-
# Save to TM
|
|
38873
|
-
try:
|
|
38874
|
-
self.save_segment_to_activated_tms(seg.source, seg.target)
|
|
38875
|
-
self.log(f"💾 Auto-confirmed and saved segment {seg.id} to TM")
|
|
38876
|
-
except Exception as e:
|
|
38877
|
-
self.log(f"⚠️ Error saving auto-confirmed segment to TM: {e}")
|
|
38878
|
-
|
|
38879
|
-
# Continue to the NEXT unconfirmed segment (skip this one)
|
|
38880
|
-
for next_row in range(row + 1, self.table.rowCount()):
|
|
38881
|
-
if next_row < len(self.current_project.segments):
|
|
38882
|
-
next_seg = self.current_project.segments[next_row]
|
|
38883
|
-
if next_seg.status not in ['confirmed', 'approved']:
|
|
38884
|
-
# Check pagination
|
|
38885
|
-
if self.table.isRowHidden(next_row):
|
|
38886
|
-
if hasattr(self, 'grid_page_size') and hasattr(self, 'grid_current_page'):
|
|
38887
|
-
target_page = next_row // self.grid_page_size
|
|
38888
|
-
if target_page != self.grid_current_page:
|
|
38889
|
-
self.grid_current_page = target_page
|
|
38890
|
-
self._update_pagination_ui()
|
|
38891
|
-
self._apply_pagination_to_grid()
|
|
38892
|
-
|
|
38893
|
-
# ⚡ INSTANT NAVIGATION
|
|
38894
|
-
self._ctrl_enter_navigation = True
|
|
38895
|
-
|
|
38896
|
-
self.table.clearSelection()
|
|
38897
|
-
self.table.setCurrentCell(next_row, 3)
|
|
38898
|
-
self.log(f"⏭️ Auto-skipped to next unconfirmed segment {next_seg.id}")
|
|
38899
|
-
next_target_widget = self.table.cellWidget(next_row, 3)
|
|
38900
|
-
if next_target_widget:
|
|
38901
|
-
next_target_widget.setFocus()
|
|
38902
|
-
next_target_widget.moveCursor(QTextCursor.MoveOperation.End)
|
|
38903
|
-
|
|
38904
|
-
# Recursively check if this next segment also has a 100% match
|
|
38905
|
-
self.confirm_and_next_unconfirmed()
|
|
38906
|
-
return
|
|
38907
|
-
|
|
38908
|
-
# No more unconfirmed segments after this one
|
|
38909
|
-
self.log("✅ No more unconfirmed segments after auto-confirm")
|
|
38910
|
-
# Update status bar after auto-confirming
|
|
38911
|
-
self.update_progress_stats()
|
|
38912
|
-
return
|
|
38913
|
-
|
|
38914
|
-
# Get the target cell widget and set focus to it (normal behavior without auto-confirm)
|
|
38981
|
+
|
|
38982
|
+
# v1.9.182: Explicitly update termview (don't rely on deferred signal)
|
|
38983
|
+
self._update_termview_for_segment(seg)
|
|
38984
|
+
|
|
38985
|
+
# v1.9.182: Explicitly schedule TM lookup (don't rely on deferred signal)
|
|
38986
|
+
if self.enable_tm_matching:
|
|
38987
|
+
find_replace_active = getattr(self, 'find_replace_active', False)
|
|
38988
|
+
if not find_replace_active:
|
|
38989
|
+
self._schedule_mt_and_llm_matches(seg, [])
|
|
38990
|
+
|
|
38991
|
+
# Get the target cell widget and set focus to it IMMEDIATELY
|
|
38992
|
+
# (moved BEFORE auto-confirm check for instant responsiveness)
|
|
38915
38993
|
target_widget = self.table.cellWidget(row, 3)
|
|
38916
38994
|
if target_widget:
|
|
38917
38995
|
target_widget.setFocus()
|
|
38918
38996
|
# Move cursor to end of text
|
|
38919
38997
|
target_widget.moveCursor(QTextCursor.MoveOperation.End)
|
|
38998
|
+
|
|
38999
|
+
# v1.9.182: Defer auto-confirm check to not block navigation
|
|
39000
|
+
# The TM lookup is slow - do it asynchronously after navigation completes
|
|
39001
|
+
if self.auto_confirm_100_percent_matches:
|
|
39002
|
+
QTimer.singleShot(50, lambda r=row, s=seg: self._check_auto_confirm_100_percent(r, s))
|
|
38920
39003
|
return
|
|
38921
39004
|
|
|
38922
39005
|
# No more unconfirmed segments, just go to next
|
|
@@ -38938,14 +39021,106 @@ OUTPUT ONLY THE SEGMENT MARKERS. DO NOT ADD EXPLANATIONS BEFORE OR AFTER."""
|
|
|
38938
39021
|
|
|
38939
39022
|
self.table.clearSelection()
|
|
38940
39023
|
self.table.setCurrentCell(next_row, 3) # Column 3 = Target (widget column)
|
|
39024
|
+
self.table.selectRow(next_row) # v1.9.182: Ensure row is visually selected
|
|
39025
|
+
# Ensure the row is visible by scrolling to it
|
|
39026
|
+
self.table.scrollToItem(self.table.item(next_row, 0), QTableWidget.ScrollHint.PositionAtCenter)
|
|
38941
39027
|
self.log(f"⏭️ Moved to next segment (all remaining confirmed)")
|
|
39028
|
+
|
|
39029
|
+
# v1.9.182: Explicitly update termview (don't rely on deferred signal)
|
|
39030
|
+
if next_row < len(self.current_project.segments):
|
|
39031
|
+
next_seg = self.current_project.segments[next_row]
|
|
39032
|
+
self._update_termview_for_segment(next_seg)
|
|
39033
|
+
|
|
39034
|
+
# v1.9.182: Explicitly schedule TM lookup (don't rely on deferred signal)
|
|
39035
|
+
if self.enable_tm_matching:
|
|
39036
|
+
find_replace_active = getattr(self, 'find_replace_active', False)
|
|
39037
|
+
if not find_replace_active:
|
|
39038
|
+
self._schedule_mt_and_llm_matches(next_seg, [])
|
|
39039
|
+
|
|
38942
39040
|
# Get the target cell widget and set focus to it
|
|
38943
39041
|
target_widget = self.table.cellWidget(next_row, 3)
|
|
38944
39042
|
if target_widget:
|
|
38945
39043
|
target_widget.setFocus()
|
|
38946
39044
|
# Move cursor to end of text
|
|
38947
39045
|
target_widget.moveCursor(QTextCursor.MoveOperation.End)
|
|
38948
|
-
|
|
39046
|
+
|
|
39047
|
+
def _check_auto_confirm_100_percent(self, row: int, seg):
|
|
39048
|
+
"""
|
|
39049
|
+
v1.9.182: Deferred auto-confirm check for 100% TM matches.
|
|
39050
|
+
|
|
39051
|
+
This is called asynchronously after Ctrl+Enter navigation to avoid blocking
|
|
39052
|
+
the UI thread with slow TM database queries.
|
|
39053
|
+
"""
|
|
39054
|
+
try:
|
|
39055
|
+
# Verify we're still on the same segment (user may have navigated away)
|
|
39056
|
+
current_row = self.table.currentRow() if hasattr(self, 'table') and self.table else -1
|
|
39057
|
+
if current_row != row:
|
|
39058
|
+
return # User has moved - don't auto-confirm wrong segment
|
|
39059
|
+
|
|
39060
|
+
if not self.enable_tm_matching or not hasattr(self, 'db_manager') or not self.db_manager:
|
|
39061
|
+
return
|
|
39062
|
+
|
|
39063
|
+
# Get activated TM IDs from project settings
|
|
39064
|
+
activated_tm_ids = []
|
|
39065
|
+
if hasattr(self.current_project, 'tm_settings') and self.current_project.tm_settings:
|
|
39066
|
+
activated_tm_ids = self.current_project.tm_settings.get('activated_tm_ids', [])
|
|
39067
|
+
|
|
39068
|
+
if not activated_tm_ids:
|
|
39069
|
+
return
|
|
39070
|
+
|
|
39071
|
+
# Use get_exact_match for 100% matches
|
|
39072
|
+
source_lang = self.current_project.source_lang if hasattr(self.current_project, 'source_lang') else None
|
|
39073
|
+
target_lang = self.current_project.target_lang if hasattr(self.current_project, 'target_lang') else None
|
|
39074
|
+
exact_match = self.db_manager.get_exact_match(
|
|
39075
|
+
seg.source,
|
|
39076
|
+
tm_ids=activated_tm_ids,
|
|
39077
|
+
source_lang=source_lang,
|
|
39078
|
+
target_lang=target_lang
|
|
39079
|
+
)
|
|
39080
|
+
|
|
39081
|
+
if not exact_match:
|
|
39082
|
+
return
|
|
39083
|
+
|
|
39084
|
+
# Check if there's a 100% match and (target is empty OR overwrite is enabled)
|
|
39085
|
+
target_is_empty = not seg.target.strip()
|
|
39086
|
+
can_auto_confirm = target_is_empty or self.auto_confirm_overwrite_existing
|
|
39087
|
+
|
|
39088
|
+
if not can_auto_confirm:
|
|
39089
|
+
return
|
|
39090
|
+
|
|
39091
|
+
# Verify AGAIN that we're still on the same segment (TM query may have taken time)
|
|
39092
|
+
current_row = self.table.currentRow() if hasattr(self, 'table') and self.table else -1
|
|
39093
|
+
if current_row != row:
|
|
39094
|
+
return # User has moved during TM lookup
|
|
39095
|
+
|
|
39096
|
+
match_target = exact_match.get('target_text', '')
|
|
39097
|
+
if not match_target:
|
|
39098
|
+
return
|
|
39099
|
+
|
|
39100
|
+
overwrite_note = " (overwriting existing)" if not target_is_empty else " (empty target)"
|
|
39101
|
+
self.log(f"🎯 Auto-confirm: Found 100% TM match for segment {seg.id}{overwrite_note}")
|
|
39102
|
+
|
|
39103
|
+
# Insert the match into the target cell
|
|
39104
|
+
target_widget = self.table.cellWidget(row, 3)
|
|
39105
|
+
if target_widget:
|
|
39106
|
+
target_widget.setPlainText(match_target)
|
|
39107
|
+
seg.target = match_target
|
|
39108
|
+
seg.status = 'confirmed'
|
|
39109
|
+
self.update_status_icon(row, 'confirmed')
|
|
39110
|
+
self.project_modified = True
|
|
39111
|
+
|
|
39112
|
+
# Save to TM
|
|
39113
|
+
try:
|
|
39114
|
+
self.save_segment_to_activated_tms(seg.source, seg.target)
|
|
39115
|
+
self.log(f"💾 Auto-confirmed and saved segment {seg.id} to TM")
|
|
39116
|
+
except Exception as e:
|
|
39117
|
+
self.log(f"⚠️ Error saving auto-confirmed segment to TM: {e}")
|
|
39118
|
+
|
|
39119
|
+
# Continue to the NEXT unconfirmed segment (skip this one)
|
|
39120
|
+
self.confirm_and_next_unconfirmed()
|
|
39121
|
+
except Exception as e:
|
|
39122
|
+
self.log(f"⚠️ Error in auto-confirm check: {e}")
|
|
39123
|
+
|
|
38949
39124
|
def confirm_selected_or_next(self):
|
|
38950
39125
|
"""Smart confirm: if multiple segments selected, confirm all; otherwise confirm and go to next.
|
|
38951
39126
|
|
|
@@ -44096,12 +44271,12 @@ OUTPUT ONLY THE SEGMENT MARKERS. DO NOT ADD EXPLANATIONS BEFORE OR AFTER."""
|
|
|
44096
44271
|
self._pending_mt_llm_segment = segment
|
|
44097
44272
|
self._pending_termbase_matches = termbase_matches or []
|
|
44098
44273
|
|
|
44099
|
-
# Start debounced timer - only call APIs after user stops
|
|
44274
|
+
# Start debounced timer - only call APIs after user stops navigating
|
|
44100
44275
|
from PyQt6.QtCore import QTimer
|
|
44101
44276
|
self._mt_llm_timer = QTimer()
|
|
44102
44277
|
self._mt_llm_timer.setSingleShot(True)
|
|
44103
44278
|
self._mt_llm_timer.timeout.connect(lambda: self._execute_mt_llm_lookup())
|
|
44104
|
-
self._mt_llm_timer.start(
|
|
44279
|
+
self._mt_llm_timer.start(150) # Wait 150ms of inactivity before external API calls
|
|
44105
44280
|
|
|
44106
44281
|
except Exception as e:
|
|
44107
44282
|
self.log(f"Error scheduling MT/LLM search: {e}")
|
|
@@ -44126,7 +44301,21 @@ OUTPUT ONLY THE SEGMENT MARKERS. DO NOT ADD EXPLANATIONS BEFORE OR AFTER."""
|
|
|
44126
44301
|
"""Search for TM, MT and LLM matches - called only after debounce delay"""
|
|
44127
44302
|
try:
|
|
44128
44303
|
from modules.translation_results_panel import TranslationMatch
|
|
44129
|
-
|
|
44304
|
+
|
|
44305
|
+
# v1.9.182: Validate we're still on the same segment before displaying results
|
|
44306
|
+
# This prevents stale results from showing when user navigates quickly
|
|
44307
|
+
current_row = self.table.currentRow() if hasattr(self, 'table') and self.table else -1
|
|
44308
|
+
if current_row >= 0:
|
|
44309
|
+
id_item = self.table.item(current_row, 0)
|
|
44310
|
+
if id_item:
|
|
44311
|
+
try:
|
|
44312
|
+
current_segment_id = int(id_item.text())
|
|
44313
|
+
if current_segment_id != segment.id:
|
|
44314
|
+
# User has moved to a different segment - abort this lookup
|
|
44315
|
+
return
|
|
44316
|
+
except (ValueError, AttributeError):
|
|
44317
|
+
pass
|
|
44318
|
+
|
|
44130
44319
|
# Get current project languages for all translation services
|
|
44131
44320
|
source_lang = getattr(self.current_project, 'source_lang', None) if self.current_project else None
|
|
44132
44321
|
target_lang = getattr(self.current_project, 'target_lang', None) if self.current_project else None
|
|
@@ -44199,6 +44388,22 @@ OUTPUT ONLY THE SEGMENT MARKERS. DO NOT ADD EXPLANATIONS BEFORE OR AFTER."""
|
|
|
44199
44388
|
|
|
44200
44389
|
# Show TM matches immediately (progressive loading)
|
|
44201
44390
|
if matches_dict["TM"]:
|
|
44391
|
+
# v1.9.182: Re-validate we're still on same segment before displaying
|
|
44392
|
+
current_row = self.table.currentRow() if hasattr(self, 'table') and self.table else -1
|
|
44393
|
+
if current_row >= 0:
|
|
44394
|
+
id_item = self.table.item(current_row, 0)
|
|
44395
|
+
if id_item:
|
|
44396
|
+
try:
|
|
44397
|
+
current_segment_id = int(id_item.text())
|
|
44398
|
+
if current_segment_id != segment.id:
|
|
44399
|
+
# User moved - still cache results but don't display
|
|
44400
|
+
with self.translation_matches_cache_lock:
|
|
44401
|
+
if segment.id in self.translation_matches_cache:
|
|
44402
|
+
self.translation_matches_cache[segment.id]["TM"] = matches_dict["TM"]
|
|
44403
|
+
return # Don't display stale results
|
|
44404
|
+
except (ValueError, AttributeError):
|
|
44405
|
+
pass
|
|
44406
|
+
|
|
44202
44407
|
tm_only = {"TM": matches_dict["TM"]}
|
|
44203
44408
|
if hasattr(self, 'results_panels') and self.results_panels:
|
|
44204
44409
|
for panel in self.results_panels:
|
|
@@ -44252,6 +44457,16 @@ OUTPUT ONLY THE SEGMENT MARKERS. DO NOT ADD EXPLANATIONS BEFORE OR AFTER."""
|
|
|
44252
44457
|
has_fuzzy_match = any(float(tm.relevance) < 99.5 and float(tm.relevance) >= 50 for tm in matches_dict["TM"])
|
|
44253
44458
|
if has_fuzzy_match and not has_100_match:
|
|
44254
44459
|
self._play_sound_effect('tm_fuzzy_match')
|
|
44460
|
+
|
|
44461
|
+
# v1.9.182: Update cache with TM results so subsequent visits are instant
|
|
44462
|
+
if matches_dict["TM"]:
|
|
44463
|
+
with self.translation_matches_cache_lock:
|
|
44464
|
+
if segment.id in self.translation_matches_cache:
|
|
44465
|
+
# Merge TM results into existing cache entry
|
|
44466
|
+
self.translation_matches_cache[segment.id]["TM"] = matches_dict["TM"]
|
|
44467
|
+
else:
|
|
44468
|
+
# Create new cache entry with TM results
|
|
44469
|
+
self.translation_matches_cache[segment.id] = matches_dict
|
|
44255
44470
|
except Exception as e:
|
|
44256
44471
|
self.log(f"Error in delayed TM search: {e}")
|
|
44257
44472
|
|
|
@@ -46830,10 +47045,8 @@ class SuperlookupTab(QWidget):
|
|
|
46830
47045
|
for row in db_manager.cursor.fetchall():
|
|
46831
47046
|
if row[0]:
|
|
46832
47047
|
all_languages.add(row[0])
|
|
46833
|
-
except Exception
|
|
46834
|
-
|
|
46835
|
-
else:
|
|
46836
|
-
print(f"[DEBUG] No db_manager available for language population")
|
|
47048
|
+
except Exception:
|
|
47049
|
+
pass # Silent failure for language population
|
|
46837
47050
|
|
|
46838
47051
|
# Get languages from termbases
|
|
46839
47052
|
if termbase_mgr:
|
|
@@ -46844,8 +47057,8 @@ class SuperlookupTab(QWidget):
|
|
|
46844
47057
|
all_languages.add(tb['source_lang'])
|
|
46845
47058
|
if tb.get('target_lang'):
|
|
46846
47059
|
all_languages.add(tb['target_lang'])
|
|
46847
|
-
except Exception
|
|
46848
|
-
|
|
47060
|
+
except Exception:
|
|
47061
|
+
pass # Silent failure for language population
|
|
46849
47062
|
|
|
46850
47063
|
# Group languages by their base language name
|
|
46851
47064
|
# E.g., "en", "en-US", "en-GB", "English" all map to "English"
|
|
@@ -46875,8 +47088,6 @@ class SuperlookupTab(QWidget):
|
|
|
46875
47088
|
# Store variants list as the data for this item
|
|
46876
47089
|
self.lang_from_combo.addItem(base_name, variants)
|
|
46877
47090
|
self.lang_to_combo.addItem(base_name, variants)
|
|
46878
|
-
|
|
46879
|
-
print(f"[DEBUG] Populated language dropdowns with {len(sorted_base_langs)} base languages (from {len(all_languages)} variants)")
|
|
46880
47091
|
|
|
46881
47092
|
def _get_base_language_name(self, lang_code):
|
|
46882
47093
|
"""Extract the base language name from any language code or name.
|
|
@@ -47063,37 +47274,20 @@ class SuperlookupTab(QWidget):
|
|
|
47063
47274
|
selected_tm_ids = self.get_selected_tm_ids()
|
|
47064
47275
|
search_direction = self.get_search_direction()
|
|
47065
47276
|
from_lang, to_lang = self.get_language_filters()
|
|
47066
|
-
|
|
47067
|
-
# Write language info to debug file
|
|
47068
|
-
with open('superlookup_debug.txt', 'a') as f:
|
|
47069
|
-
f.write(f"Language filters: from_lang='{from_lang}', to_lang='{to_lang}'\\n")
|
|
47070
|
-
f.write(f"Search direction: {search_direction}\\n")
|
|
47071
|
-
|
|
47072
|
-
print(f"[DEBUG] Superlookup: Selected TM IDs: {selected_tm_ids}, direction: {search_direction}", flush=True)
|
|
47073
|
-
print(f"[DEBUG] Superlookup: Language filters: from={from_lang}, to={to_lang}", flush=True)
|
|
47074
|
-
print(f"[DEBUG] Superlookup: tm_database = {self.tm_database}", flush=True)
|
|
47277
|
+
|
|
47075
47278
|
if self.engine:
|
|
47076
47279
|
self.engine.set_enabled_tm_ids(selected_tm_ids if selected_tm_ids else None)
|
|
47077
47280
|
|
|
47078
47281
|
# Perform TM lookup with direction and language filters
|
|
47079
47282
|
tm_results = []
|
|
47080
47283
|
if self.tm_database:
|
|
47081
|
-
|
|
47082
|
-
tm_results = self.engine.search_tm(text, direction=search_direction,
|
|
47284
|
+
tm_results = self.engine.search_tm(text, direction=search_direction,
|
|
47083
47285
|
source_lang=from_lang, target_lang=to_lang)
|
|
47084
|
-
|
|
47085
|
-
else:
|
|
47086
|
-
print(f"[DEBUG] Superlookup: tm_database is None, skipping TM search!", flush=True)
|
|
47087
|
-
|
|
47286
|
+
|
|
47088
47287
|
# Perform termbase lookup (search Supervertaler termbases directly)
|
|
47089
|
-
print(f"[DEBUG] About to call search_termbases with from_lang='{from_lang}', to_lang='{to_lang}'", flush=True)
|
|
47090
47288
|
try:
|
|
47091
47289
|
termbase_results = self.search_termbases(text, source_lang=from_lang, target_lang=to_lang)
|
|
47092
|
-
|
|
47093
|
-
except Exception as e:
|
|
47094
|
-
print(f"[DEBUG] ERROR in search_termbases: {e}", flush=True)
|
|
47095
|
-
import traceback
|
|
47096
|
-
traceback.print_exc()
|
|
47290
|
+
except Exception:
|
|
47097
47291
|
termbase_results = []
|
|
47098
47292
|
|
|
47099
47293
|
# Perform Supermemory semantic search
|