supervertaler 1.9.131__py3-none-any.whl → 1.9.173__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- Supervertaler.py +3799 -873
- modules/ai_attachment_manager.py +3 -3
- modules/config_manager.py +10 -10
- modules/database_manager.py +243 -65
- modules/keyboard_shortcuts_widget.py +7 -0
- modules/non_translatables_manager.py +1 -1
- modules/prompt_library_migration.py +1 -1
- modules/setup_wizard.py +8 -8
- modules/superbrowser.py +16 -12
- modules/superlookup.py +18 -10
- modules/tag_manager.py +20 -2
- modules/termview_widget.py +20 -12
- modules/tm_metadata_manager.py +41 -0
- modules/tmx_editor_qt.py +1 -1
- modules/translation_memory.py +53 -8
- modules/unified_prompt_library.py +1 -1
- modules/unified_prompt_manager_qt.py +10 -29
- {supervertaler-1.9.131.dist-info → supervertaler-1.9.173.dist-info}/METADATA +105 -7
- {supervertaler-1.9.131.dist-info → supervertaler-1.9.173.dist-info}/RECORD +23 -23
- {supervertaler-1.9.131.dist-info → supervertaler-1.9.173.dist-info}/WHEEL +1 -1
- {supervertaler-1.9.131.dist-info → supervertaler-1.9.173.dist-info}/entry_points.txt +0 -0
- {supervertaler-1.9.131.dist-info → supervertaler-1.9.173.dist-info}/licenses/LICENSE +0 -0
- {supervertaler-1.9.131.dist-info → supervertaler-1.9.173.dist-info}/top_level.txt +0 -0
modules/ai_attachment_manager.py
CHANGED
|
@@ -29,15 +29,15 @@ class AttachmentManager:
|
|
|
29
29
|
Initialize the AttachmentManager.
|
|
30
30
|
|
|
31
31
|
Args:
|
|
32
|
-
base_dir: Base directory for attachments (default: user_data_private/
|
|
32
|
+
base_dir: Base directory for attachments (default: user_data_private/ai_assistant)
|
|
33
33
|
log_callback: Function to call for logging messages
|
|
34
34
|
"""
|
|
35
35
|
self.log = log_callback if log_callback else print
|
|
36
36
|
|
|
37
37
|
# Set base directory
|
|
38
38
|
if base_dir is None:
|
|
39
|
-
# Default to user_data_private/
|
|
40
|
-
base_dir = Path("user_data_private") / "
|
|
39
|
+
# Default to user_data_private/ai_assistant
|
|
40
|
+
base_dir = Path("user_data_private") / "ai_assistant"
|
|
41
41
|
|
|
42
42
|
self.base_dir = Path(base_dir)
|
|
43
43
|
self.attachments_dir = self.base_dir / "attachments"
|
modules/config_manager.py
CHANGED
|
@@ -35,14 +35,14 @@ class ConfigManager:
|
|
|
35
35
|
REQUIRED_FOLDERS = [
|
|
36
36
|
# Note: Old numbered folders (1_System_Prompts, 2_Domain_Prompts, etc.) are deprecated
|
|
37
37
|
# Migration moves them to unified Library structure
|
|
38
|
-
"
|
|
39
|
-
"
|
|
40
|
-
"
|
|
41
|
-
"
|
|
42
|
-
"
|
|
43
|
-
"
|
|
44
|
-
"
|
|
45
|
-
"
|
|
38
|
+
"prompt_library/domain_expertise",
|
|
39
|
+
"prompt_library/project_prompts",
|
|
40
|
+
"prompt_library/style_guides",
|
|
41
|
+
"resources/termbases",
|
|
42
|
+
"resources/tms",
|
|
43
|
+
"resources/non_translatables",
|
|
44
|
+
"resources/segmentation_rules",
|
|
45
|
+
"projects",
|
|
46
46
|
]
|
|
47
47
|
|
|
48
48
|
def __init__(self):
|
|
@@ -268,8 +268,8 @@ class ConfigManager:
|
|
|
268
268
|
Get the full path to a subfolder in user_data.
|
|
269
269
|
|
|
270
270
|
Example:
|
|
271
|
-
config.get_subfolder_path('
|
|
272
|
-
-> '/home/user/
|
|
271
|
+
config.get_subfolder_path('resources/tms')
|
|
272
|
+
-> '/home/user/Supervertaler/resources/tms'
|
|
273
273
|
"""
|
|
274
274
|
user_data_path = self.get_user_data_path()
|
|
275
275
|
full_path = os.path.join(user_data_path, subfolder)
|
modules/database_manager.py
CHANGED
|
@@ -17,12 +17,38 @@ import sqlite3
|
|
|
17
17
|
import os
|
|
18
18
|
import json
|
|
19
19
|
import hashlib
|
|
20
|
+
import unicodedata
|
|
21
|
+
import re
|
|
20
22
|
from datetime import datetime
|
|
21
23
|
from typing import List, Dict, Optional, Tuple
|
|
22
24
|
from pathlib import Path
|
|
23
25
|
from difflib import SequenceMatcher
|
|
24
26
|
|
|
25
27
|
|
|
28
|
+
def _normalize_for_matching(text: str) -> str:
|
|
29
|
+
"""Normalize text for exact matching.
|
|
30
|
+
|
|
31
|
+
Handles invisible differences that would cause exact match to fail:
|
|
32
|
+
- Unicode normalization (NFC)
|
|
33
|
+
- Multiple whitespace -> single space
|
|
34
|
+
- Leading/trailing whitespace
|
|
35
|
+
- Non-breaking spaces -> regular spaces
|
|
36
|
+
"""
|
|
37
|
+
if not text:
|
|
38
|
+
return ""
|
|
39
|
+
# Unicode normalize (NFC form)
|
|
40
|
+
text = unicodedata.normalize('NFC', text)
|
|
41
|
+
# Convert non-breaking spaces and other whitespace to regular space
|
|
42
|
+
text = text.replace('\u00a0', ' ') # NBSP
|
|
43
|
+
text = text.replace('\u2007', ' ') # Figure space
|
|
44
|
+
text = text.replace('\u202f', ' ') # Narrow NBSP
|
|
45
|
+
# Collapse multiple whitespace to single space
|
|
46
|
+
text = re.sub(r'\s+', ' ', text)
|
|
47
|
+
# Strip leading/trailing whitespace
|
|
48
|
+
text = text.strip()
|
|
49
|
+
return text
|
|
50
|
+
|
|
51
|
+
|
|
26
52
|
class DatabaseManager:
|
|
27
53
|
"""Manages SQLite database for translation resources"""
|
|
28
54
|
|
|
@@ -655,17 +681,19 @@ class DatabaseManager:
|
|
|
655
681
|
# TRANSLATION MEMORY METHODS
|
|
656
682
|
# ============================================
|
|
657
683
|
|
|
658
|
-
def add_translation_unit(self, source: str, target: str, source_lang: str,
|
|
684
|
+
def add_translation_unit(self, source: str, target: str, source_lang: str,
|
|
659
685
|
target_lang: str, tm_id: str = 'project',
|
|
660
686
|
project_id: str = None, context_before: str = None,
|
|
661
687
|
context_after: str = None, notes: str = None) -> int:
|
|
662
688
|
"""
|
|
663
689
|
Add translation unit to database
|
|
664
|
-
|
|
690
|
+
|
|
665
691
|
Returns: ID of inserted/updated entry
|
|
666
692
|
"""
|
|
667
|
-
# Generate hash for
|
|
668
|
-
|
|
693
|
+
# Generate hash from NORMALIZED source for consistent exact matching
|
|
694
|
+
# This handles invisible differences like Unicode normalization, whitespace variations
|
|
695
|
+
normalized_source = _normalize_for_matching(source)
|
|
696
|
+
source_hash = hashlib.md5(normalized_source.encode('utf-8')).hexdigest()
|
|
669
697
|
|
|
670
698
|
try:
|
|
671
699
|
self.cursor.execute("""
|
|
@@ -687,33 +715,38 @@ class DatabaseManager:
|
|
|
687
715
|
return None
|
|
688
716
|
|
|
689
717
|
def get_exact_match(self, source: str, tm_ids: List[str] = None,
|
|
690
|
-
source_lang: str = None, target_lang: str = None,
|
|
718
|
+
source_lang: str = None, target_lang: str = None,
|
|
691
719
|
bidirectional: bool = True) -> Optional[Dict]:
|
|
692
720
|
"""
|
|
693
721
|
Get exact match from TM
|
|
694
|
-
|
|
722
|
+
|
|
695
723
|
Args:
|
|
696
724
|
source: Source text to match
|
|
697
725
|
tm_ids: List of TM IDs to search (None = all)
|
|
698
726
|
source_lang: Filter by source language (base code matching: 'en' matches 'en-US', 'en-GB', etc.)
|
|
699
727
|
target_lang: Filter by target language (base code matching)
|
|
700
728
|
bidirectional: If True, search both directions (nl→en AND en→nl)
|
|
701
|
-
|
|
729
|
+
|
|
702
730
|
Returns: Dictionary with match data or None
|
|
703
731
|
"""
|
|
704
732
|
from modules.tmx_generator import get_base_lang_code
|
|
705
|
-
|
|
733
|
+
|
|
734
|
+
# Try both normalized and non-normalized hashes for backward compatibility
|
|
735
|
+
# This handles invisible differences like Unicode normalization, whitespace variations
|
|
706
736
|
source_hash = hashlib.md5(source.encode('utf-8')).hexdigest()
|
|
707
|
-
|
|
737
|
+
normalized_source = _normalize_for_matching(source)
|
|
738
|
+
normalized_hash = hashlib.md5(normalized_source.encode('utf-8')).hexdigest()
|
|
739
|
+
|
|
708
740
|
# Get base language codes for comparison
|
|
709
741
|
src_base = get_base_lang_code(source_lang) if source_lang else None
|
|
710
742
|
tgt_base = get_base_lang_code(target_lang) if target_lang else None
|
|
711
|
-
|
|
743
|
+
|
|
744
|
+
# Search using both original hash and normalized hash
|
|
712
745
|
query = """
|
|
713
|
-
SELECT * FROM translation_units
|
|
714
|
-
WHERE source_hash = ?
|
|
746
|
+
SELECT * FROM translation_units
|
|
747
|
+
WHERE (source_hash = ? OR source_hash = ?)
|
|
715
748
|
"""
|
|
716
|
-
params = [source_hash,
|
|
749
|
+
params = [source_hash, normalized_hash]
|
|
717
750
|
|
|
718
751
|
if tm_ids:
|
|
719
752
|
placeholders = ','.join('?' * len(tm_ids))
|
|
@@ -840,11 +873,15 @@ class DatabaseManager:
|
|
|
840
873
|
bidirectional: If True, search both directions (nl→en AND en→nl)
|
|
841
874
|
|
|
842
875
|
Returns: List of matches with similarity scores
|
|
876
|
+
|
|
877
|
+
Note: When multiple TMs are provided, searches each TM separately to ensure
|
|
878
|
+
good matches from smaller TMs aren't pushed out by BM25 keyword ranking
|
|
879
|
+
from larger TMs. Results are merged and sorted by actual similarity.
|
|
843
880
|
"""
|
|
844
881
|
# For better FTS5 matching, tokenize the query and escape special chars
|
|
845
882
|
# FTS5 special characters: " ( ) - : , . ! ?
|
|
846
883
|
import re
|
|
847
|
-
from modules.tmx_generator import get_base_lang_code
|
|
884
|
+
from modules.tmx_generator import get_base_lang_code, get_lang_match_variants
|
|
848
885
|
|
|
849
886
|
# Strip HTML/XML tags from source for clean text search
|
|
850
887
|
text_without_tags = re.sub(r'<[^>]+>', '', source)
|
|
@@ -868,22 +905,57 @@ class DatabaseManager:
|
|
|
868
905
|
# This helps find similar long segments more reliably
|
|
869
906
|
search_terms_for_query = all_search_terms[:20]
|
|
870
907
|
|
|
871
|
-
print(f"[DEBUG] search_fuzzy_matches: source='{source[:50]}...', {len(all_search_terms)} terms")
|
|
872
|
-
|
|
873
908
|
if not search_terms_for_query:
|
|
874
909
|
# If no valid terms, return empty results
|
|
875
|
-
print(f"[DEBUG] search_fuzzy_matches: No valid search terms, returning empty")
|
|
876
910
|
return []
|
|
877
911
|
|
|
878
912
|
# Quote each term to prevent FTS5 syntax errors
|
|
879
913
|
fts_query = ' OR '.join(f'"{term}"' for term in search_terms_for_query)
|
|
880
|
-
print(f"[DEBUG] search_fuzzy_matches: FTS query terms = {search_terms_for_query[:10]}...")
|
|
881
914
|
|
|
882
915
|
# Get base language codes for comparison
|
|
883
916
|
src_base = get_base_lang_code(source_lang) if source_lang else None
|
|
884
917
|
tgt_base = get_base_lang_code(target_lang) if target_lang else None
|
|
885
918
|
|
|
886
|
-
#
|
|
919
|
+
# MULTI-TM FIX: Search each TM separately to avoid BM25 ranking issues
|
|
920
|
+
# When a large TM is combined with a small TM, the large TM's many keyword matches
|
|
921
|
+
# push down genuinely similar sentences from the small TM
|
|
922
|
+
tms_to_search = tm_ids if tm_ids else [None] # None means search all TMs together
|
|
923
|
+
|
|
924
|
+
all_results = []
|
|
925
|
+
|
|
926
|
+
for tm_id in tms_to_search:
|
|
927
|
+
# Search this specific TM (or all if tm_id is None)
|
|
928
|
+
tm_results = self._search_single_tm_fuzzy(
|
|
929
|
+
source, fts_query, [tm_id] if tm_id else None,
|
|
930
|
+
threshold, max_results, src_base, tgt_base,
|
|
931
|
+
source_lang, target_lang, bidirectional
|
|
932
|
+
)
|
|
933
|
+
all_results.extend(tm_results)
|
|
934
|
+
|
|
935
|
+
# Deduplicate by source_text (keep highest similarity for each unique source)
|
|
936
|
+
seen = {}
|
|
937
|
+
for result in all_results:
|
|
938
|
+
key = result['source_text']
|
|
939
|
+
if key not in seen or result['similarity'] > seen[key]['similarity']:
|
|
940
|
+
seen[key] = result
|
|
941
|
+
|
|
942
|
+
deduped_results = list(seen.values())
|
|
943
|
+
|
|
944
|
+
# Sort ALL results by similarity (highest first) - this ensures the 76% match
|
|
945
|
+
# appears before 40% matches regardless of which TM they came from
|
|
946
|
+
deduped_results.sort(key=lambda x: x['similarity'], reverse=True)
|
|
947
|
+
|
|
948
|
+
return deduped_results[:max_results]
|
|
949
|
+
|
|
950
|
+
def _search_single_tm_fuzzy(self, source: str, fts_query: str, tm_ids: List[str],
|
|
951
|
+
threshold: float, max_results: int,
|
|
952
|
+
src_base: str, tgt_base: str,
|
|
953
|
+
source_lang: str, target_lang: str,
|
|
954
|
+
bidirectional: bool) -> List[Dict]:
|
|
955
|
+
"""Search a single TM (or all TMs if tm_ids is None) for fuzzy matches"""
|
|
956
|
+
from modules.tmx_generator import get_lang_match_variants
|
|
957
|
+
|
|
958
|
+
# Build query for this TM
|
|
887
959
|
query = """
|
|
888
960
|
SELECT tu.*,
|
|
889
961
|
bm25(translation_units_fts) as relevance
|
|
@@ -893,13 +965,12 @@ class DatabaseManager:
|
|
|
893
965
|
"""
|
|
894
966
|
params = [fts_query]
|
|
895
967
|
|
|
896
|
-
if tm_ids:
|
|
968
|
+
if tm_ids and tm_ids[0] is not None:
|
|
897
969
|
placeholders = ','.join('?' * len(tm_ids))
|
|
898
970
|
query += f" AND tu.tm_id IN ({placeholders})"
|
|
899
971
|
params.extend(tm_ids)
|
|
900
972
|
|
|
901
973
|
# Use flexible language matching (matches 'nl', 'nl-NL', 'Dutch', etc.)
|
|
902
|
-
from modules.tmx_generator import get_lang_match_variants
|
|
903
974
|
if src_base:
|
|
904
975
|
src_variants = get_lang_match_variants(source_lang)
|
|
905
976
|
src_conditions = []
|
|
@@ -920,19 +991,16 @@ class DatabaseManager:
|
|
|
920
991
|
params.append(f"{variant}-%")
|
|
921
992
|
query += f" AND ({' OR '.join(tgt_conditions)})"
|
|
922
993
|
|
|
923
|
-
#
|
|
924
|
-
#
|
|
925
|
-
#
|
|
994
|
+
# Per-TM candidate limit - INCREASED to catch more potential fuzzy matches
|
|
995
|
+
# When multiple TMs are searched, BM25 ranking can push genuinely similar
|
|
996
|
+
# entries far down the list due to common word matches in other entries
|
|
926
997
|
candidate_limit = max(500, max_results * 50)
|
|
927
998
|
query += f" ORDER BY relevance DESC LIMIT {candidate_limit}"
|
|
928
999
|
|
|
929
|
-
print(f"[DEBUG] search_fuzzy_matches: Executing query (limit={candidate_limit})...")
|
|
930
|
-
|
|
931
1000
|
try:
|
|
932
1001
|
self.cursor.execute(query, params)
|
|
933
1002
|
all_rows = self.cursor.fetchall()
|
|
934
1003
|
except Exception as e:
|
|
935
|
-
print(f"[DEBUG] search_fuzzy_matches: SQL ERROR: {e}")
|
|
936
1004
|
return []
|
|
937
1005
|
|
|
938
1006
|
results = []
|
|
@@ -948,8 +1016,6 @@ class DatabaseManager:
|
|
|
948
1016
|
match_dict['match_pct'] = int(similarity * 100)
|
|
949
1017
|
results.append(match_dict)
|
|
950
1018
|
|
|
951
|
-
print(f"[DEBUG] search_fuzzy_matches: After threshold filter ({threshold}): {len(results)} matches")
|
|
952
|
-
|
|
953
1019
|
# If bidirectional, also search reverse direction
|
|
954
1020
|
if bidirectional and src_base and tgt_base:
|
|
955
1021
|
query = """
|
|
@@ -961,13 +1027,12 @@ class DatabaseManager:
|
|
|
961
1027
|
"""
|
|
962
1028
|
params = [fts_query]
|
|
963
1029
|
|
|
964
|
-
if tm_ids:
|
|
1030
|
+
if tm_ids and tm_ids[0] is not None:
|
|
965
1031
|
placeholders = ','.join('?' * len(tm_ids))
|
|
966
1032
|
query += f" AND tu.tm_id IN ({placeholders})"
|
|
967
1033
|
params.extend(tm_ids)
|
|
968
1034
|
|
|
969
1035
|
# Reversed language filters with flexible matching
|
|
970
|
-
# For reverse: TM target_lang should match our source_lang, TM source_lang should match our target_lang
|
|
971
1036
|
src_variants = get_lang_match_variants(source_lang)
|
|
972
1037
|
tgt_variants = get_lang_match_variants(target_lang)
|
|
973
1038
|
|
|
@@ -991,26 +1056,27 @@ class DatabaseManager:
|
|
|
991
1056
|
|
|
992
1057
|
query += f" ORDER BY relevance DESC LIMIT {max_results * 5}"
|
|
993
1058
|
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
for row in self.cursor.fetchall():
|
|
997
|
-
match_dict = dict(row)
|
|
998
|
-
# Calculate similarity against target_text (since we're reversing)
|
|
999
|
-
similarity = self.calculate_similarity(source, match_dict['target_text'])
|
|
1059
|
+
try:
|
|
1060
|
+
self.cursor.execute(query, params)
|
|
1000
1061
|
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
#
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1062
|
+
for row in self.cursor.fetchall():
|
|
1063
|
+
match_dict = dict(row)
|
|
1064
|
+
# Calculate similarity against target_text (since we're reversing)
|
|
1065
|
+
similarity = self.calculate_similarity(source, match_dict['target_text'])
|
|
1066
|
+
|
|
1067
|
+
# Only include matches above threshold
|
|
1068
|
+
if similarity >= threshold:
|
|
1069
|
+
# Swap source/target for reverse match
|
|
1070
|
+
match_dict['source_text'], match_dict['target_text'] = match_dict['target_text'], match_dict['source_text']
|
|
1071
|
+
match_dict['source_lang'], match_dict['target_lang'] = match_dict['target_lang'], match_dict['source_lang']
|
|
1072
|
+
match_dict['similarity'] = similarity
|
|
1073
|
+
match_dict['match_pct'] = int(similarity * 100)
|
|
1074
|
+
match_dict['reverse_match'] = True
|
|
1075
|
+
results.append(match_dict)
|
|
1076
|
+
except Exception as e:
|
|
1077
|
+
print(f"[DEBUG] _search_single_tm_fuzzy (reverse): SQL ERROR: {e}")
|
|
1078
|
+
|
|
1079
|
+
return results
|
|
1014
1080
|
|
|
1015
1081
|
def search_all(self, source: str, tm_ids: List[str] = None, enabled_only: bool = True,
|
|
1016
1082
|
threshold: float = 0.75, max_results: int = 10) -> List[Dict]:
|
|
@@ -1124,6 +1190,12 @@ class DatabaseManager:
|
|
|
1124
1190
|
Uses FTS5 full-text search for fast matching on millions of segments.
|
|
1125
1191
|
Falls back to LIKE queries if FTS5 fails.
|
|
1126
1192
|
|
|
1193
|
+
Language filters define what you're searching FOR and what translation you want:
|
|
1194
|
+
- "From: Dutch, To: English" = Search for Dutch text, show English translations
|
|
1195
|
+
- Searches ALL TMs (regardless of their stored language pair direction)
|
|
1196
|
+
- Automatically swaps columns when needed (e.g., finds Dutch in target column of EN→NL TM)
|
|
1197
|
+
- This is MORE intuitive than traditional CAT tools that only search specific TM directions
|
|
1198
|
+
|
|
1127
1199
|
Args:
|
|
1128
1200
|
query: Text to search for
|
|
1129
1201
|
tm_ids: List of TM IDs to search (None = all)
|
|
@@ -1141,6 +1213,12 @@ class DatabaseManager:
|
|
|
1141
1213
|
# Wrap in quotes for phrase search
|
|
1142
1214
|
fts_query = f'"{fts_query}"'
|
|
1143
1215
|
|
|
1216
|
+
# When language filters specified, we need to search intelligently:
|
|
1217
|
+
# - Don't filter by TM language pair (search ALL TMs)
|
|
1218
|
+
# - Search in BOTH columns to find text
|
|
1219
|
+
# - Swap columns if needed to show correct language order
|
|
1220
|
+
use_smart_search = (source_langs or target_langs)
|
|
1221
|
+
|
|
1144
1222
|
try:
|
|
1145
1223
|
# Use FTS5 for fast full-text search
|
|
1146
1224
|
if direction == 'source':
|
|
@@ -1171,20 +1249,105 @@ class DatabaseManager:
|
|
|
1171
1249
|
fts_sql += f" AND tu.tm_id IN ({placeholders})"
|
|
1172
1250
|
params.extend(tm_ids)
|
|
1173
1251
|
|
|
1174
|
-
#
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1252
|
+
# DON'T filter by language when smart search active
|
|
1253
|
+
# (we need to search all TMs and figure out which column has our language)
|
|
1254
|
+
if not use_smart_search:
|
|
1255
|
+
# Traditional filtering when no language filters
|
|
1256
|
+
if source_langs:
|
|
1257
|
+
placeholders = ','.join('?' * len(source_langs))
|
|
1258
|
+
fts_sql += f" AND tu.source_lang IN ({placeholders})"
|
|
1259
|
+
params.extend(source_langs)
|
|
1260
|
+
if target_langs:
|
|
1261
|
+
placeholders = ','.join('?' * len(target_langs))
|
|
1262
|
+
fts_sql += f" AND tu.target_lang IN ({placeholders})"
|
|
1263
|
+
params.extend(target_langs)
|
|
1183
1264
|
|
|
1184
1265
|
fts_sql += " ORDER BY tu.modified_date DESC LIMIT 100"
|
|
1185
1266
|
|
|
1186
1267
|
self.cursor.execute(fts_sql, params)
|
|
1187
|
-
|
|
1268
|
+
raw_results = [dict(row) for row in self.cursor.fetchall()]
|
|
1269
|
+
|
|
1270
|
+
# Smart search: Filter and swap based on language metadata
|
|
1271
|
+
if use_smart_search:
|
|
1272
|
+
processed_results = []
|
|
1273
|
+
for row in raw_results:
|
|
1274
|
+
row_src_lang = row.get('source_lang', '')
|
|
1275
|
+
row_tgt_lang = row.get('target_lang', '')
|
|
1276
|
+
|
|
1277
|
+
# Check if this row matches our language requirements
|
|
1278
|
+
# If "From: Dutch, To: English":
|
|
1279
|
+
# - Accept if source=nl and target=en (normal)
|
|
1280
|
+
# - Accept if source=en and target=nl (swap needed)
|
|
1281
|
+
|
|
1282
|
+
matches = False
|
|
1283
|
+
needs_swap = False
|
|
1284
|
+
|
|
1285
|
+
if source_langs and target_langs:
|
|
1286
|
+
# Both filters specified
|
|
1287
|
+
if row_src_lang in source_langs and row_tgt_lang in target_langs:
|
|
1288
|
+
# Perfect match - no swap
|
|
1289
|
+
matches = True
|
|
1290
|
+
needs_swap = False
|
|
1291
|
+
elif row_src_lang in target_langs and row_tgt_lang in source_langs:
|
|
1292
|
+
# Reversed - needs swap
|
|
1293
|
+
matches = True
|
|
1294
|
+
needs_swap = True
|
|
1295
|
+
elif source_langs:
|
|
1296
|
+
# Only "From" specified - just check if Dutch is in EITHER column
|
|
1297
|
+
if row_src_lang in source_langs:
|
|
1298
|
+
matches = True
|
|
1299
|
+
needs_swap = False
|
|
1300
|
+
elif row_tgt_lang in source_langs:
|
|
1301
|
+
matches = True
|
|
1302
|
+
needs_swap = True
|
|
1303
|
+
elif target_langs:
|
|
1304
|
+
# Only "To" specified - just check if English is in EITHER column
|
|
1305
|
+
if row_tgt_lang in target_langs:
|
|
1306
|
+
matches = True
|
|
1307
|
+
needs_swap = False
|
|
1308
|
+
elif row_src_lang in target_langs:
|
|
1309
|
+
matches = True
|
|
1310
|
+
needs_swap = True
|
|
1311
|
+
|
|
1312
|
+
if matches:
|
|
1313
|
+
# CRITICAL CHECK: Verify the search text is actually in the correct column
|
|
1314
|
+
# If user searches for Dutch with "From: Dutch", the text must be in the source column (after any swap)
|
|
1315
|
+
# This prevents finding Dutch text when user asks to search FOR English
|
|
1316
|
+
|
|
1317
|
+
if needs_swap:
|
|
1318
|
+
# After swap, check if query is in the NEW source column (was target)
|
|
1319
|
+
text_to_check = row['target_text'].lower()
|
|
1320
|
+
else:
|
|
1321
|
+
# No swap, check if query is in source column
|
|
1322
|
+
text_to_check = row['source_text'].lower()
|
|
1323
|
+
|
|
1324
|
+
# Only include if query text is actually in the source column
|
|
1325
|
+
if query.lower() in text_to_check:
|
|
1326
|
+
if needs_swap:
|
|
1327
|
+
# Swap columns to show correct language order
|
|
1328
|
+
swapped_row = row.copy()
|
|
1329
|
+
swapped_row['source'] = row['target_text']
|
|
1330
|
+
swapped_row['target'] = row['source_text']
|
|
1331
|
+
swapped_row['source_lang'] = row['target_lang']
|
|
1332
|
+
swapped_row['target_lang'] = row['source_lang']
|
|
1333
|
+
processed_results.append(swapped_row)
|
|
1334
|
+
else:
|
|
1335
|
+
# No swap needed - just rename columns
|
|
1336
|
+
processed_row = row.copy()
|
|
1337
|
+
processed_row['source'] = row['source_text']
|
|
1338
|
+
processed_row['target'] = row['target_text']
|
|
1339
|
+
processed_results.append(processed_row)
|
|
1340
|
+
|
|
1341
|
+
return processed_results
|
|
1342
|
+
else:
|
|
1343
|
+
# No language filters - just rename columns
|
|
1344
|
+
processed_results = []
|
|
1345
|
+
for row in raw_results:
|
|
1346
|
+
processed_row = row.copy()
|
|
1347
|
+
processed_row['source'] = row['source_text']
|
|
1348
|
+
processed_row['target'] = row['target_text']
|
|
1349
|
+
processed_results.append(processed_row)
|
|
1350
|
+
return processed_results
|
|
1188
1351
|
|
|
1189
1352
|
except Exception as e:
|
|
1190
1353
|
# Fallback to LIKE query if FTS5 fails (e.g., index not built)
|
|
@@ -1312,6 +1475,10 @@ class DatabaseManager:
|
|
|
1312
1475
|
# Note: termbase_id is stored as TEXT in termbase_terms but INTEGER in termbases
|
|
1313
1476
|
# Use CAST to ensure proper comparison
|
|
1314
1477
|
# IMPORTANT: Join with termbase_activation to get the ACTUAL priority for this project
|
|
1478
|
+
# CRITICAL FIX: Also match when search_term starts with the glossary term
|
|
1479
|
+
# This handles cases like searching for "ca." when glossary has "ca."
|
|
1480
|
+
# AND searching for "ca" when glossary has "ca."
|
|
1481
|
+
# We also strip trailing punctuation from glossary terms for comparison
|
|
1315
1482
|
query = """
|
|
1316
1483
|
SELECT
|
|
1317
1484
|
t.id, t.source_term, t.target_term, t.termbase_id, t.priority,
|
|
@@ -1329,19 +1496,30 @@ class DatabaseManager:
|
|
|
1329
1496
|
LOWER(t.source_term) = LOWER(?) OR
|
|
1330
1497
|
LOWER(t.source_term) LIKE LOWER(?) OR
|
|
1331
1498
|
LOWER(t.source_term) LIKE LOWER(?) OR
|
|
1332
|
-
LOWER(t.source_term) LIKE LOWER(?)
|
|
1499
|
+
LOWER(t.source_term) LIKE LOWER(?) OR
|
|
1500
|
+
LOWER(RTRIM(t.source_term, '.!?,;:')) = LOWER(?) OR
|
|
1501
|
+
LOWER(?) LIKE LOWER(t.source_term) || '%' OR
|
|
1502
|
+
LOWER(?) = LOWER(RTRIM(t.source_term, '.!?,;:'))
|
|
1333
1503
|
)
|
|
1334
1504
|
AND (ta.is_active = 1 OR tb.is_project_termbase = 1)
|
|
1335
1505
|
"""
|
|
1336
|
-
#
|
|
1337
|
-
#
|
|
1338
|
-
#
|
|
1506
|
+
# Matching patterns:
|
|
1507
|
+
# 1. Exact match: source_term = search_term
|
|
1508
|
+
# 2. Glossary term starts with search: source_term LIKE "search_term %"
|
|
1509
|
+
# 3. Glossary term ends with search: source_term LIKE "% search_term"
|
|
1510
|
+
# 4. Glossary term contains search: source_term LIKE "% search_term %"
|
|
1511
|
+
# 5. Glossary term (stripped) = search_term: RTRIM(source_term) = search_term (handles "ca." = "ca")
|
|
1512
|
+
# 6. Search starts with glossary term: search_term LIKE source_term || '%'
|
|
1513
|
+
# 7. Search = glossary term stripped: search_term = RTRIM(source_term)
|
|
1339
1514
|
params = [
|
|
1340
1515
|
project_id if project_id else 0, # Use 0 if no project (won't match any activation records)
|
|
1341
1516
|
search_term,
|
|
1342
1517
|
f"{search_term} %",
|
|
1343
1518
|
f"% {search_term}",
|
|
1344
|
-
f"% {search_term} %"
|
|
1519
|
+
f"% {search_term} %",
|
|
1520
|
+
search_term, # For RTRIM comparison
|
|
1521
|
+
search_term, # For reverse LIKE
|
|
1522
|
+
search_term # For reverse RTRIM comparison
|
|
1345
1523
|
]
|
|
1346
1524
|
|
|
1347
1525
|
# Language filters - if term has no language, use termbase language for filtering
|
|
@@ -301,6 +301,10 @@ class KeyboardShortcutsWidget(QWidget):
|
|
|
301
301
|
|
|
302
302
|
def load_shortcuts(self):
|
|
303
303
|
"""Load shortcuts into the table"""
|
|
304
|
+
# CRITICAL: Disable sorting during table modifications to prevent
|
|
305
|
+
# items from becoming disassociated from their rows (causes vanishing text bug)
|
|
306
|
+
self.table.setSortingEnabled(False)
|
|
307
|
+
|
|
304
308
|
self.table.setRowCount(0)
|
|
305
309
|
|
|
306
310
|
all_shortcuts = self.manager.get_all_shortcuts()
|
|
@@ -362,6 +366,9 @@ class KeyboardShortcutsWidget(QWidget):
|
|
|
362
366
|
self.table.setItem(row, 4, status_item)
|
|
363
367
|
|
|
364
368
|
row += 1
|
|
369
|
+
|
|
370
|
+
# Re-enable sorting after all modifications are complete
|
|
371
|
+
self.table.setSortingEnabled(True)
|
|
365
372
|
|
|
366
373
|
def _on_enabled_changed(self, state):
|
|
367
374
|
"""Handle checkbox state change for enabling/disabling shortcuts"""
|
|
@@ -172,7 +172,7 @@ class NonTranslatablesManager:
|
|
|
172
172
|
Initialize manager.
|
|
173
173
|
|
|
174
174
|
Args:
|
|
175
|
-
base_path: Base path for NT files (typically user_data/
|
|
175
|
+
base_path: Base path for NT files (typically user_data/resources/non_translatables)
|
|
176
176
|
log_callback: Optional logging function
|
|
177
177
|
"""
|
|
178
178
|
self.base_path = Path(base_path)
|
|
@@ -29,7 +29,7 @@ class PromptLibraryMigration:
|
|
|
29
29
|
def __init__(self, prompt_library_dir: str, log_callback=None):
|
|
30
30
|
"""
|
|
31
31
|
Args:
|
|
32
|
-
prompt_library_dir: Path to user_data/
|
|
32
|
+
prompt_library_dir: Path to user_data/prompt_library
|
|
33
33
|
log_callback: Function for logging
|
|
34
34
|
"""
|
|
35
35
|
self.prompt_library_dir = Path(prompt_library_dir)
|
modules/setup_wizard.py
CHANGED
|
@@ -80,17 +80,17 @@ class SetupWizard:
|
|
|
80
80
|
"Supervertaler will create the following structure:\n\n"
|
|
81
81
|
f"{self.selected_path}\n"
|
|
82
82
|
f" ├── api_keys.txt\n"
|
|
83
|
-
f" ├──
|
|
83
|
+
f" ├── prompt_library/\n"
|
|
84
84
|
f" │ ├── 1_System_Prompts/\n"
|
|
85
85
|
f" │ ├── 2_Domain_Prompts/\n"
|
|
86
86
|
f" │ ├── 3_Project_Prompts/\n"
|
|
87
87
|
f" │ └── 4_Style_Guides/\n"
|
|
88
|
-
f" ├──
|
|
88
|
+
f" ├── resources/\n"
|
|
89
89
|
f" │ ├── TMs/\n"
|
|
90
90
|
f" │ ├── Glossaries/\n"
|
|
91
|
-
f" │ ├──
|
|
92
|
-
f" │ └──
|
|
93
|
-
f" └──
|
|
91
|
+
f" │ ├── non_translatables/\n"
|
|
92
|
+
f" │ └── segmentation_rules/\n"
|
|
93
|
+
f" └── projects/\n\n"
|
|
94
94
|
"Is this correct?"
|
|
95
95
|
)
|
|
96
96
|
|
|
@@ -140,9 +140,9 @@ class SetupWizard:
|
|
|
140
140
|
f"Your data folder: {self.selected_path}\n\n"
|
|
141
141
|
f"Created:\n"
|
|
142
142
|
f" • api_keys.txt (add your API keys here)\n"
|
|
143
|
-
f" •
|
|
144
|
-
f" •
|
|
145
|
-
f" •
|
|
143
|
+
f" • prompt_library/ (your prompts)\n"
|
|
144
|
+
f" • resources/ (TMs, glossaries)\n"
|
|
145
|
+
f" • projects/ (your work)\n\n"
|
|
146
146
|
f"All your translation memories, prompts, and projects\n"
|
|
147
147
|
f"will be stored in this location."
|
|
148
148
|
)
|