supervertaler 1.9.131__py3-none-any.whl → 1.9.173__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,15 +29,15 @@ class AttachmentManager:
29
29
  Initialize the AttachmentManager.
30
30
 
31
31
  Args:
32
- base_dir: Base directory for attachments (default: user_data_private/AI_Assistant)
32
+ base_dir: Base directory for attachments (default: user_data_private/ai_assistant)
33
33
  log_callback: Function to call for logging messages
34
34
  """
35
35
  self.log = log_callback if log_callback else print
36
36
 
37
37
  # Set base directory
38
38
  if base_dir is None:
39
- # Default to user_data_private/AI_Assistant
40
- base_dir = Path("user_data_private") / "AI_Assistant"
39
+ # Default to user_data_private/ai_assistant
40
+ base_dir = Path("user_data_private") / "ai_assistant"
41
41
 
42
42
  self.base_dir = Path(base_dir)
43
43
  self.attachments_dir = self.base_dir / "attachments"
modules/config_manager.py CHANGED
@@ -35,14 +35,14 @@ class ConfigManager:
35
35
  REQUIRED_FOLDERS = [
36
36
  # Note: Old numbered folders (1_System_Prompts, 2_Domain_Prompts, etc.) are deprecated
37
37
  # Migration moves them to unified Library structure
38
- "Prompt_Library/Domain Expertise",
39
- "Prompt_Library/Project Prompts",
40
- "Prompt_Library/Style Guides",
41
- "Translation_Resources/Termbases",
42
- "Translation_Resources/TMs",
43
- "Translation_Resources/Non-translatables",
44
- "Translation_Resources/Segmentation_rules",
45
- "Projects",
38
+ "prompt_library/domain_expertise",
39
+ "prompt_library/project_prompts",
40
+ "prompt_library/style_guides",
41
+ "resources/termbases",
42
+ "resources/tms",
43
+ "resources/non_translatables",
44
+ "resources/segmentation_rules",
45
+ "projects",
46
46
  ]
47
47
 
48
48
  def __init__(self):
@@ -268,8 +268,8 @@ class ConfigManager:
268
268
  Get the full path to a subfolder in user_data.
269
269
 
270
270
  Example:
271
- config.get_subfolder_path('Translation_Resources/TMs')
272
- -> '/home/user/Supervertaler_Data/Translation_Resources/TMs'
271
+ config.get_subfolder_path('resources/tms')
272
+ -> '/home/user/Supervertaler/resources/tms'
273
273
  """
274
274
  user_data_path = self.get_user_data_path()
275
275
  full_path = os.path.join(user_data_path, subfolder)
@@ -17,12 +17,38 @@ import sqlite3
17
17
  import os
18
18
  import json
19
19
  import hashlib
20
+ import unicodedata
21
+ import re
20
22
  from datetime import datetime
21
23
  from typing import List, Dict, Optional, Tuple
22
24
  from pathlib import Path
23
25
  from difflib import SequenceMatcher
24
26
 
25
27
 
28
+ def _normalize_for_matching(text: str) -> str:
29
+ """Normalize text for exact matching.
30
+
31
+ Handles invisible differences that would cause exact match to fail:
32
+ - Unicode normalization (NFC)
33
+ - Multiple whitespace -> single space
34
+ - Leading/trailing whitespace
35
+ - Non-breaking spaces -> regular spaces
36
+ """
37
+ if not text:
38
+ return ""
39
+ # Unicode normalize (NFC form)
40
+ text = unicodedata.normalize('NFC', text)
41
+ # Convert non-breaking spaces and other whitespace to regular space
42
+ text = text.replace('\u00a0', ' ') # NBSP
43
+ text = text.replace('\u2007', ' ') # Figure space
44
+ text = text.replace('\u202f', ' ') # Narrow NBSP
45
+ # Collapse multiple whitespace to single space
46
+ text = re.sub(r'\s+', ' ', text)
47
+ # Strip leading/trailing whitespace
48
+ text = text.strip()
49
+ return text
50
+
51
+
26
52
  class DatabaseManager:
27
53
  """Manages SQLite database for translation resources"""
28
54
 
@@ -655,17 +681,19 @@ class DatabaseManager:
655
681
  # TRANSLATION MEMORY METHODS
656
682
  # ============================================
657
683
 
658
- def add_translation_unit(self, source: str, target: str, source_lang: str,
684
+ def add_translation_unit(self, source: str, target: str, source_lang: str,
659
685
  target_lang: str, tm_id: str = 'project',
660
686
  project_id: str = None, context_before: str = None,
661
687
  context_after: str = None, notes: str = None) -> int:
662
688
  """
663
689
  Add translation unit to database
664
-
690
+
665
691
  Returns: ID of inserted/updated entry
666
692
  """
667
- # Generate hash for fast exact matching
668
- source_hash = hashlib.md5(source.encode('utf-8')).hexdigest()
693
+ # Generate hash from NORMALIZED source for consistent exact matching
694
+ # This handles invisible differences like Unicode normalization, whitespace variations
695
+ normalized_source = _normalize_for_matching(source)
696
+ source_hash = hashlib.md5(normalized_source.encode('utf-8')).hexdigest()
669
697
 
670
698
  try:
671
699
  self.cursor.execute("""
@@ -687,33 +715,38 @@ class DatabaseManager:
687
715
  return None
688
716
 
689
717
  def get_exact_match(self, source: str, tm_ids: List[str] = None,
690
- source_lang: str = None, target_lang: str = None,
718
+ source_lang: str = None, target_lang: str = None,
691
719
  bidirectional: bool = True) -> Optional[Dict]:
692
720
  """
693
721
  Get exact match from TM
694
-
722
+
695
723
  Args:
696
724
  source: Source text to match
697
725
  tm_ids: List of TM IDs to search (None = all)
698
726
  source_lang: Filter by source language (base code matching: 'en' matches 'en-US', 'en-GB', etc.)
699
727
  target_lang: Filter by target language (base code matching)
700
728
  bidirectional: If True, search both directions (nl→en AND en→nl)
701
-
729
+
702
730
  Returns: Dictionary with match data or None
703
731
  """
704
732
  from modules.tmx_generator import get_base_lang_code
705
-
733
+
734
+ # Try both normalized and non-normalized hashes for backward compatibility
735
+ # This handles invisible differences like Unicode normalization, whitespace variations
706
736
  source_hash = hashlib.md5(source.encode('utf-8')).hexdigest()
707
-
737
+ normalized_source = _normalize_for_matching(source)
738
+ normalized_hash = hashlib.md5(normalized_source.encode('utf-8')).hexdigest()
739
+
708
740
  # Get base language codes for comparison
709
741
  src_base = get_base_lang_code(source_lang) if source_lang else None
710
742
  tgt_base = get_base_lang_code(target_lang) if target_lang else None
711
-
743
+
744
+ # Search using both original hash and normalized hash
712
745
  query = """
713
- SELECT * FROM translation_units
714
- WHERE source_hash = ? AND source_text = ?
746
+ SELECT * FROM translation_units
747
+ WHERE (source_hash = ? OR source_hash = ?)
715
748
  """
716
- params = [source_hash, source]
749
+ params = [source_hash, normalized_hash]
717
750
 
718
751
  if tm_ids:
719
752
  placeholders = ','.join('?' * len(tm_ids))
@@ -840,11 +873,15 @@ class DatabaseManager:
840
873
  bidirectional: If True, search both directions (nl→en AND en→nl)
841
874
 
842
875
  Returns: List of matches with similarity scores
876
+
877
+ Note: When multiple TMs are provided, searches each TM separately to ensure
878
+ good matches from smaller TMs aren't pushed out by BM25 keyword ranking
879
+ from larger TMs. Results are merged and sorted by actual similarity.
843
880
  """
844
881
  # For better FTS5 matching, tokenize the query and escape special chars
845
882
  # FTS5 special characters: " ( ) - : , . ! ?
846
883
  import re
847
- from modules.tmx_generator import get_base_lang_code
884
+ from modules.tmx_generator import get_base_lang_code, get_lang_match_variants
848
885
 
849
886
  # Strip HTML/XML tags from source for clean text search
850
887
  text_without_tags = re.sub(r'<[^>]+>', '', source)
@@ -868,22 +905,57 @@ class DatabaseManager:
868
905
  # This helps find similar long segments more reliably
869
906
  search_terms_for_query = all_search_terms[:20]
870
907
 
871
- print(f"[DEBUG] search_fuzzy_matches: source='{source[:50]}...', {len(all_search_terms)} terms")
872
-
873
908
  if not search_terms_for_query:
874
909
  # If no valid terms, return empty results
875
- print(f"[DEBUG] search_fuzzy_matches: No valid search terms, returning empty")
876
910
  return []
877
911
 
878
912
  # Quote each term to prevent FTS5 syntax errors
879
913
  fts_query = ' OR '.join(f'"{term}"' for term in search_terms_for_query)
880
- print(f"[DEBUG] search_fuzzy_matches: FTS query terms = {search_terms_for_query[:10]}...")
881
914
 
882
915
  # Get base language codes for comparison
883
916
  src_base = get_base_lang_code(source_lang) if source_lang else None
884
917
  tgt_base = get_base_lang_code(target_lang) if target_lang else None
885
918
 
886
- # Use FTS5 for initial candidate retrieval (fast)
919
+ # MULTI-TM FIX: Search each TM separately to avoid BM25 ranking issues
920
+ # When a large TM is combined with a small TM, the large TM's many keyword matches
921
+ # push down genuinely similar sentences from the small TM
922
+ tms_to_search = tm_ids if tm_ids else [None] # None means search all TMs together
923
+
924
+ all_results = []
925
+
926
+ for tm_id in tms_to_search:
927
+ # Search this specific TM (or all if tm_id is None)
928
+ tm_results = self._search_single_tm_fuzzy(
929
+ source, fts_query, [tm_id] if tm_id else None,
930
+ threshold, max_results, src_base, tgt_base,
931
+ source_lang, target_lang, bidirectional
932
+ )
933
+ all_results.extend(tm_results)
934
+
935
+ # Deduplicate by source_text (keep highest similarity for each unique source)
936
+ seen = {}
937
+ for result in all_results:
938
+ key = result['source_text']
939
+ if key not in seen or result['similarity'] > seen[key]['similarity']:
940
+ seen[key] = result
941
+
942
+ deduped_results = list(seen.values())
943
+
944
+ # Sort ALL results by similarity (highest first) - this ensures the 76% match
945
+ # appears before 40% matches regardless of which TM they came from
946
+ deduped_results.sort(key=lambda x: x['similarity'], reverse=True)
947
+
948
+ return deduped_results[:max_results]
949
+
950
+ def _search_single_tm_fuzzy(self, source: str, fts_query: str, tm_ids: List[str],
951
+ threshold: float, max_results: int,
952
+ src_base: str, tgt_base: str,
953
+ source_lang: str, target_lang: str,
954
+ bidirectional: bool) -> List[Dict]:
955
+ """Search a single TM (or all TMs if tm_ids is None) for fuzzy matches"""
956
+ from modules.tmx_generator import get_lang_match_variants
957
+
958
+ # Build query for this TM
887
959
  query = """
888
960
  SELECT tu.*,
889
961
  bm25(translation_units_fts) as relevance
@@ -893,13 +965,12 @@ class DatabaseManager:
893
965
  """
894
966
  params = [fts_query]
895
967
 
896
- if tm_ids:
968
+ if tm_ids and tm_ids[0] is not None:
897
969
  placeholders = ','.join('?' * len(tm_ids))
898
970
  query += f" AND tu.tm_id IN ({placeholders})"
899
971
  params.extend(tm_ids)
900
972
 
901
973
  # Use flexible language matching (matches 'nl', 'nl-NL', 'Dutch', etc.)
902
- from modules.tmx_generator import get_lang_match_variants
903
974
  if src_base:
904
975
  src_variants = get_lang_match_variants(source_lang)
905
976
  src_conditions = []
@@ -920,19 +991,16 @@ class DatabaseManager:
920
991
  params.append(f"{variant}-%")
921
992
  query += f" AND ({' OR '.join(tgt_conditions)})"
922
993
 
923
- # Get more candidates than needed for proper scoring (increase limit for long segments)
924
- # Long segments need MANY more candidates because BM25 ranking may push down
925
- # the truly similar entries in favor of entries matching more search terms
994
+ # Per-TM candidate limit - INCREASED to catch more potential fuzzy matches
995
+ # When multiple TMs are searched, BM25 ranking can push genuinely similar
996
+ # entries far down the list due to common word matches in other entries
926
997
  candidate_limit = max(500, max_results * 50)
927
998
  query += f" ORDER BY relevance DESC LIMIT {candidate_limit}"
928
999
 
929
- print(f"[DEBUG] search_fuzzy_matches: Executing query (limit={candidate_limit})...")
930
-
931
1000
  try:
932
1001
  self.cursor.execute(query, params)
933
1002
  all_rows = self.cursor.fetchall()
934
1003
  except Exception as e:
935
- print(f"[DEBUG] search_fuzzy_matches: SQL ERROR: {e}")
936
1004
  return []
937
1005
 
938
1006
  results = []
@@ -948,8 +1016,6 @@ class DatabaseManager:
948
1016
  match_dict['match_pct'] = int(similarity * 100)
949
1017
  results.append(match_dict)
950
1018
 
951
- print(f"[DEBUG] search_fuzzy_matches: After threshold filter ({threshold}): {len(results)} matches")
952
-
953
1019
  # If bidirectional, also search reverse direction
954
1020
  if bidirectional and src_base and tgt_base:
955
1021
  query = """
@@ -961,13 +1027,12 @@ class DatabaseManager:
961
1027
  """
962
1028
  params = [fts_query]
963
1029
 
964
- if tm_ids:
1030
+ if tm_ids and tm_ids[0] is not None:
965
1031
  placeholders = ','.join('?' * len(tm_ids))
966
1032
  query += f" AND tu.tm_id IN ({placeholders})"
967
1033
  params.extend(tm_ids)
968
1034
 
969
1035
  # Reversed language filters with flexible matching
970
- # For reverse: TM target_lang should match our source_lang, TM source_lang should match our target_lang
971
1036
  src_variants = get_lang_match_variants(source_lang)
972
1037
  tgt_variants = get_lang_match_variants(target_lang)
973
1038
 
@@ -991,26 +1056,27 @@ class DatabaseManager:
991
1056
 
992
1057
  query += f" ORDER BY relevance DESC LIMIT {max_results * 5}"
993
1058
 
994
- self.cursor.execute(query, params)
995
-
996
- for row in self.cursor.fetchall():
997
- match_dict = dict(row)
998
- # Calculate similarity against target_text (since we're reversing)
999
- similarity = self.calculate_similarity(source, match_dict['target_text'])
1059
+ try:
1060
+ self.cursor.execute(query, params)
1000
1061
 
1001
- # Only include matches above threshold
1002
- if similarity >= threshold:
1003
- # Swap source/target for reverse match
1004
- match_dict['source_text'], match_dict['target_text'] = match_dict['target_text'], match_dict['source_text']
1005
- match_dict['source_lang'], match_dict['target_lang'] = match_dict['target_lang'], match_dict['source_lang']
1006
- match_dict['similarity'] = similarity
1007
- match_dict['match_pct'] = int(similarity * 100)
1008
- match_dict['reverse_match'] = True
1009
- results.append(match_dict)
1010
-
1011
- # Sort by similarity (highest first) and limit results
1012
- results.sort(key=lambda x: x['similarity'], reverse=True)
1013
- return results[:max_results]
1062
+ for row in self.cursor.fetchall():
1063
+ match_dict = dict(row)
1064
+ # Calculate similarity against target_text (since we're reversing)
1065
+ similarity = self.calculate_similarity(source, match_dict['target_text'])
1066
+
1067
+ # Only include matches above threshold
1068
+ if similarity >= threshold:
1069
+ # Swap source/target for reverse match
1070
+ match_dict['source_text'], match_dict['target_text'] = match_dict['target_text'], match_dict['source_text']
1071
+ match_dict['source_lang'], match_dict['target_lang'] = match_dict['target_lang'], match_dict['source_lang']
1072
+ match_dict['similarity'] = similarity
1073
+ match_dict['match_pct'] = int(similarity * 100)
1074
+ match_dict['reverse_match'] = True
1075
+ results.append(match_dict)
1076
+ except Exception as e:
1077
+ print(f"[DEBUG] _search_single_tm_fuzzy (reverse): SQL ERROR: {e}")
1078
+
1079
+ return results
1014
1080
 
1015
1081
  def search_all(self, source: str, tm_ids: List[str] = None, enabled_only: bool = True,
1016
1082
  threshold: float = 0.75, max_results: int = 10) -> List[Dict]:
@@ -1124,6 +1190,12 @@ class DatabaseManager:
1124
1190
  Uses FTS5 full-text search for fast matching on millions of segments.
1125
1191
  Falls back to LIKE queries if FTS5 fails.
1126
1192
 
1193
+ Language filters define what you're searching FOR and what translation you want:
1194
+ - "From: Dutch, To: English" = Search for Dutch text, show English translations
1195
+ - Searches ALL TMs (regardless of their stored language pair direction)
1196
+ - Automatically swaps columns when needed (e.g., finds Dutch in target column of EN→NL TM)
1197
+ - This is MORE intuitive than traditional CAT tools that only search specific TM directions
1198
+
1127
1199
  Args:
1128
1200
  query: Text to search for
1129
1201
  tm_ids: List of TM IDs to search (None = all)
@@ -1141,6 +1213,12 @@ class DatabaseManager:
1141
1213
  # Wrap in quotes for phrase search
1142
1214
  fts_query = f'"{fts_query}"'
1143
1215
 
1216
+ # When language filters specified, we need to search intelligently:
1217
+ # - Don't filter by TM language pair (search ALL TMs)
1218
+ # - Search in BOTH columns to find text
1219
+ # - Swap columns if needed to show correct language order
1220
+ use_smart_search = (source_langs or target_langs)
1221
+
1144
1222
  try:
1145
1223
  # Use FTS5 for fast full-text search
1146
1224
  if direction == 'source':
@@ -1171,20 +1249,105 @@ class DatabaseManager:
1171
1249
  fts_sql += f" AND tu.tm_id IN ({placeholders})"
1172
1250
  params.extend(tm_ids)
1173
1251
 
1174
- # Add language filters (support for list of variants)
1175
- if source_langs:
1176
- placeholders = ','.join('?' * len(source_langs))
1177
- fts_sql += f" AND tu.source_lang IN ({placeholders})"
1178
- params.extend(source_langs)
1179
- if target_langs:
1180
- placeholders = ','.join('?' * len(target_langs))
1181
- fts_sql += f" AND tu.target_lang IN ({placeholders})"
1182
- params.extend(target_langs)
1252
+ # DON'T filter by language when smart search active
1253
+ # (we need to search all TMs and figure out which column has our language)
1254
+ if not use_smart_search:
1255
+ # Traditional filtering when no language filters
1256
+ if source_langs:
1257
+ placeholders = ','.join('?' * len(source_langs))
1258
+ fts_sql += f" AND tu.source_lang IN ({placeholders})"
1259
+ params.extend(source_langs)
1260
+ if target_langs:
1261
+ placeholders = ','.join('?' * len(target_langs))
1262
+ fts_sql += f" AND tu.target_lang IN ({placeholders})"
1263
+ params.extend(target_langs)
1183
1264
 
1184
1265
  fts_sql += " ORDER BY tu.modified_date DESC LIMIT 100"
1185
1266
 
1186
1267
  self.cursor.execute(fts_sql, params)
1187
- return [dict(row) for row in self.cursor.fetchall()]
1268
+ raw_results = [dict(row) for row in self.cursor.fetchall()]
1269
+
1270
+ # Smart search: Filter and swap based on language metadata
1271
+ if use_smart_search:
1272
+ processed_results = []
1273
+ for row in raw_results:
1274
+ row_src_lang = row.get('source_lang', '')
1275
+ row_tgt_lang = row.get('target_lang', '')
1276
+
1277
+ # Check if this row matches our language requirements
1278
+ # If "From: Dutch, To: English":
1279
+ # - Accept if source=nl and target=en (normal)
1280
+ # - Accept if source=en and target=nl (swap needed)
1281
+
1282
+ matches = False
1283
+ needs_swap = False
1284
+
1285
+ if source_langs and target_langs:
1286
+ # Both filters specified
1287
+ if row_src_lang in source_langs and row_tgt_lang in target_langs:
1288
+ # Perfect match - no swap
1289
+ matches = True
1290
+ needs_swap = False
1291
+ elif row_src_lang in target_langs and row_tgt_lang in source_langs:
1292
+ # Reversed - needs swap
1293
+ matches = True
1294
+ needs_swap = True
1295
+ elif source_langs:
1296
+ # Only "From" specified - just check if Dutch is in EITHER column
1297
+ if row_src_lang in source_langs:
1298
+ matches = True
1299
+ needs_swap = False
1300
+ elif row_tgt_lang in source_langs:
1301
+ matches = True
1302
+ needs_swap = True
1303
+ elif target_langs:
1304
+ # Only "To" specified - just check if English is in EITHER column
1305
+ if row_tgt_lang in target_langs:
1306
+ matches = True
1307
+ needs_swap = False
1308
+ elif row_src_lang in target_langs:
1309
+ matches = True
1310
+ needs_swap = True
1311
+
1312
+ if matches:
1313
+ # CRITICAL CHECK: Verify the search text is actually in the correct column
1314
+ # If user searches for Dutch with "From: Dutch", the text must be in the source column (after any swap)
1315
+ # This prevents finding Dutch text when user asks to search FOR English
1316
+
1317
+ if needs_swap:
1318
+ # After swap, check if query is in the NEW source column (was target)
1319
+ text_to_check = row['target_text'].lower()
1320
+ else:
1321
+ # No swap, check if query is in source column
1322
+ text_to_check = row['source_text'].lower()
1323
+
1324
+ # Only include if query text is actually in the source column
1325
+ if query.lower() in text_to_check:
1326
+ if needs_swap:
1327
+ # Swap columns to show correct language order
1328
+ swapped_row = row.copy()
1329
+ swapped_row['source'] = row['target_text']
1330
+ swapped_row['target'] = row['source_text']
1331
+ swapped_row['source_lang'] = row['target_lang']
1332
+ swapped_row['target_lang'] = row['source_lang']
1333
+ processed_results.append(swapped_row)
1334
+ else:
1335
+ # No swap needed - just rename columns
1336
+ processed_row = row.copy()
1337
+ processed_row['source'] = row['source_text']
1338
+ processed_row['target'] = row['target_text']
1339
+ processed_results.append(processed_row)
1340
+
1341
+ return processed_results
1342
+ else:
1343
+ # No language filters - just rename columns
1344
+ processed_results = []
1345
+ for row in raw_results:
1346
+ processed_row = row.copy()
1347
+ processed_row['source'] = row['source_text']
1348
+ processed_row['target'] = row['target_text']
1349
+ processed_results.append(processed_row)
1350
+ return processed_results
1188
1351
 
1189
1352
  except Exception as e:
1190
1353
  # Fallback to LIKE query if FTS5 fails (e.g., index not built)
@@ -1312,6 +1475,10 @@ class DatabaseManager:
1312
1475
  # Note: termbase_id is stored as TEXT in termbase_terms but INTEGER in termbases
1313
1476
  # Use CAST to ensure proper comparison
1314
1477
  # IMPORTANT: Join with termbase_activation to get the ACTUAL priority for this project
1478
+ # CRITICAL FIX: Also match when search_term starts with the glossary term
1479
+ # This handles cases like searching for "ca." when glossary has "ca."
1480
+ # AND searching for "ca" when glossary has "ca."
1481
+ # We also strip trailing punctuation from glossary terms for comparison
1315
1482
  query = """
1316
1483
  SELECT
1317
1484
  t.id, t.source_term, t.target_term, t.termbase_id, t.priority,
@@ -1329,19 +1496,30 @@ class DatabaseManager:
1329
1496
  LOWER(t.source_term) = LOWER(?) OR
1330
1497
  LOWER(t.source_term) LIKE LOWER(?) OR
1331
1498
  LOWER(t.source_term) LIKE LOWER(?) OR
1332
- LOWER(t.source_term) LIKE LOWER(?)
1499
+ LOWER(t.source_term) LIKE LOWER(?) OR
1500
+ LOWER(RTRIM(t.source_term, '.!?,;:')) = LOWER(?) OR
1501
+ LOWER(?) LIKE LOWER(t.source_term) || '%' OR
1502
+ LOWER(?) = LOWER(RTRIM(t.source_term, '.!?,;:'))
1333
1503
  )
1334
1504
  AND (ta.is_active = 1 OR tb.is_project_termbase = 1)
1335
1505
  """
1336
- # Exact match, word at start, word at end, word in middle
1337
- # Use LOWER() for case-insensitive matching (handles "Edelmetalen" = "edelmetalen")
1338
- # IMPORTANT: project_id must be first param for the LEFT JOIN ta.project_id = ? above
1506
+ # Matching patterns:
1507
+ # 1. Exact match: source_term = search_term
1508
+ # 2. Glossary term starts with search: source_term LIKE "search_term %"
1509
+ # 3. Glossary term ends with search: source_term LIKE "% search_term"
1510
+ # 4. Glossary term contains search: source_term LIKE "% search_term %"
1511
+ # 5. Glossary term (stripped) = search_term: RTRIM(source_term) = search_term (handles "ca." = "ca")
1512
+ # 6. Search starts with glossary term: search_term LIKE source_term || '%'
1513
+ # 7. Search = glossary term stripped: search_term = RTRIM(source_term)
1339
1514
  params = [
1340
1515
  project_id if project_id else 0, # Use 0 if no project (won't match any activation records)
1341
1516
  search_term,
1342
1517
  f"{search_term} %",
1343
1518
  f"% {search_term}",
1344
- f"% {search_term} %"
1519
+ f"% {search_term} %",
1520
+ search_term, # For RTRIM comparison
1521
+ search_term, # For reverse LIKE
1522
+ search_term # For reverse RTRIM comparison
1345
1523
  ]
1346
1524
 
1347
1525
  # Language filters - if term has no language, use termbase language for filtering
@@ -301,6 +301,10 @@ class KeyboardShortcutsWidget(QWidget):
301
301
 
302
302
  def load_shortcuts(self):
303
303
  """Load shortcuts into the table"""
304
+ # CRITICAL: Disable sorting during table modifications to prevent
305
+ # items from becoming disassociated from their rows (causes vanishing text bug)
306
+ self.table.setSortingEnabled(False)
307
+
304
308
  self.table.setRowCount(0)
305
309
 
306
310
  all_shortcuts = self.manager.get_all_shortcuts()
@@ -362,6 +366,9 @@ class KeyboardShortcutsWidget(QWidget):
362
366
  self.table.setItem(row, 4, status_item)
363
367
 
364
368
  row += 1
369
+
370
+ # Re-enable sorting after all modifications are complete
371
+ self.table.setSortingEnabled(True)
365
372
 
366
373
  def _on_enabled_changed(self, state):
367
374
  """Handle checkbox state change for enabling/disabling shortcuts"""
@@ -172,7 +172,7 @@ class NonTranslatablesManager:
172
172
  Initialize manager.
173
173
 
174
174
  Args:
175
- base_path: Base path for NT files (typically user_data/Translation_Resources/Non-translatables)
175
+ base_path: Base path for NT files (typically user_data/resources/non_translatables)
176
176
  log_callback: Optional logging function
177
177
  """
178
178
  self.base_path = Path(base_path)
@@ -29,7 +29,7 @@ class PromptLibraryMigration:
29
29
  def __init__(self, prompt_library_dir: str, log_callback=None):
30
30
  """
31
31
  Args:
32
- prompt_library_dir: Path to user_data/Prompt_Library
32
+ prompt_library_dir: Path to user_data/prompt_library
33
33
  log_callback: Function for logging
34
34
  """
35
35
  self.prompt_library_dir = Path(prompt_library_dir)
modules/setup_wizard.py CHANGED
@@ -80,17 +80,17 @@ class SetupWizard:
80
80
  "Supervertaler will create the following structure:\n\n"
81
81
  f"{self.selected_path}\n"
82
82
  f" ├── api_keys.txt\n"
83
- f" ├── Prompt_Library/\n"
83
+ f" ├── prompt_library/\n"
84
84
  f" │ ├── 1_System_Prompts/\n"
85
85
  f" │ ├── 2_Domain_Prompts/\n"
86
86
  f" │ ├── 3_Project_Prompts/\n"
87
87
  f" │ └── 4_Style_Guides/\n"
88
- f" ├── Translation_Resources/\n"
88
+ f" ├── resources/\n"
89
89
  f" │ ├── TMs/\n"
90
90
  f" │ ├── Glossaries/\n"
91
- f" │ ├── Non-translatables/\n"
92
- f" │ └── Segmentation_rules/\n"
93
- f" └── Projects/\n\n"
91
+ f" │ ├── non_translatables/\n"
92
+ f" │ └── segmentation_rules/\n"
93
+ f" └── projects/\n\n"
94
94
  "Is this correct?"
95
95
  )
96
96
 
@@ -140,9 +140,9 @@ class SetupWizard:
140
140
  f"Your data folder: {self.selected_path}\n\n"
141
141
  f"Created:\n"
142
142
  f" • api_keys.txt (add your API keys here)\n"
143
- f" • Prompt_Library/ (your prompts)\n"
144
- f" • Translation_Resources/ (TMs, glossaries)\n"
145
- f" • Projects/ (your work)\n\n"
143
+ f" • prompt_library/ (your prompts)\n"
144
+ f" • resources/ (TMs, glossaries)\n"
145
+ f" • projects/ (your work)\n\n"
146
146
  f"All your translation memories, prompts, and projects\n"
147
147
  f"will be stored in this location."
148
148
  )