supervertaler 1.9.172__py3-none-any.whl → 1.9.173__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of supervertaler might be problematic. Click here for more details.

Supervertaler.py CHANGED
@@ -5598,7 +5598,7 @@ class PreTranslationWorker(QThread):
5598
5598
  match = matches[0]
5599
5599
  match_pct = match.get('match_pct', 0)
5600
5600
  print(f"🔍 TM PRE-TRANSLATE: Best match pct: {match_pct}")
5601
- if match_pct >= 70: # Accept matches 70% and above
5601
+ if match_pct >= 75: # Accept matches 75% and above
5602
5602
  return match.get('target', '')
5603
5603
  return None
5604
5604
  except Exception as e:
@@ -41148,25 +41148,41 @@ OUTPUT ONLY THE SEGMENT MARKERS. DO NOT ADD EXPLANATIONS BEFORE OR AFTER."""
41148
41148
  self.log(f"📖 Pre-translate from TM: Using activated TMs: {tm_ids}")
41149
41149
 
41150
41150
  # Create progress dialog for TM pre-translation
41151
+ import time
41152
+ start_time = time.time()
41153
+ total_segments = len(segments_needing_translation)
41154
+
41151
41155
  progress = QProgressDialog(
41152
- f"Pre-translating {len(segments_needing_translation)} segments from TM...",
41153
- "Cancel", 0, len(segments_needing_translation), self
41156
+ f"Pre-translating {total_segments} segments from TM...",
41157
+ "Cancel", 0, total_segments, self
41154
41158
  )
41155
- progress.setWindowTitle("TM Pre-Translation")
41159
+ progress.setWindowTitle("🔍 TM Pre-Translation")
41156
41160
  progress.setWindowModality(Qt.WindowModality.WindowModal)
41157
41161
  progress.setMinimumDuration(0) # Show immediately
41162
+ progress.setMinimumWidth(450) # Wider dialog for more info
41158
41163
  progress.show()
41159
41164
  QApplication.processEvents()
41160
-
41165
+
41161
41166
  success_count = 0
41162
41167
  no_match_count = 0
41163
-
41168
+
41164
41169
  for idx, (row_index, segment) in enumerate(segments_needing_translation):
41165
41170
  if progress.wasCanceled():
41166
41171
  break
41167
-
41172
+
41168
41173
  progress.setValue(idx)
41169
- progress.setLabelText(f"Searching TM for segment {idx + 1}/{len(segments_needing_translation)}...")
41174
+
41175
+ # Build informative progress label
41176
+ elapsed = time.time() - start_time
41177
+ elapsed_str = f"{int(elapsed // 60)}:{int(elapsed % 60):02d}"
41178
+ source_preview = segment.source[:50] + "..." if len(segment.source) > 50 else segment.source
41179
+ label_text = (
41180
+ f"Searching TM for segment {idx + 1} of {total_segments}...\n\n"
41181
+ f"Current: \"{source_preview}\"\n"
41182
+ f"Matches found: {success_count} | Elapsed: {elapsed_str}\n\n"
41183
+ f"ℹ️ This may take a while for large documents."
41184
+ )
41185
+ progress.setLabelText(label_text)
41170
41186
  QApplication.processEvents()
41171
41187
 
41172
41188
  try:
@@ -41184,7 +41200,7 @@ OUTPUT ONLY THE SEGMENT MARKERS. DO NOT ADD EXPLANATIONS BEFORE OR AFTER."""
41184
41200
  else:
41185
41201
  no_match_count += 1
41186
41202
  else:
41187
- # Fuzzy matching enabled - get best match ≥70%
41203
+ # Fuzzy matching enabled - get best match ≥75%
41188
41204
  matches = self.tm_database.search_all(
41189
41205
  segment.source,
41190
41206
  tm_ids=tm_ids,
@@ -41194,7 +41210,7 @@ OUTPUT ONLY THE SEGMENT MARKERS. DO NOT ADD EXPLANATIONS BEFORE OR AFTER."""
41194
41210
  if matches and len(matches) > 0:
41195
41211
  best_match = matches[0]
41196
41212
  match_pct = best_match.get('match_pct', 0)
41197
- if match_pct >= 70:
41213
+ if match_pct >= 75:
41198
41214
  segment.target = best_match.get('target', '')
41199
41215
  segment.status = "Translated"
41200
41216
  success_count += 1
@@ -41562,7 +41578,7 @@ OUTPUT ONLY THE SEGMENT MARKERS. DO NOT ADD EXPLANATIONS BEFORE OR AFTER."""
41562
41578
  match_pct = match.get('match_pct', 0)
41563
41579
  tm_match = match.get('target', '')
41564
41580
 
41565
- if match_pct >= 70: # Accept matches 70% and above
41581
+ if match_pct >= 75: # Accept matches 75% and above
41566
41582
  segment.target = tm_match
41567
41583
  segment.status = "translated" if match_pct == 100 else "pre-translated"
41568
41584
  translated_count += 1
@@ -17,12 +17,38 @@ import sqlite3
17
17
  import os
18
18
  import json
19
19
  import hashlib
20
+ import unicodedata
21
+ import re
20
22
  from datetime import datetime
21
23
  from typing import List, Dict, Optional, Tuple
22
24
  from pathlib import Path
23
25
  from difflib import SequenceMatcher
24
26
 
25
27
 
28
+ def _normalize_for_matching(text: str) -> str:
29
+ """Normalize text for exact matching.
30
+
31
+ Handles invisible differences that would cause exact match to fail:
32
+ - Unicode normalization (NFC)
33
+ - Multiple whitespace -> single space
34
+ - Leading/trailing whitespace
35
+ - Non-breaking spaces -> regular spaces
36
+ """
37
+ if not text:
38
+ return ""
39
+ # Unicode normalize (NFC form)
40
+ text = unicodedata.normalize('NFC', text)
41
+ # Convert non-breaking spaces and other whitespace to regular space
42
+ text = text.replace('\u00a0', ' ') # NBSP
43
+ text = text.replace('\u2007', ' ') # Figure space
44
+ text = text.replace('\u202f', ' ') # Narrow NBSP
45
+ # Collapse multiple whitespace to single space
46
+ text = re.sub(r'\s+', ' ', text)
47
+ # Strip leading/trailing whitespace
48
+ text = text.strip()
49
+ return text
50
+
51
+
26
52
  class DatabaseManager:
27
53
  """Manages SQLite database for translation resources"""
28
54
 
@@ -655,17 +681,19 @@ class DatabaseManager:
655
681
  # TRANSLATION MEMORY METHODS
656
682
  # ============================================
657
683
 
658
- def add_translation_unit(self, source: str, target: str, source_lang: str,
684
+ def add_translation_unit(self, source: str, target: str, source_lang: str,
659
685
  target_lang: str, tm_id: str = 'project',
660
686
  project_id: str = None, context_before: str = None,
661
687
  context_after: str = None, notes: str = None) -> int:
662
688
  """
663
689
  Add translation unit to database
664
-
690
+
665
691
  Returns: ID of inserted/updated entry
666
692
  """
667
- # Generate hash for fast exact matching
668
- source_hash = hashlib.md5(source.encode('utf-8')).hexdigest()
693
+ # Generate hash from NORMALIZED source for consistent exact matching
694
+ # This handles invisible differences like Unicode normalization, whitespace variations
695
+ normalized_source = _normalize_for_matching(source)
696
+ source_hash = hashlib.md5(normalized_source.encode('utf-8')).hexdigest()
669
697
 
670
698
  try:
671
699
  self.cursor.execute("""
@@ -687,33 +715,38 @@ class DatabaseManager:
687
715
  return None
688
716
 
689
717
  def get_exact_match(self, source: str, tm_ids: List[str] = None,
690
- source_lang: str = None, target_lang: str = None,
718
+ source_lang: str = None, target_lang: str = None,
691
719
  bidirectional: bool = True) -> Optional[Dict]:
692
720
  """
693
721
  Get exact match from TM
694
-
722
+
695
723
  Args:
696
724
  source: Source text to match
697
725
  tm_ids: List of TM IDs to search (None = all)
698
726
  source_lang: Filter by source language (base code matching: 'en' matches 'en-US', 'en-GB', etc.)
699
727
  target_lang: Filter by target language (base code matching)
700
728
  bidirectional: If True, search both directions (nl→en AND en→nl)
701
-
729
+
702
730
  Returns: Dictionary with match data or None
703
731
  """
704
732
  from modules.tmx_generator import get_base_lang_code
705
-
733
+
734
+ # Try both normalized and non-normalized hashes for backward compatibility
735
+ # This handles invisible differences like Unicode normalization, whitespace variations
706
736
  source_hash = hashlib.md5(source.encode('utf-8')).hexdigest()
707
-
737
+ normalized_source = _normalize_for_matching(source)
738
+ normalized_hash = hashlib.md5(normalized_source.encode('utf-8')).hexdigest()
739
+
708
740
  # Get base language codes for comparison
709
741
  src_base = get_base_lang_code(source_lang) if source_lang else None
710
742
  tgt_base = get_base_lang_code(target_lang) if target_lang else None
711
-
743
+
744
+ # Search using both original hash and normalized hash
712
745
  query = """
713
- SELECT * FROM translation_units
714
- WHERE source_hash = ? AND source_text = ?
746
+ SELECT * FROM translation_units
747
+ WHERE (source_hash = ? OR source_hash = ?)
715
748
  """
716
- params = [source_hash, source]
749
+ params = [source_hash, normalized_hash]
717
750
 
718
751
  if tm_ids:
719
752
  placeholders = ','.join('?' * len(tm_ids))
@@ -123,8 +123,8 @@ class TMDatabase:
123
123
  if source_lang and target_lang:
124
124
  self.set_tm_languages(source_lang, target_lang)
125
125
 
126
- # Global fuzzy threshold (70% minimum similarity for fuzzy matches)
127
- self.fuzzy_threshold = 0.7
126
+ # Global fuzzy threshold (75% minimum similarity for fuzzy matches)
127
+ self.fuzzy_threshold = 0.75
128
128
 
129
129
  # TM metadata cache (populated from database as needed)
130
130
  # Note: Legacy 'project' and 'big_mama' TMs are no longer used.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: supervertaler
3
- Version: 1.9.172
3
+ Version: 1.9.173
4
4
  Summary: Professional AI-enhanced translation workbench with multi-LLM support, glossary system, TM, spellcheck, voice commands, and PyQt6 interface. Batteries included (core).
5
5
  Home-page: https://supervertaler.com
6
6
  Author: Michael Beijer
@@ -71,7 +71,7 @@ Dynamic: home-page
71
71
  Dynamic: license-file
72
72
  Dynamic: requires-python
73
73
 
74
- # 🚀 Supervertaler v1.9.172
74
+ # 🚀 Supervertaler v1.9.173
75
75
 
76
76
  [![PyPI version](https://badge.fury.io/py/supervertaler.svg)](https://pypi.org/project/Supervertaler/)
77
77
  [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
@@ -80,7 +80,13 @@ Dynamic: requires-python
80
80
  AI-enhanced CAT tool with multi-LLM support (GPT-4, Claude, Gemini, Ollama), innovative Superlookup concordance system offering access to multiple terminology sources (TMs, glossaries, web resources, etc.), and seamless CAT tool integration (memoQ, Trados, CafeTran, Phrase).
81
81
 
82
82
 
83
- **Current Version:** v1.9.172 (January 28, 2026)
83
+ **Current Version:** v1.9.173 (January 28, 2026)
84
+
85
+ ### IMPROVED in v1.9.173 - 🎯 Smarter TM Pre-Translation
86
+
87
+ - **Smarter TM Exact Matching**: Exact matches now use text normalization, so matches are found even with whitespace/Unicode differences.
88
+ - **Improved Pre-Translation Dialog**: Shows current segment, match count, elapsed time, and patience message for large jobs.
89
+ - **Higher Fuzzy Threshold**: Raised minimum fuzzy match threshold from 70% to 75% for better quality.
84
90
 
85
91
  ### FIXED in v1.9.172 - 🐛 Fresh Projects Start Clean
86
92
 
@@ -1,4 +1,4 @@
1
- Supervertaler.py,sha256=QN11SrXGKNdKhXFRwgkumm6IRI6Q9vg_S2yj2Qbd3K4,2286283
1
+ Supervertaler.py,sha256=Hzzf8r4zCdvk8adLVgitzsdVxrOr406AppDlUc-HMJ4,2286978
2
2
  modules/__init__.py,sha256=G58XleS-EJ2sX4Kehm-3N2m618_W2Es0Kg8CW_eBG7g,327
3
3
  modules/ai_actions.py,sha256=i5MJcM-7Y6CAvKUwxmxrVHeoZAVtAP7aRDdWM5KLkO0,33877
4
4
  modules/ai_attachment_manager.py,sha256=juZlrW3UPkIkcnj0SREgOQkQROLf0fcu3ShZcKXMxsI,11361
@@ -6,7 +6,7 @@ modules/ai_file_viewer_dialog.py,sha256=lKKqUUlOEVgHmmu6aRxqH7P6ds-7dRLk4ltDyjCw
6
6
  modules/autofingers_engine.py,sha256=eJ7tBi7YJvTToe5hYTfnyGXB-qme_cHrOPZibaoR2Xw,17061
7
7
  modules/cafetran_docx_handler.py,sha256=_F7Jh0WPVaDnMhdxEsVSXuD1fN9r-S_V6i0gr86Pdfc,14076
8
8
  modules/config_manager.py,sha256=MkPY3xVFgFDkcwewLREg4BfyKueO0OJkT1cTLxehcjM,17894
9
- modules/database_manager.py,sha256=ZdsiuwF67lh-FPKPdalWsW9t6IieX_FM0fA2Bca1xSQ,80221
9
+ modules/database_manager.py,sha256=XbOHaUWq8kdHnT6ZgqPEfA6I3fL5cYiaVXkhYRohi1E,81627
10
10
  modules/database_migrations.py,sha256=Y1onFsLDV_6vzJLOpNy3WCZDohBZ2jc4prM-g2_RwLE,14085
11
11
  modules/dejavurtf_handler.py,sha256=8NZPPYtHga40SZCypHjPoJPmZTvm9rD-eEUUab7mjtg,28156
12
12
  modules/document_analyzer.py,sha256=t1rVvqLaTcpQTEja228C7zZnh8dXshK4wA9t1E9aGVk,19524
@@ -69,7 +69,7 @@ modules/tmx_editor_qt.py,sha256=PxBIUw_06PHYTBHsd8hZzVJXW8T0A0ljfz1Wjjsa4yU,1170
69
69
  modules/tmx_generator.py,sha256=pNkxwdMLvSRMMru0lkB1gvViIpg9BQy1EVhRbwoef3k,9426
70
70
  modules/tracked_changes.py,sha256=S_BIEC6r7wVAwjG42aSy_RgH4KaMAC8GS5thEvqrYdE,39480
71
71
  modules/trados_docx_handler.py,sha256=VPRAQ73cUHs_SEj6x81z1PmSxfjnwPBp9P4fXeK3KpQ,16363
72
- modules/translation_memory.py,sha256=k0GtO6ANTqxI1XMcv3D5mdAoTgcWlDT5iVsYHizKNUM,28738
72
+ modules/translation_memory.py,sha256=13PDK4_kgYrWTACWBIBypOh2DvoxY9cRT8U6ulilbh4,28739
73
73
  modules/translation_results_panel.py,sha256=DmEe0pZRSfcZFg2cWeEREK7H9vrTcPkgeuMW54Pgrys,92505
74
74
  modules/translation_services.py,sha256=lyVpWuZK1wtVtYZMDMdLoq1DHBoSaeAnp-Yejb0TlVQ,10530
75
75
  modules/unified_prompt_library.py,sha256=lzbevgjUz_qCiYSf141BB0mmuaDhSsevWju_a7welu0,26008
@@ -77,9 +77,9 @@ modules/unified_prompt_manager_qt.py,sha256=fyF3_r0N8hnImT-CcWo1AuBOQ1Dn_ExeeUCk
77
77
  modules/voice_commands.py,sha256=iBb-gjWxRMLhFH7-InSRjYJz1EIDBNA2Pog8V7TtJaY,38516
78
78
  modules/voice_dictation.py,sha256=QmitXfkG-vRt5hIQATjphHdhXfqmwhzcQcbXB6aRzIg,16386
79
79
  modules/voice_dictation_lite.py,sha256=jorY0BmWE-8VczbtGrWwt1zbnOctMoSlWOsQrcufBcc,9423
80
- supervertaler-1.9.172.dist-info/licenses/LICENSE,sha256=m28u-4qL5nXIWnJ6xlQVw__H30rWFtRK3pCOais2OuY,1092
81
- supervertaler-1.9.172.dist-info/METADATA,sha256=cDxLhwPym42J-CQfuc07BvtDUoj231w1Lyfz2_iIN6Y,48267
82
- supervertaler-1.9.172.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
83
- supervertaler-1.9.172.dist-info/entry_points.txt,sha256=NP4hiCvx-_30YYKqgr-jfJYQvHr1qTYBMfoVmKIXSM8,53
84
- supervertaler-1.9.172.dist-info/top_level.txt,sha256=9tUHBYUSfaE4S2E4W3eavJsDyYymkwLfeWAHHAPT6Dk,22
85
- supervertaler-1.9.172.dist-info/RECORD,,
80
+ supervertaler-1.9.173.dist-info/licenses/LICENSE,sha256=m28u-4qL5nXIWnJ6xlQVw__H30rWFtRK3pCOais2OuY,1092
81
+ supervertaler-1.9.173.dist-info/METADATA,sha256=4y6XCqM8xqGT4T9dLHXLn4-KUDkYRAqJ_zsoZUHGW8k,48701
82
+ supervertaler-1.9.173.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
83
+ supervertaler-1.9.173.dist-info/entry_points.txt,sha256=NP4hiCvx-_30YYKqgr-jfJYQvHr1qTYBMfoVmKIXSM8,53
84
+ supervertaler-1.9.173.dist-info/top_level.txt,sha256=9tUHBYUSfaE4S2E4W3eavJsDyYymkwLfeWAHHAPT6Dk,22
85
+ supervertaler-1.9.173.dist-info/RECORD,,