PyPI - supervertaler - Versions diffs - 1.9.116__py3-none-any.whl → 1.9.172__py3-none-any.whl - Mend

supervertaler 1.9.116py3-none-any.whl → 1.9.172py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of supervertaler might be problematic. Click here for more details.

Files changed (24) hide show

Supervertaler.py +4210 -965
modules/ai_attachment_manager.py +3 -3
modules/config_manager.py +10 -10
modules/database_manager.py +197 -52
modules/keyboard_shortcuts_widget.py +7 -0
modules/non_translatables_manager.py +1 -1
modules/prompt_library_migration.py +1 -1
modules/setup_wizard.py +8 -8
modules/shortcut_manager.py +29 -1
modules/superbrowser.py +16 -12
modules/superlookup.py +18 -10
modules/tag_manager.py +20 -2
modules/termview_widget.py +29 -14
modules/tm_metadata_manager.py +41 -0
modules/tmx_editor_qt.py +1 -1
modules/translation_memory.py +54 -9
modules/unified_prompt_library.py +13 -10
modules/unified_prompt_manager_qt.py +363 -139
{supervertaler-1.9.116.dist-info → supervertaler-1.9.172.dist-info}/METADATA +133 -7
{supervertaler-1.9.116.dist-info → supervertaler-1.9.172.dist-info}/RECORD +24 -24
{supervertaler-1.9.116.dist-info → supervertaler-1.9.172.dist-info}/WHEEL +1 -1
{supervertaler-1.9.116.dist-info → supervertaler-1.9.172.dist-info}/entry_points.txt +0 -0
{supervertaler-1.9.116.dist-info → supervertaler-1.9.172.dist-info}/licenses/LICENSE +0 -0
{supervertaler-1.9.116.dist-info → supervertaler-1.9.172.dist-info}/top_level.txt +0 -0

modules/superlookup.py CHANGED Viewed

@@ -88,14 +88,18 @@ class SuperlookupEngine:
             Captured text or None if failed
         """
         try:
-            import keyboard
-            # Wait for hotkey to release before sending Ctrl+C
-            time.sleep(0.2)
-            # Use keyboard library to send Ctrl+C
-            keyboard.press_and_release('ctrl+c')
-            time.sleep(0.2)
+            # keyboard module is Windows-only
+            try:
+                import keyboard
+                # Wait for hotkey to release before sending Ctrl+C
+                time.sleep(0.2)
+                # Use keyboard library to send Ctrl+C
+                keyboard.press_and_release('ctrl+c')
+                time.sleep(0.2)
+            except ImportError:
+                # On non-Windows, just try to get clipboard content directly
+                # (user needs to have copied text manually)
+                pass
             # Get clipboard
             text = pyperclip.paste()
@@ -157,9 +161,13 @@ class SuperlookupEngine:
                 # Convert to LookupResult format (limit results)
                 for match in matches[:max_results]:
+                    # Use 'source' and 'target' keys (matches database column names)
+                    source_text = match.get('source', '')
+                    target_text = match.get('target', '')
+                    print(f"[Superlookup] Extracted: source='{source_text[:50]}...', target='{target_text[:50]}...'")
                     results.append(LookupResult(
-                        source=match.get('source', ''),
-                        target=match.get('target', ''),
+                        source=source_text,
+                        target=target_text,
                         match_percent=100,  # Concordance = contains the text
                         source_type='tm',
                         metadata={

modules/tag_manager.py CHANGED Viewed

@@ -77,15 +77,33 @@ class TagManager:
         runs = []
         current_pos = 0
+        # Check if paragraph style has bold/italic formatting
+        # This handles cases like "Subtitle" or "Title" styles that are bold
+        style_bold = False
+        style_italic = False
+        try:
+            if paragraph.style and paragraph.style.font:
+                if paragraph.style.font.bold:
+                    style_bold = True
+                if paragraph.style.font.italic:
+                    style_italic = True
+        except Exception:
+            pass  # If we can't read style, just use run-level formatting
         for run in paragraph.runs:
             text = run.text
             if not text:
                 continue
+            # Combine run-level formatting with style-level formatting
+            # run.bold can be True, False, or None (None means inherit from style)
+            is_bold = run.bold if run.bold is not None else style_bold
+            is_italic = run.italic if run.italic is not None else style_italic
             run_info = FormattingRun(
                 text=text,
-                bold=run.bold or False,
-                italic=run.italic or False,
+                bold=is_bold or False,
+                italic=is_italic or False,
                 underline=run.underline or False,
                 subscript=run.font.subscript or False if run.font else False,
                 superscript=run.font.superscript or False if run.font else False,

modules/termview_widget.py CHANGED Viewed

@@ -515,6 +515,9 @@ class TermviewWidget(QWidget):
         self.current_target_lang = None
         self.current_project_id = None  # Store project ID for termbase priority lookup
+        # Debug mode - disable verbose tokenization logging by default (performance)
+        self.debug_tokenize = False
         # Default font settings (will be updated from main app settings)
         self.current_font_family = "Segoe UI"
         self.current_font_size = 10
@@ -750,7 +753,10 @@ class TermviewWidget(QWidget):
                 if not source_term or not target_term:
                     continue
-                key = source_term.lower()
+                # Strip punctuation from key to match lookup normalization
+                # This ensures "ca." in glossary matches "ca." token stripped to "ca"
+                PUNCT_CHARS_FOR_KEY = '.,;:!?\"\'\u201C\u201D\u201E\u00AB\u00BB\u2018\u2019\u201A\u2039\u203A()[]'
+                key = source_term.lower().strip(PUNCT_CHARS_FOR_KEY)
                 if key not in matches_dict:
                     matches_dict[key] = []
@@ -803,7 +809,8 @@ class TermviewWidget(QWidget):
         # Comprehensive set of quote and punctuation characters to strip
         # Using Unicode escapes to avoid encoding issues
-        PUNCT_CHARS = '.,;:!?\"\'\u201C\u201D\u201E\u00AB\u00BB\u2018\u2019\u201A\u2039\u203A'
+        # Include brackets for terms like "(typisch)" to match "typisch"
+        PUNCT_CHARS = '.,;:!?\"\'\u201C\u201D\u201E\u00AB\u00BB\u2018\u2019\u201A\u2039\u203A()[]'
         # Track which terms have already been assigned shortcuts (avoid duplicates)
         assigned_shortcuts = set()
@@ -816,7 +823,6 @@ class TermviewWidget(QWidget):
             # Check if this is a non-translatable
             if lookup_key in nt_dict:
-                # Create NT block
                 nt_block = NTBlock(token, nt_dict[lookup_key], self, theme_manager=self.theme_manager,
                                    font_size=self.current_font_size, font_family=self.current_font_family,
                                    font_bold=self.current_font_bold)
@@ -941,13 +947,20 @@ class TermviewWidget(QWidget):
                     for quote_char in '\"\'\u201C\u201D\u201E\u00AB\u00BB\u2018\u2019\u201A\u2039\u203A':
                         normalized_text = normalized_text.replace(quote_char, ' ')
+                    # CRITICAL FIX v1.9.118: Strip punctuation from glossary term before matching
+                    # This allows entries like "...problemen." (with period) to match source text
+                    # where tokenization strips the period during word splitting
+                    # Comprehensive set of quote and punctuation characters to strip
+                    PUNCT_CHARS = '.,;:!?\"\'\u201C\u201D\u201E\u00AB\u00BB\u2018\u2019\u201A\u2039\u203A'
+                    normalized_term = source_lower.rstrip(PUNCT_CHARS).lstrip(PUNCT_CHARS)
                     # Use word boundaries to match complete words/phrases only
                     if ' ' in source_term:
                         # Multi-word term - must exist as exact phrase
-                        pattern = r'\b' + re.escape(source_lower) + r'\b'
+                        pattern = r'\b' + re.escape(normalized_term) + r'\b'
                     else:
                         # Single word
-                        pattern = r'\b' + re.escape(source_lower) + r'\b'
+                        pattern = r'\b' + re.escape(normalized_term) + r'\b'
                     # Try matching on normalized text first, then original
                     if not re.search(pattern, normalized_text) and not re.search(pattern, text_lower):
@@ -985,9 +998,9 @@ class TermviewWidget(QWidget):
         Returns:
             List of tokens (words/phrases/numbers), with multi-word terms kept together
         """
-        # DEBUG: Log multi-word terms we're looking for
+        # DEBUG: Log multi-word terms we're looking for (only if debug_tokenize enabled)
         multi_word_terms = [k for k in matches.keys() if ' ' in k]
-        if multi_word_terms:
+        if multi_word_terms and self.debug_tokenize:
             self.log(f"🔍 Tokenize: Looking for {len(multi_word_terms)} multi-word terms:")
             for term in sorted(multi_word_terms, key=len, reverse=True)[:3]:
                 self.log(f"    - '{term}'")
@@ -1012,11 +1025,12 @@ class TermviewWidget(QWidget):
                 else:
                     pattern = r'\b' + term_escaped + r'\b'
-                # DEBUG: Check if multi-word term is found
+                # DEBUG: Check if multi-word term is found (only if debug_tokenize enabled)
                 found = re.search(pattern, text_lower)
-                self.log(f"🔍 Tokenize: Pattern '{pattern}' for '{term}' → {'FOUND' if found else 'NOT FOUND'}")
-                if found:
-                    self.log(f"    Match at position {found.span()}: '{text[found.start():found.end()]}'")
+                if self.debug_tokenize:
+                    self.log(f"🔍 Tokenize: Pattern '{pattern}' for '{term}' → {'FOUND' if found else 'NOT FOUND'}")
+                    if found:
+                        self.log(f"    Match at position {found.span()}: '{text[found.start():found.end()]}'")
                 # Find all matches using regex
                 for match in re.finditer(pattern, text_lower):
@@ -1029,10 +1043,11 @@ class TermviewWidget(QWidget):
                         original_term = text[pos:pos + len(term)]
                         tokens_with_positions.append((pos, len(term), original_term))
                         used_positions.update(term_positions)
-                        self.log(f"    ✅ Added multi-word token: '{original_term}' covering positions {pos}-{pos+len(term)}")
+                        if self.debug_tokenize:
+                            self.log(f"    ✅ Added multi-word token: '{original_term}' covering positions {pos}-{pos+len(term)}")
-        # DEBUG: Log used_positions after first pass
-        if ' ' in sorted(matches.keys(), key=len, reverse=True)[0]:
+        # DEBUG: Log used_positions after first pass (only if debug_tokenize enabled)
+        if matches and ' ' in sorted(matches.keys(), key=len, reverse=True)[0] and self.debug_tokenize:
             self.log(f"🔍 After first pass: {len(used_positions)} positions marked as used")
             self.log(f"    Used positions: {sorted(list(used_positions))[:20]}...")

modules/tm_metadata_manager.py CHANGED Viewed

@@ -396,6 +396,47 @@ class TMMetadataManager:
             self.log(f"✗ Error fetching active tm_ids: {e}")
             return []
+    def get_writable_tm_ids(self, project_id: Optional[int]) -> List[str]:
+        """
+        Get list of writable tm_id strings for a project.
+        Returns TMs where:
+        - The TM has an activation record for this project AND
+        - read_only = 0 (Write checkbox is enabled)
+        This is used for SAVING segments to TM, separate from get_active_tm_ids()
+        which is used for READING/matching from TM.
+        Returns:
+            List of tm_id strings that are writable for the project
+        """
+        if project_id is None:
+            # No project - return all writable TMs
+            try:
+                cursor = self.db_manager.cursor
+                cursor.execute("SELECT tm_id FROM translation_memories WHERE read_only = 0")
+                return [row[0] for row in cursor.fetchall()]
+            except Exception as e:
+                self.log(f"✗ Error fetching all writable tm_ids: {e}")
+                return []
+        try:
+            cursor = self.db_manager.cursor
+            # Return TMs where Write checkbox is enabled (read_only = 0)
+            # AND the TM has an activation record for this project
+            cursor.execute("""
+                SELECT tm.tm_id
+                FROM translation_memories tm
+                INNER JOIN tm_activation ta ON tm.id = ta.tm_id
+                WHERE ta.project_id = ? AND tm.read_only = 0
+            """, (project_id,))
+            return [row[0] for row in cursor.fetchall()]
+        except Exception as e:
+            self.log(f"✗ Error fetching writable tm_ids: {e}")
+            return []
     # ========================================================================
     # PROJECT TM MANAGEMENT (similar to termbases)
     # ========================================================================

modules/tmx_editor_qt.py CHANGED Viewed

@@ -2655,7 +2655,7 @@ if __name__ == "__main__":
         os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", ".supervertaler.local")
     )
     user_data_path = Path("user_data_private" if ENABLE_PRIVATE_FEATURES else "user_data")
-    db_path = user_data_path / "Translation_Resources" / "supervertaler.db"
+    db_path = user_data_path / "resources" / "supervertaler.db"
     # Ensure database directory exists
     db_path.parent.mkdir(parents=True, exist_ok=True)

modules/translation_memory.py CHANGED Viewed

@@ -123,8 +123,8 @@ class TMDatabase:
         if source_lang and target_lang:
             self.set_tm_languages(source_lang, target_lang)
-        # Global fuzzy threshold
-        self.fuzzy_threshold = 0.75
+        # Global fuzzy threshold (70% minimum similarity for fuzzy matches)
+        self.fuzzy_threshold = 0.7
         # TM metadata cache (populated from database as needed)
         # Note: Legacy 'project' and 'big_mama' TMs are no longer used.
@@ -401,7 +401,7 @@ class TMDatabase:
     def load_tmx_file(self, filepath: str, src_lang: str, tgt_lang: str,
                       tm_name: str = None, read_only: bool = False,
-                      strip_variants: bool = True) -> tuple[str, int]:
+                      strip_variants: bool = True, progress_callback=None) -> tuple[str, int]:
         """
         Load TMX file into a new custom TM
@@ -412,6 +412,7 @@ class TMDatabase:
             tm_name: Custom name for TM (default: filename)
             read_only: Make TM read-only
             strip_variants: Match base languages ignoring regional variants (default: True)
+            progress_callback: Optional callback function(current, total, message) for progress updates
         Returns: (tm_id, entry_count)
         """
@@ -423,16 +424,18 @@ class TMDatabase:
         self.add_custom_tm(tm_name, tm_id, read_only=read_only)
         # Load TMX content
-        loaded_count = self._load_tmx_into_db(filepath, src_lang, tgt_lang, tm_id, strip_variants=strip_variants)
+        loaded_count = self._load_tmx_into_db(filepath, src_lang, tgt_lang, tm_id,
+                                             strip_variants=strip_variants,
+                                             progress_callback=progress_callback)
         self.log(f"✓ Loaded {loaded_count} entries from {os.path.basename(filepath)}")
         return tm_id, loaded_count
     def _load_tmx_into_db(self, filepath: str, src_lang: str, tgt_lang: str, tm_id: str,
-                          strip_variants: bool = False) -> int:
+                          strip_variants: bool = False, progress_callback=None) -> int:
         """
-        Internal: Load TMX content into database
+        Internal: Load TMX content into database with chunked processing
         Args:
             filepath: Path to TMX file
@@ -440,12 +443,24 @@ class TMDatabase:
             tgt_lang: Target target language code
             tm_id: TM identifier
             strip_variants: If True, match base languages ignoring regional variants
+            progress_callback: Optional callback function(current, total, message) for progress updates
         """
         loaded_count = 0
+        chunk_size = 1000  # Process in chunks for responsiveness
+        chunk_buffer = []
         try:
+            # First pass: count total TUs for progress bar
+            if progress_callback:
+                progress_callback(0, 0, "Counting translation units...")
             tree = ET.parse(filepath)
             root = tree.getroot()
+            total_tus = len(root.findall('.//tu'))
+            if progress_callback:
+                progress_callback(0, total_tus, f"Processing 0 / {total_tus:,} entries...")
             xml_ns = "http://www.w3.org/XML/1998/namespace"
             # Normalize language codes
@@ -458,6 +473,7 @@ class TMDatabase:
                 src_base = get_base_lang_code(src_lang_normalized)
                 tgt_base = get_base_lang_code(tgt_lang_normalized)
+            processed = 0
             for tu in root.findall('.//tu'):
                 src_text, tgt_text = None, None
@@ -488,14 +504,43 @@ class TMDatabase:
                                 tgt_text = text
                 if src_text and tgt_text:
+                    chunk_buffer.append((src_text, tgt_text))
+                    loaded_count += 1
+                    # Process chunk when buffer is full
+                    if len(chunk_buffer) >= chunk_size:
+                        for src, tgt in chunk_buffer:
+                            self.db.add_translation_unit(
+                                source=src,
+                                target=tgt,
+                                source_lang=src_lang_normalized,
+                                target_lang=tgt_lang_normalized,
+                                tm_id=tm_id
+                            )
+                        chunk_buffer.clear()
+                        # Update progress
+                        if progress_callback:
+                            progress_callback(processed + 1, total_tus,
+                                            f"Processing {loaded_count:,} / {total_tus:,} entries...")
+                processed += 1
+            # Process remaining entries in buffer
+            if chunk_buffer:
+                for src, tgt in chunk_buffer:
                     self.db.add_translation_unit(
-                        source=src_text,
-                        target=tgt_text,
+                        source=src,
+                        target=tgt,
                         source_lang=src_lang_normalized,
                         target_lang=tgt_lang_normalized,
                         tm_id=tm_id
                     )
-                    loaded_count += 1
+                chunk_buffer.clear()
+            # Final progress update
+            if progress_callback:
+                progress_callback(total_tus, total_tus, f"Completed: {loaded_count:,} entries imported")
             return loaded_count
         except Exception as e:

modules/unified_prompt_library.py CHANGED Viewed

@@ -30,7 +30,7 @@ class UnifiedPromptLibrary:
         Initialize the Unified Prompt Library.
         Args:
-            library_dir: Path to unified library directory (user_data/Prompt_Library/Library)
+            library_dir: Path to unified library directory (user_data/prompt_library)
             log_callback: Function to call for logging messages
         """
         self.library_dir = Path(library_dir) if library_dir else None
@@ -171,11 +171,14 @@ class UnifiedPromptLibrary:
             # Backward compatibility: quick_run is the legacy field; internally we
             # treat it as the "QuickMenu (future app menu)" flag.
             prompt_data.setdefault('quick_run', False)
-            prompt_data['quickmenu_quickmenu'] = bool(
-                prompt_data.get('quickmenu_quickmenu', prompt_data.get('quick_run', False))
+            # Support legacy quickmenu_quickmenu field (rename to sv_quickmenu)
+            if 'quickmenu_quickmenu' in prompt_data:
+                prompt_data['sv_quickmenu'] = prompt_data['quickmenu_quickmenu']
+            prompt_data['sv_quickmenu'] = bool(
+                prompt_data.get('sv_quickmenu', prompt_data.get('quick_run', False))
             )
             # Keep legacy field in sync so older code/versions still behave.
-            prompt_data['quick_run'] = bool(prompt_data['quickmenu_quickmenu'])
+            prompt_data['quick_run'] = bool(prompt_data['sv_quickmenu'])
             # New QuickMenu fields
             prompt_data.setdefault('quickmenu_grid', False)
@@ -270,7 +273,7 @@ class UnifiedPromptLibrary:
                 'name', 'description', 'domain', 'version', 'task_type',
                 'favorite',
                 # QuickMenu
-                'quickmenu_label', 'quickmenu_grid', 'quickmenu_quickmenu',
+                'quickmenu_label', 'quickmenu_grid', 'sv_quickmenu',
                 # Legacy (kept for backward compatibility)
                 'quick_run',
                 'folder', 'tags',
@@ -309,8 +312,8 @@ class UnifiedPromptLibrary:
             prompt_data['_relative_path'] = relative_path
             # Keep legacy field in sync
-            if 'quickmenu_quickmenu' in prompt_data:
-                prompt_data['quick_run'] = bool(prompt_data.get('quickmenu_quickmenu', False))
+            if 'sv_quickmenu' in prompt_data:
+                prompt_data['quick_run'] = bool(prompt_data.get('sv_quickmenu', False))
             self.prompts[relative_path] = prompt_data
             self.log(f"✓ Saved prompt: {prompt_data.get('name', relative_path)}")
@@ -456,8 +459,8 @@ class UnifiedPromptLibrary:
             return False
         prompt_data = self.prompts[relative_path]
-        new_value = not bool(prompt_data.get('quickmenu_quickmenu', prompt_data.get('quick_run', False)))
-        prompt_data['quickmenu_quickmenu'] = new_value
+        new_value = not bool(prompt_data.get('sv_quickmenu', prompt_data.get('quick_run', False)))
+        prompt_data['sv_quickmenu'] = new_value
         prompt_data['quick_run'] = new_value  # keep legacy in sync
         prompt_data['modified'] = datetime.now().strftime("%Y-%m-%d")
@@ -493,7 +496,7 @@ class UnifiedPromptLibrary:
         """Update cached QuickMenu (future app menu) list (legacy name: quick_run)."""
         self._quick_run = []
         for path, data in self.prompts.items():
-            is_enabled = bool(data.get('quickmenu_quickmenu', data.get('quick_run', False)))
+            is_enabled = bool(data.get('sv_quickmenu', data.get('quick_run', False)))
             if not is_enabled:
                 continue
             label = (data.get('quickmenu_label') or data.get('name') or Path(path).stem).strip()

supervertaler 1.9.116__py3-none-any.whl → 1.9.172__py3-none-any.whl

Potentially problematic release.

supervertaler 1.9.116py3-none-any.whl → 1.9.172py3-none-any.whl