supervertaler 1.9.163__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. Supervertaler.py +48473 -0
  2. modules/__init__.py +10 -0
  3. modules/ai_actions.py +964 -0
  4. modules/ai_attachment_manager.py +343 -0
  5. modules/ai_file_viewer_dialog.py +210 -0
  6. modules/autofingers_engine.py +466 -0
  7. modules/cafetran_docx_handler.py +379 -0
  8. modules/config_manager.py +469 -0
  9. modules/database_manager.py +1911 -0
  10. modules/database_migrations.py +417 -0
  11. modules/dejavurtf_handler.py +779 -0
  12. modules/document_analyzer.py +427 -0
  13. modules/docx_handler.py +689 -0
  14. modules/encoding_repair.py +319 -0
  15. modules/encoding_repair_Qt.py +393 -0
  16. modules/encoding_repair_ui.py +481 -0
  17. modules/feature_manager.py +350 -0
  18. modules/figure_context_manager.py +340 -0
  19. modules/file_dialog_helper.py +148 -0
  20. modules/find_replace.py +164 -0
  21. modules/find_replace_qt.py +457 -0
  22. modules/glossary_manager.py +433 -0
  23. modules/image_extractor.py +188 -0
  24. modules/keyboard_shortcuts_widget.py +571 -0
  25. modules/llm_clients.py +1211 -0
  26. modules/llm_leaderboard.py +737 -0
  27. modules/llm_superbench_ui.py +1401 -0
  28. modules/local_llm_setup.py +1104 -0
  29. modules/model_update_dialog.py +381 -0
  30. modules/model_version_checker.py +373 -0
  31. modules/mqxliff_handler.py +638 -0
  32. modules/non_translatables_manager.py +743 -0
  33. modules/pdf_rescue_Qt.py +1822 -0
  34. modules/pdf_rescue_tkinter.py +909 -0
  35. modules/phrase_docx_handler.py +516 -0
  36. modules/project_home_panel.py +209 -0
  37. modules/prompt_assistant.py +357 -0
  38. modules/prompt_library.py +689 -0
  39. modules/prompt_library_migration.py +447 -0
  40. modules/quick_access_sidebar.py +282 -0
  41. modules/ribbon_widget.py +597 -0
  42. modules/sdlppx_handler.py +874 -0
  43. modules/setup_wizard.py +353 -0
  44. modules/shortcut_manager.py +932 -0
  45. modules/simple_segmenter.py +128 -0
  46. modules/spellcheck_manager.py +727 -0
  47. modules/statuses.py +207 -0
  48. modules/style_guide_manager.py +315 -0
  49. modules/superbench_ui.py +1319 -0
  50. modules/superbrowser.py +329 -0
  51. modules/supercleaner.py +600 -0
  52. modules/supercleaner_ui.py +444 -0
  53. modules/superdocs.py +19 -0
  54. modules/superdocs_viewer_qt.py +382 -0
  55. modules/superlookup.py +252 -0
  56. modules/tag_cleaner.py +260 -0
  57. modules/tag_manager.py +351 -0
  58. modules/term_extractor.py +270 -0
  59. modules/termbase_entry_editor.py +842 -0
  60. modules/termbase_import_export.py +488 -0
  61. modules/termbase_manager.py +1060 -0
  62. modules/termview_widget.py +1176 -0
  63. modules/theme_manager.py +499 -0
  64. modules/tm_editor_dialog.py +99 -0
  65. modules/tm_manager_qt.py +1280 -0
  66. modules/tm_metadata_manager.py +545 -0
  67. modules/tmx_editor.py +1461 -0
  68. modules/tmx_editor_qt.py +2784 -0
  69. modules/tmx_generator.py +284 -0
  70. modules/tracked_changes.py +900 -0
  71. modules/trados_docx_handler.py +430 -0
  72. modules/translation_memory.py +715 -0
  73. modules/translation_results_panel.py +2134 -0
  74. modules/translation_services.py +282 -0
  75. modules/unified_prompt_library.py +659 -0
  76. modules/unified_prompt_manager_qt.py +3951 -0
  77. modules/voice_commands.py +920 -0
  78. modules/voice_dictation.py +477 -0
  79. modules/voice_dictation_lite.py +249 -0
  80. supervertaler-1.9.163.dist-info/METADATA +906 -0
  81. supervertaler-1.9.163.dist-info/RECORD +85 -0
  82. supervertaler-1.9.163.dist-info/WHEEL +5 -0
  83. supervertaler-1.9.163.dist-info/entry_points.txt +2 -0
  84. supervertaler-1.9.163.dist-info/licenses/LICENSE +21 -0
  85. supervertaler-1.9.163.dist-info/top_level.txt +2 -0
modules/tag_cleaner.py ADDED
@@ -0,0 +1,260 @@
1
+ """
2
+ TagCleaner Module for Supervertaler
3
+ Removes CAT tool tags from translation text
4
+
5
+ Supports tags from:
6
+ - memoQ
7
+ - Trados Studio
8
+ - CafeTran
9
+ - Wordfast
10
+
11
+ Can be used standalone or integrated with other modules like AutoFingers.
12
+ """
13
+
14
+ import re
15
+ from typing import Dict, List, Optional
16
+ from dataclasses import dataclass
17
+
18
+
19
+ @dataclass
20
+ class TagPattern:
21
+ """Definition of a tag pattern to clean"""
22
+ name: str
23
+ description: str
24
+ pattern: str # Regex pattern
25
+ enabled: bool = True
26
+
27
+
28
+ class TagCleaner:
29
+ """
30
+ Removes CAT tool tags from translation text.
31
+
32
+ Usage:
33
+ cleaner = TagCleaner()
34
+ cleaner.enable_memoq_index_tags()
35
+ cleaned = cleaner.clean("Text with [1}tags{2] here")
36
+ """
37
+
38
+ def __init__(self):
39
+ """Initialize TagCleaner with all available tag patterns."""
40
+ self.enabled = False # Master switch
41
+
42
+ # memoQ tag patterns
43
+ self.memoq_patterns: Dict[str, TagPattern] = {
44
+ "index_tags": TagPattern(
45
+ name="memoQ Index Tags",
46
+ description="Index tags like [1} {2] [3} {4] etc.",
47
+ pattern=r'(?:\[\d+\}|\{\d+\])',
48
+ enabled=True
49
+ ),
50
+ # Add more memoQ tag types here as needed
51
+ }
52
+
53
+ # Trados Studio tag patterns (placeholders for future)
54
+ self.trados_patterns: Dict[str, TagPattern] = {
55
+ # TODO: Add Trados tag patterns
56
+ }
57
+
58
+ # CafeTran tag patterns (placeholders for future)
59
+ self.cafetran_patterns: Dict[str, TagPattern] = {
60
+ # TODO: Add CafeTran tag patterns
61
+ }
62
+
63
+ # Wordfast tag patterns (placeholders for future)
64
+ self.wordfast_patterns: Dict[str, TagPattern] = {
65
+ # TODO: Add Wordfast tag patterns
66
+ }
67
+
68
+ def enable(self) -> None:
69
+ """Enable tag cleaning (master switch)."""
70
+ self.enabled = True
71
+
72
+ def disable(self) -> None:
73
+ """Disable tag cleaning (master switch)."""
74
+ self.enabled = False
75
+
76
+ def is_enabled(self) -> bool:
77
+ """Check if tag cleaning is enabled."""
78
+ return self.enabled
79
+
80
+ # memoQ tag control methods
81
+ def enable_memoq_index_tags(self) -> None:
82
+ """Enable cleaning of memoQ index tags ([1} {2] etc.)."""
83
+ if "index_tags" in self.memoq_patterns:
84
+ self.memoq_patterns["index_tags"].enabled = True
85
+
86
+ def disable_memoq_index_tags(self) -> None:
87
+ """Disable cleaning of memoQ index tags."""
88
+ if "index_tags" in self.memoq_patterns:
89
+ self.memoq_patterns["index_tags"].enabled = False
90
+
91
+ def is_memoq_index_tags_enabled(self) -> bool:
92
+ """Check if memoQ index tag cleaning is enabled."""
93
+ return self.memoq_patterns.get("index_tags", TagPattern("", "", "", False)).enabled
94
+
95
+ # Generic pattern management
96
+ def add_custom_pattern(self, category: str, key: str, pattern: TagPattern) -> None:
97
+ """
98
+ Add a custom tag pattern.
99
+
100
+ Args:
101
+ category: Category name ("memoq", "trados", "cafetran", "wordfast")
102
+ key: Unique identifier for this pattern
103
+ pattern: TagPattern to add
104
+ """
105
+ category_map = {
106
+ "memoq": self.memoq_patterns,
107
+ "trados": self.trados_patterns,
108
+ "cafetran": self.cafetran_patterns,
109
+ "wordfast": self.wordfast_patterns,
110
+ }
111
+
112
+ if category.lower() in category_map:
113
+ category_map[category.lower()][key] = pattern
114
+
115
+ def remove_pattern(self, category: str, key: str) -> None:
116
+ """Remove a tag pattern."""
117
+ category_map = {
118
+ "memoq": self.memoq_patterns,
119
+ "trados": self.trados_patterns,
120
+ "cafetran": self.cafetran_patterns,
121
+ "wordfast": self.wordfast_patterns,
122
+ }
123
+
124
+ if category.lower() in category_map:
125
+ category_map[category.lower()].pop(key, None)
126
+
127
+ def get_all_patterns(self) -> List[TagPattern]:
128
+ """Get all tag patterns from all categories."""
129
+ all_patterns = []
130
+ all_patterns.extend(self.memoq_patterns.values())
131
+ all_patterns.extend(self.trados_patterns.values())
132
+ all_patterns.extend(self.cafetran_patterns.values())
133
+ all_patterns.extend(self.wordfast_patterns.values())
134
+ return all_patterns
135
+
136
+ def get_enabled_patterns(self) -> List[TagPattern]:
137
+ """Get only enabled tag patterns."""
138
+ return [p for p in self.get_all_patterns() if p.enabled]
139
+
140
+ def clean(self, text: str) -> str:
141
+ """
142
+ Remove tags from text based on enabled patterns.
143
+
144
+ Args:
145
+ text: Text potentially containing CAT tool tags
146
+
147
+ Returns:
148
+ Text with enabled tags removed
149
+ """
150
+ if not self.enabled or not text:
151
+ return text
152
+
153
+ cleaned_text = text
154
+
155
+ # Apply all enabled patterns
156
+ for pattern in self.get_enabled_patterns():
157
+ cleaned_text = re.sub(pattern.pattern, '', cleaned_text)
158
+
159
+ return cleaned_text
160
+
161
+ def preview_cleaning(self, text: str) -> Dict[str, str]:
162
+ """
163
+ Preview what text would look like with each pattern applied.
164
+
165
+ Args:
166
+ text: Text to preview cleaning on
167
+
168
+ Returns:
169
+ Dictionary mapping pattern names to cleaned text
170
+ """
171
+ previews = {}
172
+
173
+ for pattern in self.get_all_patterns():
174
+ if pattern.enabled:
175
+ cleaned = re.sub(pattern.pattern, '', text)
176
+ previews[pattern.name] = cleaned
177
+
178
+ return previews
179
+
180
+ def to_dict(self) -> Dict:
181
+ """
182
+ Export settings to dictionary (for JSON serialization).
183
+
184
+ Returns:
185
+ Dictionary with all tag cleaner settings
186
+ """
187
+ return {
188
+ "enabled": self.enabled,
189
+ "memoq": {
190
+ key: {"enabled": pattern.enabled}
191
+ for key, pattern in self.memoq_patterns.items()
192
+ },
193
+ "trados": {
194
+ key: {"enabled": pattern.enabled}
195
+ for key, pattern in self.trados_patterns.items()
196
+ },
197
+ "cafetran": {
198
+ key: {"enabled": pattern.enabled}
199
+ for key, pattern in self.cafetran_patterns.items()
200
+ },
201
+ "wordfast": {
202
+ key: {"enabled": pattern.enabled}
203
+ for key, pattern in self.wordfast_patterns.items()
204
+ },
205
+ }
206
+
207
+ def from_dict(self, settings: Dict) -> None:
208
+ """
209
+ Import settings from dictionary.
210
+
211
+ Args:
212
+ settings: Dictionary with tag cleaner settings
213
+ """
214
+ self.enabled = settings.get("enabled", False)
215
+
216
+ # Load memoQ pattern states
217
+ memoq_settings = settings.get("memoq", {})
218
+ for key, pattern_settings in memoq_settings.items():
219
+ if key in self.memoq_patterns:
220
+ self.memoq_patterns[key].enabled = pattern_settings.get("enabled", True)
221
+
222
+ # Load Trados pattern states
223
+ trados_settings = settings.get("trados", {})
224
+ for key, pattern_settings in trados_settings.items():
225
+ if key in self.trados_patterns:
226
+ self.trados_patterns[key].enabled = pattern_settings.get("enabled", False)
227
+
228
+ # Load CafeTran pattern states
229
+ cafetran_settings = settings.get("cafetran", {})
230
+ for key, pattern_settings in cafetran_settings.items():
231
+ if key in self.cafetran_patterns:
232
+ self.cafetran_patterns[key].enabled = pattern_settings.get("enabled", False)
233
+
234
+ # Load Wordfast pattern states
235
+ wordfast_settings = settings.get("wordfast", {})
236
+ for key, pattern_settings in wordfast_settings.items():
237
+ if key in self.wordfast_patterns:
238
+ self.wordfast_patterns[key].enabled = pattern_settings.get("enabled", False)
239
+
240
+
241
+ # Standalone usage example
242
+ if __name__ == "__main__":
243
+ # Create cleaner
244
+ cleaner = TagCleaner()
245
+
246
+ # Enable tag cleaning
247
+ cleaner.enable()
248
+
249
+ # Enable specific tag types
250
+ cleaner.enable_memoq_index_tags()
251
+
252
+ # Test text
253
+ test_text = "Laat de tractor nooit draaien in een afgesloten ruimte, tenzij de uitlaat naar buiten wordt afgevoerd [7}lucht.{8]"
254
+
255
+ # Clean tags
256
+ cleaned = cleaner.clean(test_text)
257
+
258
+ print(f"Original: {test_text}")
259
+ print(f"Cleaned: {cleaned}")
260
+ print(f"\nExpected: Laat de tractor nooit draaien in een afgesloten ruimte, tenzij de uitlaat naar buiten wordt afgevoerd lucht.")
modules/tag_manager.py ADDED
@@ -0,0 +1,351 @@
1
+ """
2
+ Tag Manager
3
+ Handle inline formatting tags (bold, italic, underline)
4
+
5
+ This module converts formatting runs into XML-like tags for editing,
6
+ validates tag integrity, and reconstructs formatting on export.
7
+
8
+ Example:
9
+ "This is **bold** text" → "This is <b>bold</b> text"
10
+ """
11
+
12
+ from typing import List, Tuple, Dict, Any
13
+ from dataclasses import dataclass
14
+ import re
15
+
16
+
17
+ @dataclass
18
+ class FormattingRun:
19
+ """Represents a formatting run in text"""
20
+ text: str
21
+ bold: bool = False
22
+ italic: bool = False
23
+ underline: bool = False
24
+ subscript: bool = False
25
+ superscript: bool = False
26
+ start_pos: int = 0
27
+ end_pos: int = 0
28
+
29
+ def has_formatting(self) -> bool:
30
+ """Check if this run has any formatting"""
31
+ return self.bold or self.italic or self.underline or self.subscript or self.superscript
32
+
33
+ def get_tag_name(self) -> str:
34
+ """Get the tag name for this formatting"""
35
+ if self.bold and self.italic:
36
+ return "bi"
37
+ elif self.bold:
38
+ return "b"
39
+ elif self.italic:
40
+ return "i"
41
+ elif self.underline:
42
+ return "u"
43
+ elif self.subscript:
44
+ return "sub"
45
+ elif self.superscript:
46
+ return "sup"
47
+ return None
48
+
49
+
50
+ class TagManager:
51
+ """Manage inline formatting tags"""
52
+
53
+ # Tag patterns - includes list item tags and sub/sup
54
+ TAG_PATTERN = re.compile(r'<(/?)([biu]|bi|li|sub|sup)>')
55
+
56
+ def __init__(self):
57
+ self.tag_colors = {
58
+ 'b': '#CC0000', # Red for bold
59
+ 'i': '#0066CC', # Blue for italic
60
+ 'u': '#009900', # Green for underline
61
+ 'bi': '#CC00CC', # Purple for bold+italic
62
+ 'li': '#FF6600', # Orange for list items
63
+ 'sub': '#666600', # Olive for subscript
64
+ 'sup': '#006666' # Teal for superscript
65
+ }
66
+
67
+ def extract_runs(self, paragraph) -> List[FormattingRun]:
68
+ """
69
+ Extract formatting runs from a python-docx paragraph
70
+
71
+ Args:
72
+ paragraph: python-docx paragraph object
73
+
74
+ Returns:
75
+ List of FormattingRun objects with position information
76
+ """
77
+ runs = []
78
+ current_pos = 0
79
+
80
+ # Check if paragraph style has bold/italic formatting
81
+ # This handles cases like "Subtitle" or "Title" styles that are bold
82
+ style_bold = False
83
+ style_italic = False
84
+ try:
85
+ if paragraph.style and paragraph.style.font:
86
+ if paragraph.style.font.bold:
87
+ style_bold = True
88
+ if paragraph.style.font.italic:
89
+ style_italic = True
90
+ except Exception:
91
+ pass # If we can't read style, just use run-level formatting
92
+
93
+ for run in paragraph.runs:
94
+ text = run.text
95
+ if not text:
96
+ continue
97
+
98
+ # Combine run-level formatting with style-level formatting
99
+ # run.bold can be True, False, or None (None means inherit from style)
100
+ is_bold = run.bold if run.bold is not None else style_bold
101
+ is_italic = run.italic if run.italic is not None else style_italic
102
+
103
+ run_info = FormattingRun(
104
+ text=text,
105
+ bold=is_bold or False,
106
+ italic=is_italic or False,
107
+ underline=run.underline or False,
108
+ subscript=run.font.subscript or False if run.font else False,
109
+ superscript=run.font.superscript or False if run.font else False,
110
+ start_pos=current_pos,
111
+ end_pos=current_pos + len(text)
112
+ )
113
+ runs.append(run_info)
114
+ current_pos += len(text)
115
+
116
+ return runs
117
+
118
+ def runs_to_tagged_text(self, runs: List[FormattingRun]) -> str:
119
+ """
120
+ Convert formatting runs to tagged text
121
+
122
+ Example:
123
+ [Run("Hello ", bold=False), Run("world", bold=True), Run("!", bold=False)]
124
+ → "Hello <b>world</b>!"
125
+
126
+ Args:
127
+ runs: List of FormattingRun objects
128
+
129
+ Returns:
130
+ Text with inline tags
131
+ """
132
+ if not runs:
133
+ return ""
134
+
135
+ result = []
136
+ current_formatting = {'bold': False, 'italic': False, 'underline': False, 'subscript': False, 'superscript': False}
137
+
138
+ for run in runs:
139
+ # Determine what formatting changed
140
+ formatting_changed = (
141
+ run.bold != current_formatting['bold'] or
142
+ run.italic != current_formatting['italic'] or
143
+ run.underline != current_formatting['underline'] or
144
+ run.subscript != current_formatting['subscript'] or
145
+ run.superscript != current_formatting['superscript']
146
+ )
147
+
148
+ if formatting_changed:
149
+ # Close previous tags (in reverse order of nesting)
150
+ if current_formatting['subscript']:
151
+ result.append('</sub>')
152
+ if current_formatting['superscript']:
153
+ result.append('</sup>')
154
+ if current_formatting['bold'] and current_formatting['italic']:
155
+ result.append('</bi>')
156
+ elif current_formatting['bold']:
157
+ result.append('</b>')
158
+ elif current_formatting['italic']:
159
+ result.append('</i>')
160
+ elif current_formatting['underline']:
161
+ result.append('</u>')
162
+
163
+ # Open new tags
164
+ if run.bold and run.italic:
165
+ result.append('<bi>')
166
+ elif run.bold:
167
+ result.append('<b>')
168
+ elif run.italic:
169
+ result.append('<i>')
170
+ elif run.underline:
171
+ result.append('<u>')
172
+ if run.subscript:
173
+ result.append('<sub>')
174
+ if run.superscript:
175
+ result.append('<sup>')
176
+
177
+ # Update current state
178
+ current_formatting['bold'] = run.bold
179
+ current_formatting['italic'] = run.italic
180
+ current_formatting['underline'] = run.underline
181
+ current_formatting['subscript'] = run.subscript
182
+ current_formatting['superscript'] = run.superscript
183
+
184
+ result.append(run.text)
185
+
186
+ # Close any remaining tags
187
+ if current_formatting['subscript']:
188
+ result.append('</sub>')
189
+ if current_formatting['superscript']:
190
+ result.append('</sup>')
191
+ if current_formatting['bold'] and current_formatting['italic']:
192
+ result.append('</bi>')
193
+ elif current_formatting['bold']:
194
+ result.append('</b>')
195
+ elif current_formatting['italic']:
196
+ result.append('</i>')
197
+ elif current_formatting['underline']:
198
+ result.append('</u>')
199
+
200
+ return ''.join(result)
201
+
202
+ def tagged_text_to_runs(self, text: str) -> List[Dict[str, Any]]:
203
+ """
204
+ Convert tagged text back to run specifications
205
+
206
+ Example:
207
+ "Hello <b>world</b>!" →
208
+ [{'text': 'Hello ', 'bold': False},
209
+ {'text': 'world', 'bold': True},
210
+ {'text': '!', 'bold': False}]
211
+
212
+ Args:
213
+ text: Text with inline tags
214
+
215
+ Returns:
216
+ List of run specifications (dicts with text and formatting)
217
+ """
218
+ runs = []
219
+ current_formatting = {'bold': False, 'italic': False, 'underline': False, 'subscript': False, 'superscript': False}
220
+ current_text = []
221
+
222
+ pos = 0
223
+ while pos < len(text):
224
+ # Check for tag
225
+ match = self.TAG_PATTERN.match(text, pos)
226
+ if match:
227
+ # Save current text as a run
228
+ if current_text:
229
+ runs.append({
230
+ 'text': ''.join(current_text),
231
+ 'bold': current_formatting['bold'],
232
+ 'italic': current_formatting['italic'],
233
+ 'underline': current_formatting['underline'],
234
+ 'subscript': current_formatting['subscript'],
235
+ 'superscript': current_formatting['superscript']
236
+ })
237
+ current_text = []
238
+
239
+ # Process tag
240
+ is_closing = match.group(1) == '/'
241
+ tag_name = match.group(2)
242
+
243
+ if tag_name == 'bi':
244
+ current_formatting['bold'] = not is_closing
245
+ current_formatting['italic'] = not is_closing
246
+ elif tag_name == 'b':
247
+ current_formatting['bold'] = not is_closing
248
+ elif tag_name == 'i':
249
+ current_formatting['italic'] = not is_closing
250
+ elif tag_name == 'u':
251
+ current_formatting['underline'] = not is_closing
252
+ elif tag_name == 'sub':
253
+ current_formatting['subscript'] = not is_closing
254
+ elif tag_name == 'sup':
255
+ current_formatting['superscript'] = not is_closing
256
+
257
+ pos = match.end()
258
+ else:
259
+ # Regular character
260
+ current_text.append(text[pos])
261
+ pos += 1
262
+
263
+ # Save final text
264
+ if current_text:
265
+ runs.append({
266
+ 'text': ''.join(current_text),
267
+ 'bold': current_formatting['bold'],
268
+ 'italic': current_formatting['italic'],
269
+ 'underline': current_formatting['underline'],
270
+ 'subscript': current_formatting['subscript'],
271
+ 'superscript': current_formatting['superscript']
272
+ })
273
+
274
+ return runs
275
+
276
+ def validate_tags(self, text: str) -> Tuple[bool, str]:
277
+ """
278
+ Validate that all tags are properly paired and nested
279
+
280
+ Args:
281
+ text: Text with inline tags
282
+
283
+ Returns:
284
+ (is_valid, error_message)
285
+ """
286
+ stack = []
287
+ pos = 0
288
+
289
+ while pos < len(text):
290
+ match = self.TAG_PATTERN.match(text, pos)
291
+ if match:
292
+ is_closing = match.group(1) == '/'
293
+ tag_name = match.group(2)
294
+
295
+ if is_closing:
296
+ if not stack:
297
+ return False, f"Closing tag </{tag_name}> without opening tag"
298
+ if stack[-1] != tag_name:
299
+ return False, f"Mismatched tags: expected </{stack[-1]}>, found </{tag_name}>"
300
+ stack.pop()
301
+ else:
302
+ stack.append(tag_name)
303
+
304
+ pos = match.end()
305
+ else:
306
+ pos += 1
307
+
308
+ if stack:
309
+ return False, f"Unclosed tags: {', '.join(stack)}"
310
+
311
+ return True, ""
312
+
313
+ def count_tags(self, text: str) -> Dict[str, int]:
314
+ """
315
+ Count tags in text
316
+
317
+ Returns:
318
+ Dictionary with tag counts (e.g., {'b': 2, 'i': 1})
319
+ """
320
+ counts = {}
321
+ pos = 0
322
+
323
+ while pos < len(text):
324
+ match = self.TAG_PATTERN.match(text, pos)
325
+ if match:
326
+ is_closing = match.group(1) == '/'
327
+ if not is_closing: # Only count opening tags
328
+ tag_name = match.group(2)
329
+ counts[tag_name] = counts.get(tag_name, 0) + 1
330
+ pos = match.end()
331
+ else:
332
+ pos += 1
333
+
334
+ return counts
335
+
336
+ def strip_tags(self, text: str) -> str:
337
+ """Remove all tags from text"""
338
+ return self.TAG_PATTERN.sub('', text)
339
+
340
+ def get_tag_color(self, tag_name: str) -> str:
341
+ """Get color for tag name"""
342
+ return self.tag_colors.get(tag_name, '#000000')
343
+
344
+ def format_for_display(self, text: str) -> str:
345
+ """
346
+ Format tagged text for display (simplified version)
347
+ This could be enhanced with colored markers in a rich text widget
348
+
349
+ For now, just show tags as-is
350
+ """
351
+ return text