supervertaler 1.9.153__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of supervertaler might be problematic. Click here for more details.
- Supervertaler.py +47886 -0
- modules/__init__.py +10 -0
- modules/ai_actions.py +964 -0
- modules/ai_attachment_manager.py +343 -0
- modules/ai_file_viewer_dialog.py +210 -0
- modules/autofingers_engine.py +466 -0
- modules/cafetran_docx_handler.py +379 -0
- modules/config_manager.py +469 -0
- modules/database_manager.py +1878 -0
- modules/database_migrations.py +417 -0
- modules/dejavurtf_handler.py +779 -0
- modules/document_analyzer.py +427 -0
- modules/docx_handler.py +689 -0
- modules/encoding_repair.py +319 -0
- modules/encoding_repair_Qt.py +393 -0
- modules/encoding_repair_ui.py +481 -0
- modules/feature_manager.py +350 -0
- modules/figure_context_manager.py +340 -0
- modules/file_dialog_helper.py +148 -0
- modules/find_replace.py +164 -0
- modules/find_replace_qt.py +457 -0
- modules/glossary_manager.py +433 -0
- modules/image_extractor.py +188 -0
- modules/keyboard_shortcuts_widget.py +571 -0
- modules/llm_clients.py +1211 -0
- modules/llm_leaderboard.py +737 -0
- modules/llm_superbench_ui.py +1401 -0
- modules/local_llm_setup.py +1104 -0
- modules/model_update_dialog.py +381 -0
- modules/model_version_checker.py +373 -0
- modules/mqxliff_handler.py +638 -0
- modules/non_translatables_manager.py +743 -0
- modules/pdf_rescue_Qt.py +1822 -0
- modules/pdf_rescue_tkinter.py +909 -0
- modules/phrase_docx_handler.py +516 -0
- modules/project_home_panel.py +209 -0
- modules/prompt_assistant.py +357 -0
- modules/prompt_library.py +689 -0
- modules/prompt_library_migration.py +447 -0
- modules/quick_access_sidebar.py +282 -0
- modules/ribbon_widget.py +597 -0
- modules/sdlppx_handler.py +874 -0
- modules/setup_wizard.py +353 -0
- modules/shortcut_manager.py +932 -0
- modules/simple_segmenter.py +128 -0
- modules/spellcheck_manager.py +727 -0
- modules/statuses.py +207 -0
- modules/style_guide_manager.py +315 -0
- modules/superbench_ui.py +1319 -0
- modules/superbrowser.py +329 -0
- modules/supercleaner.py +600 -0
- modules/supercleaner_ui.py +444 -0
- modules/superdocs.py +19 -0
- modules/superdocs_viewer_qt.py +382 -0
- modules/superlookup.py +252 -0
- modules/tag_cleaner.py +260 -0
- modules/tag_manager.py +333 -0
- modules/term_extractor.py +270 -0
- modules/termbase_entry_editor.py +842 -0
- modules/termbase_import_export.py +488 -0
- modules/termbase_manager.py +1060 -0
- modules/termview_widget.py +1172 -0
- modules/theme_manager.py +499 -0
- modules/tm_editor_dialog.py +99 -0
- modules/tm_manager_qt.py +1280 -0
- modules/tm_metadata_manager.py +545 -0
- modules/tmx_editor.py +1461 -0
- modules/tmx_editor_qt.py +2784 -0
- modules/tmx_generator.py +284 -0
- modules/tracked_changes.py +900 -0
- modules/trados_docx_handler.py +430 -0
- modules/translation_memory.py +715 -0
- modules/translation_results_panel.py +2134 -0
- modules/translation_services.py +282 -0
- modules/unified_prompt_library.py +659 -0
- modules/unified_prompt_manager_qt.py +3951 -0
- modules/voice_commands.py +920 -0
- modules/voice_dictation.py +477 -0
- modules/voice_dictation_lite.py +249 -0
- supervertaler-1.9.153.dist-info/METADATA +896 -0
- supervertaler-1.9.153.dist-info/RECORD +85 -0
- supervertaler-1.9.153.dist-info/WHEEL +5 -0
- supervertaler-1.9.153.dist-info/entry_points.txt +2 -0
- supervertaler-1.9.153.dist-info/licenses/LICENSE +21 -0
- supervertaler-1.9.153.dist-info/top_level.txt +2 -0
modules/tag_cleaner.py
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TagCleaner Module for Supervertaler
|
|
3
|
+
Removes CAT tool tags from translation text
|
|
4
|
+
|
|
5
|
+
Supports tags from:
|
|
6
|
+
- memoQ
|
|
7
|
+
- Trados Studio
|
|
8
|
+
- CafeTran
|
|
9
|
+
- Wordfast
|
|
10
|
+
|
|
11
|
+
Can be used standalone or integrated with other modules like AutoFingers.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import re
|
|
15
|
+
from typing import Dict, List, Optional
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class TagPattern:
|
|
21
|
+
"""Definition of a tag pattern to clean"""
|
|
22
|
+
name: str
|
|
23
|
+
description: str
|
|
24
|
+
pattern: str # Regex pattern
|
|
25
|
+
enabled: bool = True
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class TagCleaner:
|
|
29
|
+
"""
|
|
30
|
+
Removes CAT tool tags from translation text.
|
|
31
|
+
|
|
32
|
+
Usage:
|
|
33
|
+
cleaner = TagCleaner()
|
|
34
|
+
cleaner.enable_memoq_index_tags()
|
|
35
|
+
cleaned = cleaner.clean("Text with [1}tags{2] here")
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self):
|
|
39
|
+
"""Initialize TagCleaner with all available tag patterns."""
|
|
40
|
+
self.enabled = False # Master switch
|
|
41
|
+
|
|
42
|
+
# memoQ tag patterns
|
|
43
|
+
self.memoq_patterns: Dict[str, TagPattern] = {
|
|
44
|
+
"index_tags": TagPattern(
|
|
45
|
+
name="memoQ Index Tags",
|
|
46
|
+
description="Index tags like [1} {2] [3} {4] etc.",
|
|
47
|
+
pattern=r'(?:\[\d+\}|\{\d+\])',
|
|
48
|
+
enabled=True
|
|
49
|
+
),
|
|
50
|
+
# Add more memoQ tag types here as needed
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
# Trados Studio tag patterns (placeholders for future)
|
|
54
|
+
self.trados_patterns: Dict[str, TagPattern] = {
|
|
55
|
+
# TODO: Add Trados tag patterns
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
# CafeTran tag patterns (placeholders for future)
|
|
59
|
+
self.cafetran_patterns: Dict[str, TagPattern] = {
|
|
60
|
+
# TODO: Add CafeTran tag patterns
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# Wordfast tag patterns (placeholders for future)
|
|
64
|
+
self.wordfast_patterns: Dict[str, TagPattern] = {
|
|
65
|
+
# TODO: Add Wordfast tag patterns
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
def enable(self) -> None:
|
|
69
|
+
"""Enable tag cleaning (master switch)."""
|
|
70
|
+
self.enabled = True
|
|
71
|
+
|
|
72
|
+
def disable(self) -> None:
|
|
73
|
+
"""Disable tag cleaning (master switch)."""
|
|
74
|
+
self.enabled = False
|
|
75
|
+
|
|
76
|
+
def is_enabled(self) -> bool:
|
|
77
|
+
"""Check if tag cleaning is enabled."""
|
|
78
|
+
return self.enabled
|
|
79
|
+
|
|
80
|
+
# memoQ tag control methods
|
|
81
|
+
def enable_memoq_index_tags(self) -> None:
|
|
82
|
+
"""Enable cleaning of memoQ index tags ([1} {2] etc.)."""
|
|
83
|
+
if "index_tags" in self.memoq_patterns:
|
|
84
|
+
self.memoq_patterns["index_tags"].enabled = True
|
|
85
|
+
|
|
86
|
+
def disable_memoq_index_tags(self) -> None:
|
|
87
|
+
"""Disable cleaning of memoQ index tags."""
|
|
88
|
+
if "index_tags" in self.memoq_patterns:
|
|
89
|
+
self.memoq_patterns["index_tags"].enabled = False
|
|
90
|
+
|
|
91
|
+
def is_memoq_index_tags_enabled(self) -> bool:
|
|
92
|
+
"""Check if memoQ index tag cleaning is enabled."""
|
|
93
|
+
return self.memoq_patterns.get("index_tags", TagPattern("", "", "", False)).enabled
|
|
94
|
+
|
|
95
|
+
# Generic pattern management
|
|
96
|
+
def add_custom_pattern(self, category: str, key: str, pattern: TagPattern) -> None:
|
|
97
|
+
"""
|
|
98
|
+
Add a custom tag pattern.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
category: Category name ("memoq", "trados", "cafetran", "wordfast")
|
|
102
|
+
key: Unique identifier for this pattern
|
|
103
|
+
pattern: TagPattern to add
|
|
104
|
+
"""
|
|
105
|
+
category_map = {
|
|
106
|
+
"memoq": self.memoq_patterns,
|
|
107
|
+
"trados": self.trados_patterns,
|
|
108
|
+
"cafetran": self.cafetran_patterns,
|
|
109
|
+
"wordfast": self.wordfast_patterns,
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if category.lower() in category_map:
|
|
113
|
+
category_map[category.lower()][key] = pattern
|
|
114
|
+
|
|
115
|
+
def remove_pattern(self, category: str, key: str) -> None:
|
|
116
|
+
"""Remove a tag pattern."""
|
|
117
|
+
category_map = {
|
|
118
|
+
"memoq": self.memoq_patterns,
|
|
119
|
+
"trados": self.trados_patterns,
|
|
120
|
+
"cafetran": self.cafetran_patterns,
|
|
121
|
+
"wordfast": self.wordfast_patterns,
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
if category.lower() in category_map:
|
|
125
|
+
category_map[category.lower()].pop(key, None)
|
|
126
|
+
|
|
127
|
+
def get_all_patterns(self) -> List[TagPattern]:
|
|
128
|
+
"""Get all tag patterns from all categories."""
|
|
129
|
+
all_patterns = []
|
|
130
|
+
all_patterns.extend(self.memoq_patterns.values())
|
|
131
|
+
all_patterns.extend(self.trados_patterns.values())
|
|
132
|
+
all_patterns.extend(self.cafetran_patterns.values())
|
|
133
|
+
all_patterns.extend(self.wordfast_patterns.values())
|
|
134
|
+
return all_patterns
|
|
135
|
+
|
|
136
|
+
def get_enabled_patterns(self) -> List[TagPattern]:
|
|
137
|
+
"""Get only enabled tag patterns."""
|
|
138
|
+
return [p for p in self.get_all_patterns() if p.enabled]
|
|
139
|
+
|
|
140
|
+
def clean(self, text: str) -> str:
|
|
141
|
+
"""
|
|
142
|
+
Remove tags from text based on enabled patterns.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
text: Text potentially containing CAT tool tags
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
Text with enabled tags removed
|
|
149
|
+
"""
|
|
150
|
+
if not self.enabled or not text:
|
|
151
|
+
return text
|
|
152
|
+
|
|
153
|
+
cleaned_text = text
|
|
154
|
+
|
|
155
|
+
# Apply all enabled patterns
|
|
156
|
+
for pattern in self.get_enabled_patterns():
|
|
157
|
+
cleaned_text = re.sub(pattern.pattern, '', cleaned_text)
|
|
158
|
+
|
|
159
|
+
return cleaned_text
|
|
160
|
+
|
|
161
|
+
def preview_cleaning(self, text: str) -> Dict[str, str]:
|
|
162
|
+
"""
|
|
163
|
+
Preview what text would look like with each pattern applied.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
text: Text to preview cleaning on
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
Dictionary mapping pattern names to cleaned text
|
|
170
|
+
"""
|
|
171
|
+
previews = {}
|
|
172
|
+
|
|
173
|
+
for pattern in self.get_all_patterns():
|
|
174
|
+
if pattern.enabled:
|
|
175
|
+
cleaned = re.sub(pattern.pattern, '', text)
|
|
176
|
+
previews[pattern.name] = cleaned
|
|
177
|
+
|
|
178
|
+
return previews
|
|
179
|
+
|
|
180
|
+
def to_dict(self) -> Dict:
|
|
181
|
+
"""
|
|
182
|
+
Export settings to dictionary (for JSON serialization).
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
Dictionary with all tag cleaner settings
|
|
186
|
+
"""
|
|
187
|
+
return {
|
|
188
|
+
"enabled": self.enabled,
|
|
189
|
+
"memoq": {
|
|
190
|
+
key: {"enabled": pattern.enabled}
|
|
191
|
+
for key, pattern in self.memoq_patterns.items()
|
|
192
|
+
},
|
|
193
|
+
"trados": {
|
|
194
|
+
key: {"enabled": pattern.enabled}
|
|
195
|
+
for key, pattern in self.trados_patterns.items()
|
|
196
|
+
},
|
|
197
|
+
"cafetran": {
|
|
198
|
+
key: {"enabled": pattern.enabled}
|
|
199
|
+
for key, pattern in self.cafetran_patterns.items()
|
|
200
|
+
},
|
|
201
|
+
"wordfast": {
|
|
202
|
+
key: {"enabled": pattern.enabled}
|
|
203
|
+
for key, pattern in self.wordfast_patterns.items()
|
|
204
|
+
},
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
def from_dict(self, settings: Dict) -> None:
|
|
208
|
+
"""
|
|
209
|
+
Import settings from dictionary.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
settings: Dictionary with tag cleaner settings
|
|
213
|
+
"""
|
|
214
|
+
self.enabled = settings.get("enabled", False)
|
|
215
|
+
|
|
216
|
+
# Load memoQ pattern states
|
|
217
|
+
memoq_settings = settings.get("memoq", {})
|
|
218
|
+
for key, pattern_settings in memoq_settings.items():
|
|
219
|
+
if key in self.memoq_patterns:
|
|
220
|
+
self.memoq_patterns[key].enabled = pattern_settings.get("enabled", True)
|
|
221
|
+
|
|
222
|
+
# Load Trados pattern states
|
|
223
|
+
trados_settings = settings.get("trados", {})
|
|
224
|
+
for key, pattern_settings in trados_settings.items():
|
|
225
|
+
if key in self.trados_patterns:
|
|
226
|
+
self.trados_patterns[key].enabled = pattern_settings.get("enabled", False)
|
|
227
|
+
|
|
228
|
+
# Load CafeTran pattern states
|
|
229
|
+
cafetran_settings = settings.get("cafetran", {})
|
|
230
|
+
for key, pattern_settings in cafetran_settings.items():
|
|
231
|
+
if key in self.cafetran_patterns:
|
|
232
|
+
self.cafetran_patterns[key].enabled = pattern_settings.get("enabled", False)
|
|
233
|
+
|
|
234
|
+
# Load Wordfast pattern states
|
|
235
|
+
wordfast_settings = settings.get("wordfast", {})
|
|
236
|
+
for key, pattern_settings in wordfast_settings.items():
|
|
237
|
+
if key in self.wordfast_patterns:
|
|
238
|
+
self.wordfast_patterns[key].enabled = pattern_settings.get("enabled", False)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
# Standalone usage example
|
|
242
|
+
if __name__ == "__main__":
|
|
243
|
+
# Create cleaner
|
|
244
|
+
cleaner = TagCleaner()
|
|
245
|
+
|
|
246
|
+
# Enable tag cleaning
|
|
247
|
+
cleaner.enable()
|
|
248
|
+
|
|
249
|
+
# Enable specific tag types
|
|
250
|
+
cleaner.enable_memoq_index_tags()
|
|
251
|
+
|
|
252
|
+
# Test text
|
|
253
|
+
test_text = "Laat de tractor nooit draaien in een afgesloten ruimte, tenzij de uitlaat naar buiten wordt afgevoerd [7}lucht.{8]"
|
|
254
|
+
|
|
255
|
+
# Clean tags
|
|
256
|
+
cleaned = cleaner.clean(test_text)
|
|
257
|
+
|
|
258
|
+
print(f"Original: {test_text}")
|
|
259
|
+
print(f"Cleaned: {cleaned}")
|
|
260
|
+
print(f"\nExpected: Laat de tractor nooit draaien in een afgesloten ruimte, tenzij de uitlaat naar buiten wordt afgevoerd lucht.")
|
modules/tag_manager.py
ADDED
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tag Manager
|
|
3
|
+
Handle inline formatting tags (bold, italic, underline)
|
|
4
|
+
|
|
5
|
+
This module converts formatting runs into XML-like tags for editing,
|
|
6
|
+
validates tag integrity, and reconstructs formatting on export.
|
|
7
|
+
|
|
8
|
+
Example:
|
|
9
|
+
"This is **bold** text" → "This is <b>bold</b> text"
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from typing import List, Tuple, Dict, Any
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
import re
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class FormattingRun:
|
|
19
|
+
"""Represents a formatting run in text"""
|
|
20
|
+
text: str
|
|
21
|
+
bold: bool = False
|
|
22
|
+
italic: bool = False
|
|
23
|
+
underline: bool = False
|
|
24
|
+
subscript: bool = False
|
|
25
|
+
superscript: bool = False
|
|
26
|
+
start_pos: int = 0
|
|
27
|
+
end_pos: int = 0
|
|
28
|
+
|
|
29
|
+
def has_formatting(self) -> bool:
|
|
30
|
+
"""Check if this run has any formatting"""
|
|
31
|
+
return self.bold or self.italic or self.underline or self.subscript or self.superscript
|
|
32
|
+
|
|
33
|
+
def get_tag_name(self) -> str:
|
|
34
|
+
"""Get the tag name for this formatting"""
|
|
35
|
+
if self.bold and self.italic:
|
|
36
|
+
return "bi"
|
|
37
|
+
elif self.bold:
|
|
38
|
+
return "b"
|
|
39
|
+
elif self.italic:
|
|
40
|
+
return "i"
|
|
41
|
+
elif self.underline:
|
|
42
|
+
return "u"
|
|
43
|
+
elif self.subscript:
|
|
44
|
+
return "sub"
|
|
45
|
+
elif self.superscript:
|
|
46
|
+
return "sup"
|
|
47
|
+
return None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class TagManager:
|
|
51
|
+
"""Manage inline formatting tags"""
|
|
52
|
+
|
|
53
|
+
# Tag patterns - includes list item tags and sub/sup
|
|
54
|
+
TAG_PATTERN = re.compile(r'<(/?)([biu]|bi|li|sub|sup)>')
|
|
55
|
+
|
|
56
|
+
def __init__(self):
|
|
57
|
+
self.tag_colors = {
|
|
58
|
+
'b': '#CC0000', # Red for bold
|
|
59
|
+
'i': '#0066CC', # Blue for italic
|
|
60
|
+
'u': '#009900', # Green for underline
|
|
61
|
+
'bi': '#CC00CC', # Purple for bold+italic
|
|
62
|
+
'li': '#FF6600', # Orange for list items
|
|
63
|
+
'sub': '#666600', # Olive for subscript
|
|
64
|
+
'sup': '#006666' # Teal for superscript
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
def extract_runs(self, paragraph) -> List[FormattingRun]:
|
|
68
|
+
"""
|
|
69
|
+
Extract formatting runs from a python-docx paragraph
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
paragraph: python-docx paragraph object
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
List of FormattingRun objects with position information
|
|
76
|
+
"""
|
|
77
|
+
runs = []
|
|
78
|
+
current_pos = 0
|
|
79
|
+
|
|
80
|
+
for run in paragraph.runs:
|
|
81
|
+
text = run.text
|
|
82
|
+
if not text:
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
run_info = FormattingRun(
|
|
86
|
+
text=text,
|
|
87
|
+
bold=run.bold or False,
|
|
88
|
+
italic=run.italic or False,
|
|
89
|
+
underline=run.underline or False,
|
|
90
|
+
subscript=run.font.subscript or False if run.font else False,
|
|
91
|
+
superscript=run.font.superscript or False if run.font else False,
|
|
92
|
+
start_pos=current_pos,
|
|
93
|
+
end_pos=current_pos + len(text)
|
|
94
|
+
)
|
|
95
|
+
runs.append(run_info)
|
|
96
|
+
current_pos += len(text)
|
|
97
|
+
|
|
98
|
+
return runs
|
|
99
|
+
|
|
100
|
+
def runs_to_tagged_text(self, runs: List[FormattingRun]) -> str:
|
|
101
|
+
"""
|
|
102
|
+
Convert formatting runs to tagged text
|
|
103
|
+
|
|
104
|
+
Example:
|
|
105
|
+
[Run("Hello ", bold=False), Run("world", bold=True), Run("!", bold=False)]
|
|
106
|
+
→ "Hello <b>world</b>!"
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
runs: List of FormattingRun objects
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Text with inline tags
|
|
113
|
+
"""
|
|
114
|
+
if not runs:
|
|
115
|
+
return ""
|
|
116
|
+
|
|
117
|
+
result = []
|
|
118
|
+
current_formatting = {'bold': False, 'italic': False, 'underline': False, 'subscript': False, 'superscript': False}
|
|
119
|
+
|
|
120
|
+
for run in runs:
|
|
121
|
+
# Determine what formatting changed
|
|
122
|
+
formatting_changed = (
|
|
123
|
+
run.bold != current_formatting['bold'] or
|
|
124
|
+
run.italic != current_formatting['italic'] or
|
|
125
|
+
run.underline != current_formatting['underline'] or
|
|
126
|
+
run.subscript != current_formatting['subscript'] or
|
|
127
|
+
run.superscript != current_formatting['superscript']
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
if formatting_changed:
|
|
131
|
+
# Close previous tags (in reverse order of nesting)
|
|
132
|
+
if current_formatting['subscript']:
|
|
133
|
+
result.append('</sub>')
|
|
134
|
+
if current_formatting['superscript']:
|
|
135
|
+
result.append('</sup>')
|
|
136
|
+
if current_formatting['bold'] and current_formatting['italic']:
|
|
137
|
+
result.append('</bi>')
|
|
138
|
+
elif current_formatting['bold']:
|
|
139
|
+
result.append('</b>')
|
|
140
|
+
elif current_formatting['italic']:
|
|
141
|
+
result.append('</i>')
|
|
142
|
+
elif current_formatting['underline']:
|
|
143
|
+
result.append('</u>')
|
|
144
|
+
|
|
145
|
+
# Open new tags
|
|
146
|
+
if run.bold and run.italic:
|
|
147
|
+
result.append('<bi>')
|
|
148
|
+
elif run.bold:
|
|
149
|
+
result.append('<b>')
|
|
150
|
+
elif run.italic:
|
|
151
|
+
result.append('<i>')
|
|
152
|
+
elif run.underline:
|
|
153
|
+
result.append('<u>')
|
|
154
|
+
if run.subscript:
|
|
155
|
+
result.append('<sub>')
|
|
156
|
+
if run.superscript:
|
|
157
|
+
result.append('<sup>')
|
|
158
|
+
|
|
159
|
+
# Update current state
|
|
160
|
+
current_formatting['bold'] = run.bold
|
|
161
|
+
current_formatting['italic'] = run.italic
|
|
162
|
+
current_formatting['underline'] = run.underline
|
|
163
|
+
current_formatting['subscript'] = run.subscript
|
|
164
|
+
current_formatting['superscript'] = run.superscript
|
|
165
|
+
|
|
166
|
+
result.append(run.text)
|
|
167
|
+
|
|
168
|
+
# Close any remaining tags
|
|
169
|
+
if current_formatting['subscript']:
|
|
170
|
+
result.append('</sub>')
|
|
171
|
+
if current_formatting['superscript']:
|
|
172
|
+
result.append('</sup>')
|
|
173
|
+
if current_formatting['bold'] and current_formatting['italic']:
|
|
174
|
+
result.append('</bi>')
|
|
175
|
+
elif current_formatting['bold']:
|
|
176
|
+
result.append('</b>')
|
|
177
|
+
elif current_formatting['italic']:
|
|
178
|
+
result.append('</i>')
|
|
179
|
+
elif current_formatting['underline']:
|
|
180
|
+
result.append('</u>')
|
|
181
|
+
|
|
182
|
+
return ''.join(result)
|
|
183
|
+
|
|
184
|
+
def tagged_text_to_runs(self, text: str) -> List[Dict[str, Any]]:
|
|
185
|
+
"""
|
|
186
|
+
Convert tagged text back to run specifications
|
|
187
|
+
|
|
188
|
+
Example:
|
|
189
|
+
"Hello <b>world</b>!" →
|
|
190
|
+
[{'text': 'Hello ', 'bold': False},
|
|
191
|
+
{'text': 'world', 'bold': True},
|
|
192
|
+
{'text': '!', 'bold': False}]
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
text: Text with inline tags
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
List of run specifications (dicts with text and formatting)
|
|
199
|
+
"""
|
|
200
|
+
runs = []
|
|
201
|
+
current_formatting = {'bold': False, 'italic': False, 'underline': False, 'subscript': False, 'superscript': False}
|
|
202
|
+
current_text = []
|
|
203
|
+
|
|
204
|
+
pos = 0
|
|
205
|
+
while pos < len(text):
|
|
206
|
+
# Check for tag
|
|
207
|
+
match = self.TAG_PATTERN.match(text, pos)
|
|
208
|
+
if match:
|
|
209
|
+
# Save current text as a run
|
|
210
|
+
if current_text:
|
|
211
|
+
runs.append({
|
|
212
|
+
'text': ''.join(current_text),
|
|
213
|
+
'bold': current_formatting['bold'],
|
|
214
|
+
'italic': current_formatting['italic'],
|
|
215
|
+
'underline': current_formatting['underline'],
|
|
216
|
+
'subscript': current_formatting['subscript'],
|
|
217
|
+
'superscript': current_formatting['superscript']
|
|
218
|
+
})
|
|
219
|
+
current_text = []
|
|
220
|
+
|
|
221
|
+
# Process tag
|
|
222
|
+
is_closing = match.group(1) == '/'
|
|
223
|
+
tag_name = match.group(2)
|
|
224
|
+
|
|
225
|
+
if tag_name == 'bi':
|
|
226
|
+
current_formatting['bold'] = not is_closing
|
|
227
|
+
current_formatting['italic'] = not is_closing
|
|
228
|
+
elif tag_name == 'b':
|
|
229
|
+
current_formatting['bold'] = not is_closing
|
|
230
|
+
elif tag_name == 'i':
|
|
231
|
+
current_formatting['italic'] = not is_closing
|
|
232
|
+
elif tag_name == 'u':
|
|
233
|
+
current_formatting['underline'] = not is_closing
|
|
234
|
+
elif tag_name == 'sub':
|
|
235
|
+
current_formatting['subscript'] = not is_closing
|
|
236
|
+
elif tag_name == 'sup':
|
|
237
|
+
current_formatting['superscript'] = not is_closing
|
|
238
|
+
|
|
239
|
+
pos = match.end()
|
|
240
|
+
else:
|
|
241
|
+
# Regular character
|
|
242
|
+
current_text.append(text[pos])
|
|
243
|
+
pos += 1
|
|
244
|
+
|
|
245
|
+
# Save final text
|
|
246
|
+
if current_text:
|
|
247
|
+
runs.append({
|
|
248
|
+
'text': ''.join(current_text),
|
|
249
|
+
'bold': current_formatting['bold'],
|
|
250
|
+
'italic': current_formatting['italic'],
|
|
251
|
+
'underline': current_formatting['underline'],
|
|
252
|
+
'subscript': current_formatting['subscript'],
|
|
253
|
+
'superscript': current_formatting['superscript']
|
|
254
|
+
})
|
|
255
|
+
|
|
256
|
+
return runs
|
|
257
|
+
|
|
258
|
+
def validate_tags(self, text: str) -> Tuple[bool, str]:
|
|
259
|
+
"""
|
|
260
|
+
Validate that all tags are properly paired and nested
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
text: Text with inline tags
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
(is_valid, error_message)
|
|
267
|
+
"""
|
|
268
|
+
stack = []
|
|
269
|
+
pos = 0
|
|
270
|
+
|
|
271
|
+
while pos < len(text):
|
|
272
|
+
match = self.TAG_PATTERN.match(text, pos)
|
|
273
|
+
if match:
|
|
274
|
+
is_closing = match.group(1) == '/'
|
|
275
|
+
tag_name = match.group(2)
|
|
276
|
+
|
|
277
|
+
if is_closing:
|
|
278
|
+
if not stack:
|
|
279
|
+
return False, f"Closing tag </{tag_name}> without opening tag"
|
|
280
|
+
if stack[-1] != tag_name:
|
|
281
|
+
return False, f"Mismatched tags: expected </{stack[-1]}>, found </{tag_name}>"
|
|
282
|
+
stack.pop()
|
|
283
|
+
else:
|
|
284
|
+
stack.append(tag_name)
|
|
285
|
+
|
|
286
|
+
pos = match.end()
|
|
287
|
+
else:
|
|
288
|
+
pos += 1
|
|
289
|
+
|
|
290
|
+
if stack:
|
|
291
|
+
return False, f"Unclosed tags: {', '.join(stack)}"
|
|
292
|
+
|
|
293
|
+
return True, ""
|
|
294
|
+
|
|
295
|
+
def count_tags(self, text: str) -> Dict[str, int]:
|
|
296
|
+
"""
|
|
297
|
+
Count tags in text
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
Dictionary with tag counts (e.g., {'b': 2, 'i': 1})
|
|
301
|
+
"""
|
|
302
|
+
counts = {}
|
|
303
|
+
pos = 0
|
|
304
|
+
|
|
305
|
+
while pos < len(text):
|
|
306
|
+
match = self.TAG_PATTERN.match(text, pos)
|
|
307
|
+
if match:
|
|
308
|
+
is_closing = match.group(1) == '/'
|
|
309
|
+
if not is_closing: # Only count opening tags
|
|
310
|
+
tag_name = match.group(2)
|
|
311
|
+
counts[tag_name] = counts.get(tag_name, 0) + 1
|
|
312
|
+
pos = match.end()
|
|
313
|
+
else:
|
|
314
|
+
pos += 1
|
|
315
|
+
|
|
316
|
+
return counts
|
|
317
|
+
|
|
318
|
+
def strip_tags(self, text: str) -> str:
|
|
319
|
+
"""Remove all tags from text"""
|
|
320
|
+
return self.TAG_PATTERN.sub('', text)
|
|
321
|
+
|
|
322
|
+
def get_tag_color(self, tag_name: str) -> str:
|
|
323
|
+
"""Get color for tag name"""
|
|
324
|
+
return self.tag_colors.get(tag_name, '#000000')
|
|
325
|
+
|
|
326
|
+
def format_for_display(self, text: str) -> str:
|
|
327
|
+
"""
|
|
328
|
+
Format tagged text for display (simplified version)
|
|
329
|
+
This could be enhanced with colored markers in a rich text widget
|
|
330
|
+
|
|
331
|
+
For now, just show tags as-is
|
|
332
|
+
"""
|
|
333
|
+
return text
|