supervertaler 1.9.153__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of supervertaler might be problematic. Click here for more details.
- Supervertaler.py +47886 -0
- modules/__init__.py +10 -0
- modules/ai_actions.py +964 -0
- modules/ai_attachment_manager.py +343 -0
- modules/ai_file_viewer_dialog.py +210 -0
- modules/autofingers_engine.py +466 -0
- modules/cafetran_docx_handler.py +379 -0
- modules/config_manager.py +469 -0
- modules/database_manager.py +1878 -0
- modules/database_migrations.py +417 -0
- modules/dejavurtf_handler.py +779 -0
- modules/document_analyzer.py +427 -0
- modules/docx_handler.py +689 -0
- modules/encoding_repair.py +319 -0
- modules/encoding_repair_Qt.py +393 -0
- modules/encoding_repair_ui.py +481 -0
- modules/feature_manager.py +350 -0
- modules/figure_context_manager.py +340 -0
- modules/file_dialog_helper.py +148 -0
- modules/find_replace.py +164 -0
- modules/find_replace_qt.py +457 -0
- modules/glossary_manager.py +433 -0
- modules/image_extractor.py +188 -0
- modules/keyboard_shortcuts_widget.py +571 -0
- modules/llm_clients.py +1211 -0
- modules/llm_leaderboard.py +737 -0
- modules/llm_superbench_ui.py +1401 -0
- modules/local_llm_setup.py +1104 -0
- modules/model_update_dialog.py +381 -0
- modules/model_version_checker.py +373 -0
- modules/mqxliff_handler.py +638 -0
- modules/non_translatables_manager.py +743 -0
- modules/pdf_rescue_Qt.py +1822 -0
- modules/pdf_rescue_tkinter.py +909 -0
- modules/phrase_docx_handler.py +516 -0
- modules/project_home_panel.py +209 -0
- modules/prompt_assistant.py +357 -0
- modules/prompt_library.py +689 -0
- modules/prompt_library_migration.py +447 -0
- modules/quick_access_sidebar.py +282 -0
- modules/ribbon_widget.py +597 -0
- modules/sdlppx_handler.py +874 -0
- modules/setup_wizard.py +353 -0
- modules/shortcut_manager.py +932 -0
- modules/simple_segmenter.py +128 -0
- modules/spellcheck_manager.py +727 -0
- modules/statuses.py +207 -0
- modules/style_guide_manager.py +315 -0
- modules/superbench_ui.py +1319 -0
- modules/superbrowser.py +329 -0
- modules/supercleaner.py +600 -0
- modules/supercleaner_ui.py +444 -0
- modules/superdocs.py +19 -0
- modules/superdocs_viewer_qt.py +382 -0
- modules/superlookup.py +252 -0
- modules/tag_cleaner.py +260 -0
- modules/tag_manager.py +333 -0
- modules/term_extractor.py +270 -0
- modules/termbase_entry_editor.py +842 -0
- modules/termbase_import_export.py +488 -0
- modules/termbase_manager.py +1060 -0
- modules/termview_widget.py +1172 -0
- modules/theme_manager.py +499 -0
- modules/tm_editor_dialog.py +99 -0
- modules/tm_manager_qt.py +1280 -0
- modules/tm_metadata_manager.py +545 -0
- modules/tmx_editor.py +1461 -0
- modules/tmx_editor_qt.py +2784 -0
- modules/tmx_generator.py +284 -0
- modules/tracked_changes.py +900 -0
- modules/trados_docx_handler.py +430 -0
- modules/translation_memory.py +715 -0
- modules/translation_results_panel.py +2134 -0
- modules/translation_services.py +282 -0
- modules/unified_prompt_library.py +659 -0
- modules/unified_prompt_manager_qt.py +3951 -0
- modules/voice_commands.py +920 -0
- modules/voice_dictation.py +477 -0
- modules/voice_dictation_lite.py +249 -0
- supervertaler-1.9.153.dist-info/METADATA +896 -0
- supervertaler-1.9.153.dist-info/RECORD +85 -0
- supervertaler-1.9.153.dist-info/WHEEL +5 -0
- supervertaler-1.9.153.dist-info/entry_points.txt +2 -0
- supervertaler-1.9.153.dist-info/licenses/LICENSE +21 -0
- supervertaler-1.9.153.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,727 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Spellcheck Manager for Supervertaler
|
|
3
|
+
=====================================
|
|
4
|
+
Provides spellchecking functionality using Hunspell dictionaries.
|
|
5
|
+
Supports custom word lists and project-specific dictionaries.
|
|
6
|
+
|
|
7
|
+
Features:
|
|
8
|
+
- Hunspell dictionary support (via cyhunspell or spylls)
|
|
9
|
+
- Spylls: Pure Python Hunspell (works on Windows/Python 3.12+)
|
|
10
|
+
- Fallback to pyspellchecker for basic checking
|
|
11
|
+
- Custom word lists (global and per-project)
|
|
12
|
+
- Integration with PyQt6 text editors
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
import re
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import List, Set, Dict, Optional, Tuple
|
|
19
|
+
|
|
20
|
+
# Try to import hunspell (cyhunspell) - may fail on Windows/Python 3.12+
|
|
21
|
+
try:
|
|
22
|
+
from hunspell import Hunspell
|
|
23
|
+
HAS_HUNSPELL = True
|
|
24
|
+
except ImportError:
|
|
25
|
+
HAS_HUNSPELL = False
|
|
26
|
+
Hunspell = None
|
|
27
|
+
|
|
28
|
+
# Try spylls (pure Python Hunspell reimplementation) - works on all platforms
|
|
29
|
+
try:
|
|
30
|
+
from spylls.hunspell import Dictionary as SpyllsDictionary
|
|
31
|
+
HAS_SPYLLS = True
|
|
32
|
+
except ImportError:
|
|
33
|
+
HAS_SPYLLS = False
|
|
34
|
+
SpyllsDictionary = None
|
|
35
|
+
|
|
36
|
+
# Fallback to pyspellchecker (no regional variants like en_US vs en_GB)
|
|
37
|
+
SPELLCHECKER_IMPORT_ERROR = None
|
|
38
|
+
try:
|
|
39
|
+
from spellchecker import SpellChecker
|
|
40
|
+
HAS_SPELLCHECKER = True
|
|
41
|
+
except ImportError as e:
|
|
42
|
+
HAS_SPELLCHECKER = False
|
|
43
|
+
SpellChecker = None
|
|
44
|
+
SPELLCHECKER_IMPORT_ERROR = str(e)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class SpellcheckManager:
|
|
48
|
+
"""
|
|
49
|
+
Manages spellchecking for Supervertaler.
|
|
50
|
+
|
|
51
|
+
Supports:
|
|
52
|
+
- Hunspell dictionaries (.dic/.aff files)
|
|
53
|
+
- Custom word lists
|
|
54
|
+
- Per-project dictionaries
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
# Map language codes to display names with variants
|
|
58
|
+
CODE_TO_DISPLAY = {
|
|
59
|
+
'en_US': 'English (US)',
|
|
60
|
+
'en_GB': 'English (GB)',
|
|
61
|
+
'en_AU': 'English (AU)',
|
|
62
|
+
'en_CA': 'English (CA)',
|
|
63
|
+
'en_ZA': 'English (ZA)',
|
|
64
|
+
'nl_NL': 'Dutch (NL)',
|
|
65
|
+
'nl_BE': 'Dutch (BE)',
|
|
66
|
+
'de_DE': 'German (DE)',
|
|
67
|
+
'de_AT': 'German (AT)',
|
|
68
|
+
'de_CH': 'German (CH)',
|
|
69
|
+
'fr_FR': 'French (FR)',
|
|
70
|
+
'fr_CA': 'French (CA)',
|
|
71
|
+
'fr_BE': 'French (BE)',
|
|
72
|
+
'fr_CH': 'French (CH)',
|
|
73
|
+
'es_ES': 'Spanish (ES)',
|
|
74
|
+
'es_MX': 'Spanish (MX)',
|
|
75
|
+
'es_AR': 'Spanish (AR)',
|
|
76
|
+
'pt_PT': 'Portuguese (PT)',
|
|
77
|
+
'pt_BR': 'Portuguese (BR)',
|
|
78
|
+
'it_IT': 'Italian',
|
|
79
|
+
'pl_PL': 'Polish',
|
|
80
|
+
'ru_RU': 'Russian',
|
|
81
|
+
'sv_SE': 'Swedish',
|
|
82
|
+
'da_DK': 'Danish',
|
|
83
|
+
'nb_NO': 'Norwegian (Bokmål)',
|
|
84
|
+
'nn_NO': 'Norwegian (Nynorsk)',
|
|
85
|
+
'fi_FI': 'Finnish',
|
|
86
|
+
'cs_CZ': 'Czech',
|
|
87
|
+
'sk_SK': 'Slovak',
|
|
88
|
+
'hu_HU': 'Hungarian',
|
|
89
|
+
'ro_RO': 'Romanian',
|
|
90
|
+
'bg_BG': 'Bulgarian',
|
|
91
|
+
'uk_UA': 'Ukrainian',
|
|
92
|
+
'el_GR': 'Greek',
|
|
93
|
+
'tr_TR': 'Turkish',
|
|
94
|
+
'zh_CN': 'Chinese (Simplified)',
|
|
95
|
+
'zh_TW': 'Chinese (Traditional)',
|
|
96
|
+
'ja_JP': 'Japanese',
|
|
97
|
+
'ko_KR': 'Korean',
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
# Reverse mapping: display name to code
|
|
101
|
+
DISPLAY_TO_CODE = {v: k for k, v in CODE_TO_DISPLAY.items()}
|
|
102
|
+
|
|
103
|
+
# Legacy mapping for project files that use simple names like "English"
|
|
104
|
+
LANGUAGE_MAP = {
|
|
105
|
+
'English': 'en_US',
|
|
106
|
+
'Dutch': 'nl_NL',
|
|
107
|
+
'German': 'de_DE',
|
|
108
|
+
'French': 'fr_FR',
|
|
109
|
+
'Spanish': 'es_ES',
|
|
110
|
+
'Italian': 'it_IT',
|
|
111
|
+
'Portuguese': 'pt_PT',
|
|
112
|
+
'Polish': 'pl_PL',
|
|
113
|
+
'Russian': 'ru_RU',
|
|
114
|
+
'Chinese': 'zh_CN',
|
|
115
|
+
'Japanese': 'ja_JP',
|
|
116
|
+
'Korean': 'ko_KR',
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
# Short code mappings (for project files that store "nl" instead of "Dutch")
|
|
120
|
+
SHORT_CODE_MAP = {
|
|
121
|
+
'en': 'en_US',
|
|
122
|
+
'nl': 'nl_NL',
|
|
123
|
+
'de': 'de_DE',
|
|
124
|
+
'fr': 'fr_FR',
|
|
125
|
+
'es': 'es_ES',
|
|
126
|
+
'it': 'it_IT',
|
|
127
|
+
'pt': 'pt_PT',
|
|
128
|
+
'pl': 'pl_PL',
|
|
129
|
+
'ru': 'ru_RU',
|
|
130
|
+
'zh': 'zh_CN',
|
|
131
|
+
'ja': 'ja_JP',
|
|
132
|
+
'ko': 'ko_KR',
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
# Reverse mapping (legacy)
|
|
136
|
+
CODE_TO_LANGUAGE = {v: k for k, v in LANGUAGE_MAP.items()}
|
|
137
|
+
|
|
138
|
+
def __init__(self, user_data_path: str = None):
|
|
139
|
+
"""
|
|
140
|
+
Initialize the spellcheck manager.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
user_data_path: Path to user data directory for custom dictionaries
|
|
144
|
+
"""
|
|
145
|
+
self.user_data_path = Path(user_data_path) if user_data_path else Path("user_data")
|
|
146
|
+
self.dictionaries_path = self.user_data_path / "dictionaries"
|
|
147
|
+
self.custom_words_file = self.dictionaries_path / "custom_words.txt"
|
|
148
|
+
|
|
149
|
+
# Ensure directories exist
|
|
150
|
+
self.dictionaries_path.mkdir(parents=True, exist_ok=True)
|
|
151
|
+
|
|
152
|
+
# Current spell checker instance
|
|
153
|
+
self._hunspell: Optional[Hunspell] = None
|
|
154
|
+
self._spylls = None # SpyllsDictionary instance
|
|
155
|
+
self._spellchecker: Optional[SpellChecker] = None
|
|
156
|
+
self._current_language: Optional[str] = None
|
|
157
|
+
self._backend: str = "none" # Track which backend is active
|
|
158
|
+
|
|
159
|
+
# Custom words (global)
|
|
160
|
+
self._custom_words: Set[str] = set()
|
|
161
|
+
self._load_custom_words()
|
|
162
|
+
|
|
163
|
+
# Session-only ignored words
|
|
164
|
+
self._ignored_words: Set[str] = set()
|
|
165
|
+
|
|
166
|
+
# Cache for word check results
|
|
167
|
+
self._word_cache: Dict[str, bool] = {}
|
|
168
|
+
|
|
169
|
+
# Enabled state
|
|
170
|
+
self.enabled = True
|
|
171
|
+
|
|
172
|
+
# Safety flag - if spellcheck crashes, disable permanently for session
|
|
173
|
+
self._crash_detected = False
|
|
174
|
+
|
|
175
|
+
def _load_custom_words(self):
|
|
176
|
+
"""Load custom words from file"""
|
|
177
|
+
self._custom_words.clear()
|
|
178
|
+
if self.custom_words_file.exists():
|
|
179
|
+
try:
|
|
180
|
+
with open(self.custom_words_file, 'r', encoding='utf-8') as f:
|
|
181
|
+
for line in f:
|
|
182
|
+
word = line.strip()
|
|
183
|
+
if word and not word.startswith('#'):
|
|
184
|
+
self._custom_words.add(word.lower())
|
|
185
|
+
except Exception as e:
|
|
186
|
+
print(f"Error loading custom words: {e}")
|
|
187
|
+
|
|
188
|
+
def _save_custom_words(self):
|
|
189
|
+
"""Save custom words to file"""
|
|
190
|
+
try:
|
|
191
|
+
with open(self.custom_words_file, 'w', encoding='utf-8') as f:
|
|
192
|
+
f.write("# Supervertaler Custom Dictionary\n")
|
|
193
|
+
f.write("# Add words that should not be marked as spelling errors\n\n")
|
|
194
|
+
for word in sorted(self._custom_words):
|
|
195
|
+
f.write(f"{word}\n")
|
|
196
|
+
except Exception as e:
|
|
197
|
+
print(f"Error saving custom words: {e}")
|
|
198
|
+
|
|
199
|
+
def set_language(self, language: str) -> bool:
|
|
200
|
+
"""
|
|
201
|
+
Set the spellcheck language.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
language: Language display name (e.g., "English (US)", "English (GB)"),
|
|
205
|
+
simple name (e.g., "English", "Dutch"), short code (e.g., "nl", "en"),
|
|
206
|
+
or full code (e.g., "en_US", "nl_NL")
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
True if language was set successfully
|
|
210
|
+
"""
|
|
211
|
+
# Convert language name to code if needed
|
|
212
|
+
lang_code = None
|
|
213
|
+
|
|
214
|
+
# First try display name with variant (English (US) -> en_US)
|
|
215
|
+
lang_code = self.DISPLAY_TO_CODE.get(language)
|
|
216
|
+
|
|
217
|
+
# Then try legacy full name map (English -> en_US)
|
|
218
|
+
if not lang_code:
|
|
219
|
+
lang_code = self.LANGUAGE_MAP.get(language)
|
|
220
|
+
|
|
221
|
+
# Then try short code map (nl -> nl_NL)
|
|
222
|
+
if not lang_code:
|
|
223
|
+
lang_code = self.SHORT_CODE_MAP.get(language.lower() if language else '')
|
|
224
|
+
|
|
225
|
+
# Fall back to using the input directly (might be en_US already)
|
|
226
|
+
if not lang_code:
|
|
227
|
+
lang_code = language
|
|
228
|
+
|
|
229
|
+
if lang_code == self._current_language:
|
|
230
|
+
return True # Already set
|
|
231
|
+
|
|
232
|
+
# Clear cache when changing language
|
|
233
|
+
self._word_cache.clear()
|
|
234
|
+
|
|
235
|
+
# Try Hunspell first (cyhunspell - may not work on Windows/Py3.12)
|
|
236
|
+
if HAS_HUNSPELL:
|
|
237
|
+
if self._try_hunspell(lang_code):
|
|
238
|
+
self._current_language = lang_code
|
|
239
|
+
self._spylls = None
|
|
240
|
+
self._spellchecker = None
|
|
241
|
+
self._backend = "hunspell"
|
|
242
|
+
return True
|
|
243
|
+
|
|
244
|
+
# Try spylls (pure Python Hunspell - works everywhere, supports regional variants)
|
|
245
|
+
if HAS_SPYLLS:
|
|
246
|
+
if self._try_spylls(lang_code):
|
|
247
|
+
self._current_language = lang_code
|
|
248
|
+
self._hunspell = None
|
|
249
|
+
self._spellchecker = None
|
|
250
|
+
self._backend = "spylls"
|
|
251
|
+
return True
|
|
252
|
+
|
|
253
|
+
# Fallback to pyspellchecker (no regional variants)
|
|
254
|
+
if HAS_SPELLCHECKER:
|
|
255
|
+
if self._try_spellchecker(lang_code):
|
|
256
|
+
self._current_language = lang_code
|
|
257
|
+
self._hunspell = None
|
|
258
|
+
self._spylls = None
|
|
259
|
+
self._backend = "pyspellchecker"
|
|
260
|
+
return True
|
|
261
|
+
|
|
262
|
+
return False
|
|
263
|
+
|
|
264
|
+
def _try_spylls(self, lang_code: str) -> bool:
|
|
265
|
+
"""Try to initialize spylls (pure Python Hunspell) with the given language"""
|
|
266
|
+
try:
|
|
267
|
+
# Check for dictionary files in user_data/dictionaries (and subdirectories)
|
|
268
|
+
dic_file = None
|
|
269
|
+
aff_file = None
|
|
270
|
+
|
|
271
|
+
# First check root folder
|
|
272
|
+
root_dic = self.dictionaries_path / f"{lang_code}.dic"
|
|
273
|
+
root_aff = self.dictionaries_path / f"{lang_code}.aff"
|
|
274
|
+
if root_dic.exists() and root_aff.exists():
|
|
275
|
+
dic_file = root_dic
|
|
276
|
+
aff_file = root_aff
|
|
277
|
+
else:
|
|
278
|
+
# Search in subdirectories (e.g., dictionaries/en/en_GB.dic)
|
|
279
|
+
for found_dic in self.dictionaries_path.glob(f"**/{lang_code}.dic"):
|
|
280
|
+
found_aff = found_dic.with_suffix('.aff')
|
|
281
|
+
if found_aff.exists():
|
|
282
|
+
dic_file = found_dic
|
|
283
|
+
aff_file = found_aff
|
|
284
|
+
break
|
|
285
|
+
|
|
286
|
+
if dic_file and aff_file:
|
|
287
|
+
# Load from local dictionaries folder
|
|
288
|
+
self._spylls = SpyllsDictionary.from_files(str(dic_file.with_suffix('')))
|
|
289
|
+
return True
|
|
290
|
+
else:
|
|
291
|
+
# Try loading from spylls' built-in dictionaries (if any)
|
|
292
|
+
# spylls.hunspell.Dictionary.from_files expects a base path without extension
|
|
293
|
+
try:
|
|
294
|
+
self._spylls = SpyllsDictionary.from_files(lang_code)
|
|
295
|
+
return True
|
|
296
|
+
except Exception:
|
|
297
|
+
return False
|
|
298
|
+
except Exception as e:
|
|
299
|
+
print(f"Spylls initialization failed for {lang_code}: {e}")
|
|
300
|
+
return False
|
|
301
|
+
|
|
302
|
+
def _try_hunspell(self, lang_code: str) -> bool:
|
|
303
|
+
"""Try to initialize Hunspell with the given language"""
|
|
304
|
+
try:
|
|
305
|
+
# Check for dictionary files in user_data/dictionaries
|
|
306
|
+
dic_file = self.dictionaries_path / f"{lang_code}.dic"
|
|
307
|
+
aff_file = self.dictionaries_path / f"{lang_code}.aff"
|
|
308
|
+
|
|
309
|
+
hunspell_obj = None
|
|
310
|
+
if dic_file.exists() and aff_file.exists():
|
|
311
|
+
hunspell_obj = Hunspell(lang_code, hunspell_data_dir=str(self.dictionaries_path))
|
|
312
|
+
else:
|
|
313
|
+
# Try system dictionaries
|
|
314
|
+
try:
|
|
315
|
+
hunspell_obj = Hunspell(lang_code)
|
|
316
|
+
except Exception:
|
|
317
|
+
return False
|
|
318
|
+
|
|
319
|
+
if hunspell_obj:
|
|
320
|
+
# CRITICAL: Test the spell checker with a simple word to catch potential crashes early
|
|
321
|
+
# Some Hunspell configurations on Linux can crash on first use
|
|
322
|
+
try:
|
|
323
|
+
hunspell_obj.spell("test")
|
|
324
|
+
self._hunspell = hunspell_obj
|
|
325
|
+
return True
|
|
326
|
+
except Exception as e:
|
|
327
|
+
print(f"Hunspell test spell failed for {lang_code}: {e}")
|
|
328
|
+
return False
|
|
329
|
+
|
|
330
|
+
return False
|
|
331
|
+
except Exception as e:
|
|
332
|
+
print(f"Hunspell initialization failed for {lang_code}: {e}")
|
|
333
|
+
return False
|
|
334
|
+
|
|
335
|
+
def _try_spellchecker(self, lang_code: str) -> bool:
|
|
336
|
+
"""Try to initialize pyspellchecker with the given language"""
|
|
337
|
+
try:
|
|
338
|
+
# pyspellchecker uses 2-letter codes
|
|
339
|
+
short_code = lang_code.split('_')[0].lower()
|
|
340
|
+
|
|
341
|
+
# Check if language is supported
|
|
342
|
+
# pyspellchecker supports: en, es, de, fr, pt, nl, it, ru, ar, eu, lv
|
|
343
|
+
supported = ['en', 'es', 'de', 'fr', 'pt', 'nl', 'it', 'ru', 'ar', 'eu', 'lv']
|
|
344
|
+
|
|
345
|
+
target_lang = short_code if short_code in supported else 'en'
|
|
346
|
+
|
|
347
|
+
# Create the spellchecker instance
|
|
348
|
+
self._spellchecker = SpellChecker(language=target_lang)
|
|
349
|
+
|
|
350
|
+
# Verify it's actually working by testing a common word
|
|
351
|
+
# Use a simple spell check instead of checking word_frequency length
|
|
352
|
+
# (word_frequency is a WordFrequency object that doesn't support len())
|
|
353
|
+
try:
|
|
354
|
+
test_result = self._spellchecker.known(['the', 'test'])
|
|
355
|
+
if not test_result:
|
|
356
|
+
print(f"SpellChecker: Dictionary appears empty for {target_lang}")
|
|
357
|
+
self._spellchecker = None
|
|
358
|
+
return False
|
|
359
|
+
except Exception:
|
|
360
|
+
# If known() fails, the spellchecker is likely broken
|
|
361
|
+
self._spellchecker = None
|
|
362
|
+
return False
|
|
363
|
+
|
|
364
|
+
return True
|
|
365
|
+
except Exception as e:
|
|
366
|
+
print(f"SpellChecker initialization failed for {lang_code}: {e}")
|
|
367
|
+
self._spellchecker = None
|
|
368
|
+
return False
|
|
369
|
+
|
|
370
|
+
def check_word(self, word: str) -> bool:
|
|
371
|
+
"""
|
|
372
|
+
Check if a word is spelled correctly.
|
|
373
|
+
|
|
374
|
+
Args:
|
|
375
|
+
word: The word to check
|
|
376
|
+
|
|
377
|
+
Returns:
|
|
378
|
+
True if the word is correct, False if misspelled
|
|
379
|
+
"""
|
|
380
|
+
# If a crash was detected earlier, always return True (don't attempt spellcheck)
|
|
381
|
+
if self._crash_detected:
|
|
382
|
+
return True
|
|
383
|
+
|
|
384
|
+
if not self.enabled:
|
|
385
|
+
return True
|
|
386
|
+
|
|
387
|
+
if not word or len(word) < 2:
|
|
388
|
+
return True
|
|
389
|
+
|
|
390
|
+
# Normalize word
|
|
391
|
+
word_lower = word.lower()
|
|
392
|
+
|
|
393
|
+
# Check cache
|
|
394
|
+
if word_lower in self._word_cache:
|
|
395
|
+
return self._word_cache[word_lower]
|
|
396
|
+
|
|
397
|
+
# Check custom words
|
|
398
|
+
if word_lower in self._custom_words:
|
|
399
|
+
self._word_cache[word_lower] = True
|
|
400
|
+
return True
|
|
401
|
+
|
|
402
|
+
# Check ignored words (session only)
|
|
403
|
+
if word_lower in self._ignored_words:
|
|
404
|
+
self._word_cache[word_lower] = True
|
|
405
|
+
return True
|
|
406
|
+
|
|
407
|
+
# Skip if it looks like a number, tag, or special text
|
|
408
|
+
if self._should_skip_word(word):
|
|
409
|
+
self._word_cache[word_lower] = True
|
|
410
|
+
return True
|
|
411
|
+
|
|
412
|
+
# Check with spell checker
|
|
413
|
+
is_correct = False
|
|
414
|
+
|
|
415
|
+
if self._hunspell:
|
|
416
|
+
try:
|
|
417
|
+
is_correct = self._hunspell.spell(word)
|
|
418
|
+
except Exception as e:
|
|
419
|
+
# If Hunspell crashes, disable for the session
|
|
420
|
+
print(f"Hunspell spell check error: {e}")
|
|
421
|
+
self._crash_detected = True
|
|
422
|
+
self.enabled = False
|
|
423
|
+
is_correct = True # Fail open
|
|
424
|
+
elif self._spylls:
|
|
425
|
+
try:
|
|
426
|
+
is_correct = self._spylls.lookup(word)
|
|
427
|
+
except Exception as e:
|
|
428
|
+
print(f"Spylls spell check error: {e}")
|
|
429
|
+
is_correct = True
|
|
430
|
+
elif self._spellchecker:
|
|
431
|
+
try:
|
|
432
|
+
# pyspellchecker returns None for known words
|
|
433
|
+
is_correct = word_lower in self._spellchecker
|
|
434
|
+
except Exception as e:
|
|
435
|
+
print(f"pyspellchecker error: {e}")
|
|
436
|
+
is_correct = True
|
|
437
|
+
else:
|
|
438
|
+
is_correct = True # No spell checker available
|
|
439
|
+
|
|
440
|
+
self._word_cache[word_lower] = is_correct
|
|
441
|
+
return is_correct
|
|
442
|
+
|
|
443
|
+
def _should_skip_word(self, word: str) -> bool:
|
|
444
|
+
"""Check if a word should be skipped (numbers, tags, etc.)"""
|
|
445
|
+
# Skip numbers
|
|
446
|
+
if re.match(r'^[\d.,]+$', word):
|
|
447
|
+
return True
|
|
448
|
+
|
|
449
|
+
# Skip words with numbers mixed in (like serial numbers)
|
|
450
|
+
if re.search(r'\d', word):
|
|
451
|
+
return True
|
|
452
|
+
|
|
453
|
+
# Skip single characters
|
|
454
|
+
if len(word) < 2:
|
|
455
|
+
return True
|
|
456
|
+
|
|
457
|
+
# Skip ALL CAPS (likely acronyms)
|
|
458
|
+
if word.isupper() and len(word) <= 5:
|
|
459
|
+
return True
|
|
460
|
+
|
|
461
|
+
# Skip HTML/XML-like tags
|
|
462
|
+
if word.startswith('<') or word.endswith('>'):
|
|
463
|
+
return True
|
|
464
|
+
|
|
465
|
+
# Skip words starting with special characters
|
|
466
|
+
if word[0] in '@#$%&':
|
|
467
|
+
return True
|
|
468
|
+
|
|
469
|
+
return False
|
|
470
|
+
|
|
471
|
+
def get_suggestions(self, word: str, max_suggestions: int = 5) -> List[str]:
|
|
472
|
+
"""
|
|
473
|
+
Get spelling suggestions for a misspelled word.
|
|
474
|
+
|
|
475
|
+
Args:
|
|
476
|
+
word: The misspelled word
|
|
477
|
+
max_suggestions: Maximum number of suggestions to return
|
|
478
|
+
|
|
479
|
+
Returns:
|
|
480
|
+
List of suggested corrections
|
|
481
|
+
"""
|
|
482
|
+
# Skip suggestions for very long words - spylls can hang for 30+ seconds
|
|
483
|
+
# on long Dutch compound words like "gegevensverwerking" (18 chars)
|
|
484
|
+
if len(word) > 12:
|
|
485
|
+
return []
|
|
486
|
+
|
|
487
|
+
if self._hunspell:
|
|
488
|
+
try:
|
|
489
|
+
suggestions = self._hunspell.suggest(word)
|
|
490
|
+
return suggestions[:max_suggestions]
|
|
491
|
+
except Exception:
|
|
492
|
+
return []
|
|
493
|
+
elif self._spylls:
|
|
494
|
+
try:
|
|
495
|
+
suggestions = list(self._spylls.suggest(word))
|
|
496
|
+
return suggestions[:max_suggestions]
|
|
497
|
+
except Exception:
|
|
498
|
+
return []
|
|
499
|
+
elif self._spellchecker:
|
|
500
|
+
try:
|
|
501
|
+
# Get candidates sorted by likelihood
|
|
502
|
+
candidates = self._spellchecker.candidates(word.lower())
|
|
503
|
+
if candidates:
|
|
504
|
+
return list(candidates)[:max_suggestions]
|
|
505
|
+
except Exception:
|
|
506
|
+
return []
|
|
507
|
+
|
|
508
|
+
return []
|
|
509
|
+
|
|
510
|
+
def add_to_dictionary(self, word: str):
|
|
511
|
+
"""
|
|
512
|
+
Add a word to the custom dictionary (persistent).
|
|
513
|
+
|
|
514
|
+
Args:
|
|
515
|
+
word: The word to add
|
|
516
|
+
"""
|
|
517
|
+
word_lower = word.lower()
|
|
518
|
+
self._custom_words.add(word_lower)
|
|
519
|
+
self._word_cache[word_lower] = True
|
|
520
|
+
self._save_custom_words()
|
|
521
|
+
|
|
522
|
+
# Also add to Hunspell session if available
|
|
523
|
+
if self._hunspell:
|
|
524
|
+
try:
|
|
525
|
+
self._hunspell.add(word)
|
|
526
|
+
except Exception:
|
|
527
|
+
pass
|
|
528
|
+
|
|
529
|
+
def ignore_word(self, word: str):
|
|
530
|
+
"""
|
|
531
|
+
Ignore a word for the current session only.
|
|
532
|
+
|
|
533
|
+
Args:
|
|
534
|
+
word: The word to ignore
|
|
535
|
+
"""
|
|
536
|
+
word_lower = word.lower()
|
|
537
|
+
self._ignored_words.add(word_lower)
|
|
538
|
+
self._word_cache[word_lower] = True
|
|
539
|
+
|
|
540
|
+
def remove_from_dictionary(self, word: str):
|
|
541
|
+
"""
|
|
542
|
+
Remove a word from the custom dictionary.
|
|
543
|
+
|
|
544
|
+
Args:
|
|
545
|
+
word: The word to remove
|
|
546
|
+
"""
|
|
547
|
+
word_lower = word.lower()
|
|
548
|
+
self._custom_words.discard(word_lower)
|
|
549
|
+
self._word_cache.pop(word_lower, None)
|
|
550
|
+
self._save_custom_words()
|
|
551
|
+
|
|
552
|
+
def get_custom_words(self) -> List[str]:
|
|
553
|
+
"""Get all custom dictionary words"""
|
|
554
|
+
return sorted(self._custom_words)
|
|
555
|
+
|
|
556
|
+
def check_text(self, text: str) -> List[Tuple[int, int, str]]:
|
|
557
|
+
"""
|
|
558
|
+
Check text and return list of misspelled words with positions.
|
|
559
|
+
|
|
560
|
+
Args:
|
|
561
|
+
text: The text to check
|
|
562
|
+
|
|
563
|
+
Returns:
|
|
564
|
+
List of (start_pos, end_pos, word) tuples for misspelled words
|
|
565
|
+
"""
|
|
566
|
+
if not self.enabled or not text:
|
|
567
|
+
return []
|
|
568
|
+
|
|
569
|
+
misspelled = []
|
|
570
|
+
|
|
571
|
+
# Find all words with their positions
|
|
572
|
+
# This regex finds word boundaries properly
|
|
573
|
+
word_pattern = re.compile(r'\b([a-zA-ZÀ-ÿ]+)\b', re.UNICODE)
|
|
574
|
+
|
|
575
|
+
for match in word_pattern.finditer(text):
|
|
576
|
+
word = match.group(1)
|
|
577
|
+
if not self.check_word(word):
|
|
578
|
+
start = match.start(1)
|
|
579
|
+
end = match.end(1)
|
|
580
|
+
misspelled.append((start, end, word))
|
|
581
|
+
|
|
582
|
+
return misspelled
|
|
583
|
+
|
|
584
|
+
def get_available_languages(self) -> List[str]:
|
|
585
|
+
"""Get list of available dictionary languages with variants (e.g., 'English (US)', 'English (GB)')"""
|
|
586
|
+
available = []
|
|
587
|
+
|
|
588
|
+
# Check user dictionaries - look in dictionaries folder AND subdirectories
|
|
589
|
+
if self.dictionaries_path.exists():
|
|
590
|
+
# Check root folder
|
|
591
|
+
for dic_file in self.dictionaries_path.glob("*.dic"):
|
|
592
|
+
lang_code = dic_file.stem # e.g., "en_US", "en_GB"
|
|
593
|
+
# Skip hyphenation dictionaries
|
|
594
|
+
if lang_code.startswith('hyph_'):
|
|
595
|
+
continue
|
|
596
|
+
display_name = self.CODE_TO_DISPLAY.get(lang_code, lang_code)
|
|
597
|
+
if display_name not in available:
|
|
598
|
+
available.append(display_name)
|
|
599
|
+
|
|
600
|
+
# Also check subdirectories (e.g., dictionaries/en/en_US.dic)
|
|
601
|
+
for dic_file in self.dictionaries_path.glob("**/*.dic"):
|
|
602
|
+
lang_code = dic_file.stem # e.g., "en_US", "en_GB"
|
|
603
|
+
# Skip hyphenation dictionaries
|
|
604
|
+
if lang_code.startswith('hyph_'):
|
|
605
|
+
continue
|
|
606
|
+
display_name = self.CODE_TO_DISPLAY.get(lang_code, lang_code)
|
|
607
|
+
if display_name not in available:
|
|
608
|
+
available.append(display_name)
|
|
609
|
+
|
|
610
|
+
# Check spylls bundled dictionaries
|
|
611
|
+
if HAS_SPYLLS:
|
|
612
|
+
try:
|
|
613
|
+
import spylls.hunspell
|
|
614
|
+
import glob
|
|
615
|
+
spylls_path = os.path.dirname(spylls.hunspell.__file__)
|
|
616
|
+
bundled_dics = glob.glob(os.path.join(spylls_path, 'data', '**', '*.dic'), recursive=True)
|
|
617
|
+
for dic_path in bundled_dics:
|
|
618
|
+
lang_code = os.path.basename(dic_path).replace('.dic', '')
|
|
619
|
+
display_name = self.CODE_TO_DISPLAY.get(lang_code, lang_code)
|
|
620
|
+
if display_name not in available:
|
|
621
|
+
available.append(display_name)
|
|
622
|
+
except Exception:
|
|
623
|
+
pass
|
|
624
|
+
|
|
625
|
+
# Add pyspellchecker languages if available (these don't have regional variants)
|
|
626
|
+
if HAS_SPELLCHECKER:
|
|
627
|
+
pyspell_langs = [
|
|
628
|
+
('en_US', 'English (US)'), # pyspellchecker uses US English
|
|
629
|
+
('es_ES', 'Spanish (ES)'),
|
|
630
|
+
('de_DE', 'German (DE)'),
|
|
631
|
+
('fr_FR', 'French (FR)'),
|
|
632
|
+
('pt_PT', 'Portuguese (PT)'),
|
|
633
|
+
('nl_NL', 'Dutch (NL)'),
|
|
634
|
+
('it_IT', 'Italian'),
|
|
635
|
+
('ru_RU', 'Russian'),
|
|
636
|
+
]
|
|
637
|
+
for code, name in pyspell_langs:
|
|
638
|
+
if name not in available:
|
|
639
|
+
available.append(name)
|
|
640
|
+
|
|
641
|
+
return sorted(available)
|
|
642
|
+
|
|
643
|
+
def get_current_language(self) -> Optional[str]:
|
|
644
|
+
"""Get the current spellcheck language as display name (e.g., 'English (US)')"""
|
|
645
|
+
if self._current_language:
|
|
646
|
+
# First try the new variant-aware mapping
|
|
647
|
+
display = self.CODE_TO_DISPLAY.get(self._current_language)
|
|
648
|
+
if display:
|
|
649
|
+
return display
|
|
650
|
+
# Fall back to legacy mapping
|
|
651
|
+
return self.CODE_TO_LANGUAGE.get(self._current_language, self._current_language)
|
|
652
|
+
return None
|
|
653
|
+
|
|
654
|
+
def clear_cache(self):
|
|
655
|
+
"""Clear the word check cache"""
|
|
656
|
+
self._word_cache.clear()
|
|
657
|
+
|
|
658
|
+
def is_available(self) -> bool:
|
|
659
|
+
"""Check if spellchecking is available"""
|
|
660
|
+
return HAS_HUNSPELL or HAS_SPELLCHECKER
|
|
661
|
+
|
|
662
|
+
def is_ready(self) -> bool:
|
|
663
|
+
"""Check if spellchecking is initialized and ready to use"""
|
|
664
|
+
return self._hunspell is not None or self._spylls is not None or self._spellchecker is not None
|
|
665
|
+
|
|
666
|
+
def get_backend_info(self) -> str:
|
|
667
|
+
"""Get information about the spellcheck backend"""
|
|
668
|
+
if self._hunspell:
|
|
669
|
+
return f"Hunspell ({self._current_language})"
|
|
670
|
+
elif self._spylls:
|
|
671
|
+
return f"Spylls/Hunspell ({self._current_language})"
|
|
672
|
+
elif self._spellchecker:
|
|
673
|
+
return f"pyspellchecker ({self._current_language})"
|
|
674
|
+
elif HAS_HUNSPELL:
|
|
675
|
+
return "Hunspell (not initialized - call set_language first)"
|
|
676
|
+
elif HAS_SPYLLS:
|
|
677
|
+
return "Spylls (not initialized - call set_language first)"
|
|
678
|
+
elif HAS_SPELLCHECKER:
|
|
679
|
+
return "pyspellchecker (not initialized - call set_language first)"
|
|
680
|
+
else:
|
|
681
|
+
return "No spellcheck backend available"
|
|
682
|
+
|
|
683
|
+
def get_diagnostics(self) -> dict:
|
|
684
|
+
"""Get diagnostic information about the spellcheck system"""
|
|
685
|
+
info = {
|
|
686
|
+
'hunspell_available': HAS_HUNSPELL,
|
|
687
|
+
'spylls_available': HAS_SPYLLS,
|
|
688
|
+
'pyspellchecker_available': HAS_SPELLCHECKER,
|
|
689
|
+
'pyspellchecker_import_error': SPELLCHECKER_IMPORT_ERROR,
|
|
690
|
+
'hunspell_initialized': self._hunspell is not None,
|
|
691
|
+
'spylls_initialized': self._spylls is not None,
|
|
692
|
+
'pyspellchecker_initialized': self._spellchecker is not None,
|
|
693
|
+
'current_language': self._current_language,
|
|
694
|
+
'backend': self._backend,
|
|
695
|
+
'enabled': self.enabled,
|
|
696
|
+
'custom_words_count': len(self._custom_words),
|
|
697
|
+
'ignored_words_count': len(self._ignored_words),
|
|
698
|
+
'cache_size': len(self._word_cache),
|
|
699
|
+
'dictionaries_path': str(self.dictionaries_path),
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
# Check if pyspellchecker word frequency data is available
|
|
703
|
+
if self._spellchecker and hasattr(self._spellchecker, 'word_frequency'):
|
|
704
|
+
# WordFrequency doesn't support len(), use alternative method
|
|
705
|
+
try:
|
|
706
|
+
# Try to get count via the keys() method if available
|
|
707
|
+
wf = self._spellchecker.word_frequency
|
|
708
|
+
if hasattr(wf, 'keys'):
|
|
709
|
+
info['pyspellchecker_word_count'] = len(list(wf.keys())[:1000]) # Sample size
|
|
710
|
+
else:
|
|
711
|
+
info['pyspellchecker_word_count'] = "available"
|
|
712
|
+
except:
|
|
713
|
+
info['pyspellchecker_word_count'] = "available"
|
|
714
|
+
|
|
715
|
+
return info
|
|
716
|
+
|
|
717
|
+
|
|
718
|
+
# Singleton instance
|
|
719
|
+
_spellcheck_manager: Optional[SpellcheckManager] = None
|
|
720
|
+
|
|
721
|
+
|
|
722
|
+
def get_spellcheck_manager(user_data_path: str = None) -> SpellcheckManager:
|
|
723
|
+
"""Get or create the global spellcheck manager instance"""
|
|
724
|
+
global _spellcheck_manager
|
|
725
|
+
if _spellcheck_manager is None:
|
|
726
|
+
_spellcheck_manager = SpellcheckManager(user_data_path)
|
|
727
|
+
return _spellcheck_manager
|