supervertaler 1.9.163__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. Supervertaler.py +48473 -0
  2. modules/__init__.py +10 -0
  3. modules/ai_actions.py +964 -0
  4. modules/ai_attachment_manager.py +343 -0
  5. modules/ai_file_viewer_dialog.py +210 -0
  6. modules/autofingers_engine.py +466 -0
  7. modules/cafetran_docx_handler.py +379 -0
  8. modules/config_manager.py +469 -0
  9. modules/database_manager.py +1911 -0
  10. modules/database_migrations.py +417 -0
  11. modules/dejavurtf_handler.py +779 -0
  12. modules/document_analyzer.py +427 -0
  13. modules/docx_handler.py +689 -0
  14. modules/encoding_repair.py +319 -0
  15. modules/encoding_repair_Qt.py +393 -0
  16. modules/encoding_repair_ui.py +481 -0
  17. modules/feature_manager.py +350 -0
  18. modules/figure_context_manager.py +340 -0
  19. modules/file_dialog_helper.py +148 -0
  20. modules/find_replace.py +164 -0
  21. modules/find_replace_qt.py +457 -0
  22. modules/glossary_manager.py +433 -0
  23. modules/image_extractor.py +188 -0
  24. modules/keyboard_shortcuts_widget.py +571 -0
  25. modules/llm_clients.py +1211 -0
  26. modules/llm_leaderboard.py +737 -0
  27. modules/llm_superbench_ui.py +1401 -0
  28. modules/local_llm_setup.py +1104 -0
  29. modules/model_update_dialog.py +381 -0
  30. modules/model_version_checker.py +373 -0
  31. modules/mqxliff_handler.py +638 -0
  32. modules/non_translatables_manager.py +743 -0
  33. modules/pdf_rescue_Qt.py +1822 -0
  34. modules/pdf_rescue_tkinter.py +909 -0
  35. modules/phrase_docx_handler.py +516 -0
  36. modules/project_home_panel.py +209 -0
  37. modules/prompt_assistant.py +357 -0
  38. modules/prompt_library.py +689 -0
  39. modules/prompt_library_migration.py +447 -0
  40. modules/quick_access_sidebar.py +282 -0
  41. modules/ribbon_widget.py +597 -0
  42. modules/sdlppx_handler.py +874 -0
  43. modules/setup_wizard.py +353 -0
  44. modules/shortcut_manager.py +932 -0
  45. modules/simple_segmenter.py +128 -0
  46. modules/spellcheck_manager.py +727 -0
  47. modules/statuses.py +207 -0
  48. modules/style_guide_manager.py +315 -0
  49. modules/superbench_ui.py +1319 -0
  50. modules/superbrowser.py +329 -0
  51. modules/supercleaner.py +600 -0
  52. modules/supercleaner_ui.py +444 -0
  53. modules/superdocs.py +19 -0
  54. modules/superdocs_viewer_qt.py +382 -0
  55. modules/superlookup.py +252 -0
  56. modules/tag_cleaner.py +260 -0
  57. modules/tag_manager.py +351 -0
  58. modules/term_extractor.py +270 -0
  59. modules/termbase_entry_editor.py +842 -0
  60. modules/termbase_import_export.py +488 -0
  61. modules/termbase_manager.py +1060 -0
  62. modules/termview_widget.py +1176 -0
  63. modules/theme_manager.py +499 -0
  64. modules/tm_editor_dialog.py +99 -0
  65. modules/tm_manager_qt.py +1280 -0
  66. modules/tm_metadata_manager.py +545 -0
  67. modules/tmx_editor.py +1461 -0
  68. modules/tmx_editor_qt.py +2784 -0
  69. modules/tmx_generator.py +284 -0
  70. modules/tracked_changes.py +900 -0
  71. modules/trados_docx_handler.py +430 -0
  72. modules/translation_memory.py +715 -0
  73. modules/translation_results_panel.py +2134 -0
  74. modules/translation_services.py +282 -0
  75. modules/unified_prompt_library.py +659 -0
  76. modules/unified_prompt_manager_qt.py +3951 -0
  77. modules/voice_commands.py +920 -0
  78. modules/voice_dictation.py +477 -0
  79. modules/voice_dictation_lite.py +249 -0
  80. supervertaler-1.9.163.dist-info/METADATA +906 -0
  81. supervertaler-1.9.163.dist-info/RECORD +85 -0
  82. supervertaler-1.9.163.dist-info/WHEEL +5 -0
  83. supervertaler-1.9.163.dist-info/entry_points.txt +2 -0
  84. supervertaler-1.9.163.dist-info/licenses/LICENSE +21 -0
  85. supervertaler-1.9.163.dist-info/top_level.txt +2 -0
@@ -0,0 +1,727 @@
1
+ """
2
+ Spellcheck Manager for Supervertaler
3
+ =====================================
4
+ Provides spellchecking functionality using Hunspell dictionaries.
5
+ Supports custom word lists and project-specific dictionaries.
6
+
7
+ Features:
8
+ - Hunspell dictionary support (via cyhunspell or spylls)
9
+ - Spylls: Pure Python Hunspell (works on Windows/Python 3.12+)
10
+ - Fallback to pyspellchecker for basic checking
11
+ - Custom word lists (global and per-project)
12
+ - Integration with PyQt6 text editors
13
+ """
14
+
15
+ import os
16
+ import re
17
+ from pathlib import Path
18
+ from typing import List, Set, Dict, Optional, Tuple
19
+
20
+ # Try to import hunspell (cyhunspell) - may fail on Windows/Python 3.12+
21
+ try:
22
+ from hunspell import Hunspell
23
+ HAS_HUNSPELL = True
24
+ except ImportError:
25
+ HAS_HUNSPELL = False
26
+ Hunspell = None
27
+
28
+ # Try spylls (pure Python Hunspell reimplementation) - works on all platforms
29
+ try:
30
+ from spylls.hunspell import Dictionary as SpyllsDictionary
31
+ HAS_SPYLLS = True
32
+ except ImportError:
33
+ HAS_SPYLLS = False
34
+ SpyllsDictionary = None
35
+
36
+ # Fallback to pyspellchecker (no regional variants like en_US vs en_GB)
37
+ SPELLCHECKER_IMPORT_ERROR = None
38
+ try:
39
+ from spellchecker import SpellChecker
40
+ HAS_SPELLCHECKER = True
41
+ except ImportError as e:
42
+ HAS_SPELLCHECKER = False
43
+ SpellChecker = None
44
+ SPELLCHECKER_IMPORT_ERROR = str(e)
45
+
46
+
47
+ class SpellcheckManager:
48
+ """
49
+ Manages spellchecking for Supervertaler.
50
+
51
+ Supports:
52
+ - Hunspell dictionaries (.dic/.aff files)
53
+ - Custom word lists
54
+ - Per-project dictionaries
55
+ """
56
+
57
+ # Map language codes to display names with variants
58
+ CODE_TO_DISPLAY = {
59
+ 'en_US': 'English (US)',
60
+ 'en_GB': 'English (GB)',
61
+ 'en_AU': 'English (AU)',
62
+ 'en_CA': 'English (CA)',
63
+ 'en_ZA': 'English (ZA)',
64
+ 'nl_NL': 'Dutch (NL)',
65
+ 'nl_BE': 'Dutch (BE)',
66
+ 'de_DE': 'German (DE)',
67
+ 'de_AT': 'German (AT)',
68
+ 'de_CH': 'German (CH)',
69
+ 'fr_FR': 'French (FR)',
70
+ 'fr_CA': 'French (CA)',
71
+ 'fr_BE': 'French (BE)',
72
+ 'fr_CH': 'French (CH)',
73
+ 'es_ES': 'Spanish (ES)',
74
+ 'es_MX': 'Spanish (MX)',
75
+ 'es_AR': 'Spanish (AR)',
76
+ 'pt_PT': 'Portuguese (PT)',
77
+ 'pt_BR': 'Portuguese (BR)',
78
+ 'it_IT': 'Italian',
79
+ 'pl_PL': 'Polish',
80
+ 'ru_RU': 'Russian',
81
+ 'sv_SE': 'Swedish',
82
+ 'da_DK': 'Danish',
83
+ 'nb_NO': 'Norwegian (Bokmål)',
84
+ 'nn_NO': 'Norwegian (Nynorsk)',
85
+ 'fi_FI': 'Finnish',
86
+ 'cs_CZ': 'Czech',
87
+ 'sk_SK': 'Slovak',
88
+ 'hu_HU': 'Hungarian',
89
+ 'ro_RO': 'Romanian',
90
+ 'bg_BG': 'Bulgarian',
91
+ 'uk_UA': 'Ukrainian',
92
+ 'el_GR': 'Greek',
93
+ 'tr_TR': 'Turkish',
94
+ 'zh_CN': 'Chinese (Simplified)',
95
+ 'zh_TW': 'Chinese (Traditional)',
96
+ 'ja_JP': 'Japanese',
97
+ 'ko_KR': 'Korean',
98
+ }
99
+
100
+ # Reverse mapping: display name to code
101
+ DISPLAY_TO_CODE = {v: k for k, v in CODE_TO_DISPLAY.items()}
102
+
103
+ # Legacy mapping for project files that use simple names like "English"
104
+ LANGUAGE_MAP = {
105
+ 'English': 'en_US',
106
+ 'Dutch': 'nl_NL',
107
+ 'German': 'de_DE',
108
+ 'French': 'fr_FR',
109
+ 'Spanish': 'es_ES',
110
+ 'Italian': 'it_IT',
111
+ 'Portuguese': 'pt_PT',
112
+ 'Polish': 'pl_PL',
113
+ 'Russian': 'ru_RU',
114
+ 'Chinese': 'zh_CN',
115
+ 'Japanese': 'ja_JP',
116
+ 'Korean': 'ko_KR',
117
+ }
118
+
119
+ # Short code mappings (for project files that store "nl" instead of "Dutch")
120
+ SHORT_CODE_MAP = {
121
+ 'en': 'en_US',
122
+ 'nl': 'nl_NL',
123
+ 'de': 'de_DE',
124
+ 'fr': 'fr_FR',
125
+ 'es': 'es_ES',
126
+ 'it': 'it_IT',
127
+ 'pt': 'pt_PT',
128
+ 'pl': 'pl_PL',
129
+ 'ru': 'ru_RU',
130
+ 'zh': 'zh_CN',
131
+ 'ja': 'ja_JP',
132
+ 'ko': 'ko_KR',
133
+ }
134
+
135
+ # Reverse mapping (legacy)
136
+ CODE_TO_LANGUAGE = {v: k for k, v in LANGUAGE_MAP.items()}
137
+
138
+ def __init__(self, user_data_path: str = None):
139
+ """
140
+ Initialize the spellcheck manager.
141
+
142
+ Args:
143
+ user_data_path: Path to user data directory for custom dictionaries
144
+ """
145
+ self.user_data_path = Path(user_data_path) if user_data_path else Path("user_data")
146
+ self.dictionaries_path = self.user_data_path / "dictionaries"
147
+ self.custom_words_file = self.dictionaries_path / "custom_words.txt"
148
+
149
+ # Ensure directories exist
150
+ self.dictionaries_path.mkdir(parents=True, exist_ok=True)
151
+
152
+ # Current spell checker instance
153
+ self._hunspell: Optional[Hunspell] = None
154
+ self._spylls = None # SpyllsDictionary instance
155
+ self._spellchecker: Optional[SpellChecker] = None
156
+ self._current_language: Optional[str] = None
157
+ self._backend: str = "none" # Track which backend is active
158
+
159
+ # Custom words (global)
160
+ self._custom_words: Set[str] = set()
161
+ self._load_custom_words()
162
+
163
+ # Session-only ignored words
164
+ self._ignored_words: Set[str] = set()
165
+
166
+ # Cache for word check results
167
+ self._word_cache: Dict[str, bool] = {}
168
+
169
+ # Enabled state
170
+ self.enabled = True
171
+
172
+ # Safety flag - if spellcheck crashes, disable permanently for session
173
+ self._crash_detected = False
174
+
175
+ def _load_custom_words(self):
176
+ """Load custom words from file"""
177
+ self._custom_words.clear()
178
+ if self.custom_words_file.exists():
179
+ try:
180
+ with open(self.custom_words_file, 'r', encoding='utf-8') as f:
181
+ for line in f:
182
+ word = line.strip()
183
+ if word and not word.startswith('#'):
184
+ self._custom_words.add(word.lower())
185
+ except Exception as e:
186
+ print(f"Error loading custom words: {e}")
187
+
188
+ def _save_custom_words(self):
189
+ """Save custom words to file"""
190
+ try:
191
+ with open(self.custom_words_file, 'w', encoding='utf-8') as f:
192
+ f.write("# Supervertaler Custom Dictionary\n")
193
+ f.write("# Add words that should not be marked as spelling errors\n\n")
194
+ for word in sorted(self._custom_words):
195
+ f.write(f"{word}\n")
196
+ except Exception as e:
197
+ print(f"Error saving custom words: {e}")
198
+
199
+ def set_language(self, language: str) -> bool:
200
+ """
201
+ Set the spellcheck language.
202
+
203
+ Args:
204
+ language: Language display name (e.g., "English (US)", "English (GB)"),
205
+ simple name (e.g., "English", "Dutch"), short code (e.g., "nl", "en"),
206
+ or full code (e.g., "en_US", "nl_NL")
207
+
208
+ Returns:
209
+ True if language was set successfully
210
+ """
211
+ # Convert language name to code if needed
212
+ lang_code = None
213
+
214
+ # First try display name with variant (English (US) -> en_US)
215
+ lang_code = self.DISPLAY_TO_CODE.get(language)
216
+
217
+ # Then try legacy full name map (English -> en_US)
218
+ if not lang_code:
219
+ lang_code = self.LANGUAGE_MAP.get(language)
220
+
221
+ # Then try short code map (nl -> nl_NL)
222
+ if not lang_code:
223
+ lang_code = self.SHORT_CODE_MAP.get(language.lower() if language else '')
224
+
225
+ # Fall back to using the input directly (might be en_US already)
226
+ if not lang_code:
227
+ lang_code = language
228
+
229
+ if lang_code == self._current_language:
230
+ return True # Already set
231
+
232
+ # Clear cache when changing language
233
+ self._word_cache.clear()
234
+
235
+ # Try Hunspell first (cyhunspell - may not work on Windows/Py3.12)
236
+ if HAS_HUNSPELL:
237
+ if self._try_hunspell(lang_code):
238
+ self._current_language = lang_code
239
+ self._spylls = None
240
+ self._spellchecker = None
241
+ self._backend = "hunspell"
242
+ return True
243
+
244
+ # Try spylls (pure Python Hunspell - works everywhere, supports regional variants)
245
+ if HAS_SPYLLS:
246
+ if self._try_spylls(lang_code):
247
+ self._current_language = lang_code
248
+ self._hunspell = None
249
+ self._spellchecker = None
250
+ self._backend = "spylls"
251
+ return True
252
+
253
+ # Fallback to pyspellchecker (no regional variants)
254
+ if HAS_SPELLCHECKER:
255
+ if self._try_spellchecker(lang_code):
256
+ self._current_language = lang_code
257
+ self._hunspell = None
258
+ self._spylls = None
259
+ self._backend = "pyspellchecker"
260
+ return True
261
+
262
+ return False
263
+
264
+ def _try_spylls(self, lang_code: str) -> bool:
265
+ """Try to initialize spylls (pure Python Hunspell) with the given language"""
266
+ try:
267
+ # Check for dictionary files in user_data/dictionaries (and subdirectories)
268
+ dic_file = None
269
+ aff_file = None
270
+
271
+ # First check root folder
272
+ root_dic = self.dictionaries_path / f"{lang_code}.dic"
273
+ root_aff = self.dictionaries_path / f"{lang_code}.aff"
274
+ if root_dic.exists() and root_aff.exists():
275
+ dic_file = root_dic
276
+ aff_file = root_aff
277
+ else:
278
+ # Search in subdirectories (e.g., dictionaries/en/en_GB.dic)
279
+ for found_dic in self.dictionaries_path.glob(f"**/{lang_code}.dic"):
280
+ found_aff = found_dic.with_suffix('.aff')
281
+ if found_aff.exists():
282
+ dic_file = found_dic
283
+ aff_file = found_aff
284
+ break
285
+
286
+ if dic_file and aff_file:
287
+ # Load from local dictionaries folder
288
+ self._spylls = SpyllsDictionary.from_files(str(dic_file.with_suffix('')))
289
+ return True
290
+ else:
291
+ # Try loading from spylls' built-in dictionaries (if any)
292
+ # spylls.hunspell.Dictionary.from_files expects a base path without extension
293
+ try:
294
+ self._spylls = SpyllsDictionary.from_files(lang_code)
295
+ return True
296
+ except Exception:
297
+ return False
298
+ except Exception as e:
299
+ print(f"Spylls initialization failed for {lang_code}: {e}")
300
+ return False
301
+
302
+ def _try_hunspell(self, lang_code: str) -> bool:
303
+ """Try to initialize Hunspell with the given language"""
304
+ try:
305
+ # Check for dictionary files in user_data/dictionaries
306
+ dic_file = self.dictionaries_path / f"{lang_code}.dic"
307
+ aff_file = self.dictionaries_path / f"{lang_code}.aff"
308
+
309
+ hunspell_obj = None
310
+ if dic_file.exists() and aff_file.exists():
311
+ hunspell_obj = Hunspell(lang_code, hunspell_data_dir=str(self.dictionaries_path))
312
+ else:
313
+ # Try system dictionaries
314
+ try:
315
+ hunspell_obj = Hunspell(lang_code)
316
+ except Exception:
317
+ return False
318
+
319
+ if hunspell_obj:
320
+ # CRITICAL: Test the spell checker with a simple word to catch potential crashes early
321
+ # Some Hunspell configurations on Linux can crash on first use
322
+ try:
323
+ hunspell_obj.spell("test")
324
+ self._hunspell = hunspell_obj
325
+ return True
326
+ except Exception as e:
327
+ print(f"Hunspell test spell failed for {lang_code}: {e}")
328
+ return False
329
+
330
+ return False
331
+ except Exception as e:
332
+ print(f"Hunspell initialization failed for {lang_code}: {e}")
333
+ return False
334
+
335
+ def _try_spellchecker(self, lang_code: str) -> bool:
336
+ """Try to initialize pyspellchecker with the given language"""
337
+ try:
338
+ # pyspellchecker uses 2-letter codes
339
+ short_code = lang_code.split('_')[0].lower()
340
+
341
+ # Check if language is supported
342
+ # pyspellchecker supports: en, es, de, fr, pt, nl, it, ru, ar, eu, lv
343
+ supported = ['en', 'es', 'de', 'fr', 'pt', 'nl', 'it', 'ru', 'ar', 'eu', 'lv']
344
+
345
+ target_lang = short_code if short_code in supported else 'en'
346
+
347
+ # Create the spellchecker instance
348
+ self._spellchecker = SpellChecker(language=target_lang)
349
+
350
+ # Verify it's actually working by testing a common word
351
+ # Use a simple spell check instead of checking word_frequency length
352
+ # (word_frequency is a WordFrequency object that doesn't support len())
353
+ try:
354
+ test_result = self._spellchecker.known(['the', 'test'])
355
+ if not test_result:
356
+ print(f"SpellChecker: Dictionary appears empty for {target_lang}")
357
+ self._spellchecker = None
358
+ return False
359
+ except Exception:
360
+ # If known() fails, the spellchecker is likely broken
361
+ self._spellchecker = None
362
+ return False
363
+
364
+ return True
365
+ except Exception as e:
366
+ print(f"SpellChecker initialization failed for {lang_code}: {e}")
367
+ self._spellchecker = None
368
+ return False
369
+
370
+ def check_word(self, word: str) -> bool:
371
+ """
372
+ Check if a word is spelled correctly.
373
+
374
+ Args:
375
+ word: The word to check
376
+
377
+ Returns:
378
+ True if the word is correct, False if misspelled
379
+ """
380
+ # If a crash was detected earlier, always return True (don't attempt spellcheck)
381
+ if self._crash_detected:
382
+ return True
383
+
384
+ if not self.enabled:
385
+ return True
386
+
387
+ if not word or len(word) < 2:
388
+ return True
389
+
390
+ # Normalize word
391
+ word_lower = word.lower()
392
+
393
+ # Check cache
394
+ if word_lower in self._word_cache:
395
+ return self._word_cache[word_lower]
396
+
397
+ # Check custom words
398
+ if word_lower in self._custom_words:
399
+ self._word_cache[word_lower] = True
400
+ return True
401
+
402
+ # Check ignored words (session only)
403
+ if word_lower in self._ignored_words:
404
+ self._word_cache[word_lower] = True
405
+ return True
406
+
407
+ # Skip if it looks like a number, tag, or special text
408
+ if self._should_skip_word(word):
409
+ self._word_cache[word_lower] = True
410
+ return True
411
+
412
+ # Check with spell checker
413
+ is_correct = False
414
+
415
+ if self._hunspell:
416
+ try:
417
+ is_correct = self._hunspell.spell(word)
418
+ except Exception as e:
419
+ # If Hunspell crashes, disable for the session
420
+ print(f"Hunspell spell check error: {e}")
421
+ self._crash_detected = True
422
+ self.enabled = False
423
+ is_correct = True # Fail open
424
+ elif self._spylls:
425
+ try:
426
+ is_correct = self._spylls.lookup(word)
427
+ except Exception as e:
428
+ print(f"Spylls spell check error: {e}")
429
+ is_correct = True
430
+ elif self._spellchecker:
431
+ try:
432
+ # pyspellchecker returns None for known words
433
+ is_correct = word_lower in self._spellchecker
434
+ except Exception as e:
435
+ print(f"pyspellchecker error: {e}")
436
+ is_correct = True
437
+ else:
438
+ is_correct = True # No spell checker available
439
+
440
+ self._word_cache[word_lower] = is_correct
441
+ return is_correct
442
+
443
+ def _should_skip_word(self, word: str) -> bool:
444
+ """Check if a word should be skipped (numbers, tags, etc.)"""
445
+ # Skip numbers
446
+ if re.match(r'^[\d.,]+$', word):
447
+ return True
448
+
449
+ # Skip words with numbers mixed in (like serial numbers)
450
+ if re.search(r'\d', word):
451
+ return True
452
+
453
+ # Skip single characters
454
+ if len(word) < 2:
455
+ return True
456
+
457
+ # Skip ALL CAPS (likely acronyms)
458
+ if word.isupper() and len(word) <= 5:
459
+ return True
460
+
461
+ # Skip HTML/XML-like tags
462
+ if word.startswith('<') or word.endswith('>'):
463
+ return True
464
+
465
+ # Skip words starting with special characters
466
+ if word[0] in '@#$%&':
467
+ return True
468
+
469
+ return False
470
+
471
+ def get_suggestions(self, word: str, max_suggestions: int = 5) -> List[str]:
472
+ """
473
+ Get spelling suggestions for a misspelled word.
474
+
475
+ Args:
476
+ word: The misspelled word
477
+ max_suggestions: Maximum number of suggestions to return
478
+
479
+ Returns:
480
+ List of suggested corrections
481
+ """
482
+ # Skip suggestions for very long words - spylls can hang for 30+ seconds
483
+ # on long Dutch compound words like "gegevensverwerking" (18 chars)
484
+ if len(word) > 12:
485
+ return []
486
+
487
+ if self._hunspell:
488
+ try:
489
+ suggestions = self._hunspell.suggest(word)
490
+ return suggestions[:max_suggestions]
491
+ except Exception:
492
+ return []
493
+ elif self._spylls:
494
+ try:
495
+ suggestions = list(self._spylls.suggest(word))
496
+ return suggestions[:max_suggestions]
497
+ except Exception:
498
+ return []
499
+ elif self._spellchecker:
500
+ try:
501
+ # Get candidates sorted by likelihood
502
+ candidates = self._spellchecker.candidates(word.lower())
503
+ if candidates:
504
+ return list(candidates)[:max_suggestions]
505
+ except Exception:
506
+ return []
507
+
508
+ return []
509
+
510
+ def add_to_dictionary(self, word: str):
511
+ """
512
+ Add a word to the custom dictionary (persistent).
513
+
514
+ Args:
515
+ word: The word to add
516
+ """
517
+ word_lower = word.lower()
518
+ self._custom_words.add(word_lower)
519
+ self._word_cache[word_lower] = True
520
+ self._save_custom_words()
521
+
522
+ # Also add to Hunspell session if available
523
+ if self._hunspell:
524
+ try:
525
+ self._hunspell.add(word)
526
+ except Exception:
527
+ pass
528
+
529
+ def ignore_word(self, word: str):
530
+ """
531
+ Ignore a word for the current session only.
532
+
533
+ Args:
534
+ word: The word to ignore
535
+ """
536
+ word_lower = word.lower()
537
+ self._ignored_words.add(word_lower)
538
+ self._word_cache[word_lower] = True
539
+
540
+ def remove_from_dictionary(self, word: str):
541
+ """
542
+ Remove a word from the custom dictionary.
543
+
544
+ Args:
545
+ word: The word to remove
546
+ """
547
+ word_lower = word.lower()
548
+ self._custom_words.discard(word_lower)
549
+ self._word_cache.pop(word_lower, None)
550
+ self._save_custom_words()
551
+
552
+ def get_custom_words(self) -> List[str]:
553
+ """Get all custom dictionary words"""
554
+ return sorted(self._custom_words)
555
+
556
+ def check_text(self, text: str) -> List[Tuple[int, int, str]]:
557
+ """
558
+ Check text and return list of misspelled words with positions.
559
+
560
+ Args:
561
+ text: The text to check
562
+
563
+ Returns:
564
+ List of (start_pos, end_pos, word) tuples for misspelled words
565
+ """
566
+ if not self.enabled or not text:
567
+ return []
568
+
569
+ misspelled = []
570
+
571
+ # Find all words with their positions
572
+ # This regex finds word boundaries properly
573
+ word_pattern = re.compile(r'\b([a-zA-ZÀ-ÿ]+)\b', re.UNICODE)
574
+
575
+ for match in word_pattern.finditer(text):
576
+ word = match.group(1)
577
+ if not self.check_word(word):
578
+ start = match.start(1)
579
+ end = match.end(1)
580
+ misspelled.append((start, end, word))
581
+
582
+ return misspelled
583
+
584
+ def get_available_languages(self) -> List[str]:
585
+ """Get list of available dictionary languages with variants (e.g., 'English (US)', 'English (GB)')"""
586
+ available = []
587
+
588
+ # Check user dictionaries - look in dictionaries folder AND subdirectories
589
+ if self.dictionaries_path.exists():
590
+ # Check root folder
591
+ for dic_file in self.dictionaries_path.glob("*.dic"):
592
+ lang_code = dic_file.stem # e.g., "en_US", "en_GB"
593
+ # Skip hyphenation dictionaries
594
+ if lang_code.startswith('hyph_'):
595
+ continue
596
+ display_name = self.CODE_TO_DISPLAY.get(lang_code, lang_code)
597
+ if display_name not in available:
598
+ available.append(display_name)
599
+
600
+ # Also check subdirectories (e.g., dictionaries/en/en_US.dic)
601
+ for dic_file in self.dictionaries_path.glob("**/*.dic"):
602
+ lang_code = dic_file.stem # e.g., "en_US", "en_GB"
603
+ # Skip hyphenation dictionaries
604
+ if lang_code.startswith('hyph_'):
605
+ continue
606
+ display_name = self.CODE_TO_DISPLAY.get(lang_code, lang_code)
607
+ if display_name not in available:
608
+ available.append(display_name)
609
+
610
+ # Check spylls bundled dictionaries
611
+ if HAS_SPYLLS:
612
+ try:
613
+ import spylls.hunspell
614
+ import glob
615
+ spylls_path = os.path.dirname(spylls.hunspell.__file__)
616
+ bundled_dics = glob.glob(os.path.join(spylls_path, 'data', '**', '*.dic'), recursive=True)
617
+ for dic_path in bundled_dics:
618
+ lang_code = os.path.basename(dic_path).replace('.dic', '')
619
+ display_name = self.CODE_TO_DISPLAY.get(lang_code, lang_code)
620
+ if display_name not in available:
621
+ available.append(display_name)
622
+ except Exception:
623
+ pass
624
+
625
+ # Add pyspellchecker languages if available (these don't have regional variants)
626
+ if HAS_SPELLCHECKER:
627
+ pyspell_langs = [
628
+ ('en_US', 'English (US)'), # pyspellchecker uses US English
629
+ ('es_ES', 'Spanish (ES)'),
630
+ ('de_DE', 'German (DE)'),
631
+ ('fr_FR', 'French (FR)'),
632
+ ('pt_PT', 'Portuguese (PT)'),
633
+ ('nl_NL', 'Dutch (NL)'),
634
+ ('it_IT', 'Italian'),
635
+ ('ru_RU', 'Russian'),
636
+ ]
637
+ for code, name in pyspell_langs:
638
+ if name not in available:
639
+ available.append(name)
640
+
641
+ return sorted(available)
642
+
643
+ def get_current_language(self) -> Optional[str]:
644
+ """Get the current spellcheck language as display name (e.g., 'English (US)')"""
645
+ if self._current_language:
646
+ # First try the new variant-aware mapping
647
+ display = self.CODE_TO_DISPLAY.get(self._current_language)
648
+ if display:
649
+ return display
650
+ # Fall back to legacy mapping
651
+ return self.CODE_TO_LANGUAGE.get(self._current_language, self._current_language)
652
+ return None
653
+
654
+ def clear_cache(self):
655
+ """Clear the word check cache"""
656
+ self._word_cache.clear()
657
+
658
+ def is_available(self) -> bool:
659
+ """Check if spellchecking is available"""
660
+ return HAS_HUNSPELL or HAS_SPELLCHECKER
661
+
662
+ def is_ready(self) -> bool:
663
+ """Check if spellchecking is initialized and ready to use"""
664
+ return self._hunspell is not None or self._spylls is not None or self._spellchecker is not None
665
+
666
+ def get_backend_info(self) -> str:
667
+ """Get information about the spellcheck backend"""
668
+ if self._hunspell:
669
+ return f"Hunspell ({self._current_language})"
670
+ elif self._spylls:
671
+ return f"Spylls/Hunspell ({self._current_language})"
672
+ elif self._spellchecker:
673
+ return f"pyspellchecker ({self._current_language})"
674
+ elif HAS_HUNSPELL:
675
+ return "Hunspell (not initialized - call set_language first)"
676
+ elif HAS_SPYLLS:
677
+ return "Spylls (not initialized - call set_language first)"
678
+ elif HAS_SPELLCHECKER:
679
+ return "pyspellchecker (not initialized - call set_language first)"
680
+ else:
681
+ return "No spellcheck backend available"
682
+
683
+ def get_diagnostics(self) -> dict:
684
+ """Get diagnostic information about the spellcheck system"""
685
+ info = {
686
+ 'hunspell_available': HAS_HUNSPELL,
687
+ 'spylls_available': HAS_SPYLLS,
688
+ 'pyspellchecker_available': HAS_SPELLCHECKER,
689
+ 'pyspellchecker_import_error': SPELLCHECKER_IMPORT_ERROR,
690
+ 'hunspell_initialized': self._hunspell is not None,
691
+ 'spylls_initialized': self._spylls is not None,
692
+ 'pyspellchecker_initialized': self._spellchecker is not None,
693
+ 'current_language': self._current_language,
694
+ 'backend': self._backend,
695
+ 'enabled': self.enabled,
696
+ 'custom_words_count': len(self._custom_words),
697
+ 'ignored_words_count': len(self._ignored_words),
698
+ 'cache_size': len(self._word_cache),
699
+ 'dictionaries_path': str(self.dictionaries_path),
700
+ }
701
+
702
+ # Check if pyspellchecker word frequency data is available
703
+ if self._spellchecker and hasattr(self._spellchecker, 'word_frequency'):
704
+ # WordFrequency doesn't support len(), use alternative method
705
+ try:
706
+ # Try to get count via the keys() method if available
707
+ wf = self._spellchecker.word_frequency
708
+ if hasattr(wf, 'keys'):
709
+ info['pyspellchecker_word_count'] = len(list(wf.keys())[:1000]) # Sample size
710
+ else:
711
+ info['pyspellchecker_word_count'] = "available"
712
+ except:
713
+ info['pyspellchecker_word_count'] = "available"
714
+
715
+ return info
716
+
717
+
718
+ # Singleton instance
719
+ _spellcheck_manager: Optional[SpellcheckManager] = None
720
+
721
+
722
+ def get_spellcheck_manager(user_data_path: str = None) -> SpellcheckManager:
723
+ """Get or create the global spellcheck manager instance"""
724
+ global _spellcheck_manager
725
+ if _spellcheck_manager is None:
726
+ _spellcheck_manager = SpellcheckManager(user_data_path)
727
+ return _spellcheck_manager