supervertaler 1.9.153__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of supervertaler might be problematic. Click here for more details.

Files changed (85) hide show
  1. Supervertaler.py +47886 -0
  2. modules/__init__.py +10 -0
  3. modules/ai_actions.py +964 -0
  4. modules/ai_attachment_manager.py +343 -0
  5. modules/ai_file_viewer_dialog.py +210 -0
  6. modules/autofingers_engine.py +466 -0
  7. modules/cafetran_docx_handler.py +379 -0
  8. modules/config_manager.py +469 -0
  9. modules/database_manager.py +1878 -0
  10. modules/database_migrations.py +417 -0
  11. modules/dejavurtf_handler.py +779 -0
  12. modules/document_analyzer.py +427 -0
  13. modules/docx_handler.py +689 -0
  14. modules/encoding_repair.py +319 -0
  15. modules/encoding_repair_Qt.py +393 -0
  16. modules/encoding_repair_ui.py +481 -0
  17. modules/feature_manager.py +350 -0
  18. modules/figure_context_manager.py +340 -0
  19. modules/file_dialog_helper.py +148 -0
  20. modules/find_replace.py +164 -0
  21. modules/find_replace_qt.py +457 -0
  22. modules/glossary_manager.py +433 -0
  23. modules/image_extractor.py +188 -0
  24. modules/keyboard_shortcuts_widget.py +571 -0
  25. modules/llm_clients.py +1211 -0
  26. modules/llm_leaderboard.py +737 -0
  27. modules/llm_superbench_ui.py +1401 -0
  28. modules/local_llm_setup.py +1104 -0
  29. modules/model_update_dialog.py +381 -0
  30. modules/model_version_checker.py +373 -0
  31. modules/mqxliff_handler.py +638 -0
  32. modules/non_translatables_manager.py +743 -0
  33. modules/pdf_rescue_Qt.py +1822 -0
  34. modules/pdf_rescue_tkinter.py +909 -0
  35. modules/phrase_docx_handler.py +516 -0
  36. modules/project_home_panel.py +209 -0
  37. modules/prompt_assistant.py +357 -0
  38. modules/prompt_library.py +689 -0
  39. modules/prompt_library_migration.py +447 -0
  40. modules/quick_access_sidebar.py +282 -0
  41. modules/ribbon_widget.py +597 -0
  42. modules/sdlppx_handler.py +874 -0
  43. modules/setup_wizard.py +353 -0
  44. modules/shortcut_manager.py +932 -0
  45. modules/simple_segmenter.py +128 -0
  46. modules/spellcheck_manager.py +727 -0
  47. modules/statuses.py +207 -0
  48. modules/style_guide_manager.py +315 -0
  49. modules/superbench_ui.py +1319 -0
  50. modules/superbrowser.py +329 -0
  51. modules/supercleaner.py +600 -0
  52. modules/supercleaner_ui.py +444 -0
  53. modules/superdocs.py +19 -0
  54. modules/superdocs_viewer_qt.py +382 -0
  55. modules/superlookup.py +252 -0
  56. modules/tag_cleaner.py +260 -0
  57. modules/tag_manager.py +333 -0
  58. modules/term_extractor.py +270 -0
  59. modules/termbase_entry_editor.py +842 -0
  60. modules/termbase_import_export.py +488 -0
  61. modules/termbase_manager.py +1060 -0
  62. modules/termview_widget.py +1172 -0
  63. modules/theme_manager.py +499 -0
  64. modules/tm_editor_dialog.py +99 -0
  65. modules/tm_manager_qt.py +1280 -0
  66. modules/tm_metadata_manager.py +545 -0
  67. modules/tmx_editor.py +1461 -0
  68. modules/tmx_editor_qt.py +2784 -0
  69. modules/tmx_generator.py +284 -0
  70. modules/tracked_changes.py +900 -0
  71. modules/trados_docx_handler.py +430 -0
  72. modules/translation_memory.py +715 -0
  73. modules/translation_results_panel.py +2134 -0
  74. modules/translation_services.py +282 -0
  75. modules/unified_prompt_library.py +659 -0
  76. modules/unified_prompt_manager_qt.py +3951 -0
  77. modules/voice_commands.py +920 -0
  78. modules/voice_dictation.py +477 -0
  79. modules/voice_dictation_lite.py +249 -0
  80. supervertaler-1.9.153.dist-info/METADATA +896 -0
  81. supervertaler-1.9.153.dist-info/RECORD +85 -0
  82. supervertaler-1.9.153.dist-info/WHEEL +5 -0
  83. supervertaler-1.9.153.dist-info/entry_points.txt +2 -0
  84. supervertaler-1.9.153.dist-info/licenses/LICENSE +21 -0
  85. supervertaler-1.9.153.dist-info/top_level.txt +2 -0
@@ -0,0 +1,128 @@
1
+ """
2
+ Simple Segmenter
3
+ Basic sentence segmentation using regex patterns
4
+ """
5
+
6
+ import re
7
+ from typing import List
8
+
9
+ class SimpleSegmenter:
10
+ """Simple sentence segmenter using regex patterns"""
11
+
12
+ def __init__(self):
13
+ # Common abbreviations that shouldn't trigger sentence breaks
14
+ self.abbreviations = {
15
+ 'mr', 'mrs', 'ms', 'dr', 'prof', 'sr', 'jr',
16
+ 'inc', 'ltd', 'co', 'corp', 'fig', 'figs',
17
+ 'etc', 'vs', 'e.g', 'i.e', 'cf', 'approx', 'ca',
18
+ 'no', 'nos', 'vol', 'p', 'pp', 'art', 'op'
19
+ }
20
+
21
+ def segment_text(self, text: str) -> List[str]:
22
+ """
23
+ Segment text into sentences
24
+
25
+ Returns: List of sentences
26
+ """
27
+ if not text or not text.strip():
28
+ return []
29
+
30
+ # Replace newlines with spaces (preserve paragraph structure elsewhere)
31
+ text = text.replace('\n', ' ').replace('\r', '')
32
+
33
+ # Find potential sentence boundaries
34
+ # Pattern: sentence-ending punctuation followed by space and capital letter or quote
35
+ pattern = r'([.!?]+)\s+(?=[A-Z"\'])'
36
+
37
+ # Split but keep the punctuation
38
+ parts = re.split(pattern, text)
39
+
40
+ # Reconstruct sentences
41
+ sentences = []
42
+ i = 0
43
+ while i < len(parts):
44
+ if i + 1 < len(parts) and parts[i+1] in ['.', '!', '?', '...', '.)', '."']:
45
+ # Combine text with its ending punctuation
46
+ sentence = (parts[i] + parts[i+1]).strip()
47
+ i += 2
48
+ else:
49
+ sentence = parts[i].strip()
50
+ i += 1
51
+
52
+ if sentence and not self._is_abbreviation_only(sentence):
53
+ sentences.append(sentence)
54
+
55
+ # Post-process: merge sentences that were incorrectly split at abbreviations
56
+ sentences = self._merge_abbreviation_splits(sentences)
57
+
58
+ return sentences
59
+
60
+ def _is_abbreviation_only(self, text: str) -> bool:
61
+ """Check if text is just an abbreviation"""
62
+ cleaned = text.lower().rstrip('.')
63
+ return cleaned in self.abbreviations
64
+
65
+ def _merge_abbreviation_splits(self, sentences: List[str]) -> List[str]:
66
+ """Merge sentences that were incorrectly split at abbreviations"""
67
+ if not sentences:
68
+ return []
69
+
70
+ merged = []
71
+ current = sentences[0]
72
+
73
+ for i in range(1, len(sentences)):
74
+ # Check if previous sentence ends with common abbreviation
75
+ prev_words = current.split()
76
+ if prev_words:
77
+ last_word = prev_words[-1].lower().rstrip('.')
78
+
79
+ # If it's an abbreviation and next sentence starts with lowercase
80
+ # or is very short, merge them
81
+ if (last_word in self.abbreviations and
82
+ (sentences[i][0].islower() or len(sentences[i]) < 10)):
83
+ current += ' ' + sentences[i]
84
+ continue
85
+
86
+ # Otherwise, save current and start new
87
+ merged.append(current)
88
+ current = sentences[i]
89
+
90
+ # Don't forget the last one
91
+ merged.append(current)
92
+
93
+ return merged
94
+
95
+ def segment_paragraphs(self, paragraphs: List[str]) -> List[tuple]:
96
+ """
97
+ Segment a list of paragraphs, tracking which paragraph each segment belongs to
98
+
99
+ Returns: List of (paragraph_index, segment_text) tuples
100
+ """
101
+ all_segments = []
102
+
103
+ for para_idx, paragraph in enumerate(paragraphs):
104
+ if not paragraph.strip():
105
+ continue
106
+
107
+ segments = self.segment_text(paragraph)
108
+ for segment in segments:
109
+ all_segments.append((para_idx, segment))
110
+
111
+ return all_segments
112
+
113
+
114
+ # Quick test
115
+ if __name__ == "__main__":
116
+ segmenter = SimpleSegmenter()
117
+
118
+ test_text = """
119
+ This is a test sentence. This is another sentence!
120
+ Dr. Smith works at Inc. Corp. The company has many employees.
121
+ What about questions? They work too. And exclamations!
122
+ """
123
+
124
+ segments = segmenter.segment_text(test_text)
125
+
126
+ print(f"Found {len(segments)} segments:")
127
+ for i, seg in enumerate(segments, 1):
128
+ print(f"{i}. {seg}")