tokmor 1.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. tokmor/__init__.py +77 -0
  2. tokmor/api.py +194 -0
  3. tokmor/assets.py +365 -0
  4. tokmor/base.py +238 -0
  5. tokmor/brahmic.py +516 -0
  6. tokmor/cjk.py +497 -0
  7. tokmor/domain/__init__.py +11 -0
  8. tokmor/domain/sentiment.py +198 -0
  9. tokmor/factory.py +394 -0
  10. tokmor/indic.py +289 -0
  11. tokmor/inventory.py +51 -0
  12. tokmor/legacy_api.py +143 -0
  13. tokmor/lemma_store.py +102 -0
  14. tokmor/lookup_keys.py +145 -0
  15. tokmor/models/domain/sentiment/en.json +54 -0
  16. tokmor/models/domain/sentiment/ko.json +52 -0
  17. tokmor/models/seg_lexicon/km_wordfreq.pkl +0 -0
  18. tokmor/models/seg_lexicon/km_wordlist.pkl +0 -0
  19. tokmor/models/seg_lexicon/lo_wordfreq.pkl +0 -0
  20. tokmor/models/seg_lexicon/lo_wordlist.pkl +0 -0
  21. tokmor/models/seg_lexicon/my_wordfreq.pkl +0 -0
  22. tokmor/models/seg_lexicon/my_wordlist.pkl +0 -0
  23. tokmor/models/seg_lexicon/th_wordfreq.pkl +0 -0
  24. tokmor/models/seg_lexicon/th_wordlist.pkl +0 -0
  25. tokmor/models/seg_lexicon/zh_extra_dict.json +35 -0
  26. tokmor/models/seg_lexicon/zh_wordfreq.pkl +0 -0
  27. tokmor/morphology/__init__.py +395 -0
  28. tokmor/morphology/advanced_base.py +472 -0
  29. tokmor/morphology/arabic_advanced.py +247 -0
  30. tokmor/morphology/chinese.py +736 -0
  31. tokmor/morphology/chinese_advanced.py +425 -0
  32. tokmor/morphology/english.py +315 -0
  33. tokmor/morphology/english_advanced.py +560 -0
  34. tokmor/morphology/french_advanced.py +237 -0
  35. tokmor/morphology/german_advanced.py +343 -0
  36. tokmor/morphology/hindi_advanced.py +258 -0
  37. tokmor/morphology/japanese.py +417 -0
  38. tokmor/morphology/japanese_advanced.py +589 -0
  39. tokmor/morphology/korean.py +534 -0
  40. tokmor/morphology/korean_advanced.py +603 -0
  41. tokmor/morphology/russian_advanced.py +217 -0
  42. tokmor/morphology/spanish_advanced.py +226 -0
  43. tokmor/morphology/templates/__init__.py +32 -0
  44. tokmor/morphology/templates/arabic_script_template.py +162 -0
  45. tokmor/morphology/templates/brahmic_template.py +181 -0
  46. tokmor/morphology/templates/cyrillic_template.py +168 -0
  47. tokmor/morphology/templates/latin_template.py +235 -0
  48. tokmor/morphology/templates/other_scripts_template.py +475 -0
  49. tokmor/morphology/thai_native.py +274 -0
  50. tokmor/morphology/tier2.py +477 -0
  51. tokmor/morphology/tier3.py +449 -0
  52. tokmor/morphology/tier4.py +410 -0
  53. tokmor/morphology/unified.py +855 -0
  54. tokmor/morphology/universal_fallback.py +398 -0
  55. tokmor/ner_prep.py +747 -0
  56. tokmor/offline.py +89 -0
  57. tokmor/preprocess.py +80 -0
  58. tokmor/resources.py +288 -0
  59. tokmor/routing.py +147 -0
  60. tokmor/rtl.py +309 -0
  61. tokmor/schema.py +17 -0
  62. tokmor/sns_tags.py +281 -0
  63. tokmor/space_based.py +272 -0
  64. tokmor/token_quality.py +1185 -0
  65. tokmor/unified_tokens.py +228 -0
  66. tokmor-1.2.9.dist-info/METADATA +103 -0
  67. tokmor-1.2.9.dist-info/RECORD +70 -0
  68. tokmor-1.2.9.dist-info/WHEEL +5 -0
  69. tokmor-1.2.9.dist-info/licenses/LICENSE +22 -0
  70. tokmor-1.2.9.dist-info/top_level.txt +1 -0
@@ -0,0 +1,228 @@
1
+ """
2
+ Unified token output for downstream pipelines
3
+ =============================================
4
+
5
+ TokMor core intentionally avoids shipping a full learned POS tagger.
6
+ However, some downstream systems expect token objects that include:
7
+ - offsets
8
+ - a best-effort POS label (language-specific if available)
9
+ - a confidence-like score
10
+ - a particle/function-word flag (for boundary / hard-block)
11
+
12
+ This module provides a deterministic, lightweight adapter on top of TokMor tokenizers.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from dataclasses import asdict
18
+ from typing import Any, Dict, List, Optional
19
+
20
+
21
+ def _is_particle(lang: str, *, pos: Optional[str], text: str) -> bool:
22
+ base = (lang or "").split("-", 1)[0].lower()
23
+ p = (pos or "").strip()
24
+
25
+ # Prefer language-specific tokenizer POS when present.
26
+ if base == "ko":
27
+ # Sejong-ish: J* particles, JC conjunction particle, etc.
28
+ if p.startswith("J"):
29
+ return True
30
+ return False
31
+
32
+ if base == "ja":
33
+ # Our JA analyzers may return string tags like 助詞/助動詞 (tokenizer compatibility).
34
+ if p in {"助詞", "助動詞"}:
35
+ return True
36
+ return False
37
+
38
+ if base.startswith("zh"):
39
+ # Chinese analyzer tags: u (particle), p (prep), c (conj)
40
+ if p in {"u"}:
41
+ return True
42
+ return False
43
+
44
+ # Fallback: use function-word hints when available (closed class).
45
+ try:
46
+ from .ner_prep import function_word_tag
47
+
48
+ fw = function_word_tag(lang, text) or ""
49
+ return fw in {"PART"}
50
+ except Exception:
51
+ return False
52
+
53
+
54
+ def _pos_coarse(lang: str, *, pos: Optional[str], text: str) -> str:
55
+ """
56
+ Best-effort mapping from native POS tags to a small universal-style coarse tag set.
57
+ Returns empty string when unknown.
58
+ """
59
+ base = (lang or "").split("-", 1)[0].lower()
60
+ p = (pos or "").strip()
61
+ if not p:
62
+ return ""
63
+
64
+ if base == "ko":
65
+ # Sejong-ish (token-level): NNG/NNP/NNB/NP/NR, VV/VA/VX/VCP/VCN, MAG/MAJ, J*, E*, X*, SN, S*
66
+ if p.startswith("NNP"):
67
+ return "PROPN"
68
+ if p.startswith("NN") or p in {"NP", "NR"}:
69
+ return "NOUN"
70
+ if p in {"VV", "VX"}:
71
+ return "VERB"
72
+ if p in {"VCP", "VCN"}:
73
+ return "AUX"
74
+ if p == "VA":
75
+ return "ADJ"
76
+ if p in {"MAG", "MAJ"}:
77
+ return "ADV"
78
+ if p.startswith("J"):
79
+ return "PART"
80
+ if p.startswith("E") or p.startswith("X"):
81
+ return "PART"
82
+ if p == "SN":
83
+ return "NUM"
84
+ if p.startswith("S"):
85
+ return "PUNCT"
86
+ return ""
87
+
88
+ if base == "ja":
89
+ # Our JA tokens may carry string tags.
90
+ if p in {"助詞", "助動詞"}:
91
+ return "PART"
92
+ if "名詞" in p:
93
+ return "NOUN"
94
+ if "固有名詞" in p:
95
+ return "PROPN"
96
+ if "動詞" in p:
97
+ return "VERB"
98
+ if "形容詞" in p:
99
+ return "ADJ"
100
+ if "副詞" in p:
101
+ return "ADV"
102
+ return ""
103
+
104
+ if base.startswith("zh"):
105
+ # Chinese analyzer tags: n/ns/nr/nt/nrt, v, a, d, p, u, c, r...
106
+ if p.startswith("n"):
107
+ return "NOUN"
108
+ if p.startswith("v"):
109
+ return "VERB"
110
+ if p.startswith("a"):
111
+ return "ADJ"
112
+ if p.startswith("d"):
113
+ return "ADV"
114
+ if p in {"p"}:
115
+ return "ADP"
116
+ if p in {"u"}:
117
+ return "PART"
118
+ if p in {"c"}:
119
+ return "CCONJ"
120
+ if p in {"m"}:
121
+ return "NUM"
122
+ if p in {"w"}:
123
+ return "PUNCT"
124
+ return ""
125
+
126
+ return ""
127
+
128
+
129
+ def _pos_conf(lang: str, *, pos: Optional[str], is_particle: bool) -> float:
130
+ """
131
+ Deterministic confidence-like value (NOT calibrated).
132
+ Useful as a downstream heuristic weight / debug signal.
133
+ """
134
+ if not pos:
135
+ return 0.0
136
+ # Match the style seen in downstream logs: particles high, content tokens moderate.
137
+ if is_particle:
138
+ return 0.95
139
+ base = (lang or "").split("-", 1)[0].lower()
140
+ if base == "ko":
141
+ return 0.60
142
+ return 0.70
143
+
144
+
145
+ def unified_tokenize(
146
+ text: str,
147
+ *,
148
+ lang: str,
149
+ sns: bool = True,
150
+ morphology: Optional[bool] = None,
151
+ include_sns_tags: bool = False,
152
+ include_pos4: bool = True,
153
+ ) -> Dict[str, Any]:
154
+ """
155
+ Return a token list with offsets + best-effort POS/particle hints.
156
+
157
+ Notes:
158
+ - `pos` comes from TokMor tokenizer morphology when available (language-specific tags).
159
+ - `pos_conf` is a deterministic heuristic (not ML confidence).
160
+ - `pos4` is a coarse hint in {N,V,ADJ,ADV,UNK} (optional).
161
+ """
162
+ from .api import detect_language
163
+ from .factory import get_tokenizer
164
+ from .preprocess import normalize_text
165
+
166
+ text_norm = normalize_text(text, sns=bool(sns))
167
+ if lang == "auto":
168
+ lang = detect_language(text_norm)
169
+
170
+ # Match segment() defaults for quality.
171
+ # Enable morphology by default for languages that need segmentation for NER.
172
+ if morphology is None:
173
+ # CJK: Chinese, Japanese, Korean
174
+ # SEA: Thai, Myanmar, Khmer, Lao (no spaces between words)
175
+ if lang in {"zh", "ja", "ko", "th", "my", "km", "lo"}:
176
+ morphology = True
177
+
178
+ tok = get_tokenizer(lang, use_morphology=bool(morphology))
179
+ res = tok.tokenize(text_norm)
180
+
181
+ out_tokens: List[Dict[str, Any]] = []
182
+ for t in res.tokens:
183
+ d = asdict(t)
184
+ # ensure minimal keys for external systems
185
+ d = {
186
+ "text": d.get("text"),
187
+ "start": int(d.get("start") or 0),
188
+ "end": int(d.get("end") or 0),
189
+ "pos": d.get("pos") or "",
190
+ }
191
+ d["is_particle"] = _is_particle(lang, pos=d.get("pos"), text=str(d.get("text") or ""))
192
+ d["pos_conf"] = float(_pos_conf(lang, pos=d.get("pos"), is_particle=bool(d["is_particle"])))
193
+ if include_pos4:
194
+ try:
195
+ from .ner_prep import pos4_hint
196
+
197
+ # POS4 is a *content* hint for NER: N/V/ADJ/ADV/UNK.
198
+ # Prefer tokenizer POS mapping when available; it is more reliable than surface heuristics.
199
+ coarse = _pos_coarse(lang, pos=d.get("pos"), text=str(d.get("text") or ""))
200
+ if coarse in {"PART", "PUNCT", "SYM", "X"} or bool(d.get("is_particle")):
201
+ d["pos4"] = "UNK"
202
+ elif coarse in {"VERB", "AUX"}:
203
+ d["pos4"] = "V"
204
+ elif coarse == "ADJ":
205
+ d["pos4"] = "ADJ"
206
+ elif coarse == "ADV":
207
+ d["pos4"] = "ADV"
208
+ elif coarse in {"NOUN", "PROPN"}:
209
+ d["pos4"] = "N"
210
+ else:
211
+ d["pos4"] = pos4_hint(lang, str(d.get("text") or ""))
212
+ except Exception:
213
+ d["pos4"] = "UNK"
214
+
215
+ if include_sns_tags:
216
+ from .sns_tags import classify_sns_token
217
+
218
+ d["sns"] = classify_sns_token(str(d.get("text") or ""), lang=lang)
219
+
220
+ out_tokens.append(d)
221
+
222
+ return {
223
+ "lang": lang,
224
+ "text_norm": text_norm,
225
+ "morphology_used": bool(getattr(res, "morphology_used", False)),
226
+ "tokens": out_tokens,
227
+ }
228
+
@@ -0,0 +1,103 @@
1
+ Metadata-Version: 2.4
2
+ Name: tokmor
3
+ Version: 1.2.9
4
+ Summary: Fast multilingual tokenizer and morphological analyzer (core)
5
+ Author: TokMor Team
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 TokMor Contributors
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+
29
+ Project-URL: Homepage, https://github.com/tokmorlab/tokmor
30
+ Project-URL: Documentation, https://github.com/tokmorlab/tokmor#readme
31
+ Project-URL: Source, https://github.com/tokmorlab/tokmor
32
+ Project-URL: Issues, https://github.com/tokmorlab/tokmor/issues
33
+ Keywords: nlp,preprocessing,multilingual,tokenizer,tokenization,segmentation,morphology,lemmatization,ner,rag,information-extraction,offline
34
+ Classifier: Development Status :: 4 - Beta
35
+ Classifier: Intended Audience :: Developers
36
+ Classifier: Intended Audience :: Science/Research
37
+ Classifier: License :: OSI Approved :: MIT License
38
+ Classifier: Programming Language :: Python :: 3
39
+ Classifier: Programming Language :: Python :: 3.8
40
+ Classifier: Programming Language :: Python :: 3.9
41
+ Classifier: Programming Language :: Python :: 3.10
42
+ Classifier: Programming Language :: Python :: 3.11
43
+ Classifier: Programming Language :: Python :: 3.12
44
+ Classifier: Programming Language :: Python :: 3.13
45
+ Classifier: Topic :: Text Processing :: Linguistic
46
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
47
+ Requires-Python: >=3.8
48
+ Description-Content-Type: text/markdown
49
+ License-File: LICENSE
50
+ Provides-Extra: dev
51
+ Requires-Dist: pytest>=7.0; extra == "dev"
52
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
53
+ Dynamic: license-file
54
+
55
+ # TokMor (Core)
56
+
57
+ TokMor is a **small, deterministic multilingual preprocessing library** (no training/inference).
58
+ It focuses on **stable tokenization/segmentation**, **NER-friendly preprocessing**, and **optional offline data packs**.
59
+
60
+ TokMor is **not** a linguistic POS tagger and does **not** run ML models at inference time.
61
+
62
+ ## Install
63
+
64
+ ```bash
65
+ pip install tokmor
66
+ ```
67
+
68
+ ## Quick start (Python)
69
+
70
+ ```python
71
+ import tokmor
72
+
73
+ out = tokmor.unified_tokenize("We visited Seoul on 2025-01-10.", lang="en", sns=False)
74
+ print(out["tokens"][:5])
75
+ ```
76
+
77
+ ## NER preprocessing helper
78
+
79
+ ```python
80
+ import tokmor
81
+
82
+ prep = tokmor.ner_preprocess("LOL!!! Apple announced new products in Seoul...", lang="en")
83
+ print(prep["tokens"][:8])
84
+ ```
85
+
86
+ ## Optional offline data packs
87
+
88
+ Large lemma/POS-hint packs are not bundled in the wheel. Provide them via:
89
+
90
+ ```bash
91
+ export TOKMOR_DATA_DIR=/path/to/tokmor_data_pack
92
+ ```
93
+
94
+ ## Docs / repo
95
+
96
+ See the repo docs for data pack layout, licenses, and policies:
97
+ - `docs/PACKAGING.md`
98
+ - `docs/DATA_SOURCES_AND_LICENSES.md`
99
+ - `docs/DOMAIN_LEXICONS.md`
100
+
101
+ ## License
102
+
103
+ MIT (see `LICENSE`)
@@ -0,0 +1,70 @@
1
+ tokmor/__init__.py,sha256=fBxOs4tO8YesWXMeNzUQp6_cOs1HiSurW6nDSJMRBXg,1910
2
+ tokmor/api.py,sha256=B8W_Q03fImwB47ZwXO1zAJcgqT1JpgGoSJl6_SAkXiQ,5549
3
+ tokmor/assets.py,sha256=oB3AtUvX9U4oP2RGRFj53_iujS4-bM9jvVlWM0BU7JQ,12088
4
+ tokmor/base.py,sha256=ttthd9aNY11VFyO0QPwVI96nTlIMhVzfUPupNVUiq5c,7434
5
+ tokmor/brahmic.py,sha256=uyRyuafjd8mombR9oVg1qpGdOSIqR9Jqy9NkmvyEnKA,20436
6
+ tokmor/cjk.py,sha256=TJg6SBWfVMSkOaOxyrbMJfJsFGmZE759-EZyhOaS7pA,18913
7
+ tokmor/factory.py,sha256=xg7lUZcam5mIt-bwlhfx2uGojgryVqn-OTP2XqaM4cI,14608
8
+ tokmor/indic.py,sha256=EW4lyc9GNdnB3XlMUb46mwlpWJ-L_m-iPq04mvSX5wU,8844
9
+ tokmor/inventory.py,sha256=TC-epjATIn-FaA0U-g1kY_cqguKKcG9J4U31XH9tsBI,1531
10
+ tokmor/legacy_api.py,sha256=yb1AXi6jXxYWrrsLUJU2EydC80Guwzsh1PPpM3KnuYk,4190
11
+ tokmor/lemma_store.py,sha256=l44veiwNUZdGgcIO5tIl5E-I7ppVyQGhQuBuyw5v_Hs,2614
12
+ tokmor/lookup_keys.py,sha256=t8TRBX2FLuCUSxwAf2blzFbGwqILLx2IIs7iDYRhR8w,4399
13
+ tokmor/ner_prep.py,sha256=2GOWUDJdBHUn3nLScGgLxCY37zXI1VWXx97Bp8AFoT0,24046
14
+ tokmor/offline.py,sha256=YwsadRjPPD_tWUGnUWbb3xMfr63p1eaukc18fCqlwE0,2362
15
+ tokmor/preprocess.py,sha256=MmkhaQvNYOjaxSm6iX6wP2pvBZhq7PfeVxDVV8LZFxA,2392
16
+ tokmor/resources.py,sha256=ylbbcLjJI-2QH_b3s3zpYTqy_VnBuENfKe1yv7UX4PA,8763
17
+ tokmor/routing.py,sha256=YQb0v05Ue1ct9SEFaKFT2R-VQi6LoN6wvDPImRYlSxY,4944
18
+ tokmor/rtl.py,sha256=6azqZhxVDZKINlhAdqzetk0zmQYARORTXkPcznbVkag,10039
19
+ tokmor/schema.py,sha256=nr3NxgNNTXh5A4Vb5MFIJCgj5tvmzejORC7U_5D9ngo,341
20
+ tokmor/sns_tags.py,sha256=uveJfgB1oX7X0Sck7Y041unOHGbu8bcF2zCHhaSDx-Q,13377
21
+ tokmor/space_based.py,sha256=8g6gJZzgB_NMfgkc7XYUypSEzgsuNOTItMtLgKQ32tA,8920
22
+ tokmor/token_quality.py,sha256=WrsFnVx2cfmvgsIz5qcfUv-0AVBWUpt2VlamhA9CWSU,40188
23
+ tokmor/unified_tokens.py,sha256=Yu1WiA6cylpbt18g1zs0uAXxQH05YFV767y7SFUBFao,7246
24
+ tokmor/domain/__init__.py,sha256=V_8vaiZE3MuNSj4_6XWaf9B6PpgUAqFh6jJmDG3_jpM,328
25
+ tokmor/domain/sentiment.py,sha256=uvu1NIy2L0O3Go-qHeNR33r8h10GBwIq3UL1FtOfBHA,5493
26
+ tokmor/models/domain/sentiment/en.json,sha256=OQwYyKHCbLxR6YZ4wwlLGf4ebUDq_3x6avyw3BnUd4M,662
27
+ tokmor/models/domain/sentiment/ko.json,sha256=IWWPiB0SwBxzkDseqBsMDJlNIVx7TumsQRE22tz-jHU,660
28
+ tokmor/models/seg_lexicon/km_wordfreq.pkl,sha256=lidHd1PXS9ucg4nLjo4lgNaAnnwkgBBFUmtJT-4vTyE,1263734
29
+ tokmor/models/seg_lexicon/km_wordlist.pkl,sha256=2djvMrVu6YCaj-GqJRWCNVAU0D7mpzZAou1c-4B5TYs,417451
30
+ tokmor/models/seg_lexicon/lo_wordfreq.pkl,sha256=tn9miUY8Yybxwwg1Tv0LHVIofq6XAz8fqpBN9Wm5Pos,1530855
31
+ tokmor/models/seg_lexicon/lo_wordlist.pkl,sha256=aQEunKNue9zNY8y8Ea-q5xw7oxuprHUf8OeAFQ9SLG4,464660
32
+ tokmor/models/seg_lexicon/my_wordfreq.pkl,sha256=34njTN4HngBa757r-GkKW55BdJBx3b23NimDco3boOs,1522085
33
+ tokmor/models/seg_lexicon/my_wordlist.pkl,sha256=EdS1iySSP3ivZFWXC9rsWkVeV0vDjPfKR04vmoyQMSU,582850
34
+ tokmor/models/seg_lexicon/th_wordfreq.pkl,sha256=lQ01Unz1N7VYDkf1X1EK13XTJdCIQ6F1Jo4t7kz7Zu4,2469349
35
+ tokmor/models/seg_lexicon/th_wordlist.pkl,sha256=rJXCPEIZepHCBykAFzJYkMmUJLBQE5VWoOoRGrdPXSE,749702
36
+ tokmor/models/seg_lexicon/zh_extra_dict.json,sha256=7d7mUX6Nj5JSiAFaiJ5TOmMV44wMpBvuORzEq7c04UI,833
37
+ tokmor/models/seg_lexicon/zh_wordfreq.pkl,sha256=UJES6ezc2SsQmeHXyY4xX9tXmmoLy_gHs6s4ASjGQO4,2717916
38
+ tokmor/morphology/__init__.py,sha256=kURcONW5r9Wnw9vOYDiI3IcUwhIenFr4aeQnBYo2388,12428
39
+ tokmor/morphology/advanced_base.py,sha256=LCVS3gmiJz_AUXXGGQ-mw8PDdJenHdWpfGL4_AE-lAc,15671
40
+ tokmor/morphology/arabic_advanced.py,sha256=olq6lAjNJO-UVQWCNsTQkSCjNSCrs9QWYlEFzpNU3FU,10139
41
+ tokmor/morphology/chinese.py,sha256=tOHc57yEvkj4X1YoW2jh5xaK87Q9mHB7Gg9Lg3gsYOg,31626
42
+ tokmor/morphology/chinese_advanced.py,sha256=JJSfXgkXSaJGsJq99I_rKdH8-v41oKQ8iLQwG4epHlc,16769
43
+ tokmor/morphology/english.py,sha256=AcVb_7Ek6ATlmSzZgT05oDNubwmlt5qhp7xS9_QN8aA,13540
44
+ tokmor/morphology/english_advanced.py,sha256=fOjTSFnMw1Rb43R76Wk3KcT051q5eDjcxKB4jTNE9SA,23090
45
+ tokmor/morphology/french_advanced.py,sha256=yNcv2LYRbX0wtMQ_wFUdPX30GiZklZU_agn7Y6jTbJk,9748
46
+ tokmor/morphology/german_advanced.py,sha256=v2U4xd3IIvDSWIjVRfqXG5mpqdmtfKiSSTDvlrKSo8g,15766
47
+ tokmor/morphology/hindi_advanced.py,sha256=rB0m-vJy-dhWToNNnc7RkjcDSRkdefiCRNr1EnzVpOA,10722
48
+ tokmor/morphology/japanese.py,sha256=5RV_qzf1yDenqJU2rYLtens7mjUdEzOMPoU9VWXvHl4,16014
49
+ tokmor/morphology/japanese_advanced.py,sha256=YMCbAOsfZacvwpyY0mOW0O51oTlKJIdTTzzbUpChCyo,23342
50
+ tokmor/morphology/korean.py,sha256=GCgdKLx51Z_tObfH-tXwWuJ2bcO-Ap8lVZmuqWWWGgs,21868
51
+ tokmor/morphology/korean_advanced.py,sha256=oM3F5pmIy1ULWoS7I_n278U8fzR4OhIeUN9irHfOguE,25292
52
+ tokmor/morphology/russian_advanced.py,sha256=5WXwwf2zDtT7f1ChXBugHbehrhuajOZUaWHQzK564-A,9861
53
+ tokmor/morphology/spanish_advanced.py,sha256=o9L-iOrH6coyHVYfZMCtLdaxa08ae9Hkj3JgAIzd1BA,9520
54
+ tokmor/morphology/thai_native.py,sha256=06PzmhWYgCAvPignaMbRmphavQW1G6vp6Q9yjiCTjPI,17289
55
+ tokmor/morphology/tier2.py,sha256=I6mNushn2MYpPXo-L-IqPPcYp63c6y3quCCB30lTfEY,24360
56
+ tokmor/morphology/tier3.py,sha256=RIcdHv80VYN0P9qmg9ZZRvzrf9xJ-YAWZkfMhbTbHlY,19945
57
+ tokmor/morphology/tier4.py,sha256=PG_UCu48fRATCiTC7rWW_J9zT9JJZJwl11vKRRqesq8,17245
58
+ tokmor/morphology/unified.py,sha256=MndJLdy7LXvu26VfB7Mhp1GaK-GoE8ij7BscinNQXOg,32245
59
+ tokmor/morphology/universal_fallback.py,sha256=QKgMxOYBmWrTm-Uw71Q2iWIcRKuvK-ElyTO48oFsAKw,13406
60
+ tokmor/morphology/templates/__init__.py,sha256=TR8m-pWsEPPMUjr1G7YT4jwt_lUPGJr0L15CeC_PK88,844
61
+ tokmor/morphology/templates/arabic_script_template.py,sha256=xpByjeOvsmdQg6AlEuDtVRXuYZKmQgCfP_cabMvEGGQ,6224
62
+ tokmor/morphology/templates/brahmic_template.py,sha256=JHSR6TsjZBv7s6BhQgt2V7PSD9bibVhwmZPajWv2VLo,6640
63
+ tokmor/morphology/templates/cyrillic_template.py,sha256=usNMeWRAbEbactywDczORKB05utjM1ytreOMco1hpPo,6397
64
+ tokmor/morphology/templates/latin_template.py,sha256=DjVd_6IYbMLIjO5DkNO84ZPUcetelYILl9ifaUvj6nM,9577
65
+ tokmor/morphology/templates/other_scripts_template.py,sha256=HNsf4T-BT5evuDbhMcSI6nSnSYgSrIyFlPcwUEBNWJU,20326
66
+ tokmor-1.2.9.dist-info/licenses/LICENSE,sha256=OfjtVmlC7qrOAU3U7_NErSYR19dT8Zb0RBD-vNjq16E,1077
67
+ tokmor-1.2.9.dist-info/METADATA,sha256=tDKWR_UqHM5f7rOwVcttdXwlTiinLcWTLjofCH-6tyE,3763
68
+ tokmor-1.2.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
69
+ tokmor-1.2.9.dist-info/top_level.txt,sha256=Su9FBpMkpwogpvXhgjDVBKWrXIS9NSWbDCUVi1BNEx0,7
70
+ tokmor-1.2.9.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 TokMor Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
@@ -0,0 +1 @@
1
+ tokmor