sonatoki 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sonatoki/Cleaners.py CHANGED
@@ -60,6 +60,13 @@ class ConsecutiveDuplicatesRe(RegexCleaner):
60
60
  replace = r"\1"
61
61
 
62
62
 
63
+ class Lowercase(Cleaner):
64
+ @classmethod
65
+ @override
66
+ def clean(cls, token: str) -> str:
67
+ return token.lower()
68
+
69
+
63
70
  __all__ = [
64
71
  "ConsecutiveDuplicates",
65
72
  ]
sonatoki/Configs.py CHANGED
@@ -1,34 +1,37 @@
1
1
  # STL
2
2
  from copy import deepcopy
3
- from typing import List, Type, TypedDict
4
-
5
- # PDM
6
- from typing_extensions import NotRequired
3
+ from typing import List, Type, Union, TypedDict
7
4
 
8
5
  # LOCAL
9
6
  from sonatoki.Filters import (
10
7
  Filter,
11
8
  NimiPu,
12
9
  Numeric,
10
+ OrFilter,
13
11
  Syllabic,
14
12
  NimiLinku,
15
13
  NimiPuAle,
14
+ NimiUCSUR,
16
15
  Alphabetic,
17
16
  ProperName,
18
17
  Phonotactic,
19
18
  Punctuation,
20
19
  NimiLinkuAle,
20
+ NimiLinkuSandbox,
21
+ EnglishIgnorables,
21
22
  )
22
23
  from sonatoki.Scorers import Number, Scorer, PassFail, SoftScaling, SoftPassFail
23
24
  from sonatoki.Cleaners import Cleaner, ConsecutiveDuplicates
24
25
  from sonatoki.Tokenizers import Tokenizer, WordTokenizer
25
26
  from sonatoki.Preprocessors import (
26
27
  URLs,
28
+ Reference,
27
29
  Preprocessor,
28
30
  DiscordEmotes,
29
31
  DiscordSpecial,
30
32
  DiscordChannels,
31
33
  DiscordMentions,
34
+ AngleBracketObject,
32
35
  )
33
36
 
34
37
 
@@ -42,6 +45,8 @@ class IloConfig(TypedDict):
42
45
  passing_score: Number
43
46
 
44
47
 
48
+ # TODO: branching configs?
49
+
45
50
  BaseConfig: IloConfig = {
46
51
  "preprocessors": [URLs],
47
52
  "cleaners": [ConsecutiveDuplicates],
@@ -53,24 +58,68 @@ BaseConfig: IloConfig = {
53
58
  }
54
59
 
55
60
 
56
- PrefConfig: IloConfig = deepcopy(BaseConfig)
57
- PrefConfig["scoring_filters"].extend([NimiLinku, Syllabic, ProperName, Alphabetic])
58
- PrefConfig["scorer"] = SoftScaling
61
+ PrefConfig: IloConfig = {
62
+ "preprocessors": [URLs, Reference],
63
+ "cleaners": [ConsecutiveDuplicates],
64
+ "ignoring_filters": [Numeric, Punctuation, EnglishIgnorables],
65
+ "scoring_filters": [
66
+ OrFilter(NimiLinku, NimiUCSUR),
67
+ Syllabic,
68
+ ProperName,
69
+ Alphabetic,
70
+ ],
71
+ "scorer": SoftScaling,
72
+ "passing_score": 0.8,
73
+ "word_tokenizer": WordTokenizer,
74
+ }
59
75
 
76
+ CorpusConfig: IloConfig = {
77
+ "preprocessors": [URLs, AngleBracketObject, Reference],
78
+ "cleaners": [ConsecutiveDuplicates],
79
+ "ignoring_filters": [Numeric, Punctuation, EnglishIgnorables],
80
+ "scoring_filters": [
81
+ OrFilter(NimiLinkuSandbox, NimiUCSUR),
82
+ Syllabic,
83
+ ProperName,
84
+ Alphabetic,
85
+ ],
86
+ "scorer": SoftScaling,
87
+ "passing_score": 0.8,
88
+ "word_tokenizer": WordTokenizer,
89
+ }
60
90
 
61
- LazyConfig: IloConfig = deepcopy(BaseConfig)
62
- LazyConfig["scoring_filters"].extend([Alphabetic, ProperName])
63
- LazyConfig["scorer"] = SoftPassFail
64
91
 
65
- DiscordConfig: IloConfig = deepcopy(PrefConfig)
66
- DiscordConfig["preprocessors"].extend(
67
- [DiscordEmotes, DiscordMentions, DiscordChannels, DiscordSpecial]
68
- )
92
+ LazyConfig: IloConfig = {
93
+ "preprocessors": [URLs],
94
+ "cleaners": [ConsecutiveDuplicates],
95
+ "ignoring_filters": [Numeric, Punctuation],
96
+ "scoring_filters": [Alphabetic, NimiUCSUR, ProperName],
97
+ "scorer": SoftPassFail,
98
+ "passing_score": 0.8,
99
+ "word_tokenizer": WordTokenizer,
100
+ }
101
+
102
+ DiscordConfig: IloConfig = {
103
+ "preprocessors": [URLs, AngleBracketObject, Reference],
104
+ "cleaners": [ConsecutiveDuplicates],
105
+ "ignoring_filters": [Numeric, Punctuation, EnglishIgnorables],
106
+ "scoring_filters": [
107
+ OrFilter(NimiLinku, NimiUCSUR),
108
+ Syllabic,
109
+ ProperName,
110
+ Alphabetic,
111
+ ],
112
+ "scorer": SoftScaling,
113
+ "passing_score": 0.8,
114
+ "word_tokenizer": WordTokenizer,
115
+ }
116
+
69
117
  TelegramConfig: IloConfig = deepcopy(PrefConfig)
70
118
  ForumConfig: IloConfig = deepcopy(PrefConfig)
71
119
 
72
120
  __all__ = [
73
121
  "BaseConfig",
122
+ "CorpusConfig",
74
123
  "DiscordConfig",
75
124
  "ForumConfig",
76
125
  "IloConfig",
sonatoki/Filters.py CHANGED
@@ -1,7 +1,7 @@
1
1
  # STL
2
2
  import re
3
3
  from abc import ABC, abstractmethod
4
- from typing import Set
4
+ from typing import Set, List, Type
5
5
  from functools import lru_cache as cache # cache comes in 3.9
6
6
 
7
7
  # PDM
@@ -13,15 +13,17 @@ from sonatoki.constants import (
13
13
  VOWELS,
14
14
  NIMI_PU,
15
15
  ALPHABET,
16
+ ALL_PUNCT,
16
17
  ALLOWABLES,
17
18
  CONSONANTS,
19
+ IGNORABLES,
18
20
  NIMI_LINKU,
19
- POSIX_PUNCT,
20
- UNICODE_PUNCT,
21
+ NIMI_UCSUR,
21
22
  NIMI_LINKU_LILI,
22
23
  ALL_PUNCT_RANGES,
23
24
  NIMI_PU_SYNONYMS,
24
25
  NIMI_LINKU_SANDBOX,
26
+ UCSUR_PUNCT_RANGES,
25
27
  )
26
28
 
27
29
  regex.DEFAULT_VERSION = regex.VERSION1
@@ -79,6 +81,10 @@ class Miscellaneous(MemberFilter):
79
81
  tokens = set(ALLOWABLES)
80
82
 
81
83
 
84
+ class EnglishIgnorables(MemberFilter):
85
+ tokens = set(IGNORABLES)
86
+
87
+
82
88
  class ProperName(Filter):
83
89
  """Determines if a given token is a valid name (also called a loan word).
84
90
  When Toki Pona is written with the Latin alphabet, names are generally
@@ -118,6 +124,10 @@ class NimiLinkuSandbox(MemberFilter):
118
124
  tokens = set(NIMI_LINKU + NIMI_LINKU_LILI + NIMI_LINKU_SANDBOX)
119
125
 
120
126
 
127
+ class NimiUCSUR(MemberFilter):
128
+ tokens = set(NIMI_UCSUR)
129
+
130
+
121
131
  class Phonotactic(RegexFilter):
122
132
  """Determines if a given token is phonotactically valid Toki Pona (or `n`).
123
133
  Excludes both consecutive nasals and the illegal syllables:
@@ -156,6 +166,11 @@ class AlphabeticRe(RegexFilter):
156
166
  pattern = re.compile(rf"[{ALPHABET}]+", flags=re.IGNORECASE)
157
167
 
158
168
 
169
+ class TwoOrMoreAlphabetic(Filter):
170
+ # TODO: alphabetic implementation that ignores single characters
171
+ pass
172
+
173
+
159
174
  class Numeric(Filter):
160
175
  """Determine if a given token is entirely numeric.
161
176
  Covers all numeric symbols in Unicode.
@@ -175,12 +190,13 @@ class Numeric(Filter):
175
190
  class Punctuation(SubsetFilter):
176
191
  """Identify whether a token is entirely punctuation. Fastest implementation."""
177
192
 
178
- tokens = set(POSIX_PUNCT + UNICODE_PUNCT)
193
+ tokens = set(ALL_PUNCT)
179
194
 
180
195
 
181
196
  class PunctuationRe(RegexFilter):
182
197
  """Faster implementation of `PunctuationRe1`.
183
- Goes out of date compared to the `regex` library if UNICODE_PUNCT is not updated."""
198
+ Goes out of date compared to the `regex` library if UNICODE_PUNCT_RANGES is not updated.
199
+ """
184
200
 
185
201
  pattern = re.compile(rf"[{ALL_PUNCT_RANGES}]+")
186
202
 
@@ -188,17 +204,81 @@ class PunctuationRe(RegexFilter):
188
204
  class PunctuationRe1(Regex1Filter):
189
205
  """Reference implementation for identifying tokens made entirely of punctuation."""
190
206
 
191
- pattern = regex.compile(r"[\p{Punctuation}\p{posix_punct}]+")
207
+ pattern = regex.compile(
208
+ rf"[\p{{Punctuation}}\p{{posix_punct}}{UCSUR_PUNCT_RANGES}]+"
209
+ )
210
+
211
+
212
+ class OrFilter:
213
+ """Instantiate with more than one filter to compose them into one filter,
214
+ returning True when any individual filter matches or False otherwise.
215
+ Requires at least two filters.
216
+
217
+ OrFilter exists as a compromise between the need to score some filters equally,
218
+ while not adding custom behavior to scorers.
219
+ I could have allowed a position to have a list of filters instead of one filter,
220
+ but this would require cleaning the user's input, and nested handling of lists.
221
+ It also would not have been as powerful- I would need another param for the and/or switch,
222
+ or to not give users the choice.
223
+
224
+ Instead, the user is responsible for building an OrFilter out of their desired filters.
225
+ """
226
+
227
+ def __new__(cls, *filters_: Type[Filter]) -> Type[Filter]:
228
+ if not len(filters_) >= 2:
229
+ raise ValueError("Must provide at least two Filters to OrFilter.")
230
+
231
+ class AnonymousOrFilter(Filter):
232
+ filters: List[Type[Filter]] = list(filters_) # TODO: tuple better?
233
+
234
+ @classmethod
235
+ @override
236
+ @cache(maxsize=None)
237
+ def filter(cls, token: str) -> bool:
238
+ for f in cls.filters:
239
+ if f.filter(token):
240
+ return True
241
+ return False
242
+
243
+ return AnonymousOrFilter
244
+
245
+
246
+ class AndFilter(Filter):
247
+ """Instantiate with more than one filter to compose them into one filter,
248
+ returning False when any individual filter fails to match or True otherwise.
249
+ Requires at least two filters."""
250
+
251
+ def __new__(cls, *filters_: Type[Filter]) -> Type[Filter]:
252
+ if not len(filters_) >= 2:
253
+ raise ValueError("Must provide at least two Filters to AndFilter.")
254
+
255
+ class AnonymousAndFilter(Filter):
256
+ filters: List[Type[Filter]] = list(filters_) # TODO: tuple better?
257
+
258
+ @classmethod
259
+ @override
260
+ @cache(maxsize=None)
261
+ def filter(cls, token: str) -> bool:
262
+ for f in cls.filters:
263
+ if not f.filter(token):
264
+ return False
265
+ return True
266
+
267
+ return AnonymousAndFilter
192
268
 
193
269
 
194
270
  __all__ = [
195
271
  "Alphabetic",
272
+ "AndFilter",
273
+ "EnglishIgnorables",
196
274
  "NimiLinku",
197
275
  "NimiLinkuAle",
198
276
  "NimiLinkuSandbox",
199
277
  "NimiPu",
200
278
  "NimiPuAle",
279
+ "NimiUCSUR",
201
280
  "Numeric",
281
+ "OrFilter",
202
282
  "Phonotactic",
203
283
  "ProperName",
204
284
  "Punctuation",
sonatoki/Tokenizers.py CHANGED
@@ -5,16 +5,12 @@ from typing import Set, List
5
5
 
6
6
  # PDM
7
7
  import regex
8
- from typing_extensions import override
8
+ from typing_extensions import override, deprecated
9
9
 
10
10
  # LOCAL
11
11
  from sonatoki.utils import regex_escape
12
- from sonatoki.constants import (
13
- POSIX_PUNCT,
14
- UNICODE_PUNCT,
15
- SENTENCE_PUNCT,
16
- ALL_PUNCT_RANGES,
17
- )
12
+ from sonatoki.Filters import NimiUCSUR # seriously this sucks
13
+ from sonatoki.constants import ALL_PUNCT, SENTENCE_PUNCT, ALL_PUNCT_RANGES
18
14
 
19
15
  regex.DEFAULT_VERSION = regex.VERSION1
20
16
 
@@ -50,7 +46,12 @@ class Regex1Tokenizer(Tokenizer):
50
46
 
51
47
 
52
48
  class WordTokenizer(SetTokenizer):
53
- delimiters = set(POSIX_PUNCT + UNICODE_PUNCT)
49
+ delimiters = set(ALL_PUNCT)
50
+
51
+ @classmethod
52
+ def __helper(cls, s: str, tokens: List[str], last_match: int, i: int):
53
+ match = s[last_match:i].split()
54
+ [tokens.append(t) for t in match if t]
54
55
 
55
56
  @classmethod
56
57
  @override
@@ -60,32 +61,47 @@ class WordTokenizer(SetTokenizer):
60
61
 
61
62
  tokens: List[str] = []
62
63
 
64
+ i = 0 # ensure i is bound
63
65
  last_match = 0
64
66
  last_membership = s[0] in cls.delimiters
65
67
  for i, char in enumerate(s):
66
68
  mem = char in cls.delimiters
67
- if mem == last_membership:
69
+ ucsur = NimiUCSUR.filter(char) # always "changed" means
70
+ changed = (mem != last_membership) or ucsur
71
+ # this keeps contiguous words together, but splits UCSUR
72
+ if not changed:
73
+ continue
74
+
75
+ if ucsur:
76
+ if i > last_match:
77
+ # Add the token before UCSUR character
78
+ cls.__helper(s, tokens, last_match, i)
79
+ # Add UCSUR character itself as a token
80
+ tokens.append(char)
81
+ last_match = i + 1
82
+ last_membership = mem
68
83
  continue
69
84
 
70
- match = s[last_match:i].split()
71
- # TODO: kinda sucks? what about unicode whitespace?
85
+ cls.__helper(s, tokens, last_match, i)
72
86
  last_match = i
73
87
  last_membership = mem
74
- [tokens.append(t) for t in match if t]
75
-
76
- match = s[last_match:].strip().split()
77
- if match:
78
- tokens.extend(match)
79
88
 
89
+ cls.__helper(s, tokens, last_match, i + 1)
80
90
  return tokens
81
91
 
82
92
 
93
+ @deprecated(
94
+ "WordTokenizerRe is a previous reference implementation. Its behavior has diverged from WordTokenizer and it may not be restored."
95
+ )
83
96
  class WordTokenizerRe(RegexTokenizer):
84
97
  pattern = re.compile(rf"""([{ALL_PUNCT_RANGES}]+|\s+)""")
85
98
 
86
99
 
100
+ @deprecated(
101
+ "WordTokenizerRe1 is a previous reference implementation. Its behavior has diverged from WordTokenizer and it may not be restored."
102
+ )
87
103
  class WordTokenizerRe1(Regex1Tokenizer):
88
- """Reference implementation for WorkTokenizer."""
104
+ """Reference implementation for WordTokenizer."""
89
105
 
90
106
  pattern = regex.compile(r"""([\p{posix_punct}\p{Punctuation}]+|\s+)""")
91
107
 
sonatoki/constants.py CHANGED
@@ -4,16 +4,371 @@ from typing import Dict, List
4
4
  from pathlib import Path
5
5
 
6
6
  # LOCAL
7
- from sonatoki.utils import find_unicode_ranges
7
+ from sonatoki.utils import find_unicode_chars, find_unicode_ranges
8
8
 
9
9
  # `\p{Punctuation}` character class
10
- UNICODE_PUNCT = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~¡¢£¤¥¦§¨©«¬®¯°±´¶·¸»¿×÷˂˃˄˅˒˓˔˕˖˗˘˙˚˛˜˝˞˟˥˦˧˨˩˪˫˭˯˰˱˲˳˴˵˶˷˸˹˺˻˼˽˾˿͵;΄΅·϶҂՚՛՜՝՞՟։֊֍֎֏־׀׃׆׳״؆؇؈؉؊؋،؍؎؏؛؝؞؟٪٫٬٭۔۞۩۽۾܀܁܂܃܄܅܆܇܈܉܊܋܌܍߶߷߸߹߾߿࠰࠱࠲࠳࠴࠵࠶࠷࠸࠹࠺࠻࠼࠽࠾࡞࢈।॥॰৲৳৺৻৽੶૰૱୰௳௴௵௶௷௸௹௺౷౿಄൏൹෴฿๏๚๛༁༂༃༄༅༆༇༈༉༊་༌།༎༏༐༑༒༓༔༕༖༗༚༛༜༝༞༟༴༶༸༺༻༼༽྅྾྿࿀࿁࿂࿃࿄࿅࿇࿈࿉࿊࿋࿌࿎࿏࿐࿑࿒࿓࿔࿕࿖࿗࿘࿙࿚၊။၌၍၎၏႞႟჻፠፡።፣፤፥፦፧፨᎐᎑᎒᎓᎔᎕᎖᎗᎘᎙᐀᙭᙮᚛᚜᛫᛬᛭᜵᜶។៕៖៘៙៚៛᠀᠁᠂᠃᠄᠅᠆᠇᠈᠉᠊᥀᥄᥅᧞᧟᧠᧡᧢᧣᧤᧥᧦᧧᧨᧩᧪᧫᧬᧭᧮᧯᧰᧱᧲᧳᧴᧵᧶᧷᧸᧹᧺᧻᧼᧽᧾᧿᨞᨟᪠᪡᪢᪣᪤᪥᪦᪨᪩᪪᪫᪬᪭᭚᭛᭜᭝᭞᭟᭠᭡᭢᭣᭤᭥᭦᭧᭨᭩᭪᭴᭵᭶᭷᭸᭹᭺᭻᭼᭽᭾᯼᯽᯾᯿᰻᰼᰽᰾᰿᱾᱿᳀᳁᳂᳃᳄᳅᳆᳇᳓᾽᾿῀῁῍῎῏῝῞῟῭΅`´῾‐‑‒–—―‖‗‘’‚‛“”„‟†‡•‣․‥…‧‰‱′″‴‵‶‷‸‹›※‼‽‾‿⁀⁁⁂⁃⁄⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁒⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞⁺⁻⁼⁽⁾₊₋₌₍₎₠₡₢₣₤₥₦₧₨₩₪₫€₭₮₯₰₱₲₳₴₵₶₷₸₹₺₻₼₽₾₿⃀℀℁℃℄℅℆℈℉℔№℗℘℞℟℠℡™℣℥℧℩℮℺℻⅀⅁⅂⅃⅄⅊⅋⅌⅍⅏↊↋←↑→↓↔↕↖↗↘↙↚↛↜↝↞↟↠↡↢↣↤↥↦↧↨↩↪↫↬↭↮↯↰↱↲↳↴↵↶↷↸↹↺↻↼↽↾↿⇀⇁⇂⇃⇄⇅⇆⇇⇈⇉⇊⇋⇌⇍⇎⇏⇐⇑⇒⇓⇔⇕⇖⇗⇘⇙⇚⇛⇜⇝⇞⇟⇠⇡⇢⇣⇤⇥⇦⇧⇨⇩⇪⇫⇬⇭⇮⇯⇰⇱⇲⇳⇴⇵⇶⇷⇸⇹⇺⇻⇼⇽⇾⇿∀∁∂∃∄∅∆∇∈∉∊∋∌∍∎∏∐∑−∓∔∕∖∗∘∙√∛∜∝∞∟∠∡∢∣∤∥∦∧∨∩∪∫∬∭∮∯∰∱∲∳∴∵∶∷∸∹∺∻∼∽∾∿≀≁≂≃≄≅≆≇≈≉≊≋≌≍≎≏≐≑≒≓≔≕≖≗≘≙≚≛≜≝≞≟≠≡≢≣≤≥≦≧≨≩≪≫≬≭≮≯≰≱≲≳≴≵≶≷≸≹≺≻≼≽≾≿⊀⊁⊂⊃⊄⊅⊆⊇⊈⊉⊊⊋⊌⊍⊎⊏⊐⊑⊒⊓⊔⊕⊖⊗⊘⊙⊚⊛⊜⊝⊞⊟⊠⊡⊢⊣⊤⊥⊦⊧⊨⊩⊪⊫⊬⊭⊮⊯⊰⊱⊲⊳⊴⊵⊶⊷⊸⊹⊺⊻⊼⊽⊾⊿⋀⋁⋂⋃⋄⋅⋆⋇⋈⋉⋊⋋⋌⋍⋎⋏⋐⋑⋒⋓⋔⋕⋖⋗⋘⋙⋚⋛⋜⋝⋞⋟⋠⋡⋢⋣⋤⋥⋦⋧⋨⋩⋪⋫⋬⋭⋮⋯⋰⋱⋲⋳⋴⋵⋶⋷⋸⋹⋺⋻⋼⋽⋾⋿⌀⌁⌂⌃⌄⌅⌆⌇⌈⌉⌊⌋⌌⌍⌎⌏⌐⌑⌒⌓⌔⌕⌖⌗⌘⌙⌚⌛⌜⌝⌞⌟⌠⌡⌢⌣⌤⌥⌦⌧⌨〈〉⌫⌬⌭⌮⌯⌰⌱⌲⌳⌴⌵⌶⌷⌸⌹⌺⌻⌼⌽⌾⌿⍀⍁⍂⍃⍄⍅⍆⍇⍈⍉⍊⍋⍌⍍⍎⍏⍐⍑⍒⍓⍔⍕⍖⍗⍘⍙⍚⍛⍜⍝⍞⍟⍠⍡⍢⍣⍤⍥⍦⍧⍨⍩⍪⍫⍬⍭⍮⍯⍰⍱⍲⍳⍴⍵⍶⍷⍸⍹⍺⍻⍼⍽⍾⍿⎀⎁⎂⎃⎄⎅⎆⎇⎈⎉⎊⎋⎌⎍⎎⎏⎐⎑⎒⎓⎔⎕⎖⎗⎘⎙⎚⎛⎜⎝⎞⎟⎠⎡⎢⎣⎤⎥⎦⎧⎨⎩⎪⎫⎬⎭⎮⎯⎰⎱⎲⎳⎴⎵⎶⎷⎸⎹⎺⎻⎼⎽⎾⎿⏀⏁⏂⏃⏄⏅⏆⏇⏈⏉⏊⏋⏌⏍⏎⏏⏐⏑⏒⏓⏔⏕⏖⏗⏘⏙⏚⏛⏜⏝⏞⏟⏠⏡⏢⏣⏤⏥⏦⏧⏨⏩⏪⏫⏬⏭⏮⏯⏰⏱⏲⏳⏴⏵⏶⏷⏸⏹⏺⏻⏼⏽⏾⏿␀␁␂␃␄␅␆␇␈␉␊␋␌␍␎␏␐␑␒␓␔␕␖␗␘␙␚␛␜␝␞␟␠␡␢␣␤␥␦⑀⑁⑂⑃⑄⑅⑆⑇⑈⑉⑊⒜⒝⒞⒟⒠⒡⒢⒣⒤⒥⒦⒧⒨⒩⒪⒫⒬⒭⒮⒯⒰⒱⒲⒳⒴⒵─━│┃┄┅┆┇┈┉┊┋┌┍┎┏┐┑┒┓└┕┖┗┘┙┚┛├┝┞┟┠┡┢┣┤┥┦┧┨┩┪┫┬┭┮┯┰┱┲┳┴┵┶┷┸┹┺┻┼┽┾┿╀╁╂╃╄╅╆╇╈╉╊╋╌╍╎╏═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠╡╢╣╤╥╦╧╨╩╪╫╬╭╮╯╰╱╲╳╴╵╶╷╸╹╺╻╼╽╾╿▀▁▂▃▄▅▆▇█▉▊▋▌▍▎▏▐░▒▓▔▕▖▗▘▙▚▛▜▝▞▟■□▢▣▤▥▦▧▨▩▪▫▬▭▮▯▰▱▲△▴▵▶▷▸▹►▻▼▽▾▿◀◁◂◃◄◅◆◇◈◉◊○◌◍◎●◐◑◒◓◔◕◖◗◘◙◚◛◜◝◞◟◠◡◢◣◤◥◦◧◨◩◪◫◬◭◮◯◰◱◲◳◴◵◶◷◸◹◺◻◼◽◾◿☀☁☂☃☄★☆☇☈☉☊☋☌☍☎☏☐☑☒☓☔☕☖☗☘☙☚☛☜☝☞☟☠☡☢☣☤☥☦☧☨☩☪☫☬☭☮☯☰☱☲☳☴☵☶☷☸☹☺☻☼☽☾☿♀♁♂♃♄♅♆♇♈♉♊♋♌♍♎♏♐♑♒♓♔♕♖♗♘♙♚♛♜♝♞♟♠♡♢♣♤♥♦♧♨♩♪♫♬♭♮♯♰♱♲♳♴♵♶♷♸♹♺♻♼♽♾♿⚀⚁⚂⚃⚄⚅⚆⚇⚈⚉⚊⚋⚌⚍⚎⚏⚐⚑⚒⚓⚔⚕⚖⚗⚘⚙⚚⚛⚜⚝⚞⚟⚠⚡⚢⚣⚤⚥⚦⚧⚨⚩⚪⚫⚬⚭⚮⚯⚰⚱⚲⚳⚴⚵⚶⚷⚸⚹⚺⚻⚼⚽⚾⚿⛀⛁⛂⛃⛄⛅⛆⛇⛈⛉⛊⛋⛌⛍⛎⛏⛐⛑⛒⛓⛔⛕⛖⛗⛘⛙⛚⛛⛜⛝⛞⛟⛠⛡⛢⛣⛤⛥⛦⛧⛨⛩⛪⛫⛬⛭⛮⛯⛰⛱⛲⛳⛴⛵⛶⛷⛸⛹⛺⛻⛼⛽⛾⛿✀✁✂✃✄✅✆✇✈✉✊✋✌✍✎✏✐✑✒✓✔✕✖✗✘✙✚✛✜✝✞✟✠✡✢✣✤✥✦✧✨✩✪✫✬✭✮✯✰✱✲✳✴✵✶✷✸✹✺✻✼✽✾✿❀❁❂❃❄❅❆❇❈❉❊❋❌❍❎❏❐❑❒❓❔❕❖❗❘❙❚❛❜❝❞❟❠❡❢❣❤❥❦❧❨❩❪❫❬❭❮❯❰❱❲❳❴❵➔➕➖➗➘➙➚➛➜➝➞➟➠➡➢➣➤➥➦➧➨➩➪➫➬➭➮➯➰➱➲➳➴➵➶➷➸➹➺➻➼➽➾➿⟀⟁⟂⟃⟄⟅⟆⟇⟈⟉⟊⟋⟌⟍⟎⟏⟐⟑⟒⟓⟔⟕⟖⟗⟘⟙⟚⟛⟜⟝⟞⟟⟠⟡⟢⟣⟤⟥⟦⟧⟨⟩⟪⟫⟬⟭⟮⟯⟰⟱⟲⟳⟴⟵⟶⟷⟸⟹⟺⟻⟼⟽⟾⟿⠀⠁⠂⠃⠄⠅⠆⠇⠈⠉⠊⠋⠌⠍⠎⠏⠐⠑⠒⠓⠔⠕⠖⠗⠘⠙⠚⠛⠜⠝⠞⠟⠠⠡⠢⠣⠤⠥⠦⠧⠨⠩⠪⠫⠬⠭⠮⠯⠰⠱⠲⠳⠴⠵⠶⠷⠸⠹⠺⠻⠼⠽⠾⠿⡀⡁⡂⡃⡄⡅⡆⡇⡈⡉⡊⡋⡌⡍⡎⡏⡐⡑⡒⡓⡔⡕⡖⡗⡘⡙⡚⡛⡜⡝⡞⡟⡠⡡⡢⡣⡤⡥⡦⡧⡨⡩⡪⡫⡬⡭⡮⡯⡰⡱⡲⡳⡴⡵⡶⡷⡸⡹⡺⡻⡼⡽⡾⡿⢀⢁⢂⢃⢄⢅⢆⢇⢈⢉⢊⢋⢌⢍⢎⢏⢐⢑⢒⢓⢔⢕⢖⢗⢘⢙⢚⢛⢜⢝⢞⢟⢠⢡⢢⢣⢤⢥⢦⢧⢨⢩⢪⢫⢬⢭⢮⢯⢰⢱⢲⢳⢴⢵⢶⢷⢸⢹⢺⢻⢼⢽⢾⢿⣀⣁⣂⣃⣄⣅⣆⣇⣈⣉⣊⣋⣌⣍⣎⣏⣐⣑⣒⣓⣔⣕⣖⣗⣘⣙⣚⣛⣜⣝⣞⣟⣠⣡⣢⣣⣤⣥⣦⣧⣨⣩⣪⣫⣬⣭⣮⣯⣰⣱⣲⣳⣴⣵⣶⣷⣸⣹⣺⣻⣼⣽⣾⣿⤀⤁⤂⤃⤄⤅⤆⤇⤈⤉⤊⤋⤌⤍⤎⤏⤐⤑⤒⤓⤔⤕⤖⤗⤘⤙⤚⤛⤜⤝⤞⤟⤠⤡⤢⤣⤤⤥⤦⤧⤨⤩⤪⤫⤬⤭⤮⤯⤰⤱⤲⤳⤴⤵⤶⤷⤸⤹⤺⤻⤼⤽⤾⤿⥀⥁⥂⥃⥄⥅⥆⥇⥈⥉⥊⥋⥌⥍⥎⥏⥐⥑⥒⥓⥔⥕⥖⥗⥘⥙⥚⥛⥜⥝⥞⥟⥠⥡⥢⥣⥤⥥⥦⥧⥨⥩⥪⥫⥬⥭⥮⥯⥰⥱⥲⥳⥴⥵⥶⥷⥸⥹⥺⥻⥼⥽⥾⥿⦀⦁⦂⦃⦄⦅⦆⦇⦈⦉⦊⦋⦌⦍⦎⦏⦐⦑⦒⦓⦔⦕⦖⦗⦘⦙⦚⦛⦜⦝⦞⦟⦠⦡⦢⦣⦤⦥⦦⦧⦨⦩⦪⦫⦬⦭⦮⦯⦰⦱⦲⦳⦴⦵⦶⦷⦸⦹⦺⦻⦼⦽⦾⦿⧀⧁⧂⧃⧄⧅⧆⧇⧈⧉⧊⧋⧌⧍⧎⧏⧐⧑⧒⧓⧔⧕⧖⧗⧘⧙⧚⧛⧜⧝⧞⧟⧠⧡⧢⧣⧤⧥⧦⧧⧨⧩⧪⧫⧬⧭⧮⧯⧰⧱⧲⧳⧴⧵⧶⧷⧸⧹⧺⧻⧼⧽⧾⧿⨀⨁⨂⨃⨄⨅⨆⨇⨈⨉⨊⨋⨌⨍⨎⨏⨐⨑⨒⨓⨔⨕⨖⨗⨘⨙⨚⨛⨜⨝⨞⨟⨠⨡⨢⨣⨤⨥⨦⨧⨨⨩⨪⨫⨬⨭⨮⨯⨰⨱⨲⨳⨴⨵⨶⨷⨸⨹⨺⨻⨼⨽⨾⨿⩀⩁⩂⩃⩄⩅⩆⩇⩈⩉⩊⩋⩌⩍⩎⩏⩐⩑⩒⩓⩔⩕⩖⩗⩘⩙⩚⩛⩜⩝⩞⩟⩠⩡⩢⩣⩤⩥⩦⩧⩨⩩⩪⩫⩬⩭⩮⩯⩰⩱⩲⩳⩴⩵⩶⩷⩸⩹⩺⩻⩼⩽⩾⩿⪀⪁⪂⪃⪄⪅⪆⪇⪈⪉⪊⪋⪌⪍⪎⪏⪐⪑⪒⪓⪔⪕⪖⪗⪘⪙⪚⪛⪜⪝⪞⪟⪠⪡⪢⪣⪤⪥⪦⪧⪨⪩⪪⪫⪬⪭⪮⪯⪰⪱⪲⪳⪴⪵⪶⪷⪸⪹⪺⪻⪼⪽⪾⪿⫀⫁⫂⫃⫄⫅⫆⫇⫈⫉⫊⫋⫌⫍⫎⫏⫐⫑⫒⫓⫔⫕⫖⫗⫘⫙⫚⫛⫝̸⫝⫞⫟⫠⫡⫢⫣⫤⫥⫦⫧⫨⫩⫪⫫⫬⫭⫮⫯⫰⫱⫲⫳⫴⫵⫶⫷⫸⫹⫺⫻⫼⫽⫾⫿⬀⬁⬂⬃⬄⬅⬆⬇⬈⬉⬊⬋⬌⬍⬎⬏⬐⬑⬒⬓⬔⬕⬖⬗⬘⬙⬚⬛⬜⬝⬞⬟⬠⬡⬢⬣⬤⬥⬦⬧⬨⬩⬪⬫⬬⬭⬮⬯⬰⬱⬲⬳⬴⬵⬶⬷⬸⬹⬺⬻⬼⬽⬾⬿⭀⭁⭂⭃⭄⭅⭆⭇⭈⭉⭊⭋⭌⭍⭎⭏⭐⭑⭒⭓⭔⭕⭖⭗⭘⭙⭚⭛⭜⭝⭞⭟⭠⭡⭢⭣⭤⭥⭦⭧⭨⭩⭪⭫⭬⭭⭮⭯⭰⭱⭲⭳⭶⭷⭸⭹⭺⭻⭼⭽⭾⭿⮀⮁⮂⮃⮄⮅⮆⮇⮈⮉⮊⮋⮌⮍⮎⮏⮐⮑⮒⮓⮔⮕⮗⮘⮙⮚⮛⮜⮝⮞⮟⮠⮡⮢⮣⮤⮥⮦⮧⮨⮩⮪⮫⮬⮭⮮⮯⮰⮱⮲⮳⮴⮵⮶⮷⮸⮹⮺⮻⮼⮽⮾⮿⯀⯁⯂⯃⯄⯅⯆⯇⯈⯉⯊⯋⯌⯍⯎⯏⯐⯑⯒⯓⯔⯕⯖⯗⯘⯙⯚⯛⯜⯝⯞⯟⯠⯡⯢⯣⯤⯥⯦⯧⯨⯩⯪⯫⯬⯭⯮⯯⯰⯱⯲⯳⯴⯵⯶⯷⯸⯹⯺⯻⯼⯽⯾⯿⳥⳦⳧⳨⳩⳪⳹⳺⳻⳼⳾⳿⵰⸀⸁⸂⸃⸄⸅⸆⸇⸈⸉⸊⸋⸌⸍⸎⸏⸐⸑⸒⸓⸔⸕⸖⸗⸘⸙⸚⸛⸜⸝⸞⸟⸠⸡⸢⸣⸤⸥⸦⸧⸨⸩⸪⸫⸬⸭⸮⸰⸱⸲⸳⸴⸵⸶⸷⸸⸹⸺⸻⸼⸽⸾⸿⹀⹁⹂⹃⹄⹅⹆⹇⹈⹉⹊⹋⹌⹍⹎⹏⹐⹑⹒⹓⹔⹕⹖⹗⹘⹙⹚⹛⹜⹝⺀⺁⺂⺃⺄⺅⺆⺇⺈⺉⺊⺋⺌⺍⺎⺏⺐⺑⺒⺓⺔⺕⺖⺗⺘⺙⺛⺜⺝⺞⺟⺠⺡⺢⺣⺤⺥⺦⺧⺨⺩⺪⺫⺬⺭⺮⺯⺰⺱⺲⺳⺴⺵⺶⺷⺸⺹⺺⺻⺼⺽⺾⺿⻀⻁⻂⻃⻄⻅⻆⻇⻈⻉⻊⻋⻌⻍⻎⻏⻐⻑⻒⻓⻔⻕⻖⻗⻘⻙⻚⻛⻜⻝⻞⻟⻠⻡⻢⻣⻤⻥⻦⻧⻨⻩⻪⻫⻬⻭⻮⻯⻰⻱⻲⻳⼀⼁⼂⼃⼄⼅⼆⼇⼈⼉⼊⼋⼌⼍⼎⼏⼐⼑⼒⼓⼔⼕⼖⼗⼘⼙⼚⼛⼜⼝⼞⼟⼠⼡⼢⼣⼤⼥⼦⼧⼨⼩⼪⼫⼬⼭⼮⼯⼰⼱⼲⼳⼴⼵⼶⼷⼸⼹⼺⼻⼼⼽⼾⼿⽀⽁⽂⽃⽄⽅⽆⽇⽈⽉⽊⽋⽌⽍⽎⽏⽐⽑⽒⽓⽔⽕⽖⽗⽘⽙⽚⽛⽜⽝⽞⽟⽠⽡⽢⽣⽤⽥⽦⽧⽨⽩⽪⽫⽬⽭⽮⽯⽰⽱⽲⽳⽴⽵⽶⽷⽸⽹⽺⽻⽼⽽⽾⽿⾀⾁⾂⾃⾄⾅⾆⾇⾈⾉⾊⾋⾌⾍⾎⾏⾐⾑⾒⾓⾔⾕⾖⾗⾘⾙⾚⾛⾜⾝⾞⾟⾠⾡⾢⾣⾤⾥⾦⾧⾨⾩⾪⾫⾬⾭⾮⾯⾰⾱⾲⾳⾴⾵⾶⾷⾸⾹⾺⾻⾼⾽⾾⾿⿀⿁⿂⿃⿄⿅⿆⿇⿈⿉⿊⿋⿌⿍⿎⿏⿐⿑⿒⿓⿔⿕⿰⿱⿲⿳⿴⿵⿶⿷⿸⿹⿺⿻⿼⿽⿾⿿、。〃〄〈〉《》「」『』【】〒〓〔〕〖〗〘〙〚〛〜〝〞〟〠〰〶〷〽〾〿゛゜゠・㆐㆑㆖㆗㆘㆙㆚㆛㆜㆝㆞㆟㇀㇁㇂㇃㇄㇅㇆㇇㇈㇉㇊㇋㇌㇍㇎㇏㇐㇑㇒㇓㇔㇕㇖㇗㇘㇙㇚㇛㇜㇝㇞㇟㇠㇡㇢㇣㇯㈀㈁㈂㈃㈄㈅㈆㈇㈈㈉㈊㈋㈌㈍㈎㈏㈐㈑㈒㈓㈔㈕㈖㈗㈘㈙㈚㈛㈜㈝㈞㈪㈫㈬㈭㈮㈯㈰㈱㈲㈳㈴㈵㈶㈷㈸㈹㈺㈻㈼㈽㈾㈿㉀㉁㉂㉃㉄㉅㉆㉇㉐㉠㉡㉢㉣㉤㉥㉦㉧㉨㉩㉪㉫㉬㉭㉮㉯㉰㉱㉲㉳㉴㉵㉶㉷㉸㉹㉺㉻㉼㉽㉾㉿㊊㊋㊌㊍㊎㊏㊐㊑㊒㊓㊔㊕㊖㊗㊘㊙㊚㊛㊜㊝㊞㊟㊠㊡㊢㊣㊤㊥㊦㊧㊨㊩㊪㊫㊬㊭㊮㊯㊰㋀㋁㋂㋃㋄㋅㋆㋇㋈㋉㋊㋋㋌㋍㋎㋏㋐㋑㋒㋓㋔㋕㋖㋗㋘㋙㋚㋛㋜㋝㋞㋟㋠㋡㋢㋣㋤㋥㋦㋧㋨㋩㋪㋫㋬㋭㋮㋯㋰㋱㋲㋳㋴㋵㋶㋷㋸㋹㋺㋻㋼㋽㋾㋿㌀㌁㌂㌃㌄㌅㌆㌇㌈㌉㌊㌋㌌㌍㌎㌏㌐㌑㌒㌓㌔㌕㌖㌗㌘㌙㌚㌛㌜㌝㌞㌟㌠㌡㌢㌣㌤㌥㌦㌧㌨㌩㌪㌫㌬㌭㌮㌯㌰㌱㌲㌳㌴㌵㌶㌷㌸㌹㌺㌻㌼㌽㌾㌿㍀㍁㍂㍃㍄㍅㍆㍇㍈㍉㍊㍋㍌㍍㍎㍏㍐㍑㍒㍓㍔㍕㍖㍗㍘㍙㍚㍛㍜㍝㍞㍟㍠㍡㍢㍣㍤㍥㍦㍧㍨㍩㍪㍫㍬㍭㍮㍯㍰㍱㍲㍳㍴㍵㍶㍷㍸㍹㍺㍻㍼㍽㍾㍿㎀㎁㎂㎃㎄㎅㎆㎇㎈㎉㎊㎋㎌㎍㎎㎏㎐㎑㎒㎓㎔㎕㎖㎗㎘㎙㎚㎛㎜㎝㎞㎟㎠㎡㎢㎣㎤㎥㎦㎧㎨㎩㎪㎫㎬㎭㎮㎯㎰㎱㎲㎳㎴㎵㎶㎷㎸㎹㎺㎻㎼㎽㎾㎿㏀㏁㏂㏃㏄㏅㏆㏇㏈㏉㏊㏋㏌㏍㏎㏏㏐㏑㏒㏓㏔㏕㏖㏗㏘㏙㏚㏛㏜㏝㏞㏟㏠㏡㏢㏣㏤㏥㏦㏧㏨㏩㏪㏫㏬㏭㏮㏯㏰㏱㏲㏳㏴㏵㏶㏷㏸㏹㏺㏻㏼㏽㏾㏿䷀䷁䷂䷃䷄䷅䷆䷇䷈䷉䷊䷋䷌䷍䷎䷏䷐䷑䷒䷓䷔䷕䷖䷗䷘䷙䷚䷛䷜䷝䷞䷟䷠䷡䷢䷣䷤䷥䷦䷧䷨䷩䷪䷫䷬䷭䷮䷯䷰䷱䷲䷳䷴䷵䷶䷷䷸䷹䷺䷻䷼䷽䷾䷿꒐꒑꒒꒓꒔꒕꒖꒗꒘꒙꒚꒛꒜꒝꒞꒟꒠꒡꒢꒣꒤꒥꒦꒧꒨꒩꒪꒫꒬꒭꒮꒯꒰꒱꒲꒳꒴꒵꒶꒷꒸꒹꒺꒻꒼꒽꒾꒿꓀꓁꓂꓃꓄꓅꓆꓾꓿꘍꘎꘏꙳꙾꛲꛳꛴꛵꛶꛷꜀꜁꜂꜃꜄꜅꜆꜇꜈꜉꜊꜋꜌꜍꜎꜏꜐꜑꜒꜓꜔꜕꜖꜠꜡꞉꞊꠨꠩꠪꠫꠶꠷꠸꠹꡴꡵꡶꡷꣎꣏꣸꣹꣺꣼꤮꤯꥟꧁꧂꧃꧄꧅꧆꧇꧈꧉꧊꧋꧌꧍꧞꧟꩜꩝꩞꩟꩷꩸꩹꫞꫟꫰꫱꭛꭪꭫꯫﬩﮲﮳﮴﮵﮶﮷﮸﮹﮺﮻﮼﮽﮾﮿﯀﯁﯂﴾﴿﵀﵁﵂﵃﵄﵅﵆﵇﵈﵉﵊﵋﵌﵍﵎﵏﷏﷼﷽﷾﷿︐︑︒︓︔︕︖︗︘︙︰︱︲︳︴︵︶︷︸︹︺︻︼︽︾︿﹀﹁﹂﹃﹄﹅﹆﹇﹈﹉﹊﹋﹌﹍﹎﹏﹐﹑﹒﹔﹕﹖﹗﹘﹙﹚﹛﹜﹝﹞﹟﹠﹡﹢﹣﹤﹥﹦﹨﹩﹪﹫!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~⦅⦆。「」、・¢£¬ ̄¦¥₩│←↑→↓■○�𐄀𐄁𐄂𐄷𐄸𐄹𐄺𐄻𐄼𐄽𐄾𐄿𐅹𐅺𐅻𐅼𐅽𐅾𐅿𐆀𐆁𐆂𐆃𐆄𐆅𐆆𐆇𐆈𐆉𐆌𐆍𐆎𐆐𐆑𐆒𐆓𐆔𐆕𐆖𐆗𐆘𐆙𐆚𐆛𐆜𐆠𐇐𐇑𐇒𐇓𐇔𐇕𐇖𐇗𐇘𐇙𐇚𐇛𐇜𐇝𐇞𐇟𐇠𐇡𐇢𐇣𐇤𐇥𐇦𐇧𐇨𐇩𐇪𐇫𐇬𐇭𐇮𐇯𐇰𐇱𐇲𐇳𐇴𐇵𐇶𐇷𐇸𐇹𐇺𐇻𐇼𐎟𐏐𐕯𐡗𐡷𐡸𐤟𐤿𐩐𐩑𐩒𐩓𐩔𐩕𐩖𐩗𐩘𐩿𐫈𐫰𐫱𐫲𐫳𐫴𐫵𐫶𐬹𐬺𐬻𐬼𐬽𐬾𐬿𐮙𐮚𐮛𐮜𐺭𐽕𐽖𐽗𐽘𐽙𐾆𐾇𐾈𐾉𑁇𑁈𑁉𑁊𑁋𑁌𑁍𑂻𑂼𑂾𑂿𑃀𑃁𑅀𑅁𑅂𑅃𑅴𑅵𑇅𑇆𑇇𑇈𑇍𑇛𑇝𑇞𑇟𑈸𑈹𑈺𑈻𑈼𑈽𑊩𑑋𑑌𑑍𑑎𑑏𑑚𑑛𑑝𑓆𑗁𑗂𑗃𑗄𑗅𑗆𑗇𑗈𑗉𑗊𑗋𑗌𑗍𑗎𑗏𑗐𑗑𑗒𑗓𑗔𑗕𑗖𑗗𑙁𑙂𑙃𑙠𑙡𑙢𑙣𑙤𑙥𑙦𑙧𑙨𑙩𑙪𑙫𑙬𑚹𑜼𑜽𑜾𑜿𑠻𑥄𑥅𑥆𑧢𑨿𑩀𑩁𑩂𑩃𑩄𑩅𑩆𑪚𑪛𑪜𑪞𑪟𑪠𑪡𑪢𑬀𑬁𑬂𑬃𑬄𑬅𑬆𑬇𑬈𑬉𑱁𑱂𑱃𑱄𑱅𑱰𑱱𑻷𑻸𑽃𑽄𑽅𑽆𑽇𑽈𑽉𑽊𑽋𑽌𑽍𑽎𑽏𑿕𑿖𑿗𑿘𑿙𑿚𑿛𑿜𑿝𑿞𑿟𑿠𑿡𑿢𑿣𑿤𑿥𑿦𑿧𑿨𑿩𑿪𑿫𑿬𑿭𑿮𑿯𑿰𑿱𑿿𒑰𒑱𒑲𒑳𒑴𒿱𒿲𖩮𖩯𖫵𖬷𖬸𖬹𖬺𖬻𖬼𖬽𖬾𖬿𖭄𖭅𖺗𖺘𖺙𖺚𖿢𛲜𛲟𜽐𜽑𜽒𜽓𜽔𜽕𜽖𜽗𜽘𜽙𜽚𜽛𜽜𜽝𜽞𜽟𜽠𜽡𜽢𜽣𜽤𜽥𜽦𜽧𜽨𜽩𜽪𜽫𜽬𜽭𜽮𜽯𜽰𜽱𜽲𜽳𜽴𜽵𜽶𜽷𜽸𜽹𜽺𜽻𜽼𜽽𜽾𜽿𜾀𜾁𜾂𜾃𜾄𜾅𜾆𜾇𜾈𜾉𜾊𜾋𜾌𜾍𜾎𜾏𜾐𜾑𜾒𜾓𜾔𜾕𜾖𜾗𜾘𜾙𜾚𜾛𜾜𜾝𜾞𜾟𜾠𜾡𜾢𜾣𜾤𜾥𜾦𜾧𜾨𜾩𜾪𜾫𜾬𜾭𜾮𜾯𜾰𜾱𜾲𜾳𜾴𜾵𜾶𜾷𜾸𜾹𜾺𜾻𜾼𜾽𜾾𜾿𜿀𜿁𜿂𜿃𝀀𝀁𝀂𝀃𝀄𝀅𝀆𝀇𝀈𝀉𝀊𝀋𝀌𝀍𝀎𝀏𝀐𝀑𝀒𝀓𝀔𝀕𝀖𝀗𝀘𝀙𝀚𝀛𝀜𝀝𝀞𝀟𝀠𝀡𝀢𝀣𝀤𝀥𝀦𝀧𝀨𝀩𝀪𝀫𝀬𝀭𝀮𝀯𝀰𝀱𝀲𝀳𝀴𝀵𝀶𝀷𝀸𝀹𝀺𝀻𝀼𝀽𝀾𝀿𝁀𝁁𝁂𝁃𝁄𝁅𝁆𝁇𝁈𝁉𝁊𝁋𝁌𝁍𝁎𝁏𝁐𝁑𝁒𝁓𝁔𝁕𝁖𝁗𝁘𝁙𝁚𝁛𝁜𝁝𝁞𝁟𝁠𝁡𝁢𝁣𝁤𝁥𝁦𝁧𝁨𝁩𝁪𝁫𝁬𝁭𝁮𝁯𝁰𝁱𝁲𝁳𝁴𝁵𝁶𝁷𝁸𝁹𝁺𝁻𝁼𝁽𝁾𝁿𝂀𝂁𝂂𝂃𝂄𝂅𝂆𝂇𝂈𝂉𝂊𝂋𝂌𝂍𝂎𝂏𝂐𝂑𝂒𝂓𝂔𝂕𝂖𝂗𝂘𝂙𝂚𝂛𝂜𝂝𝂞𝂟𝂠𝂡𝂢𝂣𝂤𝂥𝂦𝂧𝂨𝂩𝂪𝂫𝂬𝂭𝂮𝂯𝂰𝂱𝂲𝂳𝂴𝂵𝂶𝂷𝂸𝂹𝂺𝂻𝂼𝂽𝂾𝂿𝃀𝃁𝃂𝃃𝃄𝃅𝃆𝃇𝃈𝃉𝃊𝃋𝃌𝃍𝃎𝃏𝃐𝃑𝃒𝃓𝃔𝃕𝃖𝃗𝃘𝃙𝃚𝃛𝃜𝃝𝃞𝃟𝃠𝃡𝃢𝃣𝃤𝃥𝃦𝃧𝃨𝃩𝃪𝃫𝃬𝃭𝃮𝃯𝃰𝃱𝃲𝃳𝃴𝃵𝄀𝄁𝄂𝄃𝄄𝄅𝄆𝄇𝄈𝄉𝄊𝄋𝄌𝄍𝄎𝄏𝄐𝄑𝄒𝄓𝄔𝄕𝄖𝄗𝄘𝄙𝄚𝄛𝄜𝄝𝄞𝄟𝄠𝄡𝄢𝄣𝄤𝄥𝄦𝄩𝄪𝄫𝄬𝄭𝄮𝄯𝄰𝄱𝄲𝄳𝄴𝄵𝄶𝄷𝄸𝄹𝄺𝄻𝄼𝄽𝄾𝄿𝅀𝅁𝅂𝅃𝅄𝅅𝅆𝅇𝅈𝅉𝅊𝅋𝅌𝅍𝅎𝅏𝅐𝅑𝅒𝅓𝅔𝅕𝅖𝅗𝅘𝅙𝅚𝅛𝅜𝅝𝅗𝅥𝅘𝅥𝅘𝅥𝅮𝅘𝅥𝅯𝅘𝅥𝅰𝅘𝅥𝅱𝅘𝅥𝅲𝅪𝅫𝅬𝆃𝆄𝆌𝆍𝆎𝆏𝆐𝆑𝆒𝆓𝆔𝆕𝆖𝆗𝆘𝆙𝆚𝆛𝆜𝆝𝆞𝆟𝆠𝆡𝆢𝆣𝆤𝆥𝆦𝆧𝆨𝆩𝆮𝆯𝆰𝆱𝆲𝆳𝆴𝆵𝆶𝆷𝆸𝆹𝆺𝆹𝅥𝆺𝅥𝆹𝅥𝅮𝆺𝅥𝅮𝆹𝅥𝅯𝆺𝅥𝅯𝇁𝇂𝇃𝇄𝇅𝇆𝇇𝇈𝇉𝇊𝇋𝇌𝇍𝇎𝇏𝇐𝇑𝇒𝇓𝇔𝇕𝇖𝇗𝇘𝇙𝇚𝇛𝇜𝇝𝇞𝇟𝇠𝇡𝇢𝇣𝇤𝇥𝇦𝇧𝇨𝇩𝇪𝈀𝈁𝈂𝈃𝈄𝈅𝈆𝈇𝈈𝈉𝈊𝈋𝈌𝈍𝈎𝈏𝈐𝈑𝈒𝈓𝈔𝈕𝈖𝈗𝈘𝈙𝈚𝈛𝈜𝈝𝈞𝈟𝈠𝈡𝈢𝈣𝈤𝈥𝈦𝈧𝈨𝈩𝈪𝈫𝈬𝈭𝈮𝈯𝈰𝈱𝈲𝈳𝈴𝈵𝈶𝈷𝈸𝈹𝈺𝈻𝈼𝈽𝈾𝈿𝉀𝉁𝉅𝌀𝌁𝌂𝌃𝌄𝌅𝌆𝌇𝌈𝌉𝌊𝌋𝌌𝌍𝌎𝌏𝌐𝌑𝌒𝌓𝌔𝌕𝌖𝌗𝌘𝌙𝌚𝌛𝌜𝌝𝌞𝌟𝌠𝌡𝌢𝌣𝌤𝌥𝌦𝌧𝌨𝌩𝌪𝌫𝌬𝌭𝌮𝌯𝌰𝌱𝌲𝌳𝌴𝌵𝌶𝌷𝌸𝌹𝌺𝌻𝌼𝌽𝌾𝌿𝍀𝍁𝍂𝍃𝍄𝍅𝍆𝍇𝍈𝍉𝍊𝍋𝍌𝍍𝍎𝍏𝍐𝍑𝍒𝍓𝍔𝍕𝍖𝛁𝛛𝛻𝜕𝜵𝝏𝝯𝞉𝞩𝟃𝠀𝠁𝠂𝠃𝠄𝠅𝠆𝠇𝠈𝠉𝠊𝠋𝠌𝠍𝠎𝠏𝠐𝠑𝠒𝠓𝠔𝠕𝠖𝠗𝠘𝠙𝠚𝠛𝠜𝠝𝠞𝠟𝠠𝠡𝠢𝠣𝠤𝠥𝠦𝠧𝠨𝠩𝠪𝠫𝠬𝠭𝠮𝠯𝠰𝠱𝠲𝠳𝠴𝠵𝠶𝠷𝠸𝠹𝠺𝠻𝠼𝠽𝠾𝠿𝡀𝡁𝡂𝡃𝡄𝡅𝡆𝡇𝡈𝡉𝡊𝡋𝡌𝡍𝡎𝡏𝡐𝡑𝡒𝡓𝡔𝡕𝡖𝡗𝡘𝡙𝡚𝡛𝡜𝡝𝡞𝡟𝡠𝡡𝡢𝡣𝡤𝡥𝡦𝡧𝡨𝡩𝡪𝡫𝡬𝡭𝡮𝡯𝡰𝡱𝡲𝡳𝡴𝡵𝡶𝡷𝡸𝡹𝡺𝡻𝡼𝡽𝡾𝡿𝢀𝢁𝢂𝢃𝢄𝢅𝢆𝢇𝢈𝢉𝢊𝢋𝢌𝢍𝢎𝢏𝢐𝢑𝢒𝢓𝢔𝢕𝢖𝢗𝢘𝢙𝢚𝢛𝢜𝢝𝢞𝢟𝢠𝢡𝢢𝢣𝢤𝢥𝢦𝢧𝢨𝢩𝢪𝢫𝢬𝢭𝢮𝢯𝢰𝢱𝢲𝢳𝢴𝢵𝢶𝢷𝢸𝢹𝢺𝢻𝢼𝢽𝢾𝢿𝣀𝣁𝣂𝣃𝣄𝣅𝣆𝣇𝣈𝣉𝣊𝣋𝣌𝣍𝣎𝣏𝣐𝣑𝣒𝣓𝣔𝣕𝣖𝣗𝣘𝣙𝣚𝣛𝣜𝣝𝣞𝣟𝣠𝣡𝣢𝣣𝣤𝣥𝣦𝣧𝣨𝣩𝣪𝣫𝣬𝣭𝣮𝣯𝣰𝣱𝣲𝣳𝣴𝣵𝣶𝣷𝣸𝣹𝣺𝣻𝣼𝣽𝣾𝣿𝤀𝤁𝤂𝤃𝤄𝤅𝤆𝤇𝤈𝤉𝤊𝤋𝤌𝤍𝤎𝤏𝤐𝤑𝤒𝤓𝤔𝤕𝤖𝤗𝤘𝤙𝤚𝤛𝤜𝤝𝤞𝤟𝤠𝤡𝤢𝤣𝤤𝤥𝤦𝤧𝤨𝤩𝤪𝤫𝤬𝤭𝤮𝤯𝤰𝤱𝤲𝤳𝤴𝤵𝤶𝤷𝤸𝤹𝤺𝤻𝤼𝤽𝤾𝤿𝥀𝥁𝥂𝥃𝥄𝥅𝥆𝥇𝥈𝥉𝥊𝥋𝥌𝥍𝥎𝥏𝥐𝥑𝥒𝥓𝥔𝥕𝥖𝥗𝥘𝥙𝥚𝥛𝥜𝥝𝥞𝥟𝥠𝥡𝥢𝥣𝥤𝥥𝥦𝥧𝥨𝥩𝥪𝥫𝥬𝥭𝥮𝥯𝥰𝥱𝥲𝥳𝥴𝥵𝥶𝥷𝥸𝥹𝥺𝥻𝥼𝥽𝥾𝥿𝦀𝦁𝦂𝦃𝦄𝦅𝦆𝦇𝦈𝦉𝦊𝦋𝦌𝦍𝦎𝦏𝦐𝦑𝦒𝦓𝦔𝦕𝦖𝦗𝦘𝦙𝦚𝦛𝦜𝦝𝦞𝦟𝦠𝦡𝦢𝦣𝦤𝦥𝦦𝦧𝦨𝦩𝦪𝦫𝦬𝦭𝦮𝦯𝦰𝦱𝦲𝦳𝦴𝦵𝦶𝦷𝦸𝦹𝦺𝦻𝦼𝦽𝦾𝦿𝧀𝧁𝧂𝧃𝧄𝧅𝧆𝧇𝧈𝧉𝧊𝧋𝧌𝧍𝧎𝧏𝧐𝧑𝧒𝧓𝧔𝧕𝧖𝧗𝧘𝧙𝧚𝧛𝧜𝧝𝧞𝧟𝧠𝧡𝧢𝧣𝧤𝧥𝧦𝧧𝧨𝧩𝧪𝧫𝧬𝧭𝧮𝧯𝧰𝧱𝧲𝧳𝧴𝧵𝧶𝧷𝧸𝧹𝧺𝧻𝧼𝧽𝧾𝧿𝨷𝨸𝨹𝨺𝩭𝩮𝩯𝩰𝩱𝩲𝩳𝩴𝩶𝩷𝩸𝩹𝩺𝩻𝩼𝩽𝩾𝩿𝪀𝪁𝪂𝪃𝪅𝪆𝪇𝪈𝪉𝪊𝪋𞅏𞋿𞥞𞥟𞲬𞲰𞴮𞻰𞻱🀀🀁🀂🀃🀄🀅🀆🀇🀈🀉🀊🀋🀌🀍🀎🀏🀐🀑🀒🀓🀔🀕🀖🀗🀘🀙🀚🀛🀜🀝🀞🀟🀠🀡🀢🀣🀤🀥🀦🀧🀨🀩🀪🀫🀰🀱🀲🀳🀴🀵🀶🀷🀸🀹🀺🀻🀼🀽🀾🀿🁀🁁🁂🁃🁄🁅🁆🁇🁈🁉🁊🁋🁌🁍🁎🁏🁐🁑🁒🁓🁔🁕🁖🁗🁘🁙🁚🁛🁜🁝🁞🁟🁠🁡🁢🁣🁤🁥🁦🁧🁨🁩🁪🁫🁬🁭🁮🁯🁰🁱🁲🁳🁴🁵🁶🁷🁸🁹🁺🁻🁼🁽🁾🁿🂀🂁🂂🂃🂄🂅🂆🂇🂈🂉🂊🂋🂌🂍🂎🂏🂐🂑🂒🂓🂠🂡🂢🂣🂤🂥🂦🂧🂨🂩🂪🂫🂬🂭🂮🂱🂲🂳🂴🂵🂶🂷🂸🂹🂺🂻🂼🂽🂾🂿🃁🃂🃃🃄🃅🃆🃇🃈🃉🃊🃋🃌🃍🃎🃏🃑🃒🃓🃔🃕🃖🃗🃘🃙🃚🃛🃜🃝🃞🃟🃠🃡🃢🃣🃤🃥🃦🃧🃨🃩🃪🃫🃬🃭🃮🃯🃰🃱🃲🃳🃴🃵🄍🄎🄏🄐🄑🄒🄓🄔🄕🄖🄗🄘🄙🄚🄛🄜🄝🄞🄟🄠🄡🄢🄣🄤🄥🄦🄧🄨🄩🄪🄫🄬🄭🄮🄯🅊🅋🅌🅍🅎🅏🅪🅫🅬🅭🅮🅯🆊🆋🆌🆍🆎🆏🆐🆑🆒🆓🆔🆕🆖🆗🆘🆙🆚🆛🆜🆝🆞🆟🆠🆡🆢🆣🆤🆥🆦🆧🆨🆩🆪🆫🆬🆭🇦🇧🇨🇩🇪🇫🇬🇭🇮🇯🇰🇱🇲🇳🇴🇵🇶🇷🇸🇹🇺🇻🇼🇽🇾🇿🈀🈁🈂🈐🈑🈒🈓🈔🈕🈖🈗🈘🈙🈚🈛🈜🈝🈞🈟🈠🈡🈢🈣🈤🈥🈦🈧🈨🈩🈪🈫🈬🈭🈮🈯🈰🈱🈲🈳🈴🈵🈶🈷🈸🈹🈺🈻🉀🉁🉂🉃🉄🉅🉆🉇🉈🉐🉑🉠🉡🉢🉣🉤🉥🌀🌁🌂🌃🌄🌅🌆🌇🌈🌉🌊🌋🌌🌍🌎🌏🌐🌑🌒🌓🌔🌕🌖🌗🌘🌙🌚🌛🌜🌝🌞🌟🌠🌡🌢🌣🌤🌥🌦🌧🌨🌩🌪🌫🌬🌭🌮🌯🌰🌱🌲🌳🌴🌵🌶🌷🌸🌹🌺🌻🌼🌽🌾🌿🍀🍁🍂🍃🍄🍅🍆🍇🍈🍉🍊🍋🍌🍍🍎🍏🍐🍑🍒🍓🍔🍕🍖🍗🍘🍙🍚🍛🍜🍝🍞🍟🍠🍡🍢🍣🍤🍥🍦🍧🍨🍩🍪🍫🍬🍭🍮🍯🍰🍱🍲🍳🍴🍵🍶🍷🍸🍹🍺🍻🍼🍽🍾🍿🎀🎁🎂🎃🎄🎅🎆🎇🎈🎉🎊🎋🎌🎍🎎🎏🎐🎑🎒🎓🎔🎕🎖🎗🎘🎙🎚🎛🎜🎝🎞🎟🎠🎡🎢🎣🎤🎥🎦🎧🎨🎩🎪🎫🎬🎭🎮🎯🎰🎱🎲🎳🎴🎵🎶🎷🎸🎹🎺🎻🎼🎽🎾🎿🏀🏁🏂🏃🏄🏅🏆🏇🏈🏉🏊🏋🏌🏍🏎🏏🏐🏑🏒🏓🏔🏕🏖🏗🏘🏙🏚🏛🏜🏝🏞🏟🏠🏡🏢🏣🏤🏥🏦🏧🏨🏩🏪🏫🏬🏭🏮🏯🏰🏱🏲🏳🏴🏵🏶🏷🏸🏹🏺🏻🏼🏽🏾🏿🐀🐁🐂🐃🐄🐅🐆🐇🐈🐉🐊🐋🐌🐍🐎🐏🐐🐑🐒🐓🐔🐕🐖🐗🐘🐙🐚🐛🐜🐝🐞🐟🐠🐡🐢🐣🐤🐥🐦🐧🐨🐩🐪🐫🐬🐭🐮🐯🐰🐱🐲🐳🐴🐵🐶🐷🐸🐹🐺🐻🐼🐽🐾🐿👀👁👂👃👄👅👆👇👈👉👊👋👌👍👎👏👐👑👒👓👔👕👖👗👘👙👚👛👜👝👞👟👠👡👢👣👤👥👦👧👨👩👪👫👬👭👮👯👰👱👲👳👴👵👶👷👸👹👺👻👼👽👾👿💀💁💂💃💄💅💆💇💈💉💊💋💌💍💎💏💐💑💒💓💔💕💖💗💘💙💚💛💜💝💞💟💠💡💢💣💤💥💦💧💨💩💪💫💬💭💮💯💰💱💲💳💴💵💶💷💸💹💺💻💼💽💾💿📀📁📂📃📄📅📆📇📈📉📊📋📌📍📎📏📐📑📒📓📔📕📖📗📘📙📚📛📜📝📞📟📠📡📢📣📤📥📦📧📨📩📪📫📬📭📮📯📰📱📲📳📴📵📶📷📸📹📺📻📼📽📾📿🔀🔁🔂🔃🔄🔅🔆🔇🔈🔉🔊🔋🔌🔍🔎🔏🔐🔑🔒🔓🔔🔕🔖🔗🔘🔙🔚🔛🔜🔝🔞🔟🔠🔡🔢🔣🔤🔥🔦🔧🔨🔩🔪🔫🔬🔭🔮🔯🔰🔱🔲🔳🔴🔵🔶🔷🔸🔹🔺🔻🔼🔽🔾🔿🕀🕁🕂🕃🕄🕅🕆🕇🕈🕉🕊🕋🕌🕍🕎🕏🕐🕑🕒🕓🕔🕕🕖🕗🕘🕙🕚🕛🕜🕝🕞🕟🕠🕡🕢🕣🕤🕥🕦🕧🕨🕩🕪🕫🕬🕭🕮🕯🕰🕱🕲🕳🕴🕵🕶🕷🕸🕹🕺🕻🕼🕽🕾🕿🖀🖁🖂🖃🖄🖅🖆🖇🖈🖉🖊🖋🖌🖍🖎🖏🖐🖑🖒🖓🖔🖕🖖🖗🖘🖙🖚🖛🖜🖝🖞🖟🖠🖡🖢🖣🖤🖥🖦🖧🖨🖩🖪🖫🖬🖭🖮🖯🖰🖱🖲🖳🖴🖵🖶🖷🖸🖹🖺🖻🖼🖽🖾🖿🗀🗁🗂🗃🗄🗅🗆🗇🗈🗉🗊🗋🗌🗍🗎🗏🗐🗑🗒🗓🗔🗕🗖🗗🗘🗙🗚🗛🗜🗝🗞🗟🗠🗡🗢🗣🗤🗥🗦🗧🗨🗩🗪🗫🗬🗭🗮🗯🗰🗱🗲🗳🗴🗵🗶🗷🗸🗹🗺🗻🗼🗽🗾🗿😀😁😂😃😄😅😆😇😈😉😊😋😌😍😎😏😐😑😒😓😔😕😖😗😘😙😚😛😜😝😞😟😠😡😢😣😤😥😦😧😨😩😪😫😬😭😮😯😰😱😲😳😴😵😶😷😸😹😺😻😼😽😾😿🙀🙁🙂🙃🙄🙅🙆🙇🙈🙉🙊🙋🙌🙍🙎🙏🙐🙑🙒🙓🙔🙕🙖🙗🙘🙙🙚🙛🙜🙝🙞🙟🙠🙡🙢🙣🙤🙥🙦🙧🙨🙩🙪🙫🙬🙭🙮🙯🙰🙱🙲🙳🙴🙵🙶🙷🙸🙹🙺🙻🙼🙽🙾🙿🚀🚁🚂🚃🚄🚅🚆🚇🚈🚉🚊🚋🚌🚍🚎🚏🚐🚑🚒🚓🚔🚕🚖🚗🚘🚙🚚🚛🚜🚝🚞🚟🚠🚡🚢🚣🚤🚥🚦🚧🚨🚩🚪🚫🚬🚭🚮🚯🚰🚱🚲🚳🚴🚵🚶🚷🚸🚹🚺🚻🚼🚽🚾🚿🛀🛁🛂🛃🛄🛅🛆🛇🛈🛉🛊🛋🛌🛍🛎🛏🛐🛑🛒🛓🛔🛕🛖🛗🛜🛝🛞🛟🛠🛡🛢🛣🛤🛥🛦🛧🛨🛩🛪🛫🛬🛰🛱🛲🛳🛴🛵🛶🛷🛸🛹🛺🛻🛼🜀🜁🜂🜃🜄🜅🜆🜇🜈🜉🜊🜋🜌🜍🜎🜏🜐🜑🜒🜓🜔🜕🜖🜗🜘🜙🜚🜛🜜🜝🜞🜟🜠🜡🜢🜣🜤🜥🜦🜧🜨🜩🜪🜫🜬🜭🜮🜯🜰🜱🜲🜳🜴🜵🜶🜷🜸🜹🜺🜻🜼🜽🜾🜿🝀🝁🝂🝃🝄🝅🝆🝇🝈🝉🝊🝋🝌🝍🝎🝏🝐🝑🝒🝓🝔🝕🝖🝗🝘🝙🝚🝛🝜🝝🝞🝟🝠🝡🝢🝣🝤🝥🝦🝧🝨🝩🝪🝫🝬🝭🝮🝯🝰🝱🝲🝳🝴🝵🝶🝻🝼🝽🝾🝿🞀🞁🞂🞃🞄🞅🞆🞇🞈🞉🞊🞋🞌🞍🞎🞏🞐🞑🞒🞓🞔🞕🞖🞗🞘🞙🞚🞛🞜🞝🞞🞟🞠🞡🞢🞣🞤🞥🞦🞧🞨🞩🞪🞫🞬🞭🞮🞯🞰🞱🞲🞳🞴🞵🞶🞷🞸🞹🞺🞻🞼🞽🞾🞿🟀🟁🟂🟃🟄🟅🟆🟇🟈🟉🟊🟋🟌🟍🟎🟏🟐🟑🟒🟓🟔🟕🟖🟗🟘🟙🟠🟡🟢🟣🟤🟥🟦🟧🟨🟩🟪🟫🟰🠀🠁🠂🠃🠄🠅🠆🠇🠈🠉🠊🠋🠐🠑🠒🠓🠔🠕🠖🠗🠘🠙🠚🠛🠜🠝🠞🠟🠠🠡🠢🠣🠤🠥🠦🠧🠨🠩🠪🠫🠬🠭🠮🠯🠰🠱🠲🠳🠴🠵🠶🠷🠸🠹🠺🠻🠼🠽🠾🠿🡀🡁🡂🡃🡄🡅🡆🡇🡐🡑🡒🡓🡔🡕🡖🡗🡘🡙🡠🡡🡢🡣🡤🡥🡦🡧🡨🡩🡪🡫🡬🡭🡮🡯🡰🡱🡲🡳🡴🡵🡶🡷🡸🡹🡺🡻🡼🡽🡾🡿🢀🢁🢂🢃🢄🢅🢆🢇🢐🢑🢒🢓🢔🢕🢖🢗🢘🢙🢚🢛🢜🢝🢞🢟🢠🢡🢢🢣🢤🢥🢦🢧🢨🢩🢪🢫🢬🢭🢰🢱🤀🤁🤂🤃🤄🤅🤆🤇🤈🤉🤊🤋🤌🤍🤎🤏🤐🤑🤒🤓🤔🤕🤖🤗🤘🤙🤚🤛🤜🤝🤞🤟🤠🤡🤢🤣🤤🤥🤦🤧🤨🤩🤪🤫🤬🤭🤮🤯🤰🤱🤲🤳🤴🤵🤶🤷🤸🤹🤺🤻🤼🤽🤾🤿🥀🥁🥂🥃🥄🥅🥆🥇🥈🥉🥊🥋🥌🥍🥎🥏🥐🥑🥒🥓🥔🥕🥖🥗🥘🥙🥚🥛🥜🥝🥞🥟🥠🥡🥢🥣🥤🥥🥦🥧🥨🥩🥪🥫🥬🥭🥮🥯🥰🥱🥲🥳🥴🥵🥶🥷🥸🥹🥺🥻🥼🥽🥾🥿🦀🦁🦂🦃🦄🦅🦆🦇🦈🦉🦊🦋🦌🦍🦎🦏🦐🦑🦒🦓🦔🦕🦖🦗🦘🦙🦚🦛🦜🦝🦞🦟🦠🦡🦢🦣🦤🦥🦦🦧🦨🦩🦪🦫🦬🦭🦮🦯🦰🦱🦲🦳🦴🦵🦶🦷🦸🦹🦺🦻🦼🦽🦾🦿🧀🧁🧂🧃🧄🧅🧆🧇🧈🧉🧊🧋🧌🧍🧎🧏🧐🧑🧒🧓🧔🧕🧖🧗🧘🧙🧚🧛🧜🧝🧞🧟🧠🧡🧢🧣🧤🧥🧦🧧🧨🧩🧪🧫🧬🧭🧮🧯🧰🧱🧲🧳🧴🧵🧶🧷🧸🧹🧺🧻🧼🧽🧾🧿🨀🨁🨂🨃🨄🨅🨆🨇🨈🨉🨊🨋🨌🨍🨎🨏🨐🨑🨒🨓🨔🨕🨖🨗🨘🨙🨚🨛🨜🨝🨞🨟🨠🨡🨢🨣🨤🨥🨦🨧🨨🨩🨪🨫🨬🨭🨮🨯🨰🨱🨲🨳🨴🨵🨶🨷🨸🨹🨺🨻🨼🨽🨾🨿🩀🩁🩂🩃🩄🩅🩆🩇🩈🩉🩊🩋🩌🩍🩎🩏🩐🩑🩒🩓🩠🩡🩢🩣🩤🩥🩦🩧🩨🩩🩪🩫🩬🩭🩰🩱🩲🩳🩴🩵🩶🩷🩸🩹🩺🩻🩼🪀🪁🪂🪃🪄🪅🪆🪇🪈🪐🪑🪒🪓🪔🪕🪖🪗🪘🪙🪚🪛🪜🪝🪞🪟🪠🪡🪢🪣🪤🪥🪦🪧🪨🪩🪪🪫🪬🪭🪮🪯🪰🪱🪲🪳🪴🪵🪶🪷🪸🪹🪺🪻🪼🪽🪿🫀🫁🫂🫃🫄🫅🫎🫏🫐🫑🫒🫓🫔🫕🫖🫗🫘🫙🫚🫛🫠🫡🫢🫣🫤🫥🫦🫧🫨🫰🫱🫲🫳🫴🫵🫶🫷🫸🬀🬁🬂🬃🬄🬅🬆🬇🬈🬉🬊🬋🬌🬍🬎🬏🬐🬑🬒🬓🬔🬕🬖🬗🬘🬙🬚🬛🬜🬝🬞🬟🬠🬡🬢🬣🬤🬥🬦🬧🬨🬩🬪🬫🬬🬭🬮🬯🬰🬱🬲🬳🬴🬵🬶🬷🬸🬹🬺🬻🬼🬽🬾🬿🭀🭁🭂🭃🭄🭅🭆🭇🭈🭉🭊🭋🭌🭍🭎🭏🭐🭑🭒🭓🭔🭕🭖🭗🭘🭙🭚🭛🭜🭝🭞🭟🭠🭡🭢🭣🭤🭥🭦🭧🭨🭩🭪🭫🭬🭭🭮🭯🭰🭱🭲🭳🭴🭵🭶🭷🭸🭹🭺🭻🭼🭽🭾🭿🮀🮁🮂🮃🮄🮅🮆🮇🮈🮉🮊🮋🮌🮍🮎🮏🮐🮑🮒🮔🮕🮖🮗🮘🮙🮚🮛🮜🮝🮞🮟🮠🮡🮢🮣🮤🮥🮦🮧🮨🮩🮪🮫🮬🮭🮮🮯🮰🮱🮲🮳🮴🮵🮶🮷🮸🮹🮺🮻🮼🮽🮾🮿🯀🯁🯂🯃🯄🯅🯆🯇🯈🯉🯊"""
11
10
  # https://www.compart.com/en/unicode/category
12
11
  # https://unicode.org/Public/UNIDATA/UnicodeData.txt
12
+ # NOTE: WAY too large to store as a string of each char
13
+ UNICODE_PUNCT_RANGES = [
14
+ "\\U00000021-\\U0000002f",
15
+ "\\U0000003a-\\U00000040",
16
+ "\\U0000005b-\\U00000060",
17
+ "\\U0000007b-\\U0000007e",
18
+ "\\U000000a1-\\U000000a9",
19
+ "\\U000000ab-\\U000000ac",
20
+ "\\U000000ae-\\U000000b1",
21
+ "\\U000000b4",
22
+ "\\U000000b6-\\U000000b8",
23
+ "\\U000000bb",
24
+ "\\U000000bf",
25
+ "\\U000000d7",
26
+ "\\U000000f7",
27
+ "\\U000002c2-\\U000002c5",
28
+ "\\U000002d2-\\U000002df",
29
+ "\\U000002e5-\\U000002eb",
30
+ "\\U000002ed",
31
+ "\\U000002ef-\\U000002ff",
32
+ "\\U00000375",
33
+ "\\U0000037e",
34
+ "\\U00000384-\\U00000385",
35
+ "\\U00000387",
36
+ "\\U000003f6",
37
+ "\\U00000482",
38
+ "\\U0000055a-\\U0000055f",
39
+ "\\U00000589-\\U0000058a",
40
+ "\\U0000058d-\\U0000058f",
41
+ "\\U000005be",
42
+ "\\U000005c0",
43
+ "\\U000005c3",
44
+ "\\U000005c6",
45
+ "\\U000005f3-\\U000005f4",
46
+ "\\U00000606-\\U0000060f",
47
+ "\\U0000061b",
48
+ "\\U0000061d-\\U0000061f",
49
+ "\\U0000066a-\\U0000066d",
50
+ "\\U000006d4",
51
+ "\\U000006de",
52
+ "\\U000006e9",
53
+ "\\U000006fd-\\U000006fe",
54
+ "\\U00000700-\\U0000070d",
55
+ "\\U000007f6-\\U000007f9",
56
+ "\\U000007fe-\\U000007ff",
57
+ "\\U00000830-\\U0000083e",
58
+ "\\U0000085e",
59
+ "\\U00000888",
60
+ "\\U00000964-\\U00000965",
61
+ "\\U00000970",
62
+ "\\U000009f2-\\U000009f3",
63
+ "\\U000009fa-\\U000009fb",
64
+ "\\U000009fd",
65
+ "\\U00000a76",
66
+ "\\U00000af0-\\U00000af1",
67
+ "\\U00000b70",
68
+ "\\U00000bf3-\\U00000bfa",
69
+ "\\U00000c77",
70
+ "\\U00000c7f",
71
+ "\\U00000c84",
72
+ "\\U00000d4f",
73
+ "\\U00000d79",
74
+ "\\U00000df4",
75
+ "\\U00000e3f",
76
+ "\\U00000e4f",
77
+ "\\U00000e5a-\\U00000e5b",
78
+ "\\U00000f01-\\U00000f17",
79
+ "\\U00000f1a-\\U00000f1f",
80
+ "\\U00000f34",
81
+ "\\U00000f36",
82
+ "\\U00000f38",
83
+ "\\U00000f3a-\\U00000f3d",
84
+ "\\U00000f85",
85
+ "\\U00000fbe-\\U00000fc5",
86
+ "\\U00000fc7-\\U00000fcc",
87
+ "\\U00000fce-\\U00000fda",
88
+ "\\U0000104a-\\U0000104f",
89
+ "\\U0000109e-\\U0000109f",
90
+ "\\U000010fb",
91
+ "\\U00001360-\\U00001368",
92
+ "\\U00001390-\\U00001399",
93
+ "\\U00001400",
94
+ "\\U0000166d-\\U0000166e",
95
+ "\\U0000169b-\\U0000169c",
96
+ "\\U000016eb-\\U000016ed",
97
+ "\\U00001735-\\U00001736",
98
+ "\\U000017d4-\\U000017d6",
99
+ "\\U000017d8-\\U000017db",
100
+ "\\U00001800-\\U0000180a",
101
+ "\\U00001940",
102
+ "\\U00001944-\\U00001945",
103
+ "\\U000019de-\\U000019ff",
104
+ "\\U00001a1e-\\U00001a1f",
105
+ "\\U00001aa0-\\U00001aa6",
106
+ "\\U00001aa8-\\U00001aad",
107
+ "\\U00001b5a-\\U00001b6a",
108
+ "\\U00001b74-\\U00001b7e",
109
+ "\\U00001bfc-\\U00001bff",
110
+ "\\U00001c3b-\\U00001c3f",
111
+ "\\U00001c7e-\\U00001c7f",
112
+ "\\U00001cc0-\\U00001cc7",
113
+ "\\U00001cd3",
114
+ "\\U00001fbd",
115
+ "\\U00001fbf-\\U00001fc1",
116
+ "\\U00001fcd-\\U00001fcf",
117
+ "\\U00001fdd-\\U00001fdf",
118
+ "\\U00001fed-\\U00001fef",
119
+ "\\U00001ffd-\\U00001ffe",
120
+ "\\U00002010-\\U00002027",
121
+ "\\U00002030-\\U0000205e",
122
+ "\\U0000207a-\\U0000207e",
123
+ "\\U0000208a-\\U0000208e",
124
+ "\\U000020a0-\\U000020c0",
125
+ "\\U00002100-\\U00002101",
126
+ "\\U00002103-\\U00002106",
127
+ "\\U00002108-\\U00002109",
128
+ "\\U00002114",
129
+ "\\U00002116-\\U00002118",
130
+ "\\U0000211e-\\U00002123",
131
+ "\\U00002125",
132
+ "\\U00002127",
133
+ "\\U00002129",
134
+ "\\U0000212e",
135
+ "\\U0000213a-\\U0000213b",
136
+ "\\U00002140-\\U00002144",
137
+ "\\U0000214a-\\U0000214d",
138
+ "\\U0000214f",
139
+ "\\U0000218a-\\U0000218b",
140
+ "\\U00002190-\\U00002426",
141
+ "\\U00002440-\\U0000244a",
142
+ "\\U0000249c-\\U000024b5",
143
+ "\\U00002500-\\U00002775",
144
+ "\\U00002794-\\U00002b73",
145
+ "\\U00002b76-\\U00002b95",
146
+ "\\U00002b97-\\U00002bff",
147
+ "\\U00002ce5-\\U00002cea",
148
+ "\\U00002cf9-\\U00002cfc",
149
+ "\\U00002cfe-\\U00002cff",
150
+ "\\U00002d70",
151
+ "\\U00002e00-\\U00002e2e",
152
+ "\\U00002e30-\\U00002e5d",
153
+ "\\U00002e80-\\U00002e99",
154
+ "\\U00002e9b-\\U00002ef3",
155
+ "\\U00002f00-\\U00002fd5",
156
+ "\\U00002ff0-\\U00002fff",
157
+ "\\U00003001-\\U00003004",
158
+ "\\U00003008-\\U00003020",
159
+ "\\U00003030",
160
+ "\\U00003036-\\U00003037",
161
+ "\\U0000303d-\\U0000303f",
162
+ "\\U0000309b-\\U0000309c",
163
+ "\\U000030a0",
164
+ "\\U000030fb",
165
+ "\\U00003190-\\U00003191",
166
+ "\\U00003196-\\U0000319f",
167
+ "\\U000031c0-\\U000031e3",
168
+ "\\U000031ef",
169
+ "\\U00003200-\\U0000321e",
170
+ "\\U0000322a-\\U00003247",
171
+ "\\U00003250",
172
+ "\\U00003260-\\U0000327f",
173
+ "\\U0000328a-\\U000032b0",
174
+ "\\U000032c0-\\U000033ff",
175
+ "\\U00004dc0-\\U00004dff",
176
+ "\\U0000a490-\\U0000a4c6",
177
+ "\\U0000a4fe-\\U0000a4ff",
178
+ "\\U0000a60d-\\U0000a60f",
179
+ "\\U0000a673",
180
+ "\\U0000a67e",
181
+ "\\U0000a6f2-\\U0000a6f7",
182
+ "\\U0000a700-\\U0000a716",
183
+ "\\U0000a720-\\U0000a721",
184
+ "\\U0000a789-\\U0000a78a",
185
+ "\\U0000a828-\\U0000a82b",
186
+ "\\U0000a836-\\U0000a839",
187
+ "\\U0000a874-\\U0000a877",
188
+ "\\U0000a8ce-\\U0000a8cf",
189
+ "\\U0000a8f8-\\U0000a8fa",
190
+ "\\U0000a8fc",
191
+ "\\U0000a92e-\\U0000a92f",
192
+ "\\U0000a95f",
193
+ "\\U0000a9c1-\\U0000a9cd",
194
+ "\\U0000a9de-\\U0000a9df",
195
+ "\\U0000aa5c-\\U0000aa5f",
196
+ "\\U0000aa77-\\U0000aa79",
197
+ "\\U0000aade-\\U0000aadf",
198
+ "\\U0000aaf0-\\U0000aaf1",
199
+ "\\U0000ab5b",
200
+ "\\U0000ab6a-\\U0000ab6b",
201
+ "\\U0000abeb",
202
+ "\\U0000fb29",
203
+ "\\U0000fbb2-\\U0000fbc2",
204
+ "\\U0000fd3e-\\U0000fd4f",
205
+ "\\U0000fdcf",
206
+ "\\U0000fdfc-\\U0000fdff",
207
+ "\\U0000fe10-\\U0000fe19",
208
+ "\\U0000fe30-\\U0000fe52",
209
+ "\\U0000fe54-\\U0000fe66",
210
+ "\\U0000fe68-\\U0000fe6b",
211
+ "\\U0000ff01-\\U0000ff0f",
212
+ "\\U0000ff1a-\\U0000ff20",
213
+ "\\U0000ff3b-\\U0000ff40",
214
+ "\\U0000ff5b-\\U0000ff65",
215
+ "\\U0000ffe0-\\U0000ffe6",
216
+ "\\U0000ffe8-\\U0000ffee",
217
+ "\\U0000fffc-\\U0000fffd",
218
+ "\\U00010100-\\U00010102",
219
+ "\\U00010137-\\U0001013f",
220
+ "\\U00010179-\\U00010189",
221
+ "\\U0001018c-\\U0001018e",
222
+ "\\U00010190-\\U0001019c",
223
+ "\\U000101a0",
224
+ "\\U000101d0-\\U000101fc",
225
+ "\\U0001039f",
226
+ "\\U000103d0",
227
+ "\\U0001056f",
228
+ "\\U00010857",
229
+ "\\U00010877-\\U00010878",
230
+ "\\U0001091f",
231
+ "\\U0001093f",
232
+ "\\U00010a50-\\U00010a58",
233
+ "\\U00010a7f",
234
+ "\\U00010ac8",
235
+ "\\U00010af0-\\U00010af6",
236
+ "\\U00010b39-\\U00010b3f",
237
+ "\\U00010b99-\\U00010b9c",
238
+ "\\U00010ead",
239
+ "\\U00010f55-\\U00010f59",
240
+ "\\U00010f86-\\U00010f89",
241
+ "\\U00011047-\\U0001104d",
242
+ "\\U000110bb-\\U000110bc",
243
+ "\\U000110be-\\U000110c1",
244
+ "\\U00011140-\\U00011143",
245
+ "\\U00011174-\\U00011175",
246
+ "\\U000111c5-\\U000111c8",
247
+ "\\U000111cd",
248
+ "\\U000111db",
249
+ "\\U000111dd-\\U000111df",
250
+ "\\U00011238-\\U0001123d",
251
+ "\\U000112a9",
252
+ "\\U0001144b-\\U0001144f",
253
+ "\\U0001145a-\\U0001145b",
254
+ "\\U0001145d",
255
+ "\\U000114c6",
256
+ "\\U000115c1-\\U000115d7",
257
+ "\\U00011641-\\U00011643",
258
+ "\\U00011660-\\U0001166c",
259
+ "\\U000116b9",
260
+ "\\U0001173c-\\U0001173f",
261
+ "\\U0001183b",
262
+ "\\U00011944-\\U00011946",
263
+ "\\U000119e2",
264
+ "\\U00011a3f-\\U00011a46",
265
+ "\\U00011a9a-\\U00011a9c",
266
+ "\\U00011a9e-\\U00011aa2",
267
+ "\\U00011b00-\\U00011b09",
268
+ "\\U00011c41-\\U00011c45",
269
+ "\\U00011c70-\\U00011c71",
270
+ "\\U00011ef7-\\U00011ef8",
271
+ "\\U00011f43-\\U00011f4f",
272
+ "\\U00011fd5-\\U00011ff1",
273
+ "\\U00011fff",
274
+ "\\U00012470-\\U00012474",
275
+ "\\U00012ff1-\\U00012ff2",
276
+ "\\U00016a6e-\\U00016a6f",
277
+ "\\U00016af5",
278
+ "\\U00016b37-\\U00016b3f",
279
+ "\\U00016b44-\\U00016b45",
280
+ "\\U00016e97-\\U00016e9a",
281
+ "\\U00016fe2",
282
+ "\\U0001bc9c",
283
+ "\\U0001bc9f",
284
+ "\\U0001cf50-\\U0001cfc3",
285
+ "\\U0001d000-\\U0001d0f5",
286
+ "\\U0001d100-\\U0001d126",
287
+ "\\U0001d129-\\U0001d164",
288
+ "\\U0001d16a-\\U0001d16c",
289
+ "\\U0001d183-\\U0001d184",
290
+ "\\U0001d18c-\\U0001d1a9",
291
+ "\\U0001d1ae-\\U0001d1ea",
292
+ "\\U0001d200-\\U0001d241",
293
+ "\\U0001d245",
294
+ "\\U0001d300-\\U0001d356",
295
+ "\\U0001d6c1",
296
+ "\\U0001d6db",
297
+ "\\U0001d6fb",
298
+ "\\U0001d715",
299
+ "\\U0001d735",
300
+ "\\U0001d74f",
301
+ "\\U0001d76f",
302
+ "\\U0001d789",
303
+ "\\U0001d7a9",
304
+ "\\U0001d7c3",
305
+ "\\U0001d800-\\U0001d9ff",
306
+ "\\U0001da37-\\U0001da3a",
307
+ "\\U0001da6d-\\U0001da74",
308
+ "\\U0001da76-\\U0001da83",
309
+ "\\U0001da85-\\U0001da8b",
310
+ "\\U0001e14f",
311
+ "\\U0001e2ff",
312
+ "\\U0001e95e-\\U0001e95f",
313
+ "\\U0001ecac",
314
+ "\\U0001ecb0",
315
+ "\\U0001ed2e",
316
+ "\\U0001eef0-\\U0001eef1",
317
+ "\\U0001f000-\\U0001f02b",
318
+ "\\U0001f030-\\U0001f093",
319
+ "\\U0001f0a0-\\U0001f0ae",
320
+ "\\U0001f0b1-\\U0001f0bf",
321
+ "\\U0001f0c1-\\U0001f0cf",
322
+ "\\U0001f0d1-\\U0001f0f5",
323
+ "\\U0001f10d-\\U0001f12f",
324
+ "\\U0001f14a-\\U0001f14f",
325
+ "\\U0001f16a-\\U0001f16f",
326
+ "\\U0001f18a-\\U0001f1ad",
327
+ "\\U0001f1e6-\\U0001f202",
328
+ "\\U0001f210-\\U0001f23b",
329
+ "\\U0001f240-\\U0001f248",
330
+ "\\U0001f250-\\U0001f251",
331
+ "\\U0001f260-\\U0001f265",
332
+ "\\U0001f300-\\U0001f6d7",
333
+ "\\U0001f6dc-\\U0001f6ec",
334
+ "\\U0001f6f0-\\U0001f6fc",
335
+ "\\U0001f700-\\U0001f776",
336
+ "\\U0001f77b-\\U0001f7d9",
337
+ "\\U0001f7e0-\\U0001f7eb",
338
+ "\\U0001f7f0",
339
+ "\\U0001f800-\\U0001f80b",
340
+ "\\U0001f810-\\U0001f847",
341
+ "\\U0001f850-\\U0001f859",
342
+ "\\U0001f860-\\U0001f887",
343
+ "\\U0001f890-\\U0001f8ad",
344
+ "\\U0001f8b0-\\U0001f8b1",
345
+ "\\U0001f900-\\U0001fa53",
346
+ "\\U0001fa60-\\U0001fa6d",
347
+ "\\U0001fa70-\\U0001fa7c",
348
+ "\\U0001fa80-\\U0001fa88",
349
+ "\\U0001fa90-\\U0001fabd",
350
+ "\\U0001fabf-\\U0001fac5",
351
+ "\\U0001face-\\U0001fadb",
352
+ "\\U0001fae0-\\U0001fae8",
353
+ "\\U0001faf0-\\U0001faf8",
354
+ "\\U0001fb00-\\U0001fb92",
355
+ "\\U0001fb94-\\U0001fbca",
356
+ "\\U000f1990-\\U000f199d", # UCSUR punctuation
357
+ ]
358
+
359
+ UCSUR_PUNCT_RANGES = UNICODE_PUNCT_RANGES[-1] # NOTE: THIS CAN CHANGE
360
+
361
+ UNICODE_PUNCT = find_unicode_chars(UNICODE_PUNCT_RANGES)
362
+ # this is a large string.
13
363
 
14
364
  # `\p{posix_punct}` character class
15
365
  POSIX_PUNCT = r"""-!"#$%&'()*+,./:;<=>?@[\]^_`{|}~"""
16
- ALL_PUNCT_RANGES = "".join(find_unicode_ranges(POSIX_PUNCT + UNICODE_PUNCT))
366
+ POSIX_PUNCT_RANGES = find_unicode_ranges(POSIX_PUNCT)
367
+
368
+ ALL_PUNCT = "".join(sorted(list(set(POSIX_PUNCT + UNICODE_PUNCT))))
369
+ ALL_PUNCT_RANGES = "".join(find_unicode_ranges(ALL_PUNCT))
370
+ # combined bc the result could be simpler
371
+
17
372
  SENTENCE_PUNCT = """.?!:;'"()[-]“”·…"""
18
373
 
19
374
 
@@ -35,6 +390,37 @@ ALLOWABLES = {
35
390
  "wxw", # wile ala wile
36
391
  }
37
392
 
393
+ IGNORABLES = {
394
+ # o, e, n are not here bc they're not frequently problematic in english messages
395
+ "a",
396
+ "am",
397
+ "an",
398
+ "i",
399
+ "in",
400
+ "is",
401
+ "l", # they'll
402
+ "m", # i'm
403
+ "me",
404
+ "no",
405
+ "s", # let's
406
+ "so",
407
+ "t", # don't
408
+ "to",
409
+ "u", # you
410
+ "we",
411
+ "un", # un-
412
+ "use",
413
+ "some",
414
+ "like",
415
+ }
416
+
417
+ UCSUR_RANGES = [
418
+ "\\U000F1900-\\U000F1977", # pu
419
+ "\\U000F1978-\\U000F1988", # ku suli
420
+ "\\U000F19A0-\\U000F19A3", # ku lili
421
+ ]
422
+ NIMI_UCSUR = find_unicode_chars(UCSUR_RANGES)
423
+
38
424
  with open(LINKU) as f:
39
425
  linku: Dict[str, Dict[str, str]] = json.loads(f.read())
40
426
  NIMI_PU: List[str] = [d["word"] for d in linku.values() if d["book"] == "pu"]
@@ -58,6 +444,9 @@ del linku
58
444
  del sandbox
59
445
 
60
446
  __all__ = [
447
+ "ALLOWABLES",
448
+ "ALL_PUNCT",
449
+ "ALL_PUNCT_RANGES",
61
450
  "ALPHABET",
62
451
  "CONSONANTS",
63
452
  "NIMI_LINKU",
@@ -65,8 +454,9 @@ __all__ = [
65
454
  "NIMI_LINKU_SANDBOX",
66
455
  "NIMI_PU",
67
456
  "NIMI_PU_SYNONYMS",
68
- "VOWELS",
69
- "UNICODE_PUNCT",
70
- "ALLOWABLES",
71
457
  "POSIX_PUNCT",
458
+ "POSIX_PUNCT_RANGES",
459
+ "UNICODE_PUNCT",
460
+ "UNICODE_PUNCT_RANGES",
461
+ "VOWELS",
72
462
  ]
sonatoki/py.typed ADDED
File without changes
sonatoki/utils.py CHANGED
@@ -43,12 +43,37 @@ def find_unicode_ranges(chars: str) -> List[str]:
43
43
  return ranges
44
44
 
45
45
 
46
+ def find_unicode_chars(ranges: List[str]) -> str:
47
+ result: List[str] = []
48
+ for item in ranges:
49
+ if "-" in item:
50
+ start, end = item.split("-")
51
+ start = int(start.lstrip("\\U"), 16)
52
+ end = int(end.lstrip("\\U"), 16)
53
+ result.extend(chr(code_point) for code_point in range(start, end + 1))
54
+ else:
55
+ result.append(chr(int(item.lstrip("\\U"), 16)))
56
+ return "".join(result)
57
+
58
+
46
59
  if __name__ == "__main__":
47
60
  """
48
61
  Helper script to fetch UNICODE_PUNCT in constants.py
49
62
  """
50
63
 
51
- PUNCT_CATEGORIES = {"Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps", "Sm", "Sk", "Sc", "So"}
64
+ PUNCT_CATEGORIES = {
65
+ "Pc",
66
+ "Pd",
67
+ "Pe",
68
+ "Pf",
69
+ "Pi",
70
+ "Po",
71
+ "Ps",
72
+ "Sm",
73
+ "Sk",
74
+ "Sc",
75
+ "So",
76
+ }
52
77
  # Connector, Dash, Close (end), Final, Initial, Other, Open (sOpen), Math, Modifier (kModifier), Currency, Other
53
78
 
54
79
  # NOTE: UnicodeData.txt lists character ranges if there would be many characters.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonatoki
3
- Version: 0.2.2
3
+ Version: 0.3.0
4
4
  Summary: ilo li moku e toki li pana e sona ni: ni li toki ala toki pona?
5
5
  Author-Email: "jan Kekan San (@gregdan3)" <gregory.danielson3@gmail.com>
6
6
  License: AGPL-3.0-or-later
@@ -0,0 +1,18 @@
1
+ sonatoki-0.3.0.dist-info/METADATA,sha256=94NlsvWK1jI4a-wQNdbtwtl0AH7985Cw5aV7IvQbcqo,5160
2
+ sonatoki-0.3.0.dist-info/WHEEL,sha256=vnE8JVcI2Wz7GRKorsPArnBdnW2SWKWGow5gu5tHlRU,90
3
+ sonatoki-0.3.0.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
4
+ sonatoki/Cleaners.py,sha256=m0j1a1vs9Mdqp724r9Xfh1Y_tyP6GYCkihv8rH8m7lA,1871
5
+ sonatoki/Configs.py,sha256=qDLSI0c_FmTggtzNUiYk94P8GZqm5r0co5bdsoCZsa0,3120
6
+ sonatoki/Filters.py,sha256=j7UcESrGGrZxS0Ln4D-0ZTEzm94xs8zzpcb22PSF_Fo,7930
7
+ sonatoki/Preprocessors.py,sha256=aMXXuFBDlJudvzvukvCa7BixuROXXEb62un7I-TGOGs,4441
8
+ sonatoki/Scorers.py,sha256=W-1uYiqjsDejJzoe592ixs7wHazjJXPhuo-41zuJ26U,3643
9
+ sonatoki/Tokenizers.py,sha256=So5_Tu6J98MD3yVcwB_X3lw2uMG0TN6XHcTbQjFCu5Q,4254
10
+ sonatoki/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ sonatoki/__main__.py,sha256=6xc-wIrrFo9wTyn4zRQNAmqwmJBtVvCMwV-CrM-hueA,82
12
+ sonatoki/constants.py,sha256=ocH3gJOh5SzTKxhVgGmy0VP8KDk-IQpodwzh2Ilr_G4,12349
13
+ sonatoki/ilo.py,sha256=yyLgNPI0Hmb4f1BzX6IRHr11FPChfL2xDR_9odlr8_8,3849
14
+ sonatoki/linku.json,sha256=B5KNdhyM5UEfMciROgh1ECHr3i-ASBeMvwrkzNJX47c,271013
15
+ sonatoki/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ sonatoki/sandbox.json,sha256=hx6LRsfvmmTtqXcXIyCsfSaGK3DZ-GCdbM8xhZQBHoA,77650
17
+ sonatoki/utils.py,sha256=9Dcjg2fUZygA2Z9MUr30Dq3gL2xViJC4hBvRhQDSx3Q,3210
18
+ sonatoki-0.3.0.dist-info/RECORD,,
@@ -1,17 +0,0 @@
1
- sonatoki-0.2.2.dist-info/METADATA,sha256=XhDkXgLI0iFR0ceadVnXNUBlA6DcXkfVmZHxycL1tNA,5160
2
- sonatoki-0.2.2.dist-info/WHEEL,sha256=vnE8JVcI2Wz7GRKorsPArnBdnW2SWKWGow5gu5tHlRU,90
3
- sonatoki-0.2.2.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
4
- sonatoki/Cleaners.py,sha256=AMonXBUk3w1vdRiDrpB9XJAdjYaMPoqRtdX5oLI6r38,1744
5
- sonatoki/Configs.py,sha256=5mucu-Zsnt2p7GMiaM7GXUeL1F1fBq9sycjm4V7xsrI,1929
6
- sonatoki/Filters.py,sha256=qUhPWxAnNvQV9hCPJNu5RKGpx-_hWFvmL5Ab2-j_peo,5342
7
- sonatoki/Preprocessors.py,sha256=aMXXuFBDlJudvzvukvCa7BixuROXXEb62un7I-TGOGs,4441
8
- sonatoki/Scorers.py,sha256=W-1uYiqjsDejJzoe592ixs7wHazjJXPhuo-41zuJ26U,3643
9
- sonatoki/Tokenizers.py,sha256=zJ_5h9dlDIiJlLc6inuiOodWYt52nD83wS0QwSZixiM,3326
10
- sonatoki/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- sonatoki/__main__.py,sha256=6xc-wIrrFo9wTyn4zRQNAmqwmJBtVvCMwV-CrM-hueA,82
12
- sonatoki/constants.py,sha256=pOa1wb7B8w8RN772FcO5AYqqQAWlhbuLWM3N_sYlkdU,31232
13
- sonatoki/ilo.py,sha256=yyLgNPI0Hmb4f1BzX6IRHr11FPChfL2xDR_9odlr8_8,3849
14
- sonatoki/linku.json,sha256=B5KNdhyM5UEfMciROgh1ECHr3i-ASBeMvwrkzNJX47c,271013
15
- sonatoki/sandbox.json,sha256=hx6LRsfvmmTtqXcXIyCsfSaGK3DZ-GCdbM8xhZQBHoA,77650
16
- sonatoki/utils.py,sha256=jDwjRg-QpRIBalF65vIQWsX8wFLsITStihwfqimY-5E,2670
17
- sonatoki-0.2.2.dist-info/RECORD,,