sonatoki 0.2.2__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sonatoki/Cleaners.py CHANGED
@@ -60,6 +60,13 @@ class ConsecutiveDuplicatesRe(RegexCleaner):
60
60
  replace = r"\1"
61
61
 
62
62
 
63
+ class Lowercase(Cleaner):
64
+ @classmethod
65
+ @override
66
+ def clean(cls, token: str) -> str:
67
+ return token.lower()
68
+
69
+
63
70
  __all__ = [
64
71
  "ConsecutiveDuplicates",
65
72
  ]
sonatoki/Configs.py CHANGED
@@ -2,33 +2,39 @@
2
2
  from copy import deepcopy
3
3
  from typing import List, Type, TypedDict
4
4
 
5
- # PDM
6
- from typing_extensions import NotRequired
7
-
8
5
  # LOCAL
9
6
  from sonatoki.Filters import (
10
7
  Filter,
11
8
  NimiPu,
12
9
  Numeric,
10
+ OrFilter,
13
11
  Syllabic,
14
- NimiLinku,
15
- NimiPuAle,
12
+ NimiUCSUR,
16
13
  Alphabetic,
17
14
  ProperName,
18
15
  Phonotactic,
19
16
  Punctuation,
20
- NimiLinkuAle,
17
+ NimiLinkuCore,
18
+ NimiPuSynonyms,
19
+ OrMemberFilter,
20
+ NimiLinkuCommon,
21
+ NimiLinkuObscure,
22
+ NimiLinkuSandbox,
23
+ EnglishIgnorables,
24
+ NimiLinkuUncommon,
21
25
  )
22
26
  from sonatoki.Scorers import Number, Scorer, PassFail, SoftScaling, SoftPassFail
23
27
  from sonatoki.Cleaners import Cleaner, ConsecutiveDuplicates
24
28
  from sonatoki.Tokenizers import Tokenizer, WordTokenizer
25
29
  from sonatoki.Preprocessors import (
26
30
  URLs,
31
+ Reference,
27
32
  Preprocessor,
28
33
  DiscordEmotes,
29
34
  DiscordSpecial,
30
35
  DiscordChannels,
31
36
  DiscordMentions,
37
+ AngleBracketObject,
32
38
  )
33
39
 
34
40
 
@@ -42,6 +48,8 @@ class IloConfig(TypedDict):
42
48
  passing_score: Number
43
49
 
44
50
 
51
+ # TODO: branching configs?
52
+
45
53
  BaseConfig: IloConfig = {
46
54
  "preprocessors": [URLs],
47
55
  "cleaners": [ConsecutiveDuplicates],
@@ -53,24 +61,75 @@ BaseConfig: IloConfig = {
53
61
  }
54
62
 
55
63
 
56
- PrefConfig: IloConfig = deepcopy(BaseConfig)
57
- PrefConfig["scoring_filters"].extend([NimiLinku, Syllabic, ProperName, Alphabetic])
58
- PrefConfig["scorer"] = SoftScaling
64
+ PrefConfig: IloConfig = {
65
+ "preprocessors": [URLs, Reference],
66
+ "cleaners": [ConsecutiveDuplicates],
67
+ "ignoring_filters": [Numeric, Punctuation, EnglishIgnorables],
68
+ "scoring_filters": [
69
+ OrMemberFilter(NimiLinkuCore, NimiLinkuCommon, NimiUCSUR),
70
+ Syllabic,
71
+ ProperName,
72
+ Alphabetic,
73
+ ],
74
+ "scorer": SoftScaling,
75
+ "passing_score": 0.8,
76
+ "word_tokenizer": WordTokenizer,
77
+ }
59
78
 
79
+ CorpusConfig: IloConfig = {
80
+ "preprocessors": [URLs, AngleBracketObject, Reference],
81
+ "cleaners": [ConsecutiveDuplicates],
82
+ "ignoring_filters": [Numeric, Punctuation, EnglishIgnorables],
83
+ "scoring_filters": [
84
+ OrMemberFilter(
85
+ NimiLinkuCore,
86
+ NimiLinkuCommon,
87
+ NimiLinkuUncommon,
88
+ NimiLinkuObscure,
89
+ NimiLinkuSandbox,
90
+ NimiUCSUR,
91
+ ),
92
+ Syllabic,
93
+ ProperName,
94
+ Alphabetic,
95
+ ],
96
+ "scorer": SoftScaling,
97
+ "passing_score": 0.8,
98
+ "word_tokenizer": WordTokenizer,
99
+ }
60
100
 
61
- LazyConfig: IloConfig = deepcopy(BaseConfig)
62
- LazyConfig["scoring_filters"].extend([Alphabetic, ProperName])
63
- LazyConfig["scorer"] = SoftPassFail
64
101
 
65
- DiscordConfig: IloConfig = deepcopy(PrefConfig)
66
- DiscordConfig["preprocessors"].extend(
67
- [DiscordEmotes, DiscordMentions, DiscordChannels, DiscordSpecial]
68
- )
102
+ LazyConfig: IloConfig = {
103
+ "preprocessors": [URLs],
104
+ "cleaners": [ConsecutiveDuplicates],
105
+ "ignoring_filters": [Numeric, Punctuation],
106
+ "scoring_filters": [Alphabetic, NimiUCSUR, ProperName],
107
+ "scorer": SoftPassFail,
108
+ "passing_score": 0.8,
109
+ "word_tokenizer": WordTokenizer,
110
+ }
111
+
112
+ DiscordConfig: IloConfig = {
113
+ "preprocessors": [URLs, AngleBracketObject, Reference],
114
+ "cleaners": [ConsecutiveDuplicates],
115
+ "ignoring_filters": [Numeric, Punctuation, EnglishIgnorables],
116
+ "scoring_filters": [
117
+ OrMemberFilter(NimiLinkuCore, NimiLinkuCommon, NimiUCSUR),
118
+ Syllabic,
119
+ ProperName,
120
+ Alphabetic,
121
+ ],
122
+ "scorer": SoftScaling,
123
+ "passing_score": 0.8,
124
+ "word_tokenizer": WordTokenizer,
125
+ }
126
+
69
127
  TelegramConfig: IloConfig = deepcopy(PrefConfig)
70
128
  ForumConfig: IloConfig = deepcopy(PrefConfig)
71
129
 
72
130
  __all__ = [
73
131
  "BaseConfig",
132
+ "CorpusConfig",
74
133
  "DiscordConfig",
75
134
  "ForumConfig",
76
135
  "IloConfig",
sonatoki/Filters.py CHANGED
@@ -1,7 +1,7 @@
1
1
  # STL
2
2
  import re
3
3
  from abc import ABC, abstractmethod
4
- from typing import Set
4
+ from typing import Set, List, Type
5
5
  from functools import lru_cache as cache # cache comes in 3.9
6
6
 
7
7
  # PDM
@@ -9,19 +9,26 @@ import regex
9
9
  from typing_extensions import override
10
10
 
11
11
  # LOCAL
12
+ from sonatoki.utils import prep_dictionary
12
13
  from sonatoki.constants import (
13
14
  VOWELS,
14
15
  NIMI_PU,
15
16
  ALPHABET,
17
+ ALL_PUNCT,
16
18
  ALLOWABLES,
17
19
  CONSONANTS,
18
- NIMI_LINKU,
19
- POSIX_PUNCT,
20
- UNICODE_PUNCT,
21
- NIMI_LINKU_LILI,
20
+ IGNORABLES,
21
+ NIMI_UCSUR,
22
+ NIMI_KU_LILI,
23
+ NIMI_KU_SULI,
24
+ NIMI_LINKU_CORE,
22
25
  ALL_PUNCT_RANGES,
23
26
  NIMI_PU_SYNONYMS,
27
+ NIMI_LINKU_COMMON,
28
+ NIMI_LINKU_OBSCURE,
24
29
  NIMI_LINKU_SANDBOX,
30
+ UCSUR_PUNCT_RANGES,
31
+ NIMI_LINKU_UNCOMMON,
25
32
  )
26
33
 
27
34
  regex.DEFAULT_VERSION = regex.VERSION1
@@ -79,6 +86,10 @@ class Miscellaneous(MemberFilter):
79
86
  tokens = set(ALLOWABLES)
80
87
 
81
88
 
89
+ class EnglishIgnorables(MemberFilter):
90
+ tokens = set(IGNORABLES)
91
+
92
+
82
93
  class ProperName(Filter):
83
94
  """Determines if a given token is a valid name (also called a loan word).
84
95
  When Toki Pona is written with the Latin alphabet, names are generally
@@ -99,23 +110,43 @@ class ProperName(Filter):
99
110
 
100
111
 
101
112
  class NimiPu(MemberFilter):
102
- tokens = set(NIMI_PU)
113
+ tokens = prep_dictionary(NIMI_PU)
114
+
115
+
116
+ class NimiPuSynonyms(MemberFilter):
117
+ tokens = prep_dictionary(NIMI_PU_SYNONYMS)
118
+
119
+
120
+ class NimiKuSuli(MemberFilter):
121
+ tokens = prep_dictionary(NIMI_KU_SULI)
122
+
123
+
124
+ class NimiKuLili(MemberFilter):
125
+ tokens = prep_dictionary(NIMI_KU_LILI)
126
+
127
+
128
+ class NimiLinkuCore(MemberFilter):
129
+ tokens = prep_dictionary(NIMI_LINKU_CORE)
103
130
 
104
131
 
105
- class NimiPuAle(MemberFilter):
106
- tokens = set(NIMI_PU + NIMI_PU_SYNONYMS)
132
+ class NimiLinkuCommon(MemberFilter):
133
+ tokens = prep_dictionary(NIMI_LINKU_COMMON)
107
134
 
108
135
 
109
- class NimiLinku(MemberFilter):
110
- tokens = set(NIMI_LINKU)
136
+ class NimiLinkuUncommon(MemberFilter):
137
+ tokens = prep_dictionary(NIMI_LINKU_UNCOMMON)
111
138
 
112
139
 
113
- class NimiLinkuAle(MemberFilter):
114
- tokens = set(NIMI_LINKU + NIMI_LINKU_LILI)
140
+ class NimiLinkuObscure(MemberFilter):
141
+ tokens = prep_dictionary(NIMI_LINKU_OBSCURE)
115
142
 
116
143
 
117
144
  class NimiLinkuSandbox(MemberFilter):
118
- tokens = set(NIMI_LINKU + NIMI_LINKU_LILI + NIMI_LINKU_SANDBOX)
145
+ tokens = prep_dictionary(NIMI_LINKU_SANDBOX)
146
+
147
+
148
+ class NimiUCSUR(MemberFilter):
149
+ tokens = prep_dictionary(NIMI_UCSUR)
119
150
 
120
151
 
121
152
  class Phonotactic(RegexFilter):
@@ -156,6 +187,11 @@ class AlphabeticRe(RegexFilter):
156
187
  pattern = re.compile(rf"[{ALPHABET}]+", flags=re.IGNORECASE)
157
188
 
158
189
 
190
+ class TwoOrMoreAlphabetic(Filter):
191
+ # TODO: alphabetic implementation that ignores single characters
192
+ pass
193
+
194
+
159
195
  class Numeric(Filter):
160
196
  """Determine if a given token is entirely numeric.
161
197
  Covers all numeric symbols in Unicode.
@@ -175,12 +211,13 @@ class Numeric(Filter):
175
211
  class Punctuation(SubsetFilter):
176
212
  """Identify whether a token is entirely punctuation. Fastest implementation."""
177
213
 
178
- tokens = set(POSIX_PUNCT + UNICODE_PUNCT)
214
+ tokens = set(ALL_PUNCT)
179
215
 
180
216
 
181
217
  class PunctuationRe(RegexFilter):
182
218
  """Faster implementation of `PunctuationRe1`.
183
- Goes out of date compared to the `regex` library if UNICODE_PUNCT is not updated."""
219
+ Goes out of date compared to the `regex` library if UNICODE_PUNCT_RANGES is not updated.
220
+ """
184
221
 
185
222
  pattern = re.compile(rf"[{ALL_PUNCT_RANGES}]+")
186
223
 
@@ -188,17 +225,111 @@ class PunctuationRe(RegexFilter):
188
225
  class PunctuationRe1(Regex1Filter):
189
226
  """Reference implementation for identifying tokens made entirely of punctuation."""
190
227
 
191
- pattern = regex.compile(r"[\p{Punctuation}\p{posix_punct}]+")
228
+ pattern = regex.compile(
229
+ rf"[\p{{Punctuation}}\p{{posix_punct}}{UCSUR_PUNCT_RANGES}]+"
230
+ )
231
+
232
+
233
+ class OrFilter:
234
+ """Instantiate with more than one filter to compose them into one filter,
235
+ returning True when any individual filter matches or False otherwise.
236
+ Requires at least two filters.
237
+
238
+ OrFilter exists as a compromise between the need to score some filters equally,
239
+ while not adding custom behavior to scorers.
240
+ I could have allowed a position to have a list of filters instead of one filter,
241
+ but this would require cleaning the user's input, and nested handling of lists.
242
+ It also would not have been as powerful- I would need another param for the and/or switch,
243
+ or to not give users the choice.
244
+
245
+ Instead, the user is responsible for building an OrFilter out of their desired filters.
246
+ """
247
+
248
+ @staticmethod
249
+ def __generic_filter(*filters_: Type[Filter]) -> Type[Filter]:
250
+
251
+ class CombinedFilter(Filter):
252
+ filters: List[Type[Filter]] = list(filters_) # TODO: tuple better?
253
+
254
+ @classmethod
255
+ @override
256
+ @cache(maxsize=None)
257
+ def filter(cls, token: str) -> bool:
258
+ for f in cls.filters:
259
+ if f.filter(token):
260
+ return True
261
+ return False
262
+
263
+ return CombinedFilter
264
+
265
+ def __new__(cls, *filters: Type[Filter]) -> Type[Filter]:
266
+ if not len(filters) >= 2:
267
+ raise ValueError("Provide at least two Filters to OrFilter.")
268
+
269
+ subset_filters = [f for f in filters if issubclass(f, MemberFilter)]
270
+ if len(subset_filters) >= 2:
271
+ raise Warning(
272
+ "Prefer OrMemberFilter for combining two or more MemberFilters."
273
+ )
274
+
275
+ filter = cls.__generic_filter(*filters)
276
+
277
+ return filter
278
+
279
+
280
+ class OrMemberFilter:
281
+ @staticmethod
282
+ def __subset_filter(*filters: Type[MemberFilter]) -> Type[MemberFilter]:
283
+ all_token_sets: List[Set[str]] = [f.tokens for f in filters]
284
+ all_tokens: Set[str] = set().union(*all_token_sets)
285
+
286
+ class CombinedFilter(MemberFilter):
287
+ tokens = all_tokens
288
+
289
+ return CombinedFilter
290
+
291
+ def __new__(cls, *filters_: Type[MemberFilter]) -> Type[MemberFilter]:
292
+ if not len(filters_) >= 2:
293
+ raise ValueError("Provide two or more MemberFilters to OrMemberFilter.")
294
+ filter = cls.__subset_filter(*filters_)
295
+ return filter
296
+
297
+
298
+ class AndFilter(Filter):
299
+ """Instantiate with more than one filter to compose them into one filter,
300
+ returning False when any individual filter fails to match or True otherwise.
301
+ Requires at least two filters."""
302
+
303
+ def __new__(cls, *filters_: Type[Filter]) -> Type[Filter]:
304
+ if not len(filters_) >= 2:
305
+ raise ValueError("Must provide at least two Filters to AndFilter.")
306
+
307
+ class AnonymousAndFilter(Filter):
308
+ filters: List[Type[Filter]] = list(filters_) # TODO: tuple better?
309
+
310
+ @classmethod
311
+ @override
312
+ @cache(maxsize=None)
313
+ def filter(cls, token: str) -> bool:
314
+ for f in cls.filters:
315
+ if not f.filter(token):
316
+ return False
317
+ return True
318
+
319
+ return AnonymousAndFilter
192
320
 
193
321
 
194
322
  __all__ = [
195
323
  "Alphabetic",
196
- "NimiLinku",
197
- "NimiLinkuAle",
324
+ "AndFilter",
325
+ "EnglishIgnorables",
326
+ "NimiLinkuCore",
198
327
  "NimiLinkuSandbox",
199
328
  "NimiPu",
200
- "NimiPuAle",
329
+ "NimiPuSynonyms",
330
+ "NimiUCSUR",
201
331
  "Numeric",
332
+ "OrFilter",
202
333
  "Phonotactic",
203
334
  "ProperName",
204
335
  "Punctuation",
sonatoki/Tokenizers.py CHANGED
@@ -5,16 +5,12 @@ from typing import Set, List
5
5
 
6
6
  # PDM
7
7
  import regex
8
- from typing_extensions import override
8
+ from typing_extensions import override, deprecated
9
9
 
10
10
  # LOCAL
11
11
  from sonatoki.utils import regex_escape
12
- from sonatoki.constants import (
13
- POSIX_PUNCT,
14
- UNICODE_PUNCT,
15
- SENTENCE_PUNCT,
16
- ALL_PUNCT_RANGES,
17
- )
12
+ from sonatoki.Filters import NimiUCSUR # seriously this sucks
13
+ from sonatoki.constants import ALL_PUNCT, SENTENCE_PUNCT, ALL_PUNCT_RANGES
18
14
 
19
15
  regex.DEFAULT_VERSION = regex.VERSION1
20
16
 
@@ -50,7 +46,12 @@ class Regex1Tokenizer(Tokenizer):
50
46
 
51
47
 
52
48
  class WordTokenizer(SetTokenizer):
53
- delimiters = set(POSIX_PUNCT + UNICODE_PUNCT)
49
+ delimiters = set(ALL_PUNCT)
50
+
51
+ @classmethod
52
+ def __helper(cls, s: str, tokens: List[str], last_match: int, i: int):
53
+ match = s[last_match:i].split()
54
+ [tokens.append(t) for t in match if t]
54
55
 
55
56
  @classmethod
56
57
  @override
@@ -60,32 +61,47 @@ class WordTokenizer(SetTokenizer):
60
61
 
61
62
  tokens: List[str] = []
62
63
 
64
+ i = 0 # ensure i is bound
63
65
  last_match = 0
64
66
  last_membership = s[0] in cls.delimiters
65
67
  for i, char in enumerate(s):
66
68
  mem = char in cls.delimiters
67
- if mem == last_membership:
69
+ ucsur = NimiUCSUR.filter(char) # always "changed" means
70
+ changed = (mem != last_membership) or ucsur
71
+ # this keeps contiguous words together, but splits UCSUR
72
+ if not changed:
73
+ continue
74
+
75
+ if ucsur:
76
+ if i > last_match:
77
+ # Add the token before UCSUR character
78
+ cls.__helper(s, tokens, last_match, i)
79
+ # Add UCSUR character itself as a token
80
+ tokens.append(char)
81
+ last_match = i + 1
82
+ last_membership = mem
68
83
  continue
69
84
 
70
- match = s[last_match:i].split()
71
- # TODO: kinda sucks? what about unicode whitespace?
85
+ cls.__helper(s, tokens, last_match, i)
72
86
  last_match = i
73
87
  last_membership = mem
74
- [tokens.append(t) for t in match if t]
75
-
76
- match = s[last_match:].strip().split()
77
- if match:
78
- tokens.extend(match)
79
88
 
89
+ cls.__helper(s, tokens, last_match, i + 1)
80
90
  return tokens
81
91
 
82
92
 
93
+ @deprecated(
94
+ "WordTokenizerRe is a previous reference implementation. Its behavior has diverged from WordTokenizer and it may not be restored."
95
+ )
83
96
  class WordTokenizerRe(RegexTokenizer):
84
97
  pattern = re.compile(rf"""([{ALL_PUNCT_RANGES}]+|\s+)""")
85
98
 
86
99
 
100
+ @deprecated(
101
+ "WordTokenizerRe1 is a previous reference implementation. Its behavior has diverged from WordTokenizer and it may not be restored."
102
+ )
87
103
  class WordTokenizerRe1(Regex1Tokenizer):
88
- """Reference implementation for WorkTokenizer."""
104
+ """Reference implementation for WordTokenizer."""
89
105
 
90
106
  pattern = regex.compile(r"""([\p{posix_punct}\p{Punctuation}]+|\s+)""")
91
107
 
sonatoki/constants.py CHANGED
@@ -4,16 +4,371 @@ from typing import Dict, List
4
4
  from pathlib import Path
5
5
 
6
6
  # LOCAL
7
- from sonatoki.utils import find_unicode_ranges
7
+ from sonatoki.utils import find_unicode_chars, find_unicode_ranges
8
8
 
9
9
  # `\p{Punctuation}` character class
10
- UNICODE_PUNCT = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~¡¢£¤¥¦§¨©«¬®¯°±´¶·¸»¿×÷˂˃˄˅˒˓˔˕˖˗˘˙˚˛˜˝˞˟˥˦˧˨˩˪˫˭˯˰˱˲˳˴˵˶˷˸˹˺˻˼˽˾˿͵;΄΅·϶҂՚՛՜՝՞՟։֊֍֎֏־׀׃׆׳״؆؇؈؉؊؋،؍؎؏؛؝؞؟٪٫٬٭۔۞۩۽۾܀܁܂܃܄܅܆܇܈܉܊܋܌܍߶߷߸߹߾߿࠰࠱࠲࠳࠴࠵࠶࠷࠸࠹࠺࠻࠼࠽࠾࡞࢈।॥॰৲৳৺৻৽੶૰૱୰௳௴௵௶௷௸௹௺౷౿಄൏൹෴฿๏๚๛༁༂༃༄༅༆༇༈༉༊་༌།༎༏༐༑༒༓༔༕༖༗༚༛༜༝༞༟༴༶༸༺༻༼༽྅྾྿࿀࿁࿂࿃࿄࿅࿇࿈࿉࿊࿋࿌࿎࿏࿐࿑࿒࿓࿔࿕࿖࿗࿘࿙࿚၊။၌၍၎၏႞႟჻፠፡።፣፤፥፦፧፨᎐᎑᎒᎓᎔᎕᎖᎗᎘᎙᐀᙭᙮᚛᚜᛫᛬᛭᜵᜶។៕៖៘៙៚៛᠀᠁᠂᠃᠄᠅᠆᠇᠈᠉᠊᥀᥄᥅᧞᧟᧠᧡᧢᧣᧤᧥᧦᧧᧨᧩᧪᧫᧬᧭᧮᧯᧰᧱᧲᧳᧴᧵᧶᧷᧸᧹᧺᧻᧼᧽᧾᧿᨞᨟᪠᪡᪢᪣᪤᪥᪦᪨᪩᪪᪫᪬᪭᭚᭛᭜᭝᭞᭟᭠᭡᭢᭣᭤᭥᭦᭧᭨᭩᭪᭴᭵᭶᭷᭸᭹᭺᭻᭼᭽᭾᯼᯽᯾᯿᰻᰼᰽᰾᰿᱾᱿᳀᳁᳂᳃᳄᳅᳆᳇᳓᾽᾿῀῁῍῎῏῝῞῟῭΅`´῾‐‑‒–—―‖‗‘’‚‛“”„‟†‡•‣․‥…‧‰‱′″‴‵‶‷‸‹›※‼‽‾‿⁀⁁⁂⁃⁄⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁒⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞⁺⁻⁼⁽⁾₊₋₌₍₎₠₡₢₣₤₥₦₧₨₩₪₫€₭₮₯₰₱₲₳₴₵₶₷₸₹₺₻₼₽₾₿⃀℀℁℃℄℅℆℈℉℔№℗℘℞℟℠℡™℣℥℧℩℮℺℻⅀⅁⅂⅃⅄⅊⅋⅌⅍⅏↊↋←↑→↓↔↕↖↗↘↙↚↛↜↝↞↟↠↡↢↣↤↥↦↧↨↩↪↫↬↭↮↯↰↱↲↳↴↵↶↷↸↹↺↻↼↽↾↿⇀⇁⇂⇃⇄⇅⇆⇇⇈⇉⇊⇋⇌⇍⇎⇏⇐⇑⇒⇓⇔⇕⇖⇗⇘⇙⇚⇛⇜⇝⇞⇟⇠⇡⇢⇣⇤⇥⇦⇧⇨⇩⇪⇫⇬⇭⇮⇯⇰⇱⇲⇳⇴⇵⇶⇷⇸⇹⇺⇻⇼⇽⇾⇿∀∁∂∃∄∅∆∇∈∉∊∋∌∍∎∏∐∑−∓∔∕∖∗∘∙√∛∜∝∞∟∠∡∢∣∤∥∦∧∨∩∪∫∬∭∮∯∰∱∲∳∴∵∶∷∸∹∺∻∼∽∾∿≀≁≂≃≄≅≆≇≈≉≊≋≌≍≎≏≐≑≒≓≔≕≖≗≘≙≚≛≜≝≞≟≠≡≢≣≤≥≦≧≨≩≪≫≬≭≮≯≰≱≲≳≴≵≶≷≸≹≺≻≼≽≾≿⊀⊁⊂⊃⊄⊅⊆⊇⊈⊉⊊⊋⊌⊍⊎⊏⊐⊑⊒⊓⊔⊕⊖⊗⊘⊙⊚⊛⊜⊝⊞⊟⊠⊡⊢⊣⊤⊥⊦⊧⊨⊩⊪⊫⊬⊭⊮⊯⊰⊱⊲⊳⊴⊵⊶⊷⊸⊹⊺⊻⊼⊽⊾⊿⋀⋁⋂⋃⋄⋅⋆⋇⋈⋉⋊⋋⋌⋍⋎⋏⋐⋑⋒⋓⋔⋕⋖⋗⋘⋙⋚⋛⋜⋝⋞⋟⋠⋡⋢⋣⋤⋥⋦⋧⋨⋩⋪⋫⋬⋭⋮⋯⋰⋱⋲⋳⋴⋵⋶⋷⋸⋹⋺⋻⋼⋽⋾⋿⌀⌁⌂⌃⌄⌅⌆⌇⌈⌉⌊⌋⌌⌍⌎⌏⌐⌑⌒⌓⌔⌕⌖⌗⌘⌙⌚⌛⌜⌝⌞⌟⌠⌡⌢⌣⌤⌥⌦⌧⌨〈〉⌫⌬⌭⌮⌯⌰⌱⌲⌳⌴⌵⌶⌷⌸⌹⌺⌻⌼⌽⌾⌿⍀⍁⍂⍃⍄⍅⍆⍇⍈⍉⍊⍋⍌⍍⍎⍏⍐⍑⍒⍓⍔⍕⍖⍗⍘⍙⍚⍛⍜⍝⍞⍟⍠⍡⍢⍣⍤⍥⍦⍧⍨⍩⍪⍫⍬⍭⍮⍯⍰⍱⍲⍳⍴⍵⍶⍷⍸⍹⍺⍻⍼⍽⍾⍿⎀⎁⎂⎃⎄⎅⎆⎇⎈⎉⎊⎋⎌⎍⎎⎏⎐⎑⎒⎓⎔⎕⎖⎗⎘⎙⎚⎛⎜⎝⎞⎟⎠⎡⎢⎣⎤⎥⎦⎧⎨⎩⎪⎫⎬⎭⎮⎯⎰⎱⎲⎳⎴⎵⎶⎷⎸⎹⎺⎻⎼⎽⎾⎿⏀⏁⏂⏃⏄⏅⏆⏇⏈⏉⏊⏋⏌⏍⏎⏏⏐⏑⏒⏓⏔⏕⏖⏗⏘⏙⏚⏛⏜⏝⏞⏟⏠⏡⏢⏣⏤⏥⏦⏧⏨⏩⏪⏫⏬⏭⏮⏯⏰⏱⏲⏳⏴⏵⏶⏷⏸⏹⏺⏻⏼⏽⏾⏿␀␁␂␃␄␅␆␇␈␉␊␋␌␍␎␏␐␑␒␓␔␕␖␗␘␙␚␛␜␝␞␟␠␡␢␣␤␥␦⑀⑁⑂⑃⑄⑅⑆⑇⑈⑉⑊⒜⒝⒞⒟⒠⒡⒢⒣⒤⒥⒦⒧⒨⒩⒪⒫⒬⒭⒮⒯⒰⒱⒲⒳⒴⒵─━│┃┄┅┆┇┈┉┊┋┌┍┎┏┐┑┒┓└┕┖┗┘┙┚┛├┝┞┟┠┡┢┣┤┥┦┧┨┩┪┫┬┭┮┯┰┱┲┳┴┵┶┷┸┹┺┻┼┽┾┿╀╁╂╃╄╅╆╇╈╉╊╋╌╍╎╏═║╒╓╔╕╖╗╘╙╚╛╜╝╞╟╠╡╢╣╤╥╦╧╨╩╪╫╬╭╮╯╰╱╲╳╴╵╶╷╸╹╺╻╼╽╾╿▀▁▂▃▄▅▆▇█▉▊▋▌▍▎▏▐░▒▓▔▕▖▗▘▙▚▛▜▝▞▟■□▢▣▤▥▦▧▨▩▪▫▬▭▮▯▰▱▲△▴▵▶▷▸▹►▻▼▽▾▿◀◁◂◃◄◅◆◇◈◉◊○◌◍◎●◐◑◒◓◔◕◖◗◘◙◚◛◜◝◞◟◠◡◢◣◤◥◦◧◨◩◪◫◬◭◮◯◰◱◲◳◴◵◶◷◸◹◺◻◼◽◾◿☀☁☂☃☄★☆☇☈☉☊☋☌☍☎☏☐☑☒☓☔☕☖☗☘☙☚☛☜☝☞☟☠☡☢☣☤☥☦☧☨☩☪☫☬☭☮☯☰☱☲☳☴☵☶☷☸☹☺☻☼☽☾☿♀♁♂♃♄♅♆♇♈♉♊♋♌♍♎♏♐♑♒♓♔♕♖♗♘♙♚♛♜♝♞♟♠♡♢♣♤♥♦♧♨♩♪♫♬♭♮♯♰♱♲♳♴♵♶♷♸♹♺♻♼♽♾♿⚀⚁⚂⚃⚄⚅⚆⚇⚈⚉⚊⚋⚌⚍⚎⚏⚐⚑⚒⚓⚔⚕⚖⚗⚘⚙⚚⚛⚜⚝⚞⚟⚠⚡⚢⚣⚤⚥⚦⚧⚨⚩⚪⚫⚬⚭⚮⚯⚰⚱⚲⚳⚴⚵⚶⚷⚸⚹⚺⚻⚼⚽⚾⚿⛀⛁⛂⛃⛄⛅⛆⛇⛈⛉⛊⛋⛌⛍⛎⛏⛐⛑⛒⛓⛔⛕⛖⛗⛘⛙⛚⛛⛜⛝⛞⛟⛠⛡⛢⛣⛤⛥⛦⛧⛨⛩⛪⛫⛬⛭⛮⛯⛰⛱⛲⛳⛴⛵⛶⛷⛸⛹⛺⛻⛼⛽⛾⛿✀✁✂✃✄✅✆✇✈✉✊✋✌✍✎✏✐✑✒✓✔✕✖✗✘✙✚✛✜✝✞✟✠✡✢✣✤✥✦✧✨✩✪✫✬✭✮✯✰✱✲✳✴✵✶✷✸✹✺✻✼✽✾✿❀❁❂❃❄❅❆❇❈❉❊❋❌❍❎❏❐❑❒❓❔❕❖❗❘❙❚❛❜❝❞❟❠❡❢❣❤❥❦❧❨❩❪❫❬❭❮❯❰❱❲❳❴❵➔➕➖➗➘➙➚➛➜➝➞➟➠➡➢➣➤➥➦➧➨➩➪➫➬➭➮➯➰➱➲➳➴➵➶➷➸➹➺➻➼➽➾➿⟀⟁⟂⟃⟄⟅⟆⟇⟈⟉⟊⟋⟌⟍⟎⟏⟐⟑⟒⟓⟔⟕⟖⟗⟘⟙⟚⟛⟜⟝⟞⟟⟠⟡⟢⟣⟤⟥⟦⟧⟨⟩⟪⟫⟬⟭⟮⟯⟰⟱⟲⟳⟴⟵⟶⟷⟸⟹⟺⟻⟼⟽⟾⟿⠀⠁⠂⠃⠄⠅⠆⠇⠈⠉⠊⠋⠌⠍⠎⠏⠐⠑⠒⠓⠔⠕⠖⠗⠘⠙⠚⠛⠜⠝⠞⠟⠠⠡⠢⠣⠤⠥⠦⠧⠨⠩⠪⠫⠬⠭⠮⠯⠰⠱⠲⠳⠴⠵⠶⠷⠸⠹⠺⠻⠼⠽⠾⠿⡀⡁⡂⡃⡄⡅⡆⡇⡈⡉⡊⡋⡌⡍⡎⡏⡐⡑⡒⡓⡔⡕⡖⡗⡘⡙⡚⡛⡜⡝⡞⡟⡠⡡⡢⡣⡤⡥⡦⡧⡨⡩⡪⡫⡬⡭⡮⡯⡰⡱⡲⡳⡴⡵⡶⡷⡸⡹⡺⡻⡼⡽⡾⡿⢀⢁⢂⢃⢄⢅⢆⢇⢈⢉⢊⢋⢌⢍⢎⢏⢐⢑⢒⢓⢔⢕⢖⢗⢘⢙⢚⢛⢜⢝⢞⢟⢠⢡⢢⢣⢤⢥⢦⢧⢨⢩⢪⢫⢬⢭⢮⢯⢰⢱⢲⢳⢴⢵⢶⢷⢸⢹⢺⢻⢼⢽⢾⢿⣀⣁⣂⣃⣄⣅⣆⣇⣈⣉⣊⣋⣌⣍⣎⣏⣐⣑⣒⣓⣔⣕⣖⣗⣘⣙⣚⣛⣜⣝⣞⣟⣠⣡⣢⣣⣤⣥⣦⣧⣨⣩⣪⣫⣬⣭⣮⣯⣰⣱⣲⣳⣴⣵⣶⣷⣸⣹⣺⣻⣼⣽⣾⣿⤀⤁⤂⤃⤄⤅⤆⤇⤈⤉⤊⤋⤌⤍⤎⤏⤐⤑⤒⤓⤔⤕⤖⤗⤘⤙⤚⤛⤜⤝⤞⤟⤠⤡⤢⤣⤤⤥⤦⤧⤨⤩⤪⤫⤬⤭⤮⤯⤰⤱⤲⤳⤴⤵⤶⤷⤸⤹⤺⤻⤼⤽⤾⤿⥀⥁⥂⥃⥄⥅⥆⥇⥈⥉⥊⥋⥌⥍⥎⥏⥐⥑⥒⥓⥔⥕⥖⥗⥘⥙⥚⥛⥜⥝⥞⥟⥠⥡⥢⥣⥤⥥⥦⥧⥨⥩⥪⥫⥬⥭⥮⥯⥰⥱⥲⥳⥴⥵⥶⥷⥸⥹⥺⥻⥼⥽⥾⥿⦀⦁⦂⦃⦄⦅⦆⦇⦈⦉⦊⦋⦌⦍⦎⦏⦐⦑⦒⦓⦔⦕⦖⦗⦘⦙⦚⦛⦜⦝⦞⦟⦠⦡⦢⦣⦤⦥⦦⦧⦨⦩⦪⦫⦬⦭⦮⦯⦰⦱⦲⦳⦴⦵⦶⦷⦸⦹⦺⦻⦼⦽⦾⦿⧀⧁⧂⧃⧄⧅⧆⧇⧈⧉⧊⧋⧌⧍⧎⧏⧐⧑⧒⧓⧔⧕⧖⧗⧘⧙⧚⧛⧜⧝⧞⧟⧠⧡⧢⧣⧤⧥⧦⧧⧨⧩⧪⧫⧬⧭⧮⧯⧰⧱⧲⧳⧴⧵⧶⧷⧸⧹⧺⧻⧼⧽⧾⧿⨀⨁⨂⨃⨄⨅⨆⨇⨈⨉⨊⨋⨌⨍⨎⨏⨐⨑⨒⨓⨔⨕⨖⨗⨘⨙⨚⨛⨜⨝⨞⨟⨠⨡⨢⨣⨤⨥⨦⨧⨨⨩⨪⨫⨬⨭⨮⨯⨰⨱⨲⨳⨴⨵⨶⨷⨸⨹⨺⨻⨼⨽⨾⨿⩀⩁⩂⩃⩄⩅⩆⩇⩈⩉⩊⩋⩌⩍⩎⩏⩐⩑⩒⩓⩔⩕⩖⩗⩘⩙⩚⩛⩜⩝⩞⩟⩠⩡⩢⩣⩤⩥⩦⩧⩨⩩⩪⩫⩬⩭⩮⩯⩰⩱⩲⩳⩴⩵⩶⩷⩸⩹⩺⩻⩼⩽⩾⩿⪀⪁⪂⪃⪄⪅⪆⪇⪈⪉⪊⪋⪌⪍⪎⪏⪐⪑⪒⪓⪔⪕⪖⪗⪘⪙⪚⪛⪜⪝⪞⪟⪠⪡⪢⪣⪤⪥⪦⪧⪨⪩⪪⪫⪬⪭⪮⪯⪰⪱⪲⪳⪴⪵⪶⪷⪸⪹⪺⪻⪼⪽⪾⪿⫀⫁⫂⫃⫄⫅⫆⫇⫈⫉⫊⫋⫌⫍⫎⫏⫐⫑⫒⫓⫔⫕⫖⫗⫘⫙⫚⫛⫝̸⫝⫞⫟⫠⫡⫢⫣⫤⫥⫦⫧⫨⫩⫪⫫⫬⫭⫮⫯⫰⫱⫲⫳⫴⫵⫶⫷⫸⫹⫺⫻⫼⫽⫾⫿⬀⬁⬂⬃⬄⬅⬆⬇⬈⬉⬊⬋⬌⬍⬎⬏⬐⬑⬒⬓⬔⬕⬖⬗⬘⬙⬚⬛⬜⬝⬞⬟⬠⬡⬢⬣⬤⬥⬦⬧⬨⬩⬪⬫⬬⬭⬮⬯⬰⬱⬲⬳⬴⬵⬶⬷⬸⬹⬺⬻⬼⬽⬾⬿⭀⭁⭂⭃⭄⭅⭆⭇⭈⭉⭊⭋⭌⭍⭎⭏⭐⭑⭒⭓⭔⭕⭖⭗⭘⭙⭚⭛⭜⭝⭞⭟⭠⭡⭢⭣⭤⭥⭦⭧⭨⭩⭪⭫⭬⭭⭮⭯⭰⭱⭲⭳⭶⭷⭸⭹⭺⭻⭼⭽⭾⭿⮀⮁⮂⮃⮄⮅⮆⮇⮈⮉⮊⮋⮌⮍⮎⮏⮐⮑⮒⮓⮔⮕⮗⮘⮙⮚⮛⮜⮝⮞⮟⮠⮡⮢⮣⮤⮥⮦⮧⮨⮩⮪⮫⮬⮭⮮⮯⮰⮱⮲⮳⮴⮵⮶⮷⮸⮹⮺⮻⮼⮽⮾⮿⯀⯁⯂⯃⯄⯅⯆⯇⯈⯉⯊⯋⯌⯍⯎⯏⯐⯑⯒⯓⯔⯕⯖⯗⯘⯙⯚⯛⯜⯝⯞⯟⯠⯡⯢⯣⯤⯥⯦⯧⯨⯩⯪⯫⯬⯭⯮⯯⯰⯱⯲⯳⯴⯵⯶⯷⯸⯹⯺⯻⯼⯽⯾⯿⳥⳦⳧⳨⳩⳪⳹⳺⳻⳼⳾⳿⵰⸀⸁⸂⸃⸄⸅⸆⸇⸈⸉⸊⸋⸌⸍⸎⸏⸐⸑⸒⸓⸔⸕⸖⸗⸘⸙⸚⸛⸜⸝⸞⸟⸠⸡⸢⸣⸤⸥⸦⸧⸨⸩⸪⸫⸬⸭⸮⸰⸱⸲⸳⸴⸵⸶⸷⸸⸹⸺⸻⸼⸽⸾⸿⹀⹁⹂⹃⹄⹅⹆⹇⹈⹉⹊⹋⹌⹍⹎⹏⹐⹑⹒⹓⹔⹕⹖⹗⹘⹙⹚⹛⹜⹝⺀⺁⺂⺃⺄⺅⺆⺇⺈⺉⺊⺋⺌⺍⺎⺏⺐⺑⺒⺓⺔⺕⺖⺗⺘⺙⺛⺜⺝⺞⺟⺠⺡⺢⺣⺤⺥⺦⺧⺨⺩⺪⺫⺬⺭⺮⺯⺰⺱⺲⺳⺴⺵⺶⺷⺸⺹⺺⺻⺼⺽⺾⺿⻀⻁⻂⻃⻄⻅⻆⻇⻈⻉⻊⻋⻌⻍⻎⻏⻐⻑⻒⻓⻔⻕⻖⻗⻘⻙⻚⻛⻜⻝⻞⻟⻠⻡⻢⻣⻤⻥⻦⻧⻨⻩⻪⻫⻬⻭⻮⻯⻰⻱⻲⻳⼀⼁⼂⼃⼄⼅⼆⼇⼈⼉⼊⼋⼌⼍⼎⼏⼐⼑⼒⼓⼔⼕⼖⼗⼘⼙⼚⼛⼜⼝⼞⼟⼠⼡⼢⼣⼤⼥⼦⼧⼨⼩⼪⼫⼬⼭⼮⼯⼰⼱⼲⼳⼴⼵⼶⼷⼸⼹⼺⼻⼼⼽⼾⼿⽀⽁⽂⽃⽄⽅⽆⽇⽈⽉⽊⽋⽌⽍⽎⽏⽐⽑⽒⽓⽔⽕⽖⽗⽘⽙⽚⽛⽜⽝⽞⽟⽠⽡⽢⽣⽤⽥⽦⽧⽨⽩⽪⽫⽬⽭⽮⽯⽰⽱⽲⽳⽴⽵⽶⽷⽸⽹⽺⽻⽼⽽⽾⽿⾀⾁⾂⾃⾄⾅⾆⾇⾈⾉⾊⾋⾌⾍⾎⾏⾐⾑⾒⾓⾔⾕⾖⾗⾘⾙⾚⾛⾜⾝⾞⾟⾠⾡⾢⾣⾤⾥⾦⾧⾨⾩⾪⾫⾬⾭⾮⾯⾰⾱⾲⾳⾴⾵⾶⾷⾸⾹⾺⾻⾼⾽⾾⾿⿀⿁⿂⿃⿄⿅⿆⿇⿈⿉⿊⿋⿌⿍⿎⿏⿐⿑⿒⿓⿔⿕⿰⿱⿲⿳⿴⿵⿶⿷⿸⿹⿺⿻⿼⿽⿾⿿、。〃〄〈〉《》「」『』【】〒〓〔〕〖〗〘〙〚〛〜〝〞〟〠〰〶〷〽〾〿゛゜゠・㆐㆑㆖㆗㆘㆙㆚㆛㆜㆝㆞㆟㇀㇁㇂㇃㇄㇅㇆㇇㇈㇉㇊㇋㇌㇍㇎㇏㇐㇑㇒㇓㇔㇕㇖㇗㇘㇙㇚㇛㇜㇝㇞㇟㇠㇡㇢㇣㇯㈀㈁㈂㈃㈄㈅㈆㈇㈈㈉㈊㈋㈌㈍㈎㈏㈐㈑㈒㈓㈔㈕㈖㈗㈘㈙㈚㈛㈜㈝㈞㈪㈫㈬㈭㈮㈯㈰㈱㈲㈳㈴㈵㈶㈷㈸㈹㈺㈻㈼㈽㈾㈿㉀㉁㉂㉃㉄㉅㉆㉇㉐㉠㉡㉢㉣㉤㉥㉦㉧㉨㉩㉪㉫㉬㉭㉮㉯㉰㉱㉲㉳㉴㉵㉶㉷㉸㉹㉺㉻㉼㉽㉾㉿㊊㊋㊌㊍㊎㊏㊐㊑㊒㊓㊔㊕㊖㊗㊘㊙㊚㊛㊜㊝㊞㊟㊠㊡㊢㊣㊤㊥㊦㊧㊨㊩㊪㊫㊬㊭㊮㊯㊰㋀㋁㋂㋃㋄㋅㋆㋇㋈㋉㋊㋋㋌㋍㋎㋏㋐㋑㋒㋓㋔㋕㋖㋗㋘㋙㋚㋛㋜㋝㋞㋟㋠㋡㋢㋣㋤㋥㋦㋧㋨㋩㋪㋫㋬㋭㋮㋯㋰㋱㋲㋳㋴㋵㋶㋷㋸㋹㋺㋻㋼㋽㋾㋿㌀㌁㌂㌃㌄㌅㌆㌇㌈㌉㌊㌋㌌㌍㌎㌏㌐㌑㌒㌓㌔㌕㌖㌗㌘㌙㌚㌛㌜㌝㌞㌟㌠㌡㌢㌣㌤㌥㌦㌧㌨㌩㌪㌫㌬㌭㌮㌯㌰㌱㌲㌳㌴㌵㌶㌷㌸㌹㌺㌻㌼㌽㌾㌿㍀㍁㍂㍃㍄㍅㍆㍇㍈㍉㍊㍋㍌㍍㍎㍏㍐㍑㍒㍓㍔㍕㍖㍗㍘㍙㍚㍛㍜㍝㍞㍟㍠㍡㍢㍣㍤㍥㍦㍧㍨㍩㍪㍫㍬㍭㍮㍯㍰㍱㍲㍳㍴㍵㍶㍷㍸㍹㍺㍻㍼㍽㍾㍿㎀㎁㎂㎃㎄㎅㎆㎇㎈㎉㎊㎋㎌㎍㎎㎏㎐㎑㎒㎓㎔㎕㎖㎗㎘㎙㎚㎛㎜㎝㎞㎟㎠㎡㎢㎣㎤㎥㎦㎧㎨㎩㎪㎫㎬㎭㎮㎯㎰㎱㎲㎳㎴㎵㎶㎷㎸㎹㎺㎻㎼㎽㎾㎿㏀㏁㏂㏃㏄㏅㏆㏇㏈㏉㏊㏋㏌㏍㏎㏏㏐㏑㏒㏓㏔㏕㏖㏗㏘㏙㏚㏛㏜㏝㏞㏟㏠㏡㏢㏣㏤㏥㏦㏧㏨㏩㏪㏫㏬㏭㏮㏯㏰㏱㏲㏳㏴㏵㏶㏷㏸㏹㏺㏻㏼㏽㏾㏿䷀䷁䷂䷃䷄䷅䷆䷇䷈䷉䷊䷋䷌䷍䷎䷏䷐䷑䷒䷓䷔䷕䷖䷗䷘䷙䷚䷛䷜䷝䷞䷟䷠䷡䷢䷣䷤䷥䷦䷧䷨䷩䷪䷫䷬䷭䷮䷯䷰䷱䷲䷳䷴䷵䷶䷷䷸䷹䷺䷻䷼䷽䷾䷿꒐꒑꒒꒓꒔꒕꒖꒗꒘꒙꒚꒛꒜꒝꒞꒟꒠꒡꒢꒣꒤꒥꒦꒧꒨꒩꒪꒫꒬꒭꒮꒯꒰꒱꒲꒳꒴꒵꒶꒷꒸꒹꒺꒻꒼꒽꒾꒿꓀꓁꓂꓃꓄꓅꓆꓾꓿꘍꘎꘏꙳꙾꛲꛳꛴꛵꛶꛷꜀꜁꜂꜃꜄꜅꜆꜇꜈꜉꜊꜋꜌꜍꜎꜏꜐꜑꜒꜓꜔꜕꜖꜠꜡꞉꞊꠨꠩꠪꠫꠶꠷꠸꠹꡴꡵꡶꡷꣎꣏꣸꣹꣺꣼꤮꤯꥟꧁꧂꧃꧄꧅꧆꧇꧈꧉꧊꧋꧌꧍꧞꧟꩜꩝꩞꩟꩷꩸꩹꫞꫟꫰꫱꭛꭪꭫꯫﬩﮲﮳﮴﮵﮶﮷﮸﮹﮺﮻﮼﮽﮾﮿﯀﯁﯂﴾﴿﵀﵁﵂﵃﵄﵅﵆﵇﵈﵉﵊﵋﵌﵍﵎﵏﷏﷼﷽﷾﷿︐︑︒︓︔︕︖︗︘︙︰︱︲︳︴︵︶︷︸︹︺︻︼︽︾︿﹀﹁﹂﹃﹄﹅﹆﹇﹈﹉﹊﹋﹌﹍﹎﹏﹐﹑﹒﹔﹕﹖﹗﹘﹙﹚﹛﹜﹝﹞﹟﹠﹡﹢﹣﹤﹥﹦﹨﹩﹪﹫!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~⦅⦆。「」、・¢£¬ ̄¦¥₩│←↑→↓■○�𐄀𐄁𐄂𐄷𐄸𐄹𐄺𐄻𐄼𐄽𐄾𐄿𐅹𐅺𐅻𐅼𐅽𐅾𐅿𐆀𐆁𐆂𐆃𐆄𐆅𐆆𐆇𐆈𐆉𐆌𐆍𐆎𐆐𐆑𐆒𐆓𐆔𐆕𐆖𐆗𐆘𐆙𐆚𐆛𐆜𐆠𐇐𐇑𐇒𐇓𐇔𐇕𐇖𐇗𐇘𐇙𐇚𐇛𐇜𐇝𐇞𐇟𐇠𐇡𐇢𐇣𐇤𐇥𐇦𐇧𐇨𐇩𐇪𐇫𐇬𐇭𐇮𐇯𐇰𐇱𐇲𐇳𐇴𐇵𐇶𐇷𐇸𐇹𐇺𐇻𐇼𐎟𐏐𐕯𐡗𐡷𐡸𐤟𐤿𐩐𐩑𐩒𐩓𐩔𐩕𐩖𐩗𐩘𐩿𐫈𐫰𐫱𐫲𐫳𐫴𐫵𐫶𐬹𐬺𐬻𐬼𐬽𐬾𐬿𐮙𐮚𐮛𐮜𐺭𐽕𐽖𐽗𐽘𐽙𐾆𐾇𐾈𐾉𑁇𑁈𑁉𑁊𑁋𑁌𑁍𑂻𑂼𑂾𑂿𑃀𑃁𑅀𑅁𑅂𑅃𑅴𑅵𑇅𑇆𑇇𑇈𑇍𑇛𑇝𑇞𑇟𑈸𑈹𑈺𑈻𑈼𑈽𑊩𑑋𑑌𑑍𑑎𑑏𑑚𑑛𑑝𑓆𑗁𑗂𑗃𑗄𑗅𑗆𑗇𑗈𑗉𑗊𑗋𑗌𑗍𑗎𑗏𑗐𑗑𑗒𑗓𑗔𑗕𑗖𑗗𑙁𑙂𑙃𑙠𑙡𑙢𑙣𑙤𑙥𑙦𑙧𑙨𑙩𑙪𑙫𑙬𑚹𑜼𑜽𑜾𑜿𑠻𑥄𑥅𑥆𑧢𑨿𑩀𑩁𑩂𑩃𑩄𑩅𑩆𑪚𑪛𑪜𑪞𑪟𑪠𑪡𑪢𑬀𑬁𑬂𑬃𑬄𑬅𑬆𑬇𑬈𑬉𑱁𑱂𑱃𑱄𑱅𑱰𑱱𑻷𑻸𑽃𑽄𑽅𑽆𑽇𑽈𑽉𑽊𑽋𑽌𑽍𑽎𑽏𑿕𑿖𑿗𑿘𑿙𑿚𑿛𑿜𑿝𑿞𑿟𑿠𑿡𑿢𑿣𑿤𑿥𑿦𑿧𑿨𑿩𑿪𑿫𑿬𑿭𑿮𑿯𑿰𑿱𑿿𒑰𒑱𒑲𒑳𒑴𒿱𒿲𖩮𖩯𖫵𖬷𖬸𖬹𖬺𖬻𖬼𖬽𖬾𖬿𖭄𖭅𖺗𖺘𖺙𖺚𖿢𛲜𛲟𜽐𜽑𜽒𜽓𜽔𜽕𜽖𜽗𜽘𜽙𜽚𜽛𜽜𜽝𜽞𜽟𜽠𜽡𜽢𜽣𜽤𜽥𜽦𜽧𜽨𜽩𜽪𜽫𜽬𜽭𜽮𜽯𜽰𜽱𜽲𜽳𜽴𜽵𜽶𜽷𜽸𜽹𜽺𜽻𜽼𜽽𜽾𜽿𜾀𜾁𜾂𜾃𜾄𜾅𜾆𜾇𜾈𜾉𜾊𜾋𜾌𜾍𜾎𜾏𜾐𜾑𜾒𜾓𜾔𜾕𜾖𜾗𜾘𜾙𜾚𜾛𜾜𜾝𜾞𜾟𜾠𜾡𜾢𜾣𜾤𜾥𜾦𜾧𜾨𜾩𜾪𜾫𜾬𜾭𜾮𜾯𜾰𜾱𜾲𜾳𜾴𜾵𜾶𜾷𜾸𜾹𜾺𜾻𜾼𜾽𜾾𜾿𜿀𜿁𜿂𜿃𝀀𝀁𝀂𝀃𝀄𝀅𝀆𝀇𝀈𝀉𝀊𝀋𝀌𝀍𝀎𝀏𝀐𝀑𝀒𝀓𝀔𝀕𝀖𝀗𝀘𝀙𝀚𝀛𝀜𝀝𝀞𝀟𝀠𝀡𝀢𝀣𝀤𝀥𝀦𝀧𝀨𝀩𝀪𝀫𝀬𝀭𝀮𝀯𝀰𝀱𝀲𝀳𝀴𝀵𝀶𝀷𝀸𝀹𝀺𝀻𝀼𝀽𝀾𝀿𝁀𝁁𝁂𝁃𝁄𝁅𝁆𝁇𝁈𝁉𝁊𝁋𝁌𝁍𝁎𝁏𝁐𝁑𝁒𝁓𝁔𝁕𝁖𝁗𝁘𝁙𝁚𝁛𝁜𝁝𝁞𝁟𝁠𝁡𝁢𝁣𝁤𝁥𝁦𝁧𝁨𝁩𝁪𝁫𝁬𝁭𝁮𝁯𝁰𝁱𝁲𝁳𝁴𝁵𝁶𝁷𝁸𝁹𝁺𝁻𝁼𝁽𝁾𝁿𝂀𝂁𝂂𝂃𝂄𝂅𝂆𝂇𝂈𝂉𝂊𝂋𝂌𝂍𝂎𝂏𝂐𝂑𝂒𝂓𝂔𝂕𝂖𝂗𝂘𝂙𝂚𝂛𝂜𝂝𝂞𝂟𝂠𝂡𝂢𝂣𝂤𝂥𝂦𝂧𝂨𝂩𝂪𝂫𝂬𝂭𝂮𝂯𝂰𝂱𝂲𝂳𝂴𝂵𝂶𝂷𝂸𝂹𝂺𝂻𝂼𝂽𝂾𝂿𝃀𝃁𝃂𝃃𝃄𝃅𝃆𝃇𝃈𝃉𝃊𝃋𝃌𝃍𝃎𝃏𝃐𝃑𝃒𝃓𝃔𝃕𝃖𝃗𝃘𝃙𝃚𝃛𝃜𝃝𝃞𝃟𝃠𝃡𝃢𝃣𝃤𝃥𝃦𝃧𝃨𝃩𝃪𝃫𝃬𝃭𝃮𝃯𝃰𝃱𝃲𝃳𝃴𝃵𝄀𝄁𝄂𝄃𝄄𝄅𝄆𝄇𝄈𝄉𝄊𝄋𝄌𝄍𝄎𝄏𝄐𝄑𝄒𝄓𝄔𝄕𝄖𝄗𝄘𝄙𝄚𝄛𝄜𝄝𝄞𝄟𝄠𝄡𝄢𝄣𝄤𝄥𝄦𝄩𝄪𝄫𝄬𝄭𝄮𝄯𝄰𝄱𝄲𝄳𝄴𝄵𝄶𝄷𝄸𝄹𝄺𝄻𝄼𝄽𝄾𝄿𝅀𝅁𝅂𝅃𝅄𝅅𝅆𝅇𝅈𝅉𝅊𝅋𝅌𝅍𝅎𝅏𝅐𝅑𝅒𝅓𝅔𝅕𝅖𝅗𝅘𝅙𝅚𝅛𝅜𝅝𝅗𝅥𝅘𝅥𝅘𝅥𝅮𝅘𝅥𝅯𝅘𝅥𝅰𝅘𝅥𝅱𝅘𝅥𝅲𝅪𝅫𝅬𝆃𝆄𝆌𝆍𝆎𝆏𝆐𝆑𝆒𝆓𝆔𝆕𝆖𝆗𝆘𝆙𝆚𝆛𝆜𝆝𝆞𝆟𝆠𝆡𝆢𝆣𝆤𝆥𝆦𝆧𝆨𝆩𝆮𝆯𝆰𝆱𝆲𝆳𝆴𝆵𝆶𝆷𝆸𝆹𝆺𝆹𝅥𝆺𝅥𝆹𝅥𝅮𝆺𝅥𝅮𝆹𝅥𝅯𝆺𝅥𝅯𝇁𝇂𝇃𝇄𝇅𝇆𝇇𝇈𝇉𝇊𝇋𝇌𝇍𝇎𝇏𝇐𝇑𝇒𝇓𝇔𝇕𝇖𝇗𝇘𝇙𝇚𝇛𝇜𝇝𝇞𝇟𝇠𝇡𝇢𝇣𝇤𝇥𝇦𝇧𝇨𝇩𝇪𝈀𝈁𝈂𝈃𝈄𝈅𝈆𝈇𝈈𝈉𝈊𝈋𝈌𝈍𝈎𝈏𝈐𝈑𝈒𝈓𝈔𝈕𝈖𝈗𝈘𝈙𝈚𝈛𝈜𝈝𝈞𝈟𝈠𝈡𝈢𝈣𝈤𝈥𝈦𝈧𝈨𝈩𝈪𝈫𝈬𝈭𝈮𝈯𝈰𝈱𝈲𝈳𝈴𝈵𝈶𝈷𝈸𝈹𝈺𝈻𝈼𝈽𝈾𝈿𝉀𝉁𝉅𝌀𝌁𝌂𝌃𝌄𝌅𝌆𝌇𝌈𝌉𝌊𝌋𝌌𝌍𝌎𝌏𝌐𝌑𝌒𝌓𝌔𝌕𝌖𝌗𝌘𝌙𝌚𝌛𝌜𝌝𝌞𝌟𝌠𝌡𝌢𝌣𝌤𝌥𝌦𝌧𝌨𝌩𝌪𝌫𝌬𝌭𝌮𝌯𝌰𝌱𝌲𝌳𝌴𝌵𝌶𝌷𝌸𝌹𝌺𝌻𝌼𝌽𝌾𝌿𝍀𝍁𝍂𝍃𝍄𝍅𝍆𝍇𝍈𝍉𝍊𝍋𝍌𝍍𝍎𝍏𝍐𝍑𝍒𝍓𝍔𝍕𝍖𝛁𝛛𝛻𝜕𝜵𝝏𝝯𝞉𝞩𝟃𝠀𝠁𝠂𝠃𝠄𝠅𝠆𝠇𝠈𝠉𝠊𝠋𝠌𝠍𝠎𝠏𝠐𝠑𝠒𝠓𝠔𝠕𝠖𝠗𝠘𝠙𝠚𝠛𝠜𝠝𝠞𝠟𝠠𝠡𝠢𝠣𝠤𝠥𝠦𝠧𝠨𝠩𝠪𝠫𝠬𝠭𝠮𝠯𝠰𝠱𝠲𝠳𝠴𝠵𝠶𝠷𝠸𝠹𝠺𝠻𝠼𝠽𝠾𝠿𝡀𝡁𝡂𝡃𝡄𝡅𝡆𝡇𝡈𝡉𝡊𝡋𝡌𝡍𝡎𝡏𝡐𝡑𝡒𝡓𝡔𝡕𝡖𝡗𝡘𝡙𝡚𝡛𝡜𝡝𝡞𝡟𝡠𝡡𝡢𝡣𝡤𝡥𝡦𝡧𝡨𝡩𝡪𝡫𝡬𝡭𝡮𝡯𝡰𝡱𝡲𝡳𝡴𝡵𝡶𝡷𝡸𝡹𝡺𝡻𝡼𝡽𝡾𝡿𝢀𝢁𝢂𝢃𝢄𝢅𝢆𝢇𝢈𝢉𝢊𝢋𝢌𝢍𝢎𝢏𝢐𝢑𝢒𝢓𝢔𝢕𝢖𝢗𝢘𝢙𝢚𝢛𝢜𝢝𝢞𝢟𝢠𝢡𝢢𝢣𝢤𝢥𝢦𝢧𝢨𝢩𝢪𝢫𝢬𝢭𝢮𝢯𝢰𝢱𝢲𝢳𝢴𝢵𝢶𝢷𝢸𝢹𝢺𝢻𝢼𝢽𝢾𝢿𝣀𝣁𝣂𝣃𝣄𝣅𝣆𝣇𝣈𝣉𝣊𝣋𝣌𝣍𝣎𝣏𝣐𝣑𝣒𝣓𝣔𝣕𝣖𝣗𝣘𝣙𝣚𝣛𝣜𝣝𝣞𝣟𝣠𝣡𝣢𝣣𝣤𝣥𝣦𝣧𝣨𝣩𝣪𝣫𝣬𝣭𝣮𝣯𝣰𝣱𝣲𝣳𝣴𝣵𝣶𝣷𝣸𝣹𝣺𝣻𝣼𝣽𝣾𝣿𝤀𝤁𝤂𝤃𝤄𝤅𝤆𝤇𝤈𝤉𝤊𝤋𝤌𝤍𝤎𝤏𝤐𝤑𝤒𝤓𝤔𝤕𝤖𝤗𝤘𝤙𝤚𝤛𝤜𝤝𝤞𝤟𝤠𝤡𝤢𝤣𝤤𝤥𝤦𝤧𝤨𝤩𝤪𝤫𝤬𝤭𝤮𝤯𝤰𝤱𝤲𝤳𝤴𝤵𝤶𝤷𝤸𝤹𝤺𝤻𝤼𝤽𝤾𝤿𝥀𝥁𝥂𝥃𝥄𝥅𝥆𝥇𝥈𝥉𝥊𝥋𝥌𝥍𝥎𝥏𝥐𝥑𝥒𝥓𝥔𝥕𝥖𝥗𝥘𝥙𝥚𝥛𝥜𝥝𝥞𝥟𝥠𝥡𝥢𝥣𝥤𝥥𝥦𝥧𝥨𝥩𝥪𝥫𝥬𝥭𝥮𝥯𝥰𝥱𝥲𝥳𝥴𝥵𝥶𝥷𝥸𝥹𝥺𝥻𝥼𝥽𝥾𝥿𝦀𝦁𝦂𝦃𝦄𝦅𝦆𝦇𝦈𝦉𝦊𝦋𝦌𝦍𝦎𝦏𝦐𝦑𝦒𝦓𝦔𝦕𝦖𝦗𝦘𝦙𝦚𝦛𝦜𝦝𝦞𝦟𝦠𝦡𝦢𝦣𝦤𝦥𝦦𝦧𝦨𝦩𝦪𝦫𝦬𝦭𝦮𝦯𝦰𝦱𝦲𝦳𝦴𝦵𝦶𝦷𝦸𝦹𝦺𝦻𝦼𝦽𝦾𝦿𝧀𝧁𝧂𝧃𝧄𝧅𝧆𝧇𝧈𝧉𝧊𝧋𝧌𝧍𝧎𝧏𝧐𝧑𝧒𝧓𝧔𝧕𝧖𝧗𝧘𝧙𝧚𝧛𝧜𝧝𝧞𝧟𝧠𝧡𝧢𝧣𝧤𝧥𝧦𝧧𝧨𝧩𝧪𝧫𝧬𝧭𝧮𝧯𝧰𝧱𝧲𝧳𝧴𝧵𝧶𝧷𝧸𝧹𝧺𝧻𝧼𝧽𝧾𝧿𝨷𝨸𝨹𝨺𝩭𝩮𝩯𝩰𝩱𝩲𝩳𝩴𝩶𝩷𝩸𝩹𝩺𝩻𝩼𝩽𝩾𝩿𝪀𝪁𝪂𝪃𝪅𝪆𝪇𝪈𝪉𝪊𝪋𞅏𞋿𞥞𞥟𞲬𞲰𞴮𞻰𞻱🀀🀁🀂🀃🀄🀅🀆🀇🀈🀉🀊🀋🀌🀍🀎🀏🀐🀑🀒🀓🀔🀕🀖🀗🀘🀙🀚🀛🀜🀝🀞🀟🀠🀡🀢🀣🀤🀥🀦🀧🀨🀩🀪🀫🀰🀱🀲🀳🀴🀵🀶🀷🀸🀹🀺🀻🀼🀽🀾🀿🁀🁁🁂🁃🁄🁅🁆🁇🁈🁉🁊🁋🁌🁍🁎🁏🁐🁑🁒🁓🁔🁕🁖🁗🁘🁙🁚🁛🁜🁝🁞🁟🁠🁡🁢🁣🁤🁥🁦🁧🁨🁩🁪🁫🁬🁭🁮🁯🁰🁱🁲🁳🁴🁵🁶🁷🁸🁹🁺🁻🁼🁽🁾🁿🂀🂁🂂🂃🂄🂅🂆🂇🂈🂉🂊🂋🂌🂍🂎🂏🂐🂑🂒🂓🂠🂡🂢🂣🂤🂥🂦🂧🂨🂩🂪🂫🂬🂭🂮🂱🂲🂳🂴🂵🂶🂷🂸🂹🂺🂻🂼🂽🂾🂿🃁🃂🃃🃄🃅🃆🃇🃈🃉🃊🃋🃌🃍🃎🃏🃑🃒🃓🃔🃕🃖🃗🃘🃙🃚🃛🃜🃝🃞🃟🃠🃡🃢🃣🃤🃥🃦🃧🃨🃩🃪🃫🃬🃭🃮🃯🃰🃱🃲🃳🃴🃵🄍🄎🄏🄐🄑🄒🄓🄔🄕🄖🄗🄘🄙🄚🄛🄜🄝🄞🄟🄠🄡🄢🄣🄤🄥🄦🄧🄨🄩🄪🄫🄬🄭🄮🄯🅊🅋🅌🅍🅎🅏🅪🅫🅬🅭🅮🅯🆊🆋🆌🆍🆎🆏🆐🆑🆒🆓🆔🆕🆖🆗🆘🆙🆚🆛🆜🆝🆞🆟🆠🆡🆢🆣🆤🆥🆦🆧🆨🆩🆪🆫🆬🆭🇦🇧🇨🇩🇪🇫🇬🇭🇮🇯🇰🇱🇲🇳🇴🇵🇶🇷🇸🇹🇺🇻🇼🇽🇾🇿🈀🈁🈂🈐🈑🈒🈓🈔🈕🈖🈗🈘🈙🈚🈛🈜🈝🈞🈟🈠🈡🈢🈣🈤🈥🈦🈧🈨🈩🈪🈫🈬🈭🈮🈯🈰🈱🈲🈳🈴🈵🈶🈷🈸🈹🈺🈻🉀🉁🉂🉃🉄🉅🉆🉇🉈🉐🉑🉠🉡🉢🉣🉤🉥🌀🌁🌂🌃🌄🌅🌆🌇🌈🌉🌊🌋🌌🌍🌎🌏🌐🌑🌒🌓🌔🌕🌖🌗🌘🌙🌚🌛🌜🌝🌞🌟🌠🌡🌢🌣🌤🌥🌦🌧🌨🌩🌪🌫🌬🌭🌮🌯🌰🌱🌲🌳🌴🌵🌶🌷🌸🌹🌺🌻🌼🌽🌾🌿🍀🍁🍂🍃🍄🍅🍆🍇🍈🍉🍊🍋🍌🍍🍎🍏🍐🍑🍒🍓🍔🍕🍖🍗🍘🍙🍚🍛🍜🍝🍞🍟🍠🍡🍢🍣🍤🍥🍦🍧🍨🍩🍪🍫🍬🍭🍮🍯🍰🍱🍲🍳🍴🍵🍶🍷🍸🍹🍺🍻🍼🍽🍾🍿🎀🎁🎂🎃🎄🎅🎆🎇🎈🎉🎊🎋🎌🎍🎎🎏🎐🎑🎒🎓🎔🎕🎖🎗🎘🎙🎚🎛🎜🎝🎞🎟🎠🎡🎢🎣🎤🎥🎦🎧🎨🎩🎪🎫🎬🎭🎮🎯🎰🎱🎲🎳🎴🎵🎶🎷🎸🎹🎺🎻🎼🎽🎾🎿🏀🏁🏂🏃🏄🏅🏆🏇🏈🏉🏊🏋🏌🏍🏎🏏🏐🏑🏒🏓🏔🏕🏖🏗🏘🏙🏚🏛🏜🏝🏞🏟🏠🏡🏢🏣🏤🏥🏦🏧🏨🏩🏪🏫🏬🏭🏮🏯🏰🏱🏲🏳🏴🏵🏶🏷🏸🏹🏺🏻🏼🏽🏾🏿🐀🐁🐂🐃🐄🐅🐆🐇🐈🐉🐊🐋🐌🐍🐎🐏🐐🐑🐒🐓🐔🐕🐖🐗🐘🐙🐚🐛🐜🐝🐞🐟🐠🐡🐢🐣🐤🐥🐦🐧🐨🐩🐪🐫🐬🐭🐮🐯🐰🐱🐲🐳🐴🐵🐶🐷🐸🐹🐺🐻🐼🐽🐾🐿👀👁👂👃👄👅👆👇👈👉👊👋👌👍👎👏👐👑👒👓👔👕👖👗👘👙👚👛👜👝👞👟👠👡👢👣👤👥👦👧👨👩👪👫👬👭👮👯👰👱👲👳👴👵👶👷👸👹👺👻👼👽👾👿💀💁💂💃💄💅💆💇💈💉💊💋💌💍💎💏💐💑💒💓💔💕💖💗💘💙💚💛💜💝💞💟💠💡💢💣💤💥💦💧💨💩💪💫💬💭💮💯💰💱💲💳💴💵💶💷💸💹💺💻💼💽💾💿📀📁📂📃📄📅📆📇📈📉📊📋📌📍📎📏📐📑📒📓📔📕📖📗📘📙📚📛📜📝📞📟📠📡📢📣📤📥📦📧📨📩📪📫📬📭📮📯📰📱📲📳📴📵📶📷📸📹📺📻📼📽📾📿🔀🔁🔂🔃🔄🔅🔆🔇🔈🔉🔊🔋🔌🔍🔎🔏🔐🔑🔒🔓🔔🔕🔖🔗🔘🔙🔚🔛🔜🔝🔞🔟🔠🔡🔢🔣🔤🔥🔦🔧🔨🔩🔪🔫🔬🔭🔮🔯🔰🔱🔲🔳🔴🔵🔶🔷🔸🔹🔺🔻🔼🔽🔾🔿🕀🕁🕂🕃🕄🕅🕆🕇🕈🕉🕊🕋🕌🕍🕎🕏🕐🕑🕒🕓🕔🕕🕖🕗🕘🕙🕚🕛🕜🕝🕞🕟🕠🕡🕢🕣🕤🕥🕦🕧🕨🕩🕪🕫🕬🕭🕮🕯🕰🕱🕲🕳🕴🕵🕶🕷🕸🕹🕺🕻🕼🕽🕾🕿🖀🖁🖂🖃🖄🖅🖆🖇🖈🖉🖊🖋🖌🖍🖎🖏🖐🖑🖒🖓🖔🖕🖖🖗🖘🖙🖚🖛🖜🖝🖞🖟🖠🖡🖢🖣🖤🖥🖦🖧🖨🖩🖪🖫🖬🖭🖮🖯🖰🖱🖲🖳🖴🖵🖶🖷🖸🖹🖺🖻🖼🖽🖾🖿🗀🗁🗂🗃🗄🗅🗆🗇🗈🗉🗊🗋🗌🗍🗎🗏🗐🗑🗒🗓🗔🗕🗖🗗🗘🗙🗚🗛🗜🗝🗞🗟🗠🗡🗢🗣🗤🗥🗦🗧🗨🗩🗪🗫🗬🗭🗮🗯🗰🗱🗲🗳🗴🗵🗶🗷🗸🗹🗺🗻🗼🗽🗾🗿😀😁😂😃😄😅😆😇😈😉😊😋😌😍😎😏😐😑😒😓😔😕😖😗😘😙😚😛😜😝😞😟😠😡😢😣😤😥😦😧😨😩😪😫😬😭😮😯😰😱😲😳😴😵😶😷😸😹😺😻😼😽😾😿🙀🙁🙂🙃🙄🙅🙆🙇🙈🙉🙊🙋🙌🙍🙎🙏🙐🙑🙒🙓🙔🙕🙖🙗🙘🙙🙚🙛🙜🙝🙞🙟🙠🙡🙢🙣🙤🙥🙦🙧🙨🙩🙪🙫🙬🙭🙮🙯🙰🙱🙲🙳🙴🙵🙶🙷🙸🙹🙺🙻🙼🙽🙾🙿🚀🚁🚂🚃🚄🚅🚆🚇🚈🚉🚊🚋🚌🚍🚎🚏🚐🚑🚒🚓🚔🚕🚖🚗🚘🚙🚚🚛🚜🚝🚞🚟🚠🚡🚢🚣🚤🚥🚦🚧🚨🚩🚪🚫🚬🚭🚮🚯🚰🚱🚲🚳🚴🚵🚶🚷🚸🚹🚺🚻🚼🚽🚾🚿🛀🛁🛂🛃🛄🛅🛆🛇🛈🛉🛊🛋🛌🛍🛎🛏🛐🛑🛒🛓🛔🛕🛖🛗🛜🛝🛞🛟🛠🛡🛢🛣🛤🛥🛦🛧🛨🛩🛪🛫🛬🛰🛱🛲🛳🛴🛵🛶🛷🛸🛹🛺🛻🛼🜀🜁🜂🜃🜄🜅🜆🜇🜈🜉🜊🜋🜌🜍🜎🜏🜐🜑🜒🜓🜔🜕🜖🜗🜘🜙🜚🜛🜜🜝🜞🜟🜠🜡🜢🜣🜤🜥🜦🜧🜨🜩🜪🜫🜬🜭🜮🜯🜰🜱🜲🜳🜴🜵🜶🜷🜸🜹🜺🜻🜼🜽🜾🜿🝀🝁🝂🝃🝄🝅🝆🝇🝈🝉🝊🝋🝌🝍🝎🝏🝐🝑🝒🝓🝔🝕🝖🝗🝘🝙🝚🝛🝜🝝🝞🝟🝠🝡🝢🝣🝤🝥🝦🝧🝨🝩🝪🝫🝬🝭🝮🝯🝰🝱🝲🝳🝴🝵🝶🝻🝼🝽🝾🝿🞀🞁🞂🞃🞄🞅🞆🞇🞈🞉🞊🞋🞌🞍🞎🞏🞐🞑🞒🞓🞔🞕🞖🞗🞘🞙🞚🞛🞜🞝🞞🞟🞠🞡🞢🞣🞤🞥🞦🞧🞨🞩🞪🞫🞬🞭🞮🞯🞰🞱🞲🞳🞴🞵🞶🞷🞸🞹🞺🞻🞼🞽🞾🞿🟀🟁🟂🟃🟄🟅🟆🟇🟈🟉🟊🟋🟌🟍🟎🟏🟐🟑🟒🟓🟔🟕🟖🟗🟘🟙🟠🟡🟢🟣🟤🟥🟦🟧🟨🟩🟪🟫🟰🠀🠁🠂🠃🠄🠅🠆🠇🠈🠉🠊🠋🠐🠑🠒🠓🠔🠕🠖🠗🠘🠙🠚🠛🠜🠝🠞🠟🠠🠡🠢🠣🠤🠥🠦🠧🠨🠩🠪🠫🠬🠭🠮🠯🠰🠱🠲🠳🠴🠵🠶🠷🠸🠹🠺🠻🠼🠽🠾🠿🡀🡁🡂🡃🡄🡅🡆🡇🡐🡑🡒🡓🡔🡕🡖🡗🡘🡙🡠🡡🡢🡣🡤🡥🡦🡧🡨🡩🡪🡫🡬🡭🡮🡯🡰🡱🡲🡳🡴🡵🡶🡷🡸🡹🡺🡻🡼🡽🡾🡿🢀🢁🢂🢃🢄🢅🢆🢇🢐🢑🢒🢓🢔🢕🢖🢗🢘🢙🢚🢛🢜🢝🢞🢟🢠🢡🢢🢣🢤🢥🢦🢧🢨🢩🢪🢫🢬🢭🢰🢱🤀🤁🤂🤃🤄🤅🤆🤇🤈🤉🤊🤋🤌🤍🤎🤏🤐🤑🤒🤓🤔🤕🤖🤗🤘🤙🤚🤛🤜🤝🤞🤟🤠🤡🤢🤣🤤🤥🤦🤧🤨🤩🤪🤫🤬🤭🤮🤯🤰🤱🤲🤳🤴🤵🤶🤷🤸🤹🤺🤻🤼🤽🤾🤿🥀🥁🥂🥃🥄🥅🥆🥇🥈🥉🥊🥋🥌🥍🥎🥏🥐🥑🥒🥓🥔🥕🥖🥗🥘🥙🥚🥛🥜🥝🥞🥟🥠🥡🥢🥣🥤🥥🥦🥧🥨🥩🥪🥫🥬🥭🥮🥯🥰🥱🥲🥳🥴🥵🥶🥷🥸🥹🥺🥻🥼🥽🥾🥿🦀🦁🦂🦃🦄🦅🦆🦇🦈🦉🦊🦋🦌🦍🦎🦏🦐🦑🦒🦓🦔🦕🦖🦗🦘🦙🦚🦛🦜🦝🦞🦟🦠🦡🦢🦣🦤🦥🦦🦧🦨🦩🦪🦫🦬🦭🦮🦯🦰🦱🦲🦳🦴🦵🦶🦷🦸🦹🦺🦻🦼🦽🦾🦿🧀🧁🧂🧃🧄🧅🧆🧇🧈🧉🧊🧋🧌🧍🧎🧏🧐🧑🧒🧓🧔🧕🧖🧗🧘🧙🧚🧛🧜🧝🧞🧟🧠🧡🧢🧣🧤🧥🧦🧧🧨🧩🧪🧫🧬🧭🧮🧯🧰🧱🧲🧳🧴🧵🧶🧷🧸🧹🧺🧻🧼🧽🧾🧿🨀🨁🨂🨃🨄🨅🨆🨇🨈🨉🨊🨋🨌🨍🨎🨏🨐🨑🨒🨓🨔🨕🨖🨗🨘🨙🨚🨛🨜🨝🨞🨟🨠🨡🨢🨣🨤🨥🨦🨧🨨🨩🨪🨫🨬🨭🨮🨯🨰🨱🨲🨳🨴🨵🨶🨷🨸🨹🨺🨻🨼🨽🨾🨿🩀🩁🩂🩃🩄🩅🩆🩇🩈🩉🩊🩋🩌🩍🩎🩏🩐🩑🩒🩓🩠🩡🩢🩣🩤🩥🩦🩧🩨🩩🩪🩫🩬🩭🩰🩱🩲🩳🩴🩵🩶🩷🩸🩹🩺🩻🩼🪀🪁🪂🪃🪄🪅🪆🪇🪈🪐🪑🪒🪓🪔🪕🪖🪗🪘🪙🪚🪛🪜🪝🪞🪟🪠🪡🪢🪣🪤🪥🪦🪧🪨🪩🪪🪫🪬🪭🪮🪯🪰🪱🪲🪳🪴🪵🪶🪷🪸🪹🪺🪻🪼🪽🪿🫀🫁🫂🫃🫄🫅🫎🫏🫐🫑🫒🫓🫔🫕🫖🫗🫘🫙🫚🫛🫠🫡🫢🫣🫤🫥🫦🫧🫨🫰🫱🫲🫳🫴🫵🫶🫷🫸🬀🬁🬂🬃🬄🬅🬆🬇🬈🬉🬊🬋🬌🬍🬎🬏🬐🬑🬒🬓🬔🬕🬖🬗🬘🬙🬚🬛🬜🬝🬞🬟🬠🬡🬢🬣🬤🬥🬦🬧🬨🬩🬪🬫🬬🬭🬮🬯🬰🬱🬲🬳🬴🬵🬶🬷🬸🬹🬺🬻🬼🬽🬾🬿🭀🭁🭂🭃🭄🭅🭆🭇🭈🭉🭊🭋🭌🭍🭎🭏🭐🭑🭒🭓🭔🭕🭖🭗🭘🭙🭚🭛🭜🭝🭞🭟🭠🭡🭢🭣🭤🭥🭦🭧🭨🭩🭪🭫🭬🭭🭮🭯🭰🭱🭲🭳🭴🭵🭶🭷🭸🭹🭺🭻🭼🭽🭾🭿🮀🮁🮂🮃🮄🮅🮆🮇🮈🮉🮊🮋🮌🮍🮎🮏🮐🮑🮒🮔🮕🮖🮗🮘🮙🮚🮛🮜🮝🮞🮟🮠🮡🮢🮣🮤🮥🮦🮧🮨🮩🮪🮫🮬🮭🮮🮯🮰🮱🮲🮳🮴🮵🮶🮷🮸🮹🮺🮻🮼🮽🮾🮿🯀🯁🯂🯃🯄🯅🯆🯇🯈🯉🯊"""
11
10
  # https://www.compart.com/en/unicode/category
12
11
  # https://unicode.org/Public/UNIDATA/UnicodeData.txt
12
+ # NOTE: WAY too large to store as a string of each char
13
+ UNICODE_PUNCT_RANGES = [
14
+ "\\U00000021-\\U0000002f",
15
+ "\\U0000003a-\\U00000040",
16
+ "\\U0000005b-\\U00000060",
17
+ "\\U0000007b-\\U0000007e",
18
+ "\\U000000a1-\\U000000a9",
19
+ "\\U000000ab-\\U000000ac",
20
+ "\\U000000ae-\\U000000b1",
21
+ "\\U000000b4",
22
+ "\\U000000b6-\\U000000b8",
23
+ "\\U000000bb",
24
+ "\\U000000bf",
25
+ "\\U000000d7",
26
+ "\\U000000f7",
27
+ "\\U000002c2-\\U000002c5",
28
+ "\\U000002d2-\\U000002df",
29
+ "\\U000002e5-\\U000002eb",
30
+ "\\U000002ed",
31
+ "\\U000002ef-\\U000002ff",
32
+ "\\U00000375",
33
+ "\\U0000037e",
34
+ "\\U00000384-\\U00000385",
35
+ "\\U00000387",
36
+ "\\U000003f6",
37
+ "\\U00000482",
38
+ "\\U0000055a-\\U0000055f",
39
+ "\\U00000589-\\U0000058a",
40
+ "\\U0000058d-\\U0000058f",
41
+ "\\U000005be",
42
+ "\\U000005c0",
43
+ "\\U000005c3",
44
+ "\\U000005c6",
45
+ "\\U000005f3-\\U000005f4",
46
+ "\\U00000606-\\U0000060f",
47
+ "\\U0000061b",
48
+ "\\U0000061d-\\U0000061f",
49
+ "\\U0000066a-\\U0000066d",
50
+ "\\U000006d4",
51
+ "\\U000006de",
52
+ "\\U000006e9",
53
+ "\\U000006fd-\\U000006fe",
54
+ "\\U00000700-\\U0000070d",
55
+ "\\U000007f6-\\U000007f9",
56
+ "\\U000007fe-\\U000007ff",
57
+ "\\U00000830-\\U0000083e",
58
+ "\\U0000085e",
59
+ "\\U00000888",
60
+ "\\U00000964-\\U00000965",
61
+ "\\U00000970",
62
+ "\\U000009f2-\\U000009f3",
63
+ "\\U000009fa-\\U000009fb",
64
+ "\\U000009fd",
65
+ "\\U00000a76",
66
+ "\\U00000af0-\\U00000af1",
67
+ "\\U00000b70",
68
+ "\\U00000bf3-\\U00000bfa",
69
+ "\\U00000c77",
70
+ "\\U00000c7f",
71
+ "\\U00000c84",
72
+ "\\U00000d4f",
73
+ "\\U00000d79",
74
+ "\\U00000df4",
75
+ "\\U00000e3f",
76
+ "\\U00000e4f",
77
+ "\\U00000e5a-\\U00000e5b",
78
+ "\\U00000f01-\\U00000f17",
79
+ "\\U00000f1a-\\U00000f1f",
80
+ "\\U00000f34",
81
+ "\\U00000f36",
82
+ "\\U00000f38",
83
+ "\\U00000f3a-\\U00000f3d",
84
+ "\\U00000f85",
85
+ "\\U00000fbe-\\U00000fc5",
86
+ "\\U00000fc7-\\U00000fcc",
87
+ "\\U00000fce-\\U00000fda",
88
+ "\\U0000104a-\\U0000104f",
89
+ "\\U0000109e-\\U0000109f",
90
+ "\\U000010fb",
91
+ "\\U00001360-\\U00001368",
92
+ "\\U00001390-\\U00001399",
93
+ "\\U00001400",
94
+ "\\U0000166d-\\U0000166e",
95
+ "\\U0000169b-\\U0000169c",
96
+ "\\U000016eb-\\U000016ed",
97
+ "\\U00001735-\\U00001736",
98
+ "\\U000017d4-\\U000017d6",
99
+ "\\U000017d8-\\U000017db",
100
+ "\\U00001800-\\U0000180a",
101
+ "\\U00001940",
102
+ "\\U00001944-\\U00001945",
103
+ "\\U000019de-\\U000019ff",
104
+ "\\U00001a1e-\\U00001a1f",
105
+ "\\U00001aa0-\\U00001aa6",
106
+ "\\U00001aa8-\\U00001aad",
107
+ "\\U00001b5a-\\U00001b6a",
108
+ "\\U00001b74-\\U00001b7e",
109
+ "\\U00001bfc-\\U00001bff",
110
+ "\\U00001c3b-\\U00001c3f",
111
+ "\\U00001c7e-\\U00001c7f",
112
+ "\\U00001cc0-\\U00001cc7",
113
+ "\\U00001cd3",
114
+ "\\U00001fbd",
115
+ "\\U00001fbf-\\U00001fc1",
116
+ "\\U00001fcd-\\U00001fcf",
117
+ "\\U00001fdd-\\U00001fdf",
118
+ "\\U00001fed-\\U00001fef",
119
+ "\\U00001ffd-\\U00001ffe",
120
+ "\\U00002010-\\U00002027",
121
+ "\\U00002030-\\U0000205e",
122
+ "\\U0000207a-\\U0000207e",
123
+ "\\U0000208a-\\U0000208e",
124
+ "\\U000020a0-\\U000020c0",
125
+ "\\U00002100-\\U00002101",
126
+ "\\U00002103-\\U00002106",
127
+ "\\U00002108-\\U00002109",
128
+ "\\U00002114",
129
+ "\\U00002116-\\U00002118",
130
+ "\\U0000211e-\\U00002123",
131
+ "\\U00002125",
132
+ "\\U00002127",
133
+ "\\U00002129",
134
+ "\\U0000212e",
135
+ "\\U0000213a-\\U0000213b",
136
+ "\\U00002140-\\U00002144",
137
+ "\\U0000214a-\\U0000214d",
138
+ "\\U0000214f",
139
+ "\\U0000218a-\\U0000218b",
140
+ "\\U00002190-\\U00002426",
141
+ "\\U00002440-\\U0000244a",
142
+ "\\U0000249c-\\U000024b5",
143
+ "\\U00002500-\\U00002775",
144
+ "\\U00002794-\\U00002b73",
145
+ "\\U00002b76-\\U00002b95",
146
+ "\\U00002b97-\\U00002bff",
147
+ "\\U00002ce5-\\U00002cea",
148
+ "\\U00002cf9-\\U00002cfc",
149
+ "\\U00002cfe-\\U00002cff",
150
+ "\\U00002d70",
151
+ "\\U00002e00-\\U00002e2e",
152
+ "\\U00002e30-\\U00002e5d",
153
+ "\\U00002e80-\\U00002e99",
154
+ "\\U00002e9b-\\U00002ef3",
155
+ "\\U00002f00-\\U00002fd5",
156
+ "\\U00002ff0-\\U00002fff",
157
+ "\\U00003001-\\U00003004",
158
+ "\\U00003008-\\U00003020",
159
+ "\\U00003030",
160
+ "\\U00003036-\\U00003037",
161
+ "\\U0000303d-\\U0000303f",
162
+ "\\U0000309b-\\U0000309c",
163
+ "\\U000030a0",
164
+ "\\U000030fb",
165
+ "\\U00003190-\\U00003191",
166
+ "\\U00003196-\\U0000319f",
167
+ "\\U000031c0-\\U000031e3",
168
+ "\\U000031ef",
169
+ "\\U00003200-\\U0000321e",
170
+ "\\U0000322a-\\U00003247",
171
+ "\\U00003250",
172
+ "\\U00003260-\\U0000327f",
173
+ "\\U0000328a-\\U000032b0",
174
+ "\\U000032c0-\\U000033ff",
175
+ "\\U00004dc0-\\U00004dff",
176
+ "\\U0000a490-\\U0000a4c6",
177
+ "\\U0000a4fe-\\U0000a4ff",
178
+ "\\U0000a60d-\\U0000a60f",
179
+ "\\U0000a673",
180
+ "\\U0000a67e",
181
+ "\\U0000a6f2-\\U0000a6f7",
182
+ "\\U0000a700-\\U0000a716",
183
+ "\\U0000a720-\\U0000a721",
184
+ "\\U0000a789-\\U0000a78a",
185
+ "\\U0000a828-\\U0000a82b",
186
+ "\\U0000a836-\\U0000a839",
187
+ "\\U0000a874-\\U0000a877",
188
+ "\\U0000a8ce-\\U0000a8cf",
189
+ "\\U0000a8f8-\\U0000a8fa",
190
+ "\\U0000a8fc",
191
+ "\\U0000a92e-\\U0000a92f",
192
+ "\\U0000a95f",
193
+ "\\U0000a9c1-\\U0000a9cd",
194
+ "\\U0000a9de-\\U0000a9df",
195
+ "\\U0000aa5c-\\U0000aa5f",
196
+ "\\U0000aa77-\\U0000aa79",
197
+ "\\U0000aade-\\U0000aadf",
198
+ "\\U0000aaf0-\\U0000aaf1",
199
+ "\\U0000ab5b",
200
+ "\\U0000ab6a-\\U0000ab6b",
201
+ "\\U0000abeb",
202
+ "\\U0000fb29",
203
+ "\\U0000fbb2-\\U0000fbc2",
204
+ "\\U0000fd3e-\\U0000fd4f",
205
+ "\\U0000fdcf",
206
+ "\\U0000fdfc-\\U0000fdff",
207
+ "\\U0000fe10-\\U0000fe19",
208
+ "\\U0000fe30-\\U0000fe52",
209
+ "\\U0000fe54-\\U0000fe66",
210
+ "\\U0000fe68-\\U0000fe6b",
211
+ "\\U0000ff01-\\U0000ff0f",
212
+ "\\U0000ff1a-\\U0000ff20",
213
+ "\\U0000ff3b-\\U0000ff40",
214
+ "\\U0000ff5b-\\U0000ff65",
215
+ "\\U0000ffe0-\\U0000ffe6",
216
+ "\\U0000ffe8-\\U0000ffee",
217
+ "\\U0000fffc-\\U0000fffd",
218
+ "\\U00010100-\\U00010102",
219
+ "\\U00010137-\\U0001013f",
220
+ "\\U00010179-\\U00010189",
221
+ "\\U0001018c-\\U0001018e",
222
+ "\\U00010190-\\U0001019c",
223
+ "\\U000101a0",
224
+ "\\U000101d0-\\U000101fc",
225
+ "\\U0001039f",
226
+ "\\U000103d0",
227
+ "\\U0001056f",
228
+ "\\U00010857",
229
+ "\\U00010877-\\U00010878",
230
+ "\\U0001091f",
231
+ "\\U0001093f",
232
+ "\\U00010a50-\\U00010a58",
233
+ "\\U00010a7f",
234
+ "\\U00010ac8",
235
+ "\\U00010af0-\\U00010af6",
236
+ "\\U00010b39-\\U00010b3f",
237
+ "\\U00010b99-\\U00010b9c",
238
+ "\\U00010ead",
239
+ "\\U00010f55-\\U00010f59",
240
+ "\\U00010f86-\\U00010f89",
241
+ "\\U00011047-\\U0001104d",
242
+ "\\U000110bb-\\U000110bc",
243
+ "\\U000110be-\\U000110c1",
244
+ "\\U00011140-\\U00011143",
245
+ "\\U00011174-\\U00011175",
246
+ "\\U000111c5-\\U000111c8",
247
+ "\\U000111cd",
248
+ "\\U000111db",
249
+ "\\U000111dd-\\U000111df",
250
+ "\\U00011238-\\U0001123d",
251
+ "\\U000112a9",
252
+ "\\U0001144b-\\U0001144f",
253
+ "\\U0001145a-\\U0001145b",
254
+ "\\U0001145d",
255
+ "\\U000114c6",
256
+ "\\U000115c1-\\U000115d7",
257
+ "\\U00011641-\\U00011643",
258
+ "\\U00011660-\\U0001166c",
259
+ "\\U000116b9",
260
+ "\\U0001173c-\\U0001173f",
261
+ "\\U0001183b",
262
+ "\\U00011944-\\U00011946",
263
+ "\\U000119e2",
264
+ "\\U00011a3f-\\U00011a46",
265
+ "\\U00011a9a-\\U00011a9c",
266
+ "\\U00011a9e-\\U00011aa2",
267
+ "\\U00011b00-\\U00011b09",
268
+ "\\U00011c41-\\U00011c45",
269
+ "\\U00011c70-\\U00011c71",
270
+ "\\U00011ef7-\\U00011ef8",
271
+ "\\U00011f43-\\U00011f4f",
272
+ "\\U00011fd5-\\U00011ff1",
273
+ "\\U00011fff",
274
+ "\\U00012470-\\U00012474",
275
+ "\\U00012ff1-\\U00012ff2",
276
+ "\\U00016a6e-\\U00016a6f",
277
+ "\\U00016af5",
278
+ "\\U00016b37-\\U00016b3f",
279
+ "\\U00016b44-\\U00016b45",
280
+ "\\U00016e97-\\U00016e9a",
281
+ "\\U00016fe2",
282
+ "\\U0001bc9c",
283
+ "\\U0001bc9f",
284
+ "\\U0001cf50-\\U0001cfc3",
285
+ "\\U0001d000-\\U0001d0f5",
286
+ "\\U0001d100-\\U0001d126",
287
+ "\\U0001d129-\\U0001d164",
288
+ "\\U0001d16a-\\U0001d16c",
289
+ "\\U0001d183-\\U0001d184",
290
+ "\\U0001d18c-\\U0001d1a9",
291
+ "\\U0001d1ae-\\U0001d1ea",
292
+ "\\U0001d200-\\U0001d241",
293
+ "\\U0001d245",
294
+ "\\U0001d300-\\U0001d356",
295
+ "\\U0001d6c1",
296
+ "\\U0001d6db",
297
+ "\\U0001d6fb",
298
+ "\\U0001d715",
299
+ "\\U0001d735",
300
+ "\\U0001d74f",
301
+ "\\U0001d76f",
302
+ "\\U0001d789",
303
+ "\\U0001d7a9",
304
+ "\\U0001d7c3",
305
+ "\\U0001d800-\\U0001d9ff",
306
+ "\\U0001da37-\\U0001da3a",
307
+ "\\U0001da6d-\\U0001da74",
308
+ "\\U0001da76-\\U0001da83",
309
+ "\\U0001da85-\\U0001da8b",
310
+ "\\U0001e14f",
311
+ "\\U0001e2ff",
312
+ "\\U0001e95e-\\U0001e95f",
313
+ "\\U0001ecac",
314
+ "\\U0001ecb0",
315
+ "\\U0001ed2e",
316
+ "\\U0001eef0-\\U0001eef1",
317
+ "\\U0001f000-\\U0001f02b",
318
+ "\\U0001f030-\\U0001f093",
319
+ "\\U0001f0a0-\\U0001f0ae",
320
+ "\\U0001f0b1-\\U0001f0bf",
321
+ "\\U0001f0c1-\\U0001f0cf",
322
+ "\\U0001f0d1-\\U0001f0f5",
323
+ "\\U0001f10d-\\U0001f12f",
324
+ "\\U0001f14a-\\U0001f14f",
325
+ "\\U0001f16a-\\U0001f16f",
326
+ "\\U0001f18a-\\U0001f1ad",
327
+ "\\U0001f1e6-\\U0001f202",
328
+ "\\U0001f210-\\U0001f23b",
329
+ "\\U0001f240-\\U0001f248",
330
+ "\\U0001f250-\\U0001f251",
331
+ "\\U0001f260-\\U0001f265",
332
+ "\\U0001f300-\\U0001f6d7",
333
+ "\\U0001f6dc-\\U0001f6ec",
334
+ "\\U0001f6f0-\\U0001f6fc",
335
+ "\\U0001f700-\\U0001f776",
336
+ "\\U0001f77b-\\U0001f7d9",
337
+ "\\U0001f7e0-\\U0001f7eb",
338
+ "\\U0001f7f0",
339
+ "\\U0001f800-\\U0001f80b",
340
+ "\\U0001f810-\\U0001f847",
341
+ "\\U0001f850-\\U0001f859",
342
+ "\\U0001f860-\\U0001f887",
343
+ "\\U0001f890-\\U0001f8ad",
344
+ "\\U0001f8b0-\\U0001f8b1",
345
+ "\\U0001f900-\\U0001fa53",
346
+ "\\U0001fa60-\\U0001fa6d",
347
+ "\\U0001fa70-\\U0001fa7c",
348
+ "\\U0001fa80-\\U0001fa88",
349
+ "\\U0001fa90-\\U0001fabd",
350
+ "\\U0001fabf-\\U0001fac5",
351
+ "\\U0001face-\\U0001fadb",
352
+ "\\U0001fae0-\\U0001fae8",
353
+ "\\U0001faf0-\\U0001faf8",
354
+ "\\U0001fb00-\\U0001fb92",
355
+ "\\U0001fb94-\\U0001fbca",
356
+ "\\U000f1990-\\U000f199d", # UCSUR punctuation
357
+ ]
358
+
359
+ UCSUR_PUNCT_RANGES = UNICODE_PUNCT_RANGES[-1] # NOTE: THIS CAN CHANGE
360
+
361
+ UNICODE_PUNCT = find_unicode_chars(UNICODE_PUNCT_RANGES)
362
+ # this is a large string.
13
363
 
14
364
  # `\p{posix_punct}` character class
15
365
  POSIX_PUNCT = r"""-!"#$%&'()*+,./:;<=>?@[\]^_`{|}~"""
16
- ALL_PUNCT_RANGES = "".join(find_unicode_ranges(POSIX_PUNCT + UNICODE_PUNCT))
366
+ POSIX_PUNCT_RANGES = find_unicode_ranges(POSIX_PUNCT)
367
+
368
+ ALL_PUNCT = "".join(sorted(list(set(POSIX_PUNCT + UNICODE_PUNCT))))
369
+ ALL_PUNCT_RANGES = "".join(find_unicode_ranges(ALL_PUNCT))
370
+ # combined bc the result could be simpler
371
+
17
372
  SENTENCE_PUNCT = """.?!:;'"()[-]“”·…"""
18
373
 
19
374
 
@@ -35,38 +390,84 @@ ALLOWABLES = {
35
390
  "wxw", # wile ala wile
36
391
  }
37
392
 
393
+ IGNORABLES = {
394
+ # o, e, n are not here bc they're not frequently problematic in english messages
395
+ "a",
396
+ "am",
397
+ "an",
398
+ "i",
399
+ "in",
400
+ "is",
401
+ "l", # they'll
402
+ "m", # i'm
403
+ "me",
404
+ "no",
405
+ "s", # let's
406
+ "so",
407
+ "t", # don't
408
+ "to",
409
+ "u", # you
410
+ "we",
411
+ "un", # un-
412
+ "use",
413
+ "some",
414
+ "like",
415
+ }
416
+
417
+ UCSUR_RANGES = [
418
+ "\\U000F1900-\\U000F1977", # pu
419
+ "\\U000F1978-\\U000F1988", # ku suli
420
+ "\\U000F19A0-\\U000F19A3", # ku lili
421
+ ]
422
+ NIMI_UCSUR = find_unicode_chars(UCSUR_RANGES)
423
+
424
+
425
+ # NIMI_PU_UCSUR_RANGES = ["\\U000F1900-\\U000F1977"]
426
+ # NIMI_PU_ALE_UCSUR_RANGES = NIMI_PU_UCSUR_RANGES + ["\\U000F1978-\\U000F197A"]
427
+
428
+
429
+ def category_helper(data: Dict[str, Dict[str, str]], key: str, value: str) -> List[str]:
430
+ return [d["word"] for d in data.values() if d[key] == value]
431
+
432
+
38
433
  with open(LINKU) as f:
39
434
  linku: Dict[str, Dict[str, str]] = json.loads(f.read())
40
- NIMI_PU: List[str] = [d["word"] for d in linku.values() if d["book"] == "pu"]
435
+ NIMI_PU: List[str] = category_helper(linku, "book", "pu")
41
436
  NIMI_PU_SYNONYMS: List[str] = ["namako", "kin", "oko"]
42
- NIMI_LINKU: List[str] = [
43
- d["word"] for d in linku.values() if d["usage_category"] in ["core", "common"]
44
- ]
45
- NIMI_LINKU_LILI: List[str] = [
46
- d["word"]
47
- for d in linku.values()
48
- if d["usage_category"] not in ["core", "common"]
49
- ]
437
+
438
+ NIMI_KU_SULI = category_helper(linku, "book", "ku suli")
439
+ NIMI_KU_LILI = category_helper(linku, "book", "ku lili")
440
+
441
+ NIMI_LINKU_CORE = category_helper(linku, "usage_category", "core")
442
+ NIMI_LINKU_COMMON = category_helper(linku, "usage_category", "common")
443
+ NIMI_LINKU_UNCOMMON = category_helper(linku, "usage_category", "uncommon")
444
+ NIMI_LINKU_OBSCURE = category_helper(linku, "usage_category", "obscure")
50
445
 
51
446
  with open(SANDBOX) as f:
52
447
  sandbox: Dict[str, Dict[str, str]] = json.loads(f.read())
53
- NIMI_LINKU_SANDBOX: List[str] = NIMI_LINKU_LILI + [
54
- d["word"] for d in sandbox.values()
55
- ]
448
+ NIMI_LINKU_SANDBOX: List[str] = [d["word"] for d in sandbox.values()]
56
449
 
57
450
  del linku
58
451
  del sandbox
59
452
 
60
453
  __all__ = [
454
+ "ALLOWABLES",
455
+ "ALL_PUNCT",
456
+ "ALL_PUNCT_RANGES",
61
457
  "ALPHABET",
62
458
  "CONSONANTS",
63
- "NIMI_LINKU",
64
- "NIMI_LINKU_LILI",
459
+ "NIMI_KU_LILI",
460
+ "NIMI_KU_SULI",
461
+ "NIMI_LINKU_COMMON",
462
+ "NIMI_LINKU_CORE",
463
+ "NIMI_LINKU_OBSCURE",
65
464
  "NIMI_LINKU_SANDBOX",
465
+ "NIMI_LINKU_UNCOMMON",
66
466
  "NIMI_PU",
67
467
  "NIMI_PU_SYNONYMS",
68
- "VOWELS",
69
- "UNICODE_PUNCT",
70
- "ALLOWABLES",
71
468
  "POSIX_PUNCT",
469
+ "POSIX_PUNCT_RANGES",
470
+ "UNICODE_PUNCT",
471
+ "UNICODE_PUNCT_RANGES",
472
+ "VOWELS",
72
473
  ]
sonatoki/py.typed ADDED
File without changes
sonatoki/utils.py CHANGED
@@ -1,10 +1,23 @@
1
1
  # STL
2
2
  import re
3
- from typing import List
3
+ from typing import Set, List, Iterable
4
+
5
+ # LOCAL
6
+ from sonatoki.Cleaners import Lowercase, ConsecutiveDuplicates
4
7
 
5
8
  TO_ESCAPE = ["\\", "^", "[", "]", "-"]
6
9
 
7
10
 
11
+ def prep_dictionary(words: Iterable[str]) -> Set[str]:
12
+ out: Set[str] = set()
13
+ cleaners = [Lowercase, ConsecutiveDuplicates]
14
+ for word in words:
15
+ for c in cleaners:
16
+ word = c.clean(word)
17
+ out.add(word)
18
+ return out
19
+
20
+
8
21
  def regex_escape(s: str) -> str:
9
22
  """Escape all characters which must be escaped when embedded in a character class."""
10
23
  for c in TO_ESCAPE:
@@ -43,12 +56,37 @@ def find_unicode_ranges(chars: str) -> List[str]:
43
56
  return ranges
44
57
 
45
58
 
59
+ def find_unicode_chars(ranges: List[str]) -> str:
60
+ result: List[str] = []
61
+ for item in ranges:
62
+ if "-" in item:
63
+ start, end = item.split("-")
64
+ start = int(start.lstrip("\\U"), 16)
65
+ end = int(end.lstrip("\\U"), 16)
66
+ result.extend(chr(code_point) for code_point in range(start, end + 1))
67
+ else:
68
+ result.append(chr(int(item.lstrip("\\U"), 16)))
69
+ return "".join(result)
70
+
71
+
46
72
  if __name__ == "__main__":
47
73
  """
48
74
  Helper script to fetch UNICODE_PUNCT in constants.py
49
75
  """
50
76
 
51
- PUNCT_CATEGORIES = {"Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps", "Sm", "Sk", "Sc", "So"}
77
+ PUNCT_CATEGORIES = {
78
+ "Pc",
79
+ "Pd",
80
+ "Pe",
81
+ "Pf",
82
+ "Pi",
83
+ "Po",
84
+ "Ps",
85
+ "Sm",
86
+ "Sk",
87
+ "Sc",
88
+ "So",
89
+ }
52
90
  # Connector, Dash, Close (end), Final, Initial, Other, Open (sOpen), Math, Modifier (kModifier), Currency, Other
53
91
 
54
92
  # NOTE: UnicodeData.txt lists character ranges if there would be many characters.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonatoki
3
- Version: 0.2.2
3
+ Version: 0.3.1
4
4
  Summary: ilo li moku e toki li pana e sona ni: ni li toki ala toki pona?
5
5
  Author-Email: "jan Kekan San (@gregdan3)" <gregory.danielson3@gmail.com>
6
6
  License: AGPL-3.0-or-later
@@ -12,15 +12,22 @@ Description-Content-Type: text/markdown
12
12
 
13
13
  # sona toki
14
14
 
15
+ <div align="center">
16
+
17
+ ![Test workflow for this library](https://github.com/gregdan3/sona-toki/workflows/Tests/badge.svg)
18
+ [![Version number for this library](https://img.shields.io/pypi/v/sonatoki?logo=python&logoColor=%23cccccc)](https://pypi.org/project/sonatoki)
19
+
20
+ </div>
21
+
15
22
  ## What is **sona toki**?
16
23
 
17
- This library, "Language Knowledge," helps you identify whether a message is in Toki Pona. No grammar checking, yet, which means this more checks whether a given message has enough Toki Pona words.
24
+ This library, "Language Knowledge," helps you identify whether a message is in Toki Pona. It does so by determining whether a large enough number of words in a statement are "in Toki Pona". No grammar checking, yet.
18
25
 
19
- I wrote it with a variety of scraps and lessons learned from a prior project, [ilo pi toki pona taso, "toki-pona-only tool"](https://github.com/gregdan3/ilo-pi-toki-pona-taso). That tool will be rewritten to use this library shortly.
26
+ I wrote this library with a variety of scraps and lessons learned from a prior project, [ilo pi toki pona taso, "toki-pona-only tool"](https://github.com/gregdan3/ilo-pi-toki-pona-taso). That tool now uses this library to great success!
20
27
 
21
- If you've ever worked on a similar project, you know the question "is this message in [language]" is not a consistent one- the environment, time, preferences of the speaker, and much more, can all alter whether a given message is "in" any specific language, and this question applies to Toki Pona too.
28
+ If you've ever worked on a similar project, you know the question "is this message in [language]" is not a consistent one- the environment, time, preferences of the speaker, and much more, can all alter whether a given message is "in" any specific language. This complexity applies to Toki Pona too.
22
29
 
23
- This project "solves" that complex problem by offering a highly configurable parser, so you can tune it to your preferences and goals.
30
+ So, this project "solves" that complex problem by offering an opinionated tokenizer and a configurable parser, allowing you to tune its output to your preferences and goals. [Even silly ones.](https://sona.pona.la/wiki/isipin_epiku).
24
31
 
25
32
  ## Quick Start
26
33
 
@@ -53,12 +60,12 @@ Or if you'd prefer to configure on your own:
53
60
  from copy import deepcopy
54
61
  from sonatoki.ilo import Ilo
55
62
  from sonatoki.Configs import BaseConfig
56
- from sonatoki.Filters import NimiPuAle, Phonotactic, ProperName
63
+ from sonatoki.Filters import NimiLinkuCore, Phonotactic, ProperName
57
64
  from sonatoki.Scorers import SoftPassFail
58
65
 
59
66
  def main():
60
67
  config = deepcopy(BaseConfig)
61
- config["scoring_filters"].extend([NimiPuAle, Phonotactic, ProperName])
68
+ config["scoring_filters"].extend([NimiLinkuCore, Phonotactic, ProperName])
62
69
  config["scorer"] = SoftPassFail
63
70
 
64
71
  ilo = Ilo(**config)
@@ -88,24 +95,28 @@ After our proposal has been examined and a result given by the committee, I will
88
95
 
89
96
  ### What's the deal with the tokenizers?
90
97
 
91
- The Toki Pona tokenizer `word_tokenize_tok` is very specific in always separating writing characters from punctuation, and leaving contiguous punctuation as contiguous- this is a level of precision that NLTK's English tokenizer does not want for several reasons, such as that English words can have "punctuation" characters in them.
92
-
93
- Toki Pona doesn't have any mid-word symbols when rendered in the Latin alphabet, so a more aggressive tokenizer is highly desirable.
98
+ The Toki Pona tokenizer `sonatoki.Tokenizers.WordTokenizer` has the goal of tokenizing statements such that every token either represents a word candidate ("toki", "mumumu") or a complete non-candidate ("..!", "123").
99
+ This design is highly undesirable for NLTK's English tokenizer because English words can have "punctuation" characters in them.
100
+ But Toki Pona doesn't have any mid-word symbols when rendered in the Latin alphabet or in [Private Use Area Unicode characters](https://www.kreativekorp.com/ucsur/), so a more aggressive tokenizer is highly desirable.
94
101
 
95
- The other tokenizers are provided as a comparison case more than anything. I do not recommend their use.
102
+ The goal of splitting into word candidates and non-candidates is important, because any [encoding of Toki Pona's logographic script](https://www.kreativekorp.com/ucsur/charts/sitelen.html) will require each character be split into its own token, where the default behavior would be to leave consecutive non-punctuation together.
96
103
 
97
104
  ### Aren't there a lot of false positives?
98
105
 
99
- Yes. It's up to you to use this tool responsibly on input you've done your best to clean, and better, use stronger filters before weaker ones. For now though, here's a list of relevant false positives:
106
+ Yes, depending on the filter you choose and how you apply it.
107
+ It's up to you to use this tool responsibly on input you've done your best to clean, such as by using stronger filters before weaker ones.
108
+ For now though, here's a list of relevant false positives:
100
109
 
101
- - `ProperName` will errantly match text in languages without a capital/lowercase distinction, artificially inflating the scores.
102
- - `Alphabetic` will match a _lot_ of undesirable text- it essentially allows 14 letters of the English alphabet.
110
+ - `ProperName` will errantly match text in languages without a capital/lowercase distinction, artificially increasing scores.
111
+ - `Alphabetic` will match a _lot_ of undesirable text- it essentially allows 14 letters of the English alphabet. For example, "I'm well" would match as _three_ words: "i", "m", "well".
112
+ - `NimiPu` and other sets containing `a`, `mute`, `open`, and others will unavoidably match those words in English text too.
103
113
 
104
114
  ### Don't some of the cleaners/filters conflict?
105
115
 
106
- Yes. Some do so
116
+ Yes, though not terribly much.
107
117
 
108
118
  - `ConsecutiveDuplicates` may errantly change a word's validity. For example, "manna" is phonotactically invalid in Toki Pona, but would become "mana" which is valid.
109
- - `ConsecutiveDuplicates` will not work correctly with syllabaries (alphabets, but representing a pair of consonant and vowel).
119
+ - `ConsecutiveDuplicates` will not work correctly with syllabaries, though this should not change the validity of the analyzed word unless you attempt to dictionary match these words.
120
+ - If you build your own `MemberFilter` with words that have capital letters or consecutive duplicates, they will never match unless you use `prep_dictionary`.
110
121
 
111
- You'll notice a _lot_ of these are troubles regarding the application of latin alphabet filters to non-latin text. Working on it!
122
+ You'll notice these are mostly casued by applying latin alphabet filters to non-latin text. Working on it!
@@ -0,0 +1,18 @@
1
+ sonatoki-0.3.1.dist-info/METADATA,sha256=nWomuM-AeE98VwnWen7qffNclw8emxAf-oFtXwba8wI,6341
2
+ sonatoki-0.3.1.dist-info/WHEEL,sha256=vnE8JVcI2Wz7GRKorsPArnBdnW2SWKWGow5gu5tHlRU,90
3
+ sonatoki-0.3.1.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
4
+ sonatoki/Cleaners.py,sha256=m0j1a1vs9Mdqp724r9Xfh1Y_tyP6GYCkihv8rH8m7lA,1871
5
+ sonatoki/Configs.py,sha256=NS1_esoDNna8LyH_9bPMkxbo2sMSilYhG1PwYLdq6L8,3402
6
+ sonatoki/Filters.py,sha256=-j5xSZ8URjqalQVGMBabMvJ5ofZWazfN7YPfXkM_4uQ,9429
7
+ sonatoki/Preprocessors.py,sha256=aMXXuFBDlJudvzvukvCa7BixuROXXEb62un7I-TGOGs,4441
8
+ sonatoki/Scorers.py,sha256=W-1uYiqjsDejJzoe592ixs7wHazjJXPhuo-41zuJ26U,3643
9
+ sonatoki/Tokenizers.py,sha256=So5_Tu6J98MD3yVcwB_X3lw2uMG0TN6XHcTbQjFCu5Q,4254
10
+ sonatoki/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ sonatoki/__main__.py,sha256=6xc-wIrrFo9wTyn4zRQNAmqwmJBtVvCMwV-CrM-hueA,82
12
+ sonatoki/constants.py,sha256=XTFmEcnLBXwdYXjTq_EuW9e_TWLtnNLz2vFCf8m-sz0,12844
13
+ sonatoki/ilo.py,sha256=yyLgNPI0Hmb4f1BzX6IRHr11FPChfL2xDR_9odlr8_8,3849
14
+ sonatoki/linku.json,sha256=B5KNdhyM5UEfMciROgh1ECHr3i-ASBeMvwrkzNJX47c,271013
15
+ sonatoki/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ sonatoki/sandbox.json,sha256=hx6LRsfvmmTtqXcXIyCsfSaGK3DZ-GCdbM8xhZQBHoA,77650
17
+ sonatoki/utils.py,sha256=OMaRyoNvKGKYQCBDjQyaCI58-wMpQ0wrrNjTJKsEZ9Y,3550
18
+ sonatoki-0.3.1.dist-info/RECORD,,
@@ -1,17 +0,0 @@
1
- sonatoki-0.2.2.dist-info/METADATA,sha256=XhDkXgLI0iFR0ceadVnXNUBlA6DcXkfVmZHxycL1tNA,5160
2
- sonatoki-0.2.2.dist-info/WHEEL,sha256=vnE8JVcI2Wz7GRKorsPArnBdnW2SWKWGow5gu5tHlRU,90
3
- sonatoki-0.2.2.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
4
- sonatoki/Cleaners.py,sha256=AMonXBUk3w1vdRiDrpB9XJAdjYaMPoqRtdX5oLI6r38,1744
5
- sonatoki/Configs.py,sha256=5mucu-Zsnt2p7GMiaM7GXUeL1F1fBq9sycjm4V7xsrI,1929
6
- sonatoki/Filters.py,sha256=qUhPWxAnNvQV9hCPJNu5RKGpx-_hWFvmL5Ab2-j_peo,5342
7
- sonatoki/Preprocessors.py,sha256=aMXXuFBDlJudvzvukvCa7BixuROXXEb62un7I-TGOGs,4441
8
- sonatoki/Scorers.py,sha256=W-1uYiqjsDejJzoe592ixs7wHazjJXPhuo-41zuJ26U,3643
9
- sonatoki/Tokenizers.py,sha256=zJ_5h9dlDIiJlLc6inuiOodWYt52nD83wS0QwSZixiM,3326
10
- sonatoki/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- sonatoki/__main__.py,sha256=6xc-wIrrFo9wTyn4zRQNAmqwmJBtVvCMwV-CrM-hueA,82
12
- sonatoki/constants.py,sha256=pOa1wb7B8w8RN772FcO5AYqqQAWlhbuLWM3N_sYlkdU,31232
13
- sonatoki/ilo.py,sha256=yyLgNPI0Hmb4f1BzX6IRHr11FPChfL2xDR_9odlr8_8,3849
14
- sonatoki/linku.json,sha256=B5KNdhyM5UEfMciROgh1ECHr3i-ASBeMvwrkzNJX47c,271013
15
- sonatoki/sandbox.json,sha256=hx6LRsfvmmTtqXcXIyCsfSaGK3DZ-GCdbM8xhZQBHoA,77650
16
- sonatoki/utils.py,sha256=jDwjRg-QpRIBalF65vIQWsX8wFLsITStihwfqimY-5E,2670
17
- sonatoki-0.2.2.dist-info/RECORD,,