sonatoki 0.4.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sonatoki/syllabic.txt ADDED
@@ -0,0 +1,297 @@
1
+ alamo
2
+ alan
3
+ alike
4
+ alone
5
+ ama
6
+ amen
7
+ ami
8
+ amin
9
+ amino
10
+ amo
11
+ amuse
12
+ ana
13
+ ane
14
+ ani
15
+ anise
16
+ anon
17
+ antelope
18
+ antena
19
+ anti
20
+ ape
21
+ apetite
22
+ apolo
23
+ asasin
24
+ asasinate
25
+ asimilate
26
+ asinine
27
+ asume
28
+ ate
29
+ awake
30
+ awaken
31
+ awe
32
+ awesome
33
+ awoke
34
+ eliminate
35
+ elite
36
+ elope
37
+ enema
38
+ eta
39
+ ewe
40
+ iluminate
41
+ imense
42
+ imitate
43
+ imune
44
+ inmate
45
+ insane
46
+ insulin
47
+ intake
48
+ intense
49
+ intimate
50
+ into
51
+ isolate
52
+ jake
53
+ jane
54
+ japan
55
+ jese
56
+ jetison
57
+ jin
58
+ joke
59
+ juke
60
+ kale
61
+ kane
62
+ kapa
63
+ keno
64
+ kilo
65
+ kimono
66
+ kite
67
+ kiten
68
+ kiwi
69
+ lake
70
+ lama
71
+ lame
72
+ lane
73
+ late
74
+ latino
75
+ lemon
76
+ leno
77
+ lese
78
+ lesen
79
+ leson
80
+ like
81
+ likewise
82
+ lima
83
+ lime
84
+ limo
85
+ lin
86
+ line
87
+ linen
88
+ lite
89
+ lone
90
+ lonesome
91
+ lose
92
+ losen
93
+ lote
94
+ loto
95
+ lowe
96
+ lulu
97
+ luna
98
+ make
99
+ male
100
+ man
101
+ mana
102
+ manate
103
+ manila
104
+ manipulate
105
+ mano
106
+ masa
107
+ mason
108
+ mate
109
+ matine
110
+ melon
111
+ memento
112
+ memo
113
+ men
114
+ mensa
115
+ menu
116
+ mesa
117
+ meta
118
+ mike
119
+ mile
120
+ milo
121
+ mime
122
+ mina
123
+ mine
124
+ mini
125
+ minute
126
+ misile
127
+ misuse
128
+ mite
129
+ miten
130
+ mojo
131
+ mola
132
+ mole
133
+ moma
134
+ momento
135
+ mon
136
+ mono
137
+ monson
138
+ monte
139
+ mope
140
+ mose
141
+ mote
142
+ moto
143
+ mule
144
+ mumu
145
+ muse
146
+ mutilate
147
+ muton
148
+ name
149
+ namesake
150
+ nan
151
+ nana
152
+ nine
153
+ nineten
154
+ ninja
155
+ nite
156
+ nome
157
+ nominate
158
+ nomine
159
+ non
160
+ none
161
+ nonsense
162
+ nope
163
+ nose
164
+ note
165
+ nuke
166
+ nun
167
+ ole
168
+ omelete
169
+ omen
170
+ one
171
+ onto
172
+ opose
173
+ oposite
174
+ ose
175
+ oto
176
+ otoman
177
+ pajama
178
+ pale
179
+ palete
180
+ palomino
181
+ panama
182
+ pane
183
+ papa
184
+ pate
185
+ paten
186
+ pele
187
+ pen
188
+ pene
189
+ peninsula
190
+ petite
191
+ pike
192
+ pile
193
+ pin
194
+ pina
195
+ pinata
196
+ pine
197
+ pinto
198
+ pipe
199
+ pipeline
200
+ poke
201
+ pole
202
+ polen
203
+ polite
204
+ polo
205
+ polute
206
+ ponton
207
+ popa
208
+ pope
209
+ pose
210
+ potato
211
+ puke
212
+ pun
213
+ sake
214
+ saki
215
+ salami
216
+ sale
217
+ salina
218
+ saline
219
+ salon
220
+ salute
221
+ same
222
+ sane
223
+ santo
224
+ satelite
225
+ satin
226
+ semen
227
+ semi
228
+ sen
229
+ senate
230
+ senile
231
+ sense
232
+ sepuku
233
+ sesame
234
+ simulate
235
+ sine
236
+ site
237
+ sole
238
+ solo
239
+ some
240
+ sometime
241
+ son
242
+ sonata
243
+ sulen
244
+ sumo
245
+ sumon
246
+ sun
247
+ sunken
248
+ suntan
249
+ supose
250
+ take
251
+ taken
252
+ takin
253
+ tale
254
+ tame
255
+ tape
256
+ tate
257
+ tato
258
+ ten
259
+ tense
260
+ tiki
261
+ tile
262
+ time
263
+ timeline
264
+ tin
265
+ titan
266
+ toke
267
+ token
268
+ tomato
269
+ tome
270
+ ton
271
+ tone
272
+ tote
273
+ tule
274
+ tuna
275
+ tune
276
+ tuti
277
+ tutu
278
+ unite
279
+ unlike
280
+ unsen
281
+ unto
282
+ unwise
283
+ upon
284
+ use
285
+ wake
286
+ waken
287
+ wala
288
+ wanton
289
+ win
290
+ wine
291
+ wipe
292
+ wise
293
+ woke
294
+ woken
295
+ woman
296
+ women
297
+ won
sonatoki/utils.py CHANGED
@@ -1,5 +1,4 @@
1
1
  # STL
2
- import re
3
2
  import itertools
4
3
  from typing import Set, List, TypeVar, Iterable
5
4
 
@@ -87,58 +86,3 @@ def overlapping_ntuples(iterable: Iterable[T], n: int) -> Iterable[T]:
87
86
 
88
87
  # ends when any iter is empty; all groups will be same size
89
88
  return zip(*teed)
90
-
91
-
92
- if __name__ == "__main__":
93
- """Helper script to fetch UNICODE_PUNCT in constants.py."""
94
-
95
- PUNCT_CATEGORIES = {
96
- "Pc",
97
- "Pd",
98
- "Pe",
99
- "Pf",
100
- "Pi",
101
- "Po",
102
- "Ps",
103
- "Sm",
104
- "Sk",
105
- "Sc",
106
- "So",
107
- }
108
- # Connector, Dash, Close (end), Final, Initial, Other, Open (sOpen), Math, Modifier (kModifier), Currency, Other
109
-
110
- # NOTE: UnicodeData.txt lists character ranges if there would be many characters.
111
- # (e.g. CJK Ideograph, First at 4E00 and CJK Ideograph, Last at 9FFF).
112
- # This does not apply to any currently defined punctuation category.
113
-
114
- EXCEPTION_RANGES = re.compile(r"""[Ⓐ-ⓩ🄰-🅉🅐-🅩🅰-🆉]+""")
115
- # These groups are in Symbol other (So) but are not part of `\p{Punctuation}`
116
- # NOTE: There are many characters which look like writing characters but are not. Examples:
117
- # - kangxi radicals from ⺀ to ⿕ which are for demonstration
118
- # - circled katakana from to ㋾ which... shouldn't be in \p{Punctuation} but oh well
119
-
120
- def is_punctuation(data: List[str]):
121
- return data[2] in PUNCT_CATEGORIES
122
-
123
- def get_character(data: List[str]):
124
- return chr(int(data[0], 16))
125
-
126
- def is_exception(c: str):
127
- return not not re.fullmatch(EXCEPTION_RANGES, c)
128
-
129
- # http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
130
- unicode_punctuation = ""
131
- with open("UnicodeData.txt", "r") as f:
132
- for line in f:
133
- data = line.split(";")
134
- if not is_punctuation(data):
135
- continue
136
-
137
- char = get_character(data)
138
- if is_exception(char):
139
- continue
140
-
141
- unicode_punctuation += char
142
-
143
- with open("UnicodePunctuation.txt", "w") as f:
144
- _ = f.write(unicode_punctuation)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonatoki
3
- Version: 0.4.0
3
+ Version: 0.5.1
4
4
  Summary: ilo li moku e toki li pana e sona ni: ni li toki ala toki pona?
5
5
  Author-Email: "jan Kekan San (@gregdan3)" <gregory.danielson3@gmail.com>
6
6
  License: AGPL-3.0-or-later
@@ -8,6 +8,7 @@ Requires-Python: >=3.8
8
8
  Requires-Dist: unidecode>=1.3.6
9
9
  Requires-Dist: regex>=2023.12.25
10
10
  Requires-Dist: typing-extensions>=4.11.0
11
+ Requires-Dist: emoji>=2.12.1
11
12
  Description-Content-Type: text/markdown
12
13
 
13
14
  # sona toki
@@ -0,0 +1,20 @@
1
+ sonatoki-0.5.1.dist-info/METADATA,sha256=gj5B_q10R5l-w0jEuzFY2035qzp9tpmBQ-sZ0q73zXE,6370
2
+ sonatoki-0.5.1.dist-info/WHEEL,sha256=SOP-4bEE0jbVaCHQGVvF08uWxk5rcSsfEybvoQVHlD8,90
3
+ sonatoki-0.5.1.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
4
+ sonatoki/Cleaners.py,sha256=x2dT3MpDUfbrHA0EP2D3n1sTiKFFi5jw9ha-1dX973o,1958
5
+ sonatoki/Configs.py,sha256=HHaSAA7hus7aY6Xy-3fNlbzMwk3wJO0HrjTssg8P78M,4291
6
+ sonatoki/Filters.py,sha256=nVSmw5M4sEYA_8KI1fI53rMHkd9KO6yWbKfdxxExxN8,11700
7
+ sonatoki/Preprocessors.py,sha256=zuu-6SLqFgk88vfSnYlyZjZrzoZQ56U_1SFXoxThQDQ,5628
8
+ sonatoki/Scorers.py,sha256=LRQLgXKTU2VqhkMHFPVxyVt83DXf85_zrpDGk4ThU24,3811
9
+ sonatoki/Tokenizers.py,sha256=qFaA1-v-wjKMihtEJMeZpi3m4cSkJQgWhGhL-w0VgPE,4236
10
+ sonatoki/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ sonatoki/__main__.py,sha256=6n4kUF80APl6a0jV46h_ncHNuQbrLpZ_nAmiNAakiag,5673
12
+ sonatoki/alphabetic.txt,sha256=duyqAKilD2vLIr75RShCIAnktNJcGeEoQIk18V6czmg,11702
13
+ sonatoki/constants.py,sha256=a3OjhtH2Jp6RDot1NE-PrQfm2VzfM850b-qipFLnjS4,18868
14
+ sonatoki/ilo.py,sha256=PWZa202Q4h7IjnLxmfgT93iAPJL7dqJbA97L9kQDPiA,5658
15
+ sonatoki/linku.json,sha256=FLsaESG01rQ88OU8HvwOUl_P9qtGykJ1X-1xoMVDkKA,295077
16
+ sonatoki/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
+ sonatoki/sandbox.json,sha256=3BpCEjw-kB4z7DJAJ2UrE1YuFIe3knat8qi1iYuAIq4,83555
18
+ sonatoki/syllabic.txt,sha256=HnqY4TrZ3tPcHah3TsvG9F9gjMrnAGdJ8hHJNHyyUPc,1712
19
+ sonatoki/utils.py,sha256=sT5xLMEj0aLpy8GP92HKblJU1Wt1m8NUlMgCFWB32xQ,2265
20
+ sonatoki-0.5.1.dist-info/RECORD,,
@@ -1,18 +0,0 @@
1
- sonatoki-0.4.0.dist-info/METADATA,sha256=Z89tIHyGG9RRAgcr_3E4XW2IMX9NyT9mawcCeMQfXPU,6341
2
- sonatoki-0.4.0.dist-info/WHEEL,sha256=SOP-4bEE0jbVaCHQGVvF08uWxk5rcSsfEybvoQVHlD8,90
3
- sonatoki-0.4.0.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
4
- sonatoki/Cleaners.py,sha256=x2dT3MpDUfbrHA0EP2D3n1sTiKFFi5jw9ha-1dX973o,1958
5
- sonatoki/Configs.py,sha256=tOeJSlYXMBHbRPBxERGWGT5AjvCxNb3ZGu8GA4BYve4,4034
6
- sonatoki/Filters.py,sha256=mpJBl-YPMF-Yl6mKFXf0D6DwkPR6H424RlvrkSeh4Dc,10714
7
- sonatoki/Preprocessors.py,sha256=nvAzxpWP9WwT6gOCKcuiz5F8xYDdKIt9bOVUvy9o-G0,4459
8
- sonatoki/Scorers.py,sha256=LRQLgXKTU2VqhkMHFPVxyVt83DXf85_zrpDGk4ThU24,3811
9
- sonatoki/Tokenizers.py,sha256=So5_Tu6J98MD3yVcwB_X3lw2uMG0TN6XHcTbQjFCu5Q,4254
10
- sonatoki/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- sonatoki/__main__.py,sha256=6xc-wIrrFo9wTyn4zRQNAmqwmJBtVvCMwV-CrM-hueA,82
12
- sonatoki/constants.py,sha256=wH3iR32-Ic7vSkrMjAZIvmIysTtkJ-KBVU5zv3Oamqs,12656
13
- sonatoki/ilo.py,sha256=7KwTZgczzU2gbhC69yZbxtpTHy_fGtg_MnG_bDpiSxM,5639
14
- sonatoki/linku.json,sha256=fm4-dks5s9x1bs7q82GNngAedVCWilMPCQ_o-j35QL0,270950
15
- sonatoki/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- sonatoki/sandbox.json,sha256=zPtZgJ_CpJa-2Den0gTNlk52f-YEwFVcjMarQXeeu5U,77563
17
- sonatoki/utils.py,sha256=L984aXxvzfJaZ6GSWRKs7LweOGZYTLK11CdAhpLQr0g,4067
18
- sonatoki-0.4.0.dist-info/RECORD,,