sonatoki 0.5.2__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sonatoki/Configs.py CHANGED
@@ -112,16 +112,30 @@ __corpus_tokens_dict: Set[str] = cast(
112
112
  ].tokens, # pyright: ignore[reportAttributeAccessIssue]
113
113
  )
114
114
  __corpus_tokens_dict -= {
115
- "an",
116
- "i",
117
- "me",
118
- "ne",
119
- "se",
120
- "take",
121
- "ten",
122
- "to",
123
- "u",
124
- "we",
115
+ # Sandbox words are removed from the CorpusConfig if they appear more frequently in English than Toki Pona by a factor of at least 3.
116
+ # In this case, all of these appear more often in English by a factor of at least 10.
117
+ "aka", # also known as
118
+ "an", # article
119
+ "api", # API
120
+ "i", # 1st person
121
+ "kana", # japanese script
122
+ "me", # 1st person
123
+ "ne", # "no" in several languages
124
+ "nu", # "new", now in dutch
125
+ "se", # spanish particle, "see"
126
+ "take", # acquire, perhaps forcefully or without permission
127
+ "ten", # 10
128
+ "to", # to, too
129
+ "u", # no u
130
+ "we", # 1st person plural
131
+ "wi", # wii and discussions of syllables
132
+ "sole", # singular, of shoe
133
+ # unexplored candidates for removal
134
+ # "omen", # ominous
135
+ # "papa", # father
136
+ # "lo", # "lo" and "loo"
137
+ # "ewe", # sheep
138
+ # "pa", # father- eh?
125
139
  }
126
140
  """Mimics the previous implementation of ilo pi toki pona taso."""
127
141
  LazyConfig: IloConfig = {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonatoki
3
- Version: 0.5.2
3
+ Version: 0.5.3
4
4
  Summary: ilo li moku e toki li pana e sona ni: ni li toki ala toki pona?
5
5
  Author-Email: "jan Kekan San (@gregdan3)" <gregory.danielson3@gmail.com>
6
6
  License: AGPL-3.0-or-later
@@ -1,8 +1,8 @@
1
- sonatoki-0.5.2.dist-info/METADATA,sha256=LAV29H1iJPORzfPG4HgwobwoEBZ-9KjI2vGrlgDyO2U,6517
2
- sonatoki-0.5.2.dist-info/WHEEL,sha256=rSwsxJWe3vzyR5HCwjWXQruDgschpei4h_giTm0dJVE,90
3
- sonatoki-0.5.2.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
1
+ sonatoki-0.5.3.dist-info/METADATA,sha256=mC-i9FszUcyFA8peFVjRvj5QxCoVFjfHf60UWZNxquA,6517
2
+ sonatoki-0.5.3.dist-info/WHEEL,sha256=rSwsxJWe3vzyR5HCwjWXQruDgschpei4h_giTm0dJVE,90
3
+ sonatoki-0.5.3.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
4
4
  sonatoki/Cleaners.py,sha256=x2dT3MpDUfbrHA0EP2D3n1sTiKFFi5jw9ha-1dX973o,1958
5
- sonatoki/Configs.py,sha256=038pDARsmxDLbjnyd9fNO8wjdIJbGw_kOUqN4lYOfKU,4690
5
+ sonatoki/Configs.py,sha256=yprG3LEMyy6KKJWEEeJ7nEIC3-qtqA7p4CTHYv4a4vU,5469
6
6
  sonatoki/Filters.py,sha256=nVSmw5M4sEYA_8KI1fI53rMHkd9KO6yWbKfdxxExxN8,11700
7
7
  sonatoki/Preprocessors.py,sha256=nN6xL6mvVAnWZjSNW8CaeLm8x4kK3dCoB-1WYqi0ANU,5763
8
8
  sonatoki/Scorers.py,sha256=LRQLgXKTU2VqhkMHFPVxyVt83DXf85_zrpDGk4ThU24,3811
@@ -17,4 +17,4 @@ sonatoki/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  sonatoki/sandbox.json,sha256=44csrQDaVtV-n8OyewabX1J9MmUFCsPct5C8E5Xuc58,140197
18
18
  sonatoki/syllabic.txt,sha256=HnqY4TrZ3tPcHah3TsvG9F9gjMrnAGdJ8hHJNHyyUPc,1712
19
19
  sonatoki/utils.py,sha256=sT5xLMEj0aLpy8GP92HKblJU1Wt1m8NUlMgCFWB32xQ,2265
20
- sonatoki-0.5.2.dist-info/RECORD,,
20
+ sonatoki-0.5.3.dist-info/RECORD,,