sonatoki 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sonatoki/Filters.py CHANGED
@@ -16,11 +16,11 @@ from sonatoki.constants import (
16
16
  ALLOWABLES,
17
17
  CONSONANTS,
18
18
  NIMI_LINKU,
19
- NIMI_PU_ALE,
20
19
  POSIX_PUNCT,
21
20
  UNICODE_PUNCT,
22
- NIMI_LINKU_ALE,
21
+ NIMI_LINKU_LILI,
23
22
  ALL_PUNCT_RANGES,
23
+ NIMI_PU_SYNONYMS,
24
24
  NIMI_LINKU_SANDBOX,
25
25
  )
26
26
 
@@ -103,7 +103,7 @@ class NimiPu(MemberFilter):
103
103
 
104
104
 
105
105
  class NimiPuAle(MemberFilter):
106
- tokens = set(NIMI_PU_ALE)
106
+ tokens = set(NIMI_PU + NIMI_PU_SYNONYMS)
107
107
 
108
108
 
109
109
  class NimiLinku(MemberFilter):
@@ -111,11 +111,11 @@ class NimiLinku(MemberFilter):
111
111
 
112
112
 
113
113
  class NimiLinkuAle(MemberFilter):
114
- tokens = set(NIMI_LINKU_ALE)
114
+ tokens = set(NIMI_LINKU + NIMI_LINKU_LILI)
115
115
 
116
116
 
117
117
  class NimiLinkuSandbox(MemberFilter):
118
- tokens = set(NIMI_LINKU_SANDBOX)
118
+ tokens = set(NIMI_LINKU + NIMI_LINKU_LILI + NIMI_LINKU_SANDBOX)
119
119
 
120
120
 
121
121
  class Phonotactic(RegexFilter):
sonatoki/constants.py CHANGED
@@ -38,15 +38,21 @@ ALLOWABLES = {
38
38
  with open(LINKU) as f:
39
39
  linku: Dict[str, Dict[str, str]] = json.loads(f.read())
40
40
  NIMI_PU: List[str] = [d["word"] for d in linku.values() if d["book"] == "pu"]
41
- NIMI_PU_ALE: List[str] = NIMI_PU + ["namako", "kin", "oko"]
41
+ NIMI_PU_SYNONYMS: List[str] = ["namako", "kin", "oko"]
42
42
  NIMI_LINKU: List[str] = [
43
43
  d["word"] for d in linku.values() if d["usage_category"] in ["core", "common"]
44
44
  ]
45
- NIMI_LINKU_ALE: List[str] = [d["word"] for d in linku.values()]
45
+ NIMI_LINKU_LILI: List[str] = [
46
+ d["word"]
47
+ for d in linku.values()
48
+ if d["usage_category"] not in ["core", "common"]
49
+ ]
46
50
 
47
51
  with open(SANDBOX) as f:
48
52
  sandbox: Dict[str, Dict[str, str]] = json.loads(f.read())
49
- NIMI_LINKU_SANDBOX: List[str] = [d["word"] for d in sandbox.values()]
53
+ NIMI_LINKU_SANDBOX: List[str] = NIMI_LINKU_LILI + [
54
+ d["word"] for d in sandbox.values()
55
+ ]
50
56
 
51
57
  del linku
52
58
  del sandbox
@@ -55,10 +61,10 @@ __all__ = [
55
61
  "ALPHABET",
56
62
  "CONSONANTS",
57
63
  "NIMI_LINKU",
58
- "NIMI_LINKU_ALE",
64
+ "NIMI_LINKU_LILI",
59
65
  "NIMI_LINKU_SANDBOX",
60
66
  "NIMI_PU",
61
- "NIMI_PU_ALE",
67
+ "NIMI_PU_SYNONYMS",
62
68
  "VOWELS",
63
69
  "UNICODE_PUNCT",
64
70
  "ALLOWABLES",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonatoki
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: ilo li moku e toki li pana e sona ni: ni li toki ala toki pona?
5
5
  Author-Email: "jan Kekan San (@gregdan3)" <gregory.danielson3@gmail.com>
6
6
  License: AGPL-3.0-or-later
@@ -1,17 +1,17 @@
1
- sonatoki-0.2.1.dist-info/METADATA,sha256=M1bUkEl_vzE48jEoyvwOF3_DdFJsxIP8CDPHDf_yErM,5160
2
- sonatoki-0.2.1.dist-info/WHEEL,sha256=vnE8JVcI2Wz7GRKorsPArnBdnW2SWKWGow5gu5tHlRU,90
3
- sonatoki-0.2.1.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
1
+ sonatoki-0.2.2.dist-info/METADATA,sha256=XhDkXgLI0iFR0ceadVnXNUBlA6DcXkfVmZHxycL1tNA,5160
2
+ sonatoki-0.2.2.dist-info/WHEEL,sha256=vnE8JVcI2Wz7GRKorsPArnBdnW2SWKWGow5gu5tHlRU,90
3
+ sonatoki-0.2.2.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
4
4
  sonatoki/Cleaners.py,sha256=AMonXBUk3w1vdRiDrpB9XJAdjYaMPoqRtdX5oLI6r38,1744
5
5
  sonatoki/Configs.py,sha256=5mucu-Zsnt2p7GMiaM7GXUeL1F1fBq9sycjm4V7xsrI,1929
6
- sonatoki/Filters.py,sha256=hZfVVv2e4ig_5hM2hdCsdNi21CFFK_AT53oO4N4H6FU,5276
6
+ sonatoki/Filters.py,sha256=qUhPWxAnNvQV9hCPJNu5RKGpx-_hWFvmL5Ab2-j_peo,5342
7
7
  sonatoki/Preprocessors.py,sha256=aMXXuFBDlJudvzvukvCa7BixuROXXEb62un7I-TGOGs,4441
8
8
  sonatoki/Scorers.py,sha256=W-1uYiqjsDejJzoe592ixs7wHazjJXPhuo-41zuJ26U,3643
9
9
  sonatoki/Tokenizers.py,sha256=zJ_5h9dlDIiJlLc6inuiOodWYt52nD83wS0QwSZixiM,3326
10
10
  sonatoki/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  sonatoki/__main__.py,sha256=6xc-wIrrFo9wTyn4zRQNAmqwmJBtVvCMwV-CrM-hueA,82
12
- sonatoki/constants.py,sha256=uuxrKQsxFY92waq7YesckbNw-Rtad7_dr0TcDr30yHk,31119
12
+ sonatoki/constants.py,sha256=pOa1wb7B8w8RN772FcO5AYqqQAWlhbuLWM3N_sYlkdU,31232
13
13
  sonatoki/ilo.py,sha256=yyLgNPI0Hmb4f1BzX6IRHr11FPChfL2xDR_9odlr8_8,3849
14
14
  sonatoki/linku.json,sha256=B5KNdhyM5UEfMciROgh1ECHr3i-ASBeMvwrkzNJX47c,271013
15
15
  sonatoki/sandbox.json,sha256=hx6LRsfvmmTtqXcXIyCsfSaGK3DZ-GCdbM8xhZQBHoA,77650
16
16
  sonatoki/utils.py,sha256=jDwjRg-QpRIBalF65vIQWsX8wFLsITStihwfqimY-5E,2670
17
- sonatoki-0.2.1.dist-info/RECORD,,
17
+ sonatoki-0.2.2.dist-info/RECORD,,