sonatoki 0.2.1__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {sonatoki-0.2.1 → sonatoki-0.2.2}/PKG-INFO +1 -1
  2. {sonatoki-0.2.1 → sonatoki-0.2.2}/pyproject.toml +1 -1
  3. {sonatoki-0.2.1 → sonatoki-0.2.2}/src/sonatoki/Filters.py +5 -5
  4. {sonatoki-0.2.1 → sonatoki-0.2.2}/src/sonatoki/constants.py +11 -5
  5. {sonatoki-0.2.1 → sonatoki-0.2.2}/tests/test_utils.py +2 -2
  6. {sonatoki-0.2.1 → sonatoki-0.2.2}/LICENSE +0 -0
  7. {sonatoki-0.2.1 → sonatoki-0.2.2}/README.md +0 -0
  8. {sonatoki-0.2.1 → sonatoki-0.2.2}/src/sonatoki/Cleaners.py +0 -0
  9. {sonatoki-0.2.1 → sonatoki-0.2.2}/src/sonatoki/Configs.py +0 -0
  10. {sonatoki-0.2.1 → sonatoki-0.2.2}/src/sonatoki/Preprocessors.py +0 -0
  11. {sonatoki-0.2.1 → sonatoki-0.2.2}/src/sonatoki/Scorers.py +0 -0
  12. {sonatoki-0.2.1 → sonatoki-0.2.2}/src/sonatoki/Tokenizers.py +0 -0
  13. {sonatoki-0.2.1 → sonatoki-0.2.2}/src/sonatoki/__init__.py +0 -0
  14. {sonatoki-0.2.1 → sonatoki-0.2.2}/src/sonatoki/__main__.py +0 -0
  15. {sonatoki-0.2.1 → sonatoki-0.2.2}/src/sonatoki/ilo.py +0 -0
  16. {sonatoki-0.2.1 → sonatoki-0.2.2}/src/sonatoki/linku.json +0 -0
  17. {sonatoki-0.2.1 → sonatoki-0.2.2}/src/sonatoki/sandbox.json +0 -0
  18. {sonatoki-0.2.1 → sonatoki-0.2.2}/src/sonatoki/utils.py +0 -0
  19. {sonatoki-0.2.1 → sonatoki-0.2.2}/tests/__init__.py +0 -0
  20. {sonatoki-0.2.1 → sonatoki-0.2.2}/tests/test_cleaners.py +0 -0
  21. {sonatoki-0.2.1 → sonatoki-0.2.2}/tests/test_filters.py +0 -0
  22. {sonatoki-0.2.1 → sonatoki-0.2.2}/tests/test_ilo.py +0 -0
  23. {sonatoki-0.2.1 → sonatoki-0.2.2}/tests/test_preprocessors.py +0 -0
  24. {sonatoki-0.2.1 → sonatoki-0.2.2}/tests/test_scorers.py +0 -0
  25. {sonatoki-0.2.1 → sonatoki-0.2.2}/tests/test_tokenize.py +0 -0
  26. {sonatoki-0.2.1 → sonatoki-0.2.2}/tests/tokenize_cases/tokenize_sentences_tok.yml +0 -0
  27. {sonatoki-0.2.1 → sonatoki-0.2.2}/tests/tokenize_cases/tokenize_words_tok.yml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonatoki
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: ilo li moku e toki li pana e sona ni: ni li toki ala toki pona?
5
5
  Author-Email: "jan Kekan San (@gregdan3)" <gregory.danielson3@gmail.com>
6
6
  License: AGPL-3.0-or-later
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sonatoki"
3
- version = "0.2.1"
3
+ version = "0.2.2"
4
4
  description = "ilo li moku e toki li pana e sona ni: ni li toki ala toki pona?"
5
5
  authors = [
6
6
  { name = "jan Kekan San (@gregdan3)", email = "gregory.danielson3@gmail.com" },
@@ -16,11 +16,11 @@ from sonatoki.constants import (
16
16
  ALLOWABLES,
17
17
  CONSONANTS,
18
18
  NIMI_LINKU,
19
- NIMI_PU_ALE,
20
19
  POSIX_PUNCT,
21
20
  UNICODE_PUNCT,
22
- NIMI_LINKU_ALE,
21
+ NIMI_LINKU_LILI,
23
22
  ALL_PUNCT_RANGES,
23
+ NIMI_PU_SYNONYMS,
24
24
  NIMI_LINKU_SANDBOX,
25
25
  )
26
26
 
@@ -103,7 +103,7 @@ class NimiPu(MemberFilter):
103
103
 
104
104
 
105
105
  class NimiPuAle(MemberFilter):
106
- tokens = set(NIMI_PU_ALE)
106
+ tokens = set(NIMI_PU + NIMI_PU_SYNONYMS)
107
107
 
108
108
 
109
109
  class NimiLinku(MemberFilter):
@@ -111,11 +111,11 @@ class NimiLinku(MemberFilter):
111
111
 
112
112
 
113
113
  class NimiLinkuAle(MemberFilter):
114
- tokens = set(NIMI_LINKU_ALE)
114
+ tokens = set(NIMI_LINKU + NIMI_LINKU_LILI)
115
115
 
116
116
 
117
117
  class NimiLinkuSandbox(MemberFilter):
118
- tokens = set(NIMI_LINKU_SANDBOX)
118
+ tokens = set(NIMI_LINKU + NIMI_LINKU_LILI + NIMI_LINKU_SANDBOX)
119
119
 
120
120
 
121
121
  class Phonotactic(RegexFilter):
@@ -38,15 +38,21 @@ ALLOWABLES = {
38
38
  with open(LINKU) as f:
39
39
  linku: Dict[str, Dict[str, str]] = json.loads(f.read())
40
40
  NIMI_PU: List[str] = [d["word"] for d in linku.values() if d["book"] == "pu"]
41
- NIMI_PU_ALE: List[str] = NIMI_PU + ["namako", "kin", "oko"]
41
+ NIMI_PU_SYNONYMS: List[str] = ["namako", "kin", "oko"]
42
42
  NIMI_LINKU: List[str] = [
43
43
  d["word"] for d in linku.values() if d["usage_category"] in ["core", "common"]
44
44
  ]
45
- NIMI_LINKU_ALE: List[str] = [d["word"] for d in linku.values()]
45
+ NIMI_LINKU_LILI: List[str] = [
46
+ d["word"]
47
+ for d in linku.values()
48
+ if d["usage_category"] not in ["core", "common"]
49
+ ]
46
50
 
47
51
  with open(SANDBOX) as f:
48
52
  sandbox: Dict[str, Dict[str, str]] = json.loads(f.read())
49
- NIMI_LINKU_SANDBOX: List[str] = [d["word"] for d in sandbox.values()]
53
+ NIMI_LINKU_SANDBOX: List[str] = NIMI_LINKU_LILI + [
54
+ d["word"] for d in sandbox.values()
55
+ ]
50
56
 
51
57
  del linku
52
58
  del sandbox
@@ -55,10 +61,10 @@ __all__ = [
55
61
  "ALPHABET",
56
62
  "CONSONANTS",
57
63
  "NIMI_LINKU",
58
- "NIMI_LINKU_ALE",
64
+ "NIMI_LINKU_LILI",
59
65
  "NIMI_LINKU_SANDBOX",
60
66
  "NIMI_PU",
61
- "NIMI_PU_ALE",
67
+ "NIMI_PU_SYNONYMS",
62
68
  "VOWELS",
63
69
  "UNICODE_PUNCT",
64
70
  "ALLOWABLES",
@@ -6,12 +6,12 @@ import hypothesis.strategies as st
6
6
 
7
7
  # LOCAL
8
8
  from sonatoki.Filters import Syllabic, Phonotactic, AlphabeticRe
9
- from sonatoki.constants import NIMI_LINKU_ALE
9
+ from sonatoki.constants import NIMI_LINKU, NIMI_LINKU_LILI
10
10
 
11
11
  PROPER_NAME_RE = r"[A-Z][a-z]*"
12
12
 
13
13
  token_strategy = (
14
- st.sampled_from(NIMI_LINKU_ALE)
14
+ st.sampled_from(NIMI_LINKU + NIMI_LINKU_LILI)
15
15
  | st.from_regex(Phonotactic.pattern.pattern, fullmatch=True)
16
16
  | st.from_regex(Syllabic.pattern.pattern, fullmatch=True)
17
17
  | st.from_regex(PROPER_NAME_RE, fullmatch=True)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes