sonatoki 0.11.2__tar.gz → 0.11.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {sonatoki-0.11.2 → sonatoki-0.11.3}/PKG-INFO +1 -1
  2. {sonatoki-0.11.2 → sonatoki-0.11.3}/pyproject.toml +1 -1
  3. {sonatoki-0.11.2 → sonatoki-0.11.3}/src/sonatoki/constants.py +1 -1
  4. {sonatoki-0.11.2 → sonatoki-0.11.3}/tests/tokenize_cases/tokenize_sentences_tok.yml +5 -0
  5. {sonatoki-0.11.2 → sonatoki-0.11.3}/tests/tokenize_cases/tokenize_words_tok.yml +10 -7
  6. {sonatoki-0.11.2 → sonatoki-0.11.3}/LICENSE +0 -0
  7. {sonatoki-0.11.2 → sonatoki-0.11.3}/README.md +0 -0
  8. {sonatoki-0.11.2 → sonatoki-0.11.3}/src/sonatoki/Cleaners.py +0 -0
  9. {sonatoki-0.11.2 → sonatoki-0.11.3}/src/sonatoki/Configs.py +0 -0
  10. {sonatoki-0.11.2 → sonatoki-0.11.3}/src/sonatoki/Filters.py +0 -0
  11. {sonatoki-0.11.2 → sonatoki-0.11.3}/src/sonatoki/Preprocessors.py +0 -0
  12. {sonatoki-0.11.2 → sonatoki-0.11.3}/src/sonatoki/Scorers.py +0 -0
  13. {sonatoki-0.11.2 → sonatoki-0.11.3}/src/sonatoki/Tokenizers.py +0 -0
  14. {sonatoki-0.11.2 → sonatoki-0.11.3}/src/sonatoki/__init__.py +0 -0
  15. {sonatoki-0.11.2 → sonatoki-0.11.3}/src/sonatoki/__main__.py +0 -0
  16. {sonatoki-0.11.2 → sonatoki-0.11.3}/src/sonatoki/alphabetic.txt +0 -0
  17. {sonatoki-0.11.2 → sonatoki-0.11.3}/src/sonatoki/ilo.py +0 -0
  18. {sonatoki-0.11.2 → sonatoki-0.11.3}/src/sonatoki/linku.json +0 -0
  19. {sonatoki-0.11.2 → sonatoki-0.11.3}/src/sonatoki/py.typed +0 -0
  20. {sonatoki-0.11.2 → sonatoki-0.11.3}/src/sonatoki/sandbox.json +0 -0
  21. {sonatoki-0.11.2 → sonatoki-0.11.3}/src/sonatoki/syllabic.txt +0 -0
  22. {sonatoki-0.11.2 → sonatoki-0.11.3}/src/sonatoki/types.py +0 -0
  23. {sonatoki-0.11.2 → sonatoki-0.11.3}/src/sonatoki/utils.py +0 -0
  24. {sonatoki-0.11.2 → sonatoki-0.11.3}/tests/__init__.py +0 -0
  25. {sonatoki-0.11.2 → sonatoki-0.11.3}/tests/test_cleaners.py +0 -0
  26. {sonatoki-0.11.2 → sonatoki-0.11.3}/tests/test_filters.py +0 -0
  27. {sonatoki-0.11.2 → sonatoki-0.11.3}/tests/test_ilo.py +0 -0
  28. {sonatoki-0.11.2 → sonatoki-0.11.3}/tests/test_preprocessors.py +0 -0
  29. {sonatoki-0.11.2 → sonatoki-0.11.3}/tests/test_properties.py +0 -0
  30. {sonatoki-0.11.2 → sonatoki-0.11.3}/tests/test_scorers.py +0 -0
  31. {sonatoki-0.11.2 → sonatoki-0.11.3}/tests/test_tokenize.py +0 -0
  32. {sonatoki-0.11.2 → sonatoki-0.11.3}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonatoki
3
- Version: 0.11.2
3
+ Version: 0.11.3
4
4
  Summary: ilo li moku e toki li pana e sona ni: ni li toki ala toki pona?
5
5
  Author-Email: "jan Kekan San (@gregdan3)" <gregory.danielson3@gmail.com>
6
6
  License: AGPL-3.0-or-later
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sonatoki"
3
- version = "0.11.2"
3
+ version = "0.11.3"
4
4
  description = "ilo li moku e toki li pana e sona ni: ni li toki ala toki pona?"
5
5
  authors = [
6
6
  { name = "jan Kekan San (@gregdan3)", email = "gregory.danielson3@gmail.com" },
@@ -538,7 +538,7 @@ QUOTATIVE_PUNCT = """"«»‹›“”‟„⹂「」『』"""
538
538
  UCSUR_SENTENCE_PUNCT = """󱦜󱦝"""
539
539
  ALL_SENTENCE_PUNCT = BASIC_SENTENCE_PUNCT + UCSUR_SENTENCE_PUNCT
540
540
 
541
- INTRA_WORD_PUNCT = """-'’."""
541
+ INTRA_WORD_PUNCT = """-'’._"""
542
542
 
543
543
 
544
544
  LINKU = Path(__file__).resolve().parent / Path("linku.json")
@@ -104,6 +104,11 @@
104
104
  - "monsi"
105
105
  - "-"
106
106
  - "ma"
107
+ - name: "intraword punctuation 4"
108
+ input: "look at this variable: leaf_node_right"
109
+ output:
110
+ - "look at this variable:"
111
+ - "leaf_node_right"
107
112
  - name: "multiline with fake intraword"
108
113
  input: >
109
114
  toki!
@@ -187,13 +187,7 @@
187
187
  - "e"
188
188
  - "sitelen"
189
189
  - "[_"
190
- - "ike"
191
- - "_"
192
- - "nanpa"
193
- - "_"
194
- - "lete"
195
- - "_"
196
- - "ike"
190
+ - "ike_nanpa_lete_ike"
197
191
  - "]."
198
192
  - "ni"
199
193
  - "li"
@@ -345,6 +339,15 @@
345
339
  input: "whom's't'd've'n't"
346
340
  output:
347
341
  - "whom's't'd've'n't"
342
+ - name: "underscore"
343
+ input: "look at this variable: leaf_node_right"
344
+ output:
345
+ - "look"
346
+ - "at"
347
+ - "this"
348
+ - "variable"
349
+ - ":"
350
+ - "leaf_node_right"
348
351
  - name: "just periods"
349
352
  input: "..."
350
353
  output:
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes