sonatoki 0.8.0__py3-none-any.whl → 0.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sonatoki/Preprocessors.py CHANGED
@@ -149,7 +149,10 @@ class Codeblock(RegexPreprocessor):
149
149
  Subset of what would be removed by Backticks, but may be preferable.
150
150
  """
151
151
 
152
- pattern = re.compile(r"```\n(?:(?!```).*?)?```", flags=re.DOTALL)
152
+ pattern = re.compile(
153
+ r"```.+?```",
154
+ flags=re.DOTALL,
155
+ )
153
156
 
154
157
 
155
158
  class Spoilers(RegexPreprocessor):
sonatoki/constants.py CHANGED
@@ -503,8 +503,9 @@ ALL_PUNCT = "".join(sorted(list(set(POSIX_PUNCT + UNICODE_PUNCT))))
503
503
  ALL_PUNCT_RANGES_STR = "".join(find_unicode_ranges(ALL_PUNCT))
504
504
  # combined bc the result could be simpler
505
505
 
506
- SENTENCE_PUNCT = """.?!:;()[-]·•…"""
507
- # NOTE: quotes were previously included, but in TP they are *not* reliably sentence boundaries
506
+ SENTENCE_PUNCT = """.?!:;"()[-]«»‹›“”‟„⹂‽·•…「」『』"""
507
+ # single quotes are word boundaries if not intra-word, but double quotes are sentence
508
+ # boundaries
508
509
 
509
510
  INTRA_WORD_PUNCT = """-'’"""
510
511
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonatoki
3
- Version: 0.8.0
3
+ Version: 0.8.2
4
4
  Summary: ilo li moku e toki li pana e sona ni: ni li toki ala toki pona?
5
5
  Author-Email: "jan Kekan San (@gregdan3)" <gregory.danielson3@gmail.com>
6
6
  License: AGPL-3.0-or-later
@@ -1,16 +1,16 @@
1
- sonatoki-0.8.0.dist-info/METADATA,sha256=52aTVq7ljGFzYm1Pdh9tKaRN3IVfXruRJZbwIiAPi9w,6517
2
- sonatoki-0.8.0.dist-info/WHEEL,sha256=rSwsxJWe3vzyR5HCwjWXQruDgschpei4h_giTm0dJVE,90
3
- sonatoki-0.8.0.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
1
+ sonatoki-0.8.2.dist-info/METADATA,sha256=-B-LR4O8O16t7Ond150qs5Il9j08wWnRa76q3sjjszA,6517
2
+ sonatoki-0.8.2.dist-info/WHEEL,sha256=rSwsxJWe3vzyR5HCwjWXQruDgschpei4h_giTm0dJVE,90
3
+ sonatoki-0.8.2.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
4
4
  sonatoki/Cleaners.py,sha256=x2dT3MpDUfbrHA0EP2D3n1sTiKFFi5jw9ha-1dX973o,1958
5
5
  sonatoki/Configs.py,sha256=h6-igZbhbYoYA0gJLrd3YCa5annTqacsAMGB1dX3v9A,4758
6
6
  sonatoki/Filters.py,sha256=rBEJrY_R6koFpoYl4yfo_9UR-i21HbvlUF0ORg1g0WE,13411
7
- sonatoki/Preprocessors.py,sha256=nY0_cmF4aEmGZxXc7ZEvhvf2BZO6GnrMUC8IqDwu47A,6034
7
+ sonatoki/Preprocessors.py,sha256=5xKBifsaHMm_fg8nQq4IdyLBGKe8SuWXg67-O5tl1qM,6043
8
8
  sonatoki/Scorers.py,sha256=aCU3p9rD4QOy-uu851FGGw-ARqUCG_l4V_z5rtRL420,5236
9
9
  sonatoki/Tokenizers.py,sha256=8lpC70bzXOpHyhVr5bmqpYKmdmQvJdf7X5-Icc9RRCw,5040
10
10
  sonatoki/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  sonatoki/__main__.py,sha256=6n4kUF80APl6a0jV46h_ncHNuQbrLpZ_nAmiNAakiag,5673
12
12
  sonatoki/alphabetic.txt,sha256=duyqAKilD2vLIr75RShCIAnktNJcGeEoQIk18V6czmg,11702
13
- sonatoki/constants.py,sha256=BxE_MME2XZUZLg9ZezPirUO2sxw4JkujsrKoENeYORc,19313
13
+ sonatoki/constants.py,sha256=y1ZyuvCC1geTWBEnHiOHPWBxisf5McMnJn4LzGd7RFI,19358
14
14
  sonatoki/ilo.py,sha256=Dsn0yagkwjqpAQoCj6mkZ6NqWeanRF2lxNDNoqjWGLo,5993
15
15
  sonatoki/linku.json,sha256=d72Dvht-a4gBmdqLLI8mElvo83zSpbxDmxJj05hOudM,295413
16
16
  sonatoki/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -18,4 +18,4 @@ sonatoki/sandbox.json,sha256=44csrQDaVtV-n8OyewabX1J9MmUFCsPct5C8E5Xuc58,140197
18
18
  sonatoki/syllabic.txt,sha256=HnqY4TrZ3tPcHah3TsvG9F9gjMrnAGdJ8hHJNHyyUPc,1712
19
19
  sonatoki/types.py,sha256=zoVJeaDLOPstREiHtoD9pv-AOCsJq2C4_GG3nTYd114,1267
20
20
  sonatoki/utils.py,sha256=sT5xLMEj0aLpy8GP92HKblJU1Wt1m8NUlMgCFWB32xQ,2265
21
- sonatoki-0.8.0.dist-info/RECORD,,
21
+ sonatoki-0.8.2.dist-info/RECORD,,