sonatoki 0.7.0__tar.gz → 0.8.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sonatoki-0.7.0 → sonatoki-0.8.1}/PKG-INFO +1 -1
- {sonatoki-0.7.0 → sonatoki-0.8.1}/pyproject.toml +1 -1
- {sonatoki-0.7.0 → sonatoki-0.8.1}/src/sonatoki/Configs.py +2 -2
- {sonatoki-0.7.0 → sonatoki-0.8.1}/src/sonatoki/Filters.py +25 -2
- {sonatoki-0.7.0 → sonatoki-0.8.1}/src/sonatoki/Preprocessors.py +4 -1
- {sonatoki-0.7.0 → sonatoki-0.8.1}/tests/test_cleaners.py +2 -2
- {sonatoki-0.7.0 → sonatoki-0.8.1}/tests/test_filters.py +17 -17
- {sonatoki-0.7.0 → sonatoki-0.8.1}/tests/test_ilo.py +4 -1
- {sonatoki-0.7.0 → sonatoki-0.8.1}/tests/test_preprocessors.py +29 -23
- {sonatoki-0.7.0 → sonatoki-0.8.1}/tests/test_scorers.py +2 -2
- {sonatoki-0.7.0 → sonatoki-0.8.1}/tests/test_utils.py +3 -3
- {sonatoki-0.7.0 → sonatoki-0.8.1}/LICENSE +0 -0
- {sonatoki-0.7.0 → sonatoki-0.8.1}/README.md +0 -0
- {sonatoki-0.7.0 → sonatoki-0.8.1}/src/sonatoki/Cleaners.py +0 -0
- {sonatoki-0.7.0 → sonatoki-0.8.1}/src/sonatoki/Scorers.py +0 -0
- {sonatoki-0.7.0 → sonatoki-0.8.1}/src/sonatoki/Tokenizers.py +0 -0
- {sonatoki-0.7.0 → sonatoki-0.8.1}/src/sonatoki/__init__.py +0 -0
- {sonatoki-0.7.0 → sonatoki-0.8.1}/src/sonatoki/__main__.py +0 -0
- {sonatoki-0.7.0 → sonatoki-0.8.1}/src/sonatoki/alphabetic.txt +0 -0
- {sonatoki-0.7.0 → sonatoki-0.8.1}/src/sonatoki/constants.py +0 -0
- {sonatoki-0.7.0 → sonatoki-0.8.1}/src/sonatoki/ilo.py +0 -0
- {sonatoki-0.7.0 → sonatoki-0.8.1}/src/sonatoki/linku.json +0 -0
- {sonatoki-0.7.0 → sonatoki-0.8.1}/src/sonatoki/py.typed +0 -0
- {sonatoki-0.7.0 → sonatoki-0.8.1}/src/sonatoki/sandbox.json +0 -0
- {sonatoki-0.7.0 → sonatoki-0.8.1}/src/sonatoki/syllabic.txt +0 -0
- {sonatoki-0.7.0 → sonatoki-0.8.1}/src/sonatoki/types.py +0 -0
- {sonatoki-0.7.0 → sonatoki-0.8.1}/src/sonatoki/utils.py +0 -0
- {sonatoki-0.7.0 → sonatoki-0.8.1}/tests/__init__.py +0 -0
- {sonatoki-0.7.0 → sonatoki-0.8.1}/tests/test_properties.py +0 -0
- {sonatoki-0.7.0 → sonatoki-0.8.1}/tests/test_tokenize.py +0 -0
- {sonatoki-0.7.0 → sonatoki-0.8.1}/tests/tokenize_cases/tokenize_sentences_tok.yml +0 -0
- {sonatoki-0.7.0 → sonatoki-0.8.1}/tests/tokenize_cases/tokenize_words_tok.yml +0 -0
@@ -11,12 +11,12 @@ from sonatoki.Filters import (
|
|
11
11
|
And,
|
12
12
|
Not,
|
13
13
|
Filter,
|
14
|
+
PuName,
|
14
15
|
Numeric,
|
15
16
|
NimiUCSUR,
|
16
17
|
Alphabetic,
|
17
18
|
NimiKuLili,
|
18
19
|
NimiKuSuli,
|
19
|
-
ProperName,
|
20
20
|
Punctuation,
|
21
21
|
LongSyllabic,
|
22
22
|
Miscellaneous,
|
@@ -131,7 +131,7 @@ LazyConfig: IloConfig = {
|
|
131
131
|
"preprocessors": [Emoji, Codeblock, URLs, AngleBracketObject, Reference],
|
132
132
|
"cleaners": [ConsecutiveDuplicates],
|
133
133
|
"ignoring_filters": [Numeric, Punctuation],
|
134
|
-
"scoring_filters": [Alphabetic, NimiUCSUR,
|
134
|
+
"scoring_filters": [Alphabetic, NimiUCSUR, PuName, Miscellaneous],
|
135
135
|
"scorer": SoftPassFail,
|
136
136
|
"passing_score": 0.8,
|
137
137
|
"word_tokenizer": WordTokenizerRe, # mimics old tokenizer
|
@@ -141,8 +141,27 @@ class FalsePosAlphabetic(MemberFilter):
|
|
141
141
|
|
142
142
|
|
143
143
|
class ProperName(Filter):
|
144
|
-
"""
|
145
|
-
|
144
|
+
"""Determine if a given token is a valid name based on a reasonable weakening of
|
145
|
+
the rules given in Toki Pona: The Language of Good. A token matches if it has a capital
|
146
|
+
letter at its start and is **not** fully capitalized.
|
147
|
+
|
148
|
+
This corrects an issue with PuName, where scripts lacking a case distinction are
|
149
|
+
errantly counted"""
|
150
|
+
|
151
|
+
@classmethod
|
152
|
+
@override
|
153
|
+
@cache(maxsize=None)
|
154
|
+
def filter(cls, token: str) -> bool:
|
155
|
+
first_capitalized = token[0].isupper()
|
156
|
+
all_caps = token.isupper()
|
157
|
+
|
158
|
+
return first_capitalized and not all_caps
|
159
|
+
|
160
|
+
|
161
|
+
class PuName(Filter):
|
162
|
+
"""Determine if a given token is a valid name (also called a loan word) based on
|
163
|
+
the rules given in Toki Pona: The Language of Good.
|
164
|
+
When Toki Pona is written with the Latin alphabet, names are
|
146
165
|
capitalized at their start. This filter identifies those tokens.
|
147
166
|
|
148
167
|
Note that this alone cannot determine if a token is a valid name,
|
@@ -156,6 +175,9 @@ class ProperName(Filter):
|
|
156
175
|
@override
|
157
176
|
@cache(maxsize=None)
|
158
177
|
def filter(cls, token: str) -> bool:
|
178
|
+
# first_capitalized = token[0].isupper()
|
179
|
+
# rest_capitalized = token[1:] == token[1:].upper()
|
180
|
+
# return first_capitalized and not rest_capitalized
|
159
181
|
return token == token.capitalize()
|
160
182
|
# TODO: If the token is in a script which doesn't have a case distinction,
|
161
183
|
# this will errantly match.
|
@@ -445,6 +467,7 @@ __all__ = [
|
|
445
467
|
"Or",
|
446
468
|
"Phonotactic",
|
447
469
|
"ProperName",
|
470
|
+
"PuName",
|
448
471
|
"Punctuation",
|
449
472
|
"Syllabic",
|
450
473
|
]
|
@@ -149,7 +149,10 @@ class Codeblock(RegexPreprocessor):
|
|
149
149
|
Subset of what would be removed by Backticks, but may be preferable.
|
150
150
|
"""
|
151
151
|
|
152
|
-
pattern = re.compile(
|
152
|
+
pattern = re.compile(
|
153
|
+
r"```.+?```",
|
154
|
+
flags=re.DOTALL,
|
155
|
+
)
|
153
156
|
|
154
157
|
|
155
158
|
class Spoilers(RegexPreprocessor):
|
@@ -12,7 +12,7 @@ from sonatoki.Cleaners import Lowercase, ConsecutiveDuplicates, ConsecutiveDupli
|
|
12
12
|
from .test_utils import PROPER_NAME_RE
|
13
13
|
|
14
14
|
|
15
|
-
@given(st.from_regex(ConsecutiveDuplicatesRe.pattern
|
15
|
+
@given(st.from_regex(ConsecutiveDuplicatesRe.pattern))
|
16
16
|
@example("tooooki a")
|
17
17
|
@example("muuuuuu")
|
18
18
|
@example("nnn")
|
@@ -25,7 +25,7 @@ def test_ConsecutiveDuplicatesRe(s: str):
|
|
25
25
|
assert a.lower() != b.lower(), (s, res)
|
26
26
|
|
27
27
|
|
28
|
-
@given(st.from_regex(ConsecutiveDuplicatesRe.pattern
|
28
|
+
@given(st.from_regex(ConsecutiveDuplicatesRe.pattern))
|
29
29
|
@example("Aaa")
|
30
30
|
@example("aAa")
|
31
31
|
@example("aaA")
|
@@ -11,12 +11,12 @@ from sonatoki.Filters import (
|
|
11
11
|
And,
|
12
12
|
Not,
|
13
13
|
NimiPu,
|
14
|
+
PuName,
|
14
15
|
Numeric,
|
15
16
|
Syllabic,
|
16
17
|
Alphabetic,
|
17
18
|
NimiKuLili,
|
18
19
|
NimiKuSuli,
|
19
|
-
ProperName,
|
20
20
|
Phonotactic,
|
21
21
|
Punctuation,
|
22
22
|
AlphabeticRe,
|
@@ -90,7 +90,7 @@ def test_NimiLinkuSandbox(s: str):
|
|
90
90
|
assert res, repr(s)
|
91
91
|
|
92
92
|
|
93
|
-
@given(st.from_regex(Phonotactic.pattern
|
93
|
+
@given(st.from_regex(Phonotactic.pattern, fullmatch=True))
|
94
94
|
@example("kijetesantakalu")
|
95
95
|
@example("n")
|
96
96
|
def test_Phonotactic(s: str):
|
@@ -98,28 +98,28 @@ def test_Phonotactic(s: str):
|
|
98
98
|
assert res, repr(s)
|
99
99
|
|
100
100
|
|
101
|
-
@given(st.from_regex(Phonotactic.pattern
|
101
|
+
@given(st.from_regex(Phonotactic.pattern, fullmatch=True))
|
102
102
|
def test_LongPhonotactic(s: str):
|
103
103
|
len_ok = len(s) >= LongPhonotactic.length
|
104
104
|
res = LongPhonotactic.filter(s)
|
105
105
|
assert res == len_ok, repr(s) # will match given fullmatch
|
106
106
|
|
107
107
|
|
108
|
-
@given(st.from_regex(Syllabic.pattern
|
108
|
+
@given(st.from_regex(Syllabic.pattern, fullmatch=True))
|
109
109
|
@example("wuwojitiwunwonjintinmanna")
|
110
110
|
def test_Syllabic(s: str):
|
111
111
|
res = Syllabic.filter(s)
|
112
112
|
assert res, repr(s)
|
113
113
|
|
114
114
|
|
115
|
-
@given(st.from_regex(Syllabic.pattern
|
115
|
+
@given(st.from_regex(Syllabic.pattern, fullmatch=True))
|
116
116
|
def test_LongSyllabic(s: str):
|
117
117
|
len_ok = len(s) >= LongSyllabic.length
|
118
118
|
res = LongSyllabic.filter(s)
|
119
119
|
assert res == len_ok
|
120
120
|
|
121
121
|
|
122
|
-
@given(st.from_regex(AlphabeticRe.pattern
|
122
|
+
@given(st.from_regex(AlphabeticRe.pattern, fullmatch=True))
|
123
123
|
@example("muems")
|
124
124
|
@example("mpptp")
|
125
125
|
@example("tptpt")
|
@@ -129,14 +129,14 @@ def test_Alphabetic(s: str):
|
|
129
129
|
assert res_fn == res_re, repr(s)
|
130
130
|
|
131
131
|
|
132
|
-
@given(st.from_regex(AlphabeticRe.pattern
|
132
|
+
@given(st.from_regex(AlphabeticRe.pattern, fullmatch=True))
|
133
133
|
def test_LongAlphabetic(s: str):
|
134
134
|
len_ok = len(s) >= LongAlphabetic.length
|
135
135
|
res = LongAlphabetic.filter(s)
|
136
136
|
assert res == len_ok
|
137
137
|
|
138
138
|
|
139
|
-
@given(st.from_regex(AlphabeticRe.pattern
|
139
|
+
@given(st.from_regex(AlphabeticRe.pattern, fullmatch=True))
|
140
140
|
def test_AlphabeticRe(s: str):
|
141
141
|
res_re = AlphabeticRe.filter(s)
|
142
142
|
assert res_re, repr(s)
|
@@ -144,11 +144,11 @@ def test_AlphabeticRe(s: str):
|
|
144
144
|
|
145
145
|
@given(st.from_regex(PROPER_NAME_RE, fullmatch=True))
|
146
146
|
def test_ProperName(s: str):
|
147
|
-
res =
|
147
|
+
res = PuName.filter(s)
|
148
148
|
assert res, repr(s)
|
149
149
|
|
150
150
|
|
151
|
-
@given(st.from_regex(PunctuationRe.pattern
|
151
|
+
@given(st.from_regex(PunctuationRe.pattern, fullmatch=True))
|
152
152
|
@example("[]")
|
153
153
|
@example(r"\\")
|
154
154
|
@example(r"\"")
|
@@ -161,14 +161,14 @@ def test_PunctuationRe1(s: str):
|
|
161
161
|
assert res, repr(s)
|
162
162
|
|
163
163
|
|
164
|
-
@given(st.from_regex(PunctuationRe.pattern
|
164
|
+
@given(st.from_regex(PunctuationRe.pattern, fullmatch=True))
|
165
165
|
def test_PunctuationRe(s: str):
|
166
166
|
res_re = PunctuationRe.filter(s)
|
167
167
|
res_re1 = PunctuationRe1.filter(s)
|
168
168
|
assert res_re == res_re1, repr(s)
|
169
169
|
|
170
170
|
|
171
|
-
@given(st.from_regex(PunctuationRe.pattern
|
171
|
+
@given(st.from_regex(PunctuationRe.pattern, fullmatch=True))
|
172
172
|
@example("\U000f1990") # UCSUR char
|
173
173
|
def test_Punctuation(s: str):
|
174
174
|
res_fn = Punctuation.filter(s)
|
@@ -185,7 +185,7 @@ def test_Numeric(s: str):
|
|
185
185
|
|
186
186
|
|
187
187
|
@given(
|
188
|
-
st.from_regex(PunctuationRe.pattern
|
188
|
+
st.from_regex(PunctuationRe.pattern, fullmatch=True)
|
189
189
|
| st.from_regex(r"\d+", fullmatch=True),
|
190
190
|
)
|
191
191
|
def test_OrFilter(s: str):
|
@@ -247,7 +247,7 @@ def test_OrFilter_IsipinEpiku(s: str):
|
|
247
247
|
@given(st.sampled_from(list(words_by_tag("book", "pu"))))
|
248
248
|
def test_AndFilter(s: str):
|
249
249
|
s = s.capitalize()
|
250
|
-
f = And(
|
250
|
+
f = And(PuName, NimiPu)
|
251
251
|
assert f.filter(s)
|
252
252
|
|
253
253
|
|
@@ -259,8 +259,8 @@ def test_NotFilter(s: str):
|
|
259
259
|
|
260
260
|
@given(
|
261
261
|
st.sampled_from(list(FALSE_POS_SYLLABIC))
|
262
|
-
| st.from_regex(Syllabic.pattern
|
263
|
-
| st.from_regex(AlphabeticRe.pattern
|
262
|
+
| st.from_regex(Syllabic.pattern, fullmatch=True)
|
263
|
+
| st.from_regex(AlphabeticRe.pattern, fullmatch=True)
|
264
264
|
)
|
265
265
|
def test_AndNotFilter(s: str):
|
266
266
|
AndNotFilter = And(Syllabic, Not(FalsePosSyllabic))
|
@@ -309,7 +309,7 @@ def test_AddTokensToMemberFilterNegative(s: str):
|
|
309
309
|
| words_by_tag("usage_category", "sandbox")
|
310
310
|
),
|
311
311
|
)
|
312
|
-
| st.from_regex(Syllabic.pattern
|
312
|
+
| st.from_regex(Syllabic.pattern, fullmatch=True)
|
313
313
|
)
|
314
314
|
def test_SubTokensFromMemberFilter(s: str):
|
315
315
|
NimiAlaFilter = NimiLinkuCore(sub=NimiPu.tokens)
|
@@ -94,6 +94,9 @@ NAME_MATCHES = [
|
|
94
94
|
"toki Kanse li lon",
|
95
95
|
"toki Lojban li nasa e lawa mi",
|
96
96
|
"ilo Firefox",
|
97
|
+
"ilo FaceBook li nasa",
|
98
|
+
"mi kepeken ilo MySQL",
|
99
|
+
"poki li nasin SQLite",
|
97
100
|
"mi musi Space Station 13",
|
98
101
|
"jan Tepo en jan Salo en jan Lakuse en pipi Kewapi en soweli Eweke en mi li musi",
|
99
102
|
]
|
@@ -135,7 +138,6 @@ EXCESSIVE_SYLLABICS = [
|
|
135
138
|
]
|
136
139
|
|
137
140
|
EXCESSIVE_ALPHABETICS = [
|
138
|
-
"21st", # candidate for xfails?
|
139
141
|
"wen i tok usin onli notes in toki pona i look silli. ",
|
140
142
|
"I wait, I sulk, as a tool I make stoops to ineptness.",
|
141
143
|
"aaa i non-saw usa's most multiple element-set. it's as asinine as in `e`-less speak",
|
@@ -161,6 +163,7 @@ EXCESSIVE_ENGLISH = [
|
|
161
163
|
"i'm online all the time",
|
162
164
|
"How to Cut a Kiwi",
|
163
165
|
"a e i o u",
|
166
|
+
"21st", # previous false positive; fixed by ProperName change
|
164
167
|
]
|
165
168
|
|
166
169
|
NON_MATCHES = [
|
@@ -22,7 +22,7 @@ from sonatoki.Preprocessors import (
|
|
22
22
|
)
|
23
23
|
|
24
24
|
|
25
|
-
@given(st.from_regex(URLs.pattern
|
25
|
+
@given(st.from_regex(URLs.pattern, fullmatch=True))
|
26
26
|
@example("https://google.com")
|
27
27
|
@example("https://mun.la")
|
28
28
|
@example("https://discord.gg/")
|
@@ -32,7 +32,7 @@ def test_URLs(s: str):
|
|
32
32
|
assert URLs.process(s).strip() == ""
|
33
33
|
|
34
34
|
|
35
|
-
@given(st.from_regex(Spoilers.pattern
|
35
|
+
@given(st.from_regex(Spoilers.pattern, fullmatch=True))
|
36
36
|
@example("|| | ||")
|
37
37
|
@example("|| content\n\n\ncontent ||")
|
38
38
|
@example("||\n||")
|
@@ -42,14 +42,14 @@ def test_Spoilers(s: str):
|
|
42
42
|
assert res == "", (repr(s), repr(res))
|
43
43
|
|
44
44
|
|
45
|
-
@given(st.from_regex(Backticks.pattern
|
45
|
+
@given(st.from_regex(Backticks.pattern, fullmatch=True))
|
46
46
|
@example("` ` ` `")
|
47
47
|
def test_Backticks(s: str):
|
48
48
|
res = Backticks.process(s).strip()
|
49
49
|
assert res == "", (repr(s), repr(res))
|
50
50
|
|
51
51
|
|
52
|
-
@given(st.from_regex(Codeblock.pattern
|
52
|
+
@given(st.from_regex(Codeblock.pattern, fullmatch=True))
|
53
53
|
@example(
|
54
54
|
"""```
|
55
55
|
```"""
|
@@ -63,12 +63,18 @@ blocky message
|
|
63
63
|
second blocky message
|
64
64
|
```"""
|
65
65
|
)
|
66
|
+
@example(
|
67
|
+
"""```oisandm123-_mu
|
68
|
+
arbitrary content
|
69
|
+
```"""
|
70
|
+
)
|
71
|
+
@example("""```mu```""")
|
66
72
|
def test_Codeblock(s: str):
|
67
73
|
res = Codeblock.process(s).strip()
|
68
74
|
assert res == "", (repr(s), repr(res))
|
69
75
|
|
70
76
|
|
71
|
-
@given(st.from_regex(ArrowQuote.pattern
|
77
|
+
@given(st.from_regex(ArrowQuote.pattern, fullmatch=True))
|
72
78
|
@example("> base")
|
73
79
|
@example("> newline\n> newline")
|
74
80
|
def test_ArrowQuote(s: str):
|
@@ -76,7 +82,7 @@ def test_ArrowQuote(s: str):
|
|
76
82
|
assert res == "", (repr(s), repr(res))
|
77
83
|
|
78
84
|
|
79
|
-
@given(st.from_regex(DoubleQuotes.pattern
|
85
|
+
@given(st.from_regex(DoubleQuotes.pattern, fullmatch=True))
|
80
86
|
@example('" "" "')
|
81
87
|
@example('" "\n" "')
|
82
88
|
@example('" \n "')
|
@@ -85,7 +91,7 @@ def test_DoubleQuotes(s: str):
|
|
85
91
|
assert res == "", (repr(s), repr(res))
|
86
92
|
|
87
93
|
|
88
|
-
@given(st.from_regex(SingleQuotes.pattern
|
94
|
+
@given(st.from_regex(SingleQuotes.pattern, fullmatch=True))
|
89
95
|
@example("' '' '")
|
90
96
|
@example("' '\n' '")
|
91
97
|
@example("' \n '")
|
@@ -94,7 +100,7 @@ def test_SingleQuotes(s: str):
|
|
94
100
|
assert res == "", (repr(s), repr(res))
|
95
101
|
|
96
102
|
|
97
|
-
@given(st.from_regex(DiscordEmotes.pattern
|
103
|
+
@given(st.from_regex(DiscordEmotes.pattern, fullmatch=True))
|
98
104
|
@example("<a:example:123123>")
|
99
105
|
@example("<:example:123123>")
|
100
106
|
def test_DiscordEmotes(s: str):
|
@@ -102,7 +108,7 @@ def test_DiscordEmotes(s: str):
|
|
102
108
|
assert res == "", (repr(s), repr(res))
|
103
109
|
|
104
110
|
|
105
|
-
@given(st.from_regex(DiscordMentions.pattern
|
111
|
+
@given(st.from_regex(DiscordMentions.pattern, fullmatch=True))
|
106
112
|
@example("<@497549183847497739>")
|
107
113
|
@example("<@!457890000>")
|
108
114
|
@example("<@&18398198981985>")
|
@@ -111,7 +117,7 @@ def test_DiscordMentions(s: str):
|
|
111
117
|
assert res == "", (repr(s), repr(res))
|
112
118
|
|
113
119
|
|
114
|
-
@given(st.from_regex(DiscordChannels.pattern
|
120
|
+
@given(st.from_regex(DiscordChannels.pattern, fullmatch=True))
|
115
121
|
@example("<#19858915>")
|
116
122
|
@example("<#18591912589812985>")
|
117
123
|
def test_DiscordChannels(s: str):
|
@@ -119,7 +125,7 @@ def test_DiscordChannels(s: str):
|
|
119
125
|
assert res == "", (repr(s), repr(res))
|
120
126
|
|
121
127
|
|
122
|
-
@given(st.from_regex(DiscordSpecial.pattern
|
128
|
+
@given(st.from_regex(DiscordSpecial.pattern, fullmatch=True))
|
123
129
|
@example("<id:guide>")
|
124
130
|
@example("<id:browse>")
|
125
131
|
def test_DiscordSpecial(s: str):
|
@@ -128,11 +134,11 @@ def test_DiscordSpecial(s: str):
|
|
128
134
|
|
129
135
|
|
130
136
|
@given(
|
131
|
-
st.from_regex(DiscordEmotes.pattern
|
132
|
-
| st.from_regex(DiscordMentions.pattern
|
133
|
-
| st.from_regex(DiscordChannels.pattern
|
134
|
-
| st.from_regex(DiscordSpecial.pattern
|
135
|
-
| st.from_regex(AngleBracketObject.pattern
|
137
|
+
st.from_regex(DiscordEmotes.pattern, fullmatch=True)
|
138
|
+
| st.from_regex(DiscordMentions.pattern, fullmatch=True)
|
139
|
+
| st.from_regex(DiscordChannels.pattern, fullmatch=True)
|
140
|
+
| st.from_regex(DiscordSpecial.pattern, fullmatch=True)
|
141
|
+
| st.from_regex(AngleBracketObject.pattern, fullmatch=True)
|
136
142
|
)
|
137
143
|
@example("<https://example.com>")
|
138
144
|
@example("<#123124125125>")
|
@@ -142,11 +148,11 @@ def test_AngleBracketObject(s: str):
|
|
142
148
|
|
143
149
|
|
144
150
|
@given(
|
145
|
-
st.from_regex(SingleQuotes.pattern
|
146
|
-
| st.from_regex(DoubleQuotes.pattern
|
147
|
-
| st.from_regex(Backticks.pattern
|
148
|
-
| st.from_regex(ArrowQuote.pattern
|
149
|
-
| st.from_regex(AllQuotes.pattern
|
151
|
+
st.from_regex(SingleQuotes.pattern, fullmatch=True)
|
152
|
+
| st.from_regex(DoubleQuotes.pattern, fullmatch=True)
|
153
|
+
| st.from_regex(Backticks.pattern, fullmatch=True)
|
154
|
+
| st.from_regex(ArrowQuote.pattern, fullmatch=True)
|
155
|
+
| st.from_regex(AllQuotes.pattern, fullmatch=True)
|
150
156
|
)
|
151
157
|
@example("> bruh")
|
152
158
|
@example("`bruh`")
|
@@ -155,7 +161,7 @@ def test_AllQuotes(s: str):
|
|
155
161
|
assert res == "", (repr(s), repr(res))
|
156
162
|
|
157
163
|
|
158
|
-
@given(st.from_regex(Reference.pattern
|
164
|
+
@given(st.from_regex(Reference.pattern, fullmatch=True))
|
159
165
|
@example("[[Brainstorm]]")
|
160
166
|
@example("[[Phatic Phrases]]")
|
161
167
|
@example("[[Yahoo!]]")
|
@@ -164,7 +170,7 @@ def test_Reference(s: str):
|
|
164
170
|
assert res == "", (repr(s), repr(res))
|
165
171
|
|
166
172
|
|
167
|
-
@given(st.from_regex(ColonEmotes.pattern
|
173
|
+
@given(st.from_regex(ColonEmotes.pattern, fullmatch=True))
|
168
174
|
@example(":owe::owe:")
|
169
175
|
@example(":suffering:")
|
170
176
|
@example(":presid65despair:")
|
@@ -10,10 +10,10 @@ from hypothesis import given, example
|
|
10
10
|
from sonatoki.Filters import (
|
11
11
|
Filter,
|
12
12
|
NimiPu,
|
13
|
+
PuName,
|
13
14
|
Numeric,
|
14
15
|
Syllabic,
|
15
16
|
Alphabetic,
|
16
|
-
ProperName,
|
17
17
|
Phonotactic,
|
18
18
|
NimiLinkuCore,
|
19
19
|
PunctuationRe,
|
@@ -31,7 +31,7 @@ FILTERS = [
|
|
31
31
|
NimiLinkuCore,
|
32
32
|
NimiLinkuCommon,
|
33
33
|
Alphabetic,
|
34
|
-
|
34
|
+
PuName,
|
35
35
|
Phonotactic,
|
36
36
|
PunctuationRe,
|
37
37
|
]
|
@@ -9,10 +9,10 @@ PROPER_NAME_RE = r"[A-Z][a-z]*"
|
|
9
9
|
|
10
10
|
token_strategy = (
|
11
11
|
st.sampled_from(list(words_by_usage(60)))
|
12
|
-
| st.from_regex(Phonotactic.pattern
|
13
|
-
| st.from_regex(Syllabic.pattern
|
12
|
+
| st.from_regex(Phonotactic.pattern, fullmatch=True)
|
13
|
+
| st.from_regex(Syllabic.pattern, fullmatch=True)
|
14
14
|
| st.from_regex(PROPER_NAME_RE, fullmatch=True)
|
15
|
-
| st.from_regex(AlphabeticRe.pattern
|
15
|
+
| st.from_regex(AlphabeticRe.pattern, fullmatch=True)
|
16
16
|
)
|
17
17
|
|
18
18
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|