sonatoki 0.8.3__py3-none-any.whl → 0.8.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sonatoki/Preprocessors.py +15 -0
- {sonatoki-0.8.3.dist-info → sonatoki-0.8.4.dist-info}/METADATA +1 -1
- {sonatoki-0.8.3.dist-info → sonatoki-0.8.4.dist-info}/RECORD +5 -5
- {sonatoki-0.8.3.dist-info → sonatoki-0.8.4.dist-info}/WHEEL +0 -0
- {sonatoki-0.8.3.dist-info → sonatoki-0.8.4.dist-info}/licenses/LICENSE +0 -0
sonatoki/Preprocessors.py
CHANGED
@@ -83,6 +83,19 @@ class MarkdownURLs(RegexPreprocessor):
|
|
83
83
|
replace = r"\1"
|
84
84
|
|
85
85
|
|
86
|
+
class Emails(RegexPreprocessor):
|
87
|
+
"""Attempt to remove emails, for a particularly strong definition of
|
88
|
+
"email".
|
89
|
+
|
90
|
+
https://www.regular-expressions.info/email.html
|
91
|
+
"""
|
92
|
+
|
93
|
+
pattern = re.compile(
|
94
|
+
r"\b[a-zA-Z0-9._%+-]{2,}@[a-zA-Z0-9.-]{2,}\.[a-zA-Z]{2,24}\b",
|
95
|
+
flags=re.IGNORECASE,
|
96
|
+
)
|
97
|
+
|
98
|
+
|
86
99
|
class Reference(RegexPreprocessor):
|
87
100
|
"""Remove text contained in double brackets.
|
88
101
|
|
@@ -228,6 +241,7 @@ RECOMMENDED_PREPROCESSORS: List[Type[Preprocessor]] = [
|
|
228
241
|
Reference,
|
229
242
|
MarkdownURLs,
|
230
243
|
URLs,
|
244
|
+
Emails,
|
231
245
|
Emoji,
|
232
246
|
]
|
233
247
|
|
@@ -242,6 +256,7 @@ __all__ = [
|
|
242
256
|
"DiscordMentions",
|
243
257
|
"DiscordSpecial",
|
244
258
|
"DoubleQuotes",
|
259
|
+
"Emails",
|
245
260
|
"Emoji",
|
246
261
|
"MarkdownURLs",
|
247
262
|
"RECOMMENDED_PREPROCESSORS",
|
@@ -1,10 +1,10 @@
|
|
1
|
-
sonatoki-0.8.
|
2
|
-
sonatoki-0.8.
|
3
|
-
sonatoki-0.8.
|
1
|
+
sonatoki-0.8.4.dist-info/METADATA,sha256=Nui-Em5-CwsiOt5mkyhF5bb6WM9VQ6sp9UlENnH5Udw,6893
|
2
|
+
sonatoki-0.8.4.dist-info/WHEEL,sha256=rSwsxJWe3vzyR5HCwjWXQruDgschpei4h_giTm0dJVE,90
|
3
|
+
sonatoki-0.8.4.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
|
4
4
|
sonatoki/Cleaners.py,sha256=x2dT3MpDUfbrHA0EP2D3n1sTiKFFi5jw9ha-1dX973o,1958
|
5
5
|
sonatoki/Configs.py,sha256=-R-rTPUJfuSintpvC4UnOF1B9B93-Ooh_jmkZwhKvtk,4669
|
6
6
|
sonatoki/Filters.py,sha256=rBEJrY_R6koFpoYl4yfo_9UR-i21HbvlUF0ORg1g0WE,13411
|
7
|
-
sonatoki/Preprocessors.py,sha256=
|
7
|
+
sonatoki/Preprocessors.py,sha256=RmzkvPVo6Kdx1rZ5HeR9cTtx6oxpp2iLKrOMCUEqIrM,7107
|
8
8
|
sonatoki/Scorers.py,sha256=aCU3p9rD4QOy-uu851FGGw-ARqUCG_l4V_z5rtRL420,5236
|
9
9
|
sonatoki/Tokenizers.py,sha256=8lpC70bzXOpHyhVr5bmqpYKmdmQvJdf7X5-Icc9RRCw,5040
|
10
10
|
sonatoki/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -18,4 +18,4 @@ sonatoki/sandbox.json,sha256=44csrQDaVtV-n8OyewabX1J9MmUFCsPct5C8E5Xuc58,140197
|
|
18
18
|
sonatoki/syllabic.txt,sha256=HnqY4TrZ3tPcHah3TsvG9F9gjMrnAGdJ8hHJNHyyUPc,1712
|
19
19
|
sonatoki/types.py,sha256=zoVJeaDLOPstREiHtoD9pv-AOCsJq2C4_GG3nTYd114,1267
|
20
20
|
sonatoki/utils.py,sha256=sT5xLMEj0aLpy8GP92HKblJU1Wt1m8NUlMgCFWB32xQ,2265
|
21
|
-
sonatoki-0.8.
|
21
|
+
sonatoki-0.8.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|