sonatoki 0.8.3__py3-none-any.whl → 0.8.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sonatoki/Preprocessors.py CHANGED
@@ -83,6 +83,19 @@ class MarkdownURLs(RegexPreprocessor):
83
83
  replace = r"\1"
84
84
 
85
85
 
86
+ class Emails(RegexPreprocessor):
87
+ """Attempt to remove emails, for a particularly strong definition of
88
+ "email".
89
+
90
+ https://www.regular-expressions.info/email.html
91
+ """
92
+
93
+ pattern = re.compile(
94
+ r"\b[a-zA-Z0-9._%+-]{2,}@[a-zA-Z0-9.-]{2,}\.[a-zA-Z]{2,24}\b",
95
+ flags=re.IGNORECASE,
96
+ )
97
+
98
+
86
99
  class Reference(RegexPreprocessor):
87
100
  """Remove text contained in double brackets.
88
101
 
@@ -228,6 +241,7 @@ RECOMMENDED_PREPROCESSORS: List[Type[Preprocessor]] = [
228
241
  Reference,
229
242
  MarkdownURLs,
230
243
  URLs,
244
+ Emails,
231
245
  Emoji,
232
246
  ]
233
247
 
@@ -242,6 +256,7 @@ __all__ = [
242
256
  "DiscordMentions",
243
257
  "DiscordSpecial",
244
258
  "DoubleQuotes",
259
+ "Emails",
245
260
  "Emoji",
246
261
  "MarkdownURLs",
247
262
  "RECOMMENDED_PREPROCESSORS",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonatoki
3
- Version: 0.8.3
3
+ Version: 0.8.4
4
4
  Summary: ilo li moku e toki li pana e sona ni: ni li toki ala toki pona?
5
5
  Author-Email: "jan Kekan San (@gregdan3)" <gregory.danielson3@gmail.com>
6
6
  License: AGPL-3.0-or-later
@@ -1,10 +1,10 @@
1
- sonatoki-0.8.3.dist-info/METADATA,sha256=W35cZGS_DWg3Q-mTRfFCVKPWWJOe50U3Uy4dzyIxRaw,6893
2
- sonatoki-0.8.3.dist-info/WHEEL,sha256=rSwsxJWe3vzyR5HCwjWXQruDgschpei4h_giTm0dJVE,90
3
- sonatoki-0.8.3.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
1
+ sonatoki-0.8.4.dist-info/METADATA,sha256=Nui-Em5-CwsiOt5mkyhF5bb6WM9VQ6sp9UlENnH5Udw,6893
2
+ sonatoki-0.8.4.dist-info/WHEEL,sha256=rSwsxJWe3vzyR5HCwjWXQruDgschpei4h_giTm0dJVE,90
3
+ sonatoki-0.8.4.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
4
4
  sonatoki/Cleaners.py,sha256=x2dT3MpDUfbrHA0EP2D3n1sTiKFFi5jw9ha-1dX973o,1958
5
5
  sonatoki/Configs.py,sha256=-R-rTPUJfuSintpvC4UnOF1B9B93-Ooh_jmkZwhKvtk,4669
6
6
  sonatoki/Filters.py,sha256=rBEJrY_R6koFpoYl4yfo_9UR-i21HbvlUF0ORg1g0WE,13411
7
- sonatoki/Preprocessors.py,sha256=AcvYKr7oT9eumsOiXPM8EBo9TagEaFIYIMC8L2YwPVk,6766
7
+ sonatoki/Preprocessors.py,sha256=RmzkvPVo6Kdx1rZ5HeR9cTtx6oxpp2iLKrOMCUEqIrM,7107
8
8
  sonatoki/Scorers.py,sha256=aCU3p9rD4QOy-uu851FGGw-ARqUCG_l4V_z5rtRL420,5236
9
9
  sonatoki/Tokenizers.py,sha256=8lpC70bzXOpHyhVr5bmqpYKmdmQvJdf7X5-Icc9RRCw,5040
10
10
  sonatoki/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -18,4 +18,4 @@ sonatoki/sandbox.json,sha256=44csrQDaVtV-n8OyewabX1J9MmUFCsPct5C8E5Xuc58,140197
18
18
  sonatoki/syllabic.txt,sha256=HnqY4TrZ3tPcHah3TsvG9F9gjMrnAGdJ8hHJNHyyUPc,1712
19
19
  sonatoki/types.py,sha256=zoVJeaDLOPstREiHtoD9pv-AOCsJq2C4_GG3nTYd114,1267
20
20
  sonatoki/utils.py,sha256=sT5xLMEj0aLpy8GP92HKblJU1Wt1m8NUlMgCFWB32xQ,2265
21
- sonatoki-0.8.3.dist-info/RECORD,,
21
+ sonatoki-0.8.4.dist-info/RECORD,,