sonatoki 0.3.3__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sonatoki/constants.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # STL
2
2
  import json
3
- from typing import Set, Dict, List
3
+ from typing import Set, Dict
4
4
  from pathlib import Path
5
5
 
6
6
  # LOCAL
@@ -15,9 +15,9 @@ UNICODE_PUNCT_RANGES = [
15
15
  "\\U0000003a-\\U00000040",
16
16
  "\\U0000005b-\\U00000060",
17
17
  "\\U0000007b-\\U0000007e",
18
- "\\U000000a1-\\U000000a9",
18
+ "\\U000000a1-\\U000000a8",
19
19
  "\\U000000ab-\\U000000ac",
20
- "\\U000000ae-\\U000000b1",
20
+ "\\U000000af-\\U000000b1",
21
21
  "\\U000000b4",
22
22
  "\\U000000b6-\\U000000b8",
23
23
  "\\U000000bb",
@@ -118,7 +118,9 @@ UNICODE_PUNCT_RANGES = [
118
118
  "\\U00001fed-\\U00001fef",
119
119
  "\\U00001ffd-\\U00001ffe",
120
120
  "\\U00002010-\\U00002027",
121
- "\\U00002030-\\U0000205e",
121
+ "\\U00002030-\\U0000203b",
122
+ "\\U0000203d-\\U00002048",
123
+ "\\U0000204a-\\U0000205e",
122
124
  "\\U0000207a-\\U0000207e",
123
125
  "\\U0000208a-\\U0000208e",
124
126
  "\\U000020a0-\\U000020c0",
@@ -127,7 +129,8 @@ UNICODE_PUNCT_RANGES = [
127
129
  "\\U00002108-\\U00002109",
128
130
  "\\U00002114",
129
131
  "\\U00002116-\\U00002118",
130
- "\\U0000211e-\\U00002123",
132
+ "\\U0000211e-\\U00002121",
133
+ "\\U00002123",
131
134
  "\\U00002125",
132
135
  "\\U00002127",
133
136
  "\\U00002129",
@@ -137,11 +140,88 @@ UNICODE_PUNCT_RANGES = [
137
140
  "\\U0000214a-\\U0000214d",
138
141
  "\\U0000214f",
139
142
  "\\U0000218a-\\U0000218b",
140
- "\\U00002190-\\U00002426",
143
+ "\\U00002190-\\U00002193",
144
+ "\\U0000219a-\\U000021a8",
145
+ "\\U000021ab-\\U00002319",
146
+ "\\U0000231c-\\U00002327",
147
+ "\\U00002329-\\U000023ce",
148
+ "\\U000023d0-\\U000023e8",
149
+ "\\U000023f4-\\U000023f7",
150
+ "\\U000023fb-\\U00002426",
141
151
  "\\U00002440-\\U0000244a",
142
- "\\U0000249c-\\U000024b5",
143
- "\\U00002500-\\U00002775",
144
- "\\U00002794-\\U00002b73",
152
+ "\\U0000249c-\\U000024c1",
153
+ "\\U000024c3-\\U000024e9",
154
+ "\\U00002500-\\U000025a9",
155
+ "\\U000025ac-\\U000025b5",
156
+ "\\U000025b7-\\U000025bf",
157
+ "\\U000025c1-\\U000025fa",
158
+ "\\U000025ff",
159
+ "\\U00002605-\\U0000260d",
160
+ "\\U0000260f-\\U00002610",
161
+ "\\U00002612-\\U00002613",
162
+ "\\U00002616-\\U00002617",
163
+ "\\U00002619-\\U0000261c",
164
+ "\\U0000261e-\\U0000261f",
165
+ "\\U00002621",
166
+ "\\U00002624-\\U00002625",
167
+ "\\U00002627-\\U00002629",
168
+ "\\U0000262b-\\U0000262d",
169
+ "\\U00002630-\\U00002637",
170
+ "\\U0000263b-\\U0000263f",
171
+ "\\U00002641",
172
+ "\\U00002643-\\U00002647",
173
+ "\\U00002654-\\U0000265e",
174
+ "\\U00002661-\\U00002662",
175
+ "\\U00002664",
176
+ "\\U00002667",
177
+ "\\U00002669-\\U0000267a",
178
+ "\\U0000267c-\\U0000267d",
179
+ "\\U00002680-\\U00002691",
180
+ "\\U00002698",
181
+ "\\U0000269a",
182
+ "\\U0000269d-\\U0000269f",
183
+ "\\U000026a2-\\U000026a6",
184
+ "\\U000026a8-\\U000026a9",
185
+ "\\U000026ac-\\U000026af",
186
+ "\\U000026b2-\\U000026bc",
187
+ "\\U000026bf-\\U000026c3",
188
+ "\\U000026c6-\\U000026c7",
189
+ "\\U000026c9-\\U000026cd",
190
+ "\\U000026d0",
191
+ "\\U000026d2",
192
+ "\\U000026d5-\\U000026e8",
193
+ "\\U000026eb-\\U000026ef",
194
+ "\\U000026f6",
195
+ "\\U000026fb-\\U000026fc",
196
+ "\\U000026fe-\\U00002701",
197
+ "\\U00002703-\\U00002704",
198
+ "\\U00002706-\\U00002707",
199
+ "\\U0000270e",
200
+ "\\U00002710-\\U00002711",
201
+ "\\U00002713",
202
+ "\\U00002715",
203
+ "\\U00002717-\\U0000271c",
204
+ "\\U0000271e-\\U00002720",
205
+ "\\U00002722-\\U00002727",
206
+ "\\U00002729-\\U00002732",
207
+ "\\U00002735-\\U00002743",
208
+ "\\U00002745-\\U00002746",
209
+ "\\U00002748-\\U0000274b",
210
+ "\\U0000274d",
211
+ "\\U0000274f-\\U00002752",
212
+ "\\U00002756",
213
+ "\\U00002758-\\U00002762",
214
+ "\\U00002765-\\U00002775",
215
+ "\\U00002794",
216
+ "\\U00002798-\\U000027a0",
217
+ "\\U000027a2-\\U000027af",
218
+ "\\U000027b1-\\U000027be",
219
+ "\\U000027c0-\\U00002933",
220
+ "\\U00002936-\\U00002b04",
221
+ "\\U00002b08-\\U00002b1a",
222
+ "\\U00002b1d-\\U00002b4f",
223
+ "\\U00002b51-\\U00002b54",
224
+ "\\U00002b56-\\U00002b73",
145
225
  "\\U00002b76-\\U00002b95",
146
226
  "\\U00002b97-\\U00002bff",
147
227
  "\\U00002ce5-\\U00002cea",
@@ -156,9 +236,8 @@ UNICODE_PUNCT_RANGES = [
156
236
  "\\U00002ff0-\\U00002fff",
157
237
  "\\U00003001-\\U00003004",
158
238
  "\\U00003008-\\U00003020",
159
- "\\U00003030",
160
239
  "\\U00003036-\\U00003037",
161
- "\\U0000303d-\\U0000303f",
240
+ "\\U0000303e-\\U0000303f",
162
241
  "\\U0000309b-\\U0000309c",
163
242
  "\\U000030a0",
164
243
  "\\U000030fb",
@@ -170,7 +249,9 @@ UNICODE_PUNCT_RANGES = [
170
249
  "\\U0000322a-\\U00003247",
171
250
  "\\U00003250",
172
251
  "\\U00003260-\\U0000327f",
173
- "\\U0000328a-\\U000032b0",
252
+ "\\U0000328a-\\U00003296",
253
+ "\\U00003298",
254
+ "\\U0000329a-\\U000032b0",
174
255
  "\\U000032c0-\\U000033ff",
175
256
  "\\U00004dc0-\\U00004dff",
176
257
  "\\U0000a490-\\U0000a4c6",
@@ -314,49 +395,97 @@ UNICODE_PUNCT_RANGES = [
314
395
  "\\U0001ecb0",
315
396
  "\\U0001ed2e",
316
397
  "\\U0001eef0-\\U0001eef1",
317
- "\\U0001f000-\\U0001f02b",
398
+ "\\U0001f000-\\U0001f003",
399
+ "\\U0001f005-\\U0001f02b",
318
400
  "\\U0001f030-\\U0001f093",
319
401
  "\\U0001f0a0-\\U0001f0ae",
320
402
  "\\U0001f0b1-\\U0001f0bf",
321
- "\\U0001f0c1-\\U0001f0cf",
403
+ "\\U0001f0c1-\\U0001f0ce",
322
404
  "\\U0001f0d1-\\U0001f0f5",
323
- "\\U0001f10d-\\U0001f12f",
324
- "\\U0001f14a-\\U0001f14f",
325
- "\\U0001f16a-\\U0001f16f",
326
- "\\U0001f18a-\\U0001f1ad",
327
- "\\U0001f1e6-\\U0001f202",
328
- "\\U0001f210-\\U0001f23b",
405
+ "\\U0001f10d-\\U0001f16f",
406
+ "\\U0001f172-\\U0001f17d",
407
+ "\\U0001f180-\\U0001f18d",
408
+ "\\U0001f18f-\\U0001f190",
409
+ "\\U0001f19b-\\U0001f1ad",
410
+ "\\U0001f1e6-\\U0001f1e7",
411
+ "\\U0001f1ea-\\U0001f1eb",
412
+ "\\U0001f1ee-\\U0001f1f1",
413
+ "\\U0001f1f4-\\U0001f1f6",
414
+ "\\U0001f1f9-\\U0001f200",
415
+ "\\U0001f210-\\U0001f219",
416
+ "\\U0001f21b-\\U0001f22e",
417
+ "\\U0001f230-\\U0001f231",
418
+ "\\U0001f23b",
329
419
  "\\U0001f240-\\U0001f248",
330
- "\\U0001f250-\\U0001f251",
331
420
  "\\U0001f260-\\U0001f265",
332
- "\\U0001f300-\\U0001f6d7",
333
- "\\U0001f6dc-\\U0001f6ec",
334
- "\\U0001f6f0-\\U0001f6fc",
421
+ "\\U0001f322-\\U0001f323",
422
+ "\\U0001f394-\\U0001f395",
423
+ "\\U0001f398",
424
+ "\\U0001f39c-\\U0001f39d",
425
+ "\\U0001f3f1-\\U0001f3f2",
426
+ "\\U0001f3f6",
427
+ "\\U0001f4fe",
428
+ "\\U0001f53e-\\U0001f548",
429
+ "\\U0001f54f",
430
+ "\\U0001f568-\\U0001f56e",
431
+ "\\U0001f571-\\U0001f572",
432
+ "\\U0001f57b-\\U0001f586",
433
+ "\\U0001f588-\\U0001f589",
434
+ "\\U0001f58e-\\U0001f58f",
435
+ "\\U0001f591-\\U0001f594",
436
+ "\\U0001f597-\\U0001f5a3",
437
+ "\\U0001f5a6-\\U0001f5a7",
438
+ "\\U0001f5a9-\\U0001f5b0",
439
+ "\\U0001f5b3-\\U0001f5bb",
440
+ "\\U0001f5bd-\\U0001f5c1",
441
+ "\\U0001f5c5-\\U0001f5d0",
442
+ "\\U0001f5d4-\\U0001f5db",
443
+ "\\U0001f5df-\\U0001f5e0",
444
+ "\\U0001f5e2",
445
+ "\\U0001f5e4-\\U0001f5e7",
446
+ "\\U0001f5e9-\\U0001f5ee",
447
+ "\\U0001f5f0-\\U0001f5f2",
448
+ "\\U0001f5f4-\\U0001f5f9",
449
+ "\\U0001f650-\\U0001f67f",
450
+ "\\U0001f6c6-\\U0001f6ca",
451
+ "\\U0001f6d3-\\U0001f6d4",
452
+ "\\U0001f6e6-\\U0001f6e8",
453
+ "\\U0001f6ea",
454
+ "\\U0001f6f1-\\U0001f6f2",
335
455
  "\\U0001f700-\\U0001f776",
336
456
  "\\U0001f77b-\\U0001f7d9",
337
- "\\U0001f7e0-\\U0001f7eb",
338
- "\\U0001f7f0",
339
457
  "\\U0001f800-\\U0001f80b",
340
458
  "\\U0001f810-\\U0001f847",
341
459
  "\\U0001f850-\\U0001f859",
342
460
  "\\U0001f860-\\U0001f887",
343
461
  "\\U0001f890-\\U0001f8ad",
344
462
  "\\U0001f8b0-\\U0001f8b1",
345
- "\\U0001f900-\\U0001fa53",
463
+ "\\U0001f900-\\U0001f90b",
464
+ "\\U0001f93b",
465
+ "\\U0001f946",
466
+ "\\U0001fa00-\\U0001fa53",
346
467
  "\\U0001fa60-\\U0001fa6d",
347
- "\\U0001fa70-\\U0001fa7c",
348
- "\\U0001fa80-\\U0001fa88",
349
- "\\U0001fa90-\\U0001fabd",
350
- "\\U0001fabf-\\U0001fac5",
351
- "\\U0001face-\\U0001fadb",
352
- "\\U0001fae0-\\U0001fae8",
353
- "\\U0001faf0-\\U0001faf8",
354
468
  "\\U0001fb00-\\U0001fb92",
355
469
  "\\U0001fb94-\\U0001fbca",
356
- "\\U000f1990-\\U000f199d", # UCSUR punctuation
470
+ "\\U000f1990-\\U000f199d",
357
471
  ]
358
472
 
359
- UCSUR_PUNCT_RANGES = UNICODE_PUNCT_RANGES[-1] # NOTE: THIS CAN CHANGE
473
+
474
+ NOT_IN_PUNCT_CLASS = r"Ⓐ-ⓩ🄰-🅉🅐-🅩🅰-🆉"
475
+ ALL_VARIATION_SELECTOR_RANGES = ["\\U0000fe00-\\U0000fe0f", "\\U000e0100-\\U000e01ef"]
476
+ EMOJI_VARIATION_SELECTOR_RANGES = ["\\U0000fe0e-\\U0000fe0f"]
477
+ EMOJI_VARIATION_SELECTOR_RANGES_STR = "".join(EMOJI_VARIATION_SELECTOR_RANGES)
478
+ """All variation selectors are in Nonspacing Mark (Mn), but it is more apt to
479
+ mark these two as punctuation, since they are used exclusively for rendering
480
+ emoji.
481
+
482
+ But it's even better to use the Emoji filter.
483
+ """
484
+
485
+ UCSUR_PUNCT_RANGES = ["\\U000f1990-\\U000f199d"]
486
+ UCSUR_PUNCT_RANGES_STR = "".join(UCSUR_PUNCT_RANGES)
487
+ """Private Use Area glyphs are given the apt but unhelpful 'Private Use'
488
+ class."""
360
489
 
361
490
  UNICODE_PUNCT = find_unicode_chars(UNICODE_PUNCT_RANGES)
362
491
  # this is a large string.
@@ -366,7 +495,7 @@ POSIX_PUNCT = r"""-!"#$%&'()*+,./:;<=>?@[\]^_`{|}~"""
366
495
  POSIX_PUNCT_RANGES = find_unicode_ranges(POSIX_PUNCT)
367
496
 
368
497
  ALL_PUNCT = "".join(sorted(list(set(POSIX_PUNCT + UNICODE_PUNCT))))
369
- ALL_PUNCT_RANGES = "".join(find_unicode_ranges(ALL_PUNCT))
498
+ ALL_PUNCT_RANGES_STR = "".join(find_unicode_ranges(ALL_PUNCT))
370
499
  # combined bc the result could be simpler
371
500
 
372
501
  SENTENCE_PUNCT = """.?!:;'"()[-]“”·…"""
@@ -374,6 +503,8 @@ SENTENCE_PUNCT = """.?!:;'"()[-]“”·…"""
374
503
 
375
504
  LINKU = Path(__file__).resolve().parent / Path("linku.json")
376
505
  SANDBOX = Path(__file__).resolve().parent / Path("sandbox.json")
506
+ SYLLABICS = Path(__file__).resolve().parent / Path("syllabic.txt")
507
+ ALPHABETICS = Path(__file__).resolve().parent / Path("alphabetic.txt")
377
508
 
378
509
  VOWELS = "aeiou"
379
510
  CONSONANTS = "jklmnpstw"
@@ -390,21 +521,69 @@ ALLOWABLES = {
390
521
  "msa",
391
522
  }
392
523
 
393
- PHONOMATCHES = {
394
- "non",
395
- "nope",
396
- "some",
524
+ FALSE_POS_SYLLABIC = {
525
+ # ordered by frequency in previous TPT data
397
526
  "like",
527
+ "same",
528
+ "nope",
529
+ "uwu", # TODO: emoticon?? uhh?
530
+ "non",
531
+ "owo", # TODO: emoticon??
532
+ "one",
533
+ "to",
534
+ "i",
535
+ "awesome",
398
536
  "use",
399
- "imo",
537
+ "name",
400
538
  "time",
539
+ "imo", # "in my opinion"
401
540
  "man",
402
- "also",
541
+ # "son", # sona typo?
542
+ "joke",
543
+ "so",
544
+ "ten",
545
+ "make",
546
+ "pin",
547
+ "note",
548
+ # "aka" # in sandbox
549
+ "into",
550
+ "in",
551
+ "some",
552
+ "on",
553
+ "me",
554
+ "ipa",
555
+ "sun",
556
+ "sense",
557
+ "none",
558
+ "meme",
559
+ "wise",
560
+ # "ono", # TODO: what is this
561
+ "mon",
562
+ "take",
563
+ "luna",
564
+ "anti",
565
+ "elo",
566
+ "an",
567
+ "win",
568
+ "won",
569
+ "we",
570
+ "men",
571
+ "ton",
572
+ "woke",
573
+ "semi",
574
+ "male",
403
575
  }
404
576
 
405
- ALPHABETIC_MATCHES: Set[str] = set()
406
-
407
- IGNORABLES = PHONOMATCHES | ALPHABETIC_MATCHES
577
+ FALSE_POS_ALPHABETIC: Set[str] = {
578
+ "t",
579
+ "is",
580
+ "not",
581
+ "lol",
582
+ "also",
583
+ "isn", # TODO: tokenizer....
584
+ "mean",
585
+ "means",
586
+ }
408
587
 
409
588
  UCSUR_RANGES = [
410
589
  "\\U000F1900-\\U000F1977", # pu
@@ -439,15 +618,23 @@ with open(SANDBOX) as f:
439
618
  sandbox: Dict[str, Dict[str, str]] = json.loads(f.read())
440
619
  NIMI_LINKU_SANDBOX = {d["word"] for d in sandbox.values()}
441
620
 
621
+ # with open(SYLLABICS) as f:
622
+ # FALSE_POS_SYLLABIC = {line.strip() for line in f}
623
+ #
624
+ # with open(ALPHABETICS) as f:
625
+ # FALSE_POS_ALPHABETIC = {line.strip() for line in f}
626
+
442
627
  del linku
443
628
  del sandbox
444
629
 
445
630
  __all__ = [
446
631
  "ALLOWABLES",
447
632
  "ALL_PUNCT",
448
- "ALL_PUNCT_RANGES",
633
+ "ALL_PUNCT_RANGES_STR",
449
634
  "ALPHABET",
450
635
  "CONSONANTS",
636
+ "EMOJI_VARIATION_SELECTOR_RANGES",
637
+ "EMOJI_VARIATION_SELECTOR_RANGES_STR",
451
638
  "NIMI_KU_LILI",
452
639
  "NIMI_KU_SULI",
453
640
  "NIMI_LINKU_COMMON",
@@ -459,6 +646,8 @@ __all__ = [
459
646
  "NIMI_PU_SYNONYMS",
460
647
  "POSIX_PUNCT",
461
648
  "POSIX_PUNCT_RANGES",
649
+ "UCSUR_PUNCT_RANGES",
650
+ "UCSUR_PUNCT_RANGES_STR",
462
651
  "UNICODE_PUNCT",
463
652
  "UNICODE_PUNCT_RANGES",
464
653
  "VOWELS",
sonatoki/ilo.py CHANGED
@@ -5,12 +5,17 @@ from typing import List, Type, Tuple
5
5
  from sonatoki.Filters import Filter
6
6
  from sonatoki.Scorers import Number, Scorer
7
7
  from sonatoki.Cleaners import Cleaner
8
- from sonatoki.Tokenizers import Tokenizer
8
+ from sonatoki.Tokenizers import Tokenizer, SentTokenizer, WordTokenizer
9
9
  from sonatoki.Preprocessors import Preprocessor
10
10
 
11
+ # tokenized, filtered, cleaned, score, result
12
+ Scorecard = Tuple[List[str], List[str], List[str], Number, bool]
13
+ # TODO: scorecard kinda sucks as a name
14
+
11
15
 
12
16
  class Ilo:
13
17
  __preprocessors: List[Type[Preprocessor]]
18
+ __sent_tokenizer: Type[Tokenizer]
14
19
  __word_tokenizer: Type[Tokenizer]
15
20
  __cleaners: List[Type[Cleaner]]
16
21
  __ignoring_filters: List[Type[Filter]]
@@ -26,11 +31,13 @@ class Ilo:
26
31
  scoring_filters: List[Type[Filter]],
27
32
  scorer: Type[Scorer],
28
33
  passing_score: Number,
29
- word_tokenizer: Type[Tokenizer],
34
+ word_tokenizer: Type[Tokenizer] = WordTokenizer,
35
+ sent_tokenizer: Type[Tokenizer] = SentTokenizer,
30
36
  ):
31
37
  super().__init__()
32
38
  # avoid keeping a ref to user's list just in case
33
39
  self.__preprocessors = [*preprocessors]
40
+ self.__sent_tokenizer = sent_tokenizer
34
41
  self.__word_tokenizer = word_tokenizer
35
42
  self.__cleaners = [*cleaners]
36
43
  self.__ignoring_filters = [*ignoring_filters]
@@ -47,6 +54,9 @@ class Ilo:
47
54
  """It is *highly* recommended that you run `ilo.preprocess` first."""
48
55
  return self.__word_tokenizer.tokenize(msg)
49
56
 
57
+ def sent_tokenize(self, msg: str) -> List[str]:
58
+ return self.__sent_tokenizer.tokenize(msg)
59
+
50
60
  def clean_token(self, token: str) -> str:
51
61
  for c in self.__cleaners:
52
62
  token = c.clean(token)
@@ -83,26 +93,60 @@ class Ilo:
83
93
  def score_tokens(self, tokens: List[str]) -> float:
84
94
  return self.__scorer.score(tokens, self.__scoring_filters)
85
95
 
86
- def _is_toki_pona(
87
- self, message: str
88
- ) -> Tuple[str, List[str], List[str], List[str], Number, bool]:
89
- """Returns all components of the processing algorithm:
90
- - Preprocessed message (str)
96
+ def _is_toki_pona(self, message: str) -> Scorecard:
97
+ """Process a message into its tokens, then filters, cleans, and scores
98
+ them. Returns all parts. Message must already be preprocessed, normally
99
+ done in `self.is_toki_pona(message)`.
100
+
101
+ Returns all components of the processing algorithm except preprocessing:
91
102
  - Tokenized message (list[str])
92
103
  - Filtered message (list[str])
93
104
  - Cleaned message (list[str])
94
105
  - Score (float)
95
- - Result (bool)"""
96
- preprocessed = self.preprocess(message)
97
- tokenized = self.word_tokenize(preprocessed)
106
+ - Result (bool)
107
+ """
108
+ tokenized = self.word_tokenize(message)
98
109
  filtered = self.filter_tokens(tokenized)
99
110
  cleaned = self.clean_tokens(filtered)
100
111
  score = self.score_tokens(cleaned)
101
112
  result = score >= self.__passing_score
102
113
 
103
- return preprocessed, tokenized, filtered, cleaned, score, result
114
+ return tokenized, filtered, cleaned, score, result
104
115
 
105
116
  def is_toki_pona(self, message: str) -> bool:
106
117
  """Determines whether a single statement is or is not Toki Pona."""
118
+ message = self.preprocess(message)
107
119
  *_, result = self._is_toki_pona(message)
108
120
  return result
121
+
122
+ def _are_toki_pona(self, message: str) -> List[Scorecard]:
123
+ """Split a message into sentences, then return a list each sentence's
124
+ results via `self._is_toki_pona()`.
125
+
126
+ Message must already be preprocessed, normally done in
127
+ `self.are_toki_pona(message)`.
128
+ """
129
+ results: List[Scorecard] = list()
130
+ for sentence in self.sent_tokenize(message):
131
+ result = self._is_toki_pona(sentence)
132
+ results.append(result)
133
+ return results
134
+
135
+ def are_toki_pona(self, message: str) -> List[bool]:
136
+ """Splits a statement into sentences, then determines if each is or is not Toki Pona.
137
+ NOTE: You will need to decide how to score the result. Examples:
138
+
139
+ ```
140
+ def all_must_pass(message: str) -> bool:
141
+ return all(ILO.are_toki_pona(message))
142
+
143
+ def portion_must_pass(message: str, score: Number = 0.8) -> bool:
144
+ results = ILO.are_toki_pona(message)
145
+ sent_count = len(results)
146
+ passing = results.count(True)
147
+ return (passing / sent_count) >= score
148
+ ```
149
+ """
150
+ message = self.preprocess(message)
151
+ results = self._are_toki_pona(message)
152
+ return [res[-1] for res in results]