sonatoki 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sonatoki/Configs.py CHANGED
@@ -73,6 +73,7 @@ PrefConfig: IloConfig = {
73
73
  "scoring_filters": [
74
74
  Or(NimiLinkuCore, NimiLinkuCommon, NimiUCSUR, Miscellaneous),
75
75
  And(LongSyllabic, Not(FalsePosSyllabic)),
76
+ # NOTE: These are allowed to pass name and alphabetic below, because they *could* be wrong
76
77
  LongProperName,
77
78
  LongAlphabetic,
78
79
  ],
sonatoki/Filters.py CHANGED
@@ -351,6 +351,10 @@ class Or:
351
351
  else:
352
352
  other_filters.extend(member_filters)
353
353
 
354
+ if len(other_filters) == 1: # we only had member filters
355
+ # TODO: this sucks?
356
+ return other_filters[0]
357
+
354
358
  filter = cls.__generic_filter(*other_filters)
355
359
  return filter
356
360
 
sonatoki/constants.py CHANGED
@@ -519,8 +519,10 @@ ALLOWABLES = {
519
519
  "kxk", # ken ala ken
520
520
  "wxw", # wile ala wile
521
521
  "msa",
522
+ "anusem",
522
523
  }
523
524
 
525
+ # NOTE: This is being tracked manually rather than fetched from syllabics.txt until I am convinced that solution is appropriate
524
526
  FALSE_POS_SYLLABIC = {
525
527
  # ordered by frequency in previous TPT data
526
528
  "like",
@@ -540,6 +542,7 @@ FALSE_POS_SYLLABIC = {
540
542
  "man",
541
543
  # "son", # sona typo?
542
544
  "joke",
545
+ # pon would go here
543
546
  "so",
544
547
  "ten",
545
548
  "make",
@@ -548,11 +551,14 @@ FALSE_POS_SYLLABIC = {
548
551
  # "aka" # in sandbox
549
552
  "into",
550
553
  "in",
554
+ "no",
551
555
  "some",
556
+ # "papa",
552
557
  "on",
553
558
  "me",
554
559
  "ipa",
555
560
  "sun",
561
+ "mine",
556
562
  "sense",
557
563
  "none",
558
564
  "meme",
@@ -561,28 +567,101 @@ FALSE_POS_SYLLABIC = {
561
567
  "mon",
562
568
  "take",
563
569
  "luna",
564
- "anti",
565
570
  "elo",
571
+ "japanese",
566
572
  "an",
573
+ "anti",
567
574
  "win",
568
575
  "won",
569
- "we",
576
+ "we", # word in sandbox
570
577
  "men",
571
578
  "ton",
572
579
  "woke",
580
+ "sen", # seen
581
+ "se", # see
573
582
  "semi",
574
583
  "male",
584
+ # "pen", # borderline
585
+ "woman",
586
+ "line",
587
+ "meta",
588
+ "mini",
589
+ "sine",
590
+ # "min", # borderline
591
+ "oposite",
592
+ "anime",
593
+ "potato",
594
+ # "japan",
595
+ "nose",
596
+ "kilo",
597
+ "alone",
598
+ "minute",
599
+ "late",
600
+ "women",
601
+ "leson",
602
+ "amen",
603
+ "tote",
604
+ "lame",
605
+ "online",
606
+ "tone",
607
+ "ate",
608
+ "mile",
609
+ "melon",
610
+ "tense",
611
+ "nonsense",
612
+ "nine",
613
+ "emo",
614
+ "unlike",
615
+ "lone",
616
+ # manual additions
617
+ "alike",
618
+ "amuse",
619
+ "antelope",
620
+ "antena",
621
+ "apetite",
622
+ "asasin",
623
+ "asasinate",
624
+ "asinine",
625
+ "asinine",
626
+ "asume",
627
+ "atone",
628
+ "awake",
629
+ "awaken",
630
+ "eliminate",
631
+ "elite",
632
+ "misuse",
633
+ "emanate",
634
+ "iluminate",
635
+ "imense",
636
+ "imitate",
637
+ "insane",
638
+ "insolate",
639
+ "insulate",
640
+ "intense",
641
+ "lemon",
642
+ "manipulate",
575
643
  }
576
644
 
577
645
  FALSE_POS_ALPHABETIC: Set[str] = {
578
646
  "t",
579
647
  "is",
648
+ "as",
580
649
  "not",
650
+ "link",
651
+ "wait",
581
652
  "lol",
653
+ "new",
582
654
  "also",
583
655
  "isn", # TODO: tokenizer....
584
656
  "mean",
585
657
  "means",
658
+ "it",
659
+ "moment",
660
+ "its",
661
+ "lmao",
662
+ "new",
663
+ "wel",
664
+ "makes",
586
665
  }
587
666
 
588
667
  UCSUR_RANGES = [
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sonatoki
3
- Version: 0.5.0
3
+ Version: 0.5.1
4
4
  Summary: ilo li moku e toki li pana e sona ni: ni li toki ala toki pona?
5
5
  Author-Email: "jan Kekan San (@gregdan3)" <gregory.danielson3@gmail.com>
6
6
  License: AGPL-3.0-or-later
@@ -1,20 +1,20 @@
1
- sonatoki-0.5.0.dist-info/METADATA,sha256=ytLxR8WxNEourLjtJhT6TmuaeeKXM1bdoSkn0tw7umc,6370
2
- sonatoki-0.5.0.dist-info/WHEEL,sha256=SOP-4bEE0jbVaCHQGVvF08uWxk5rcSsfEybvoQVHlD8,90
3
- sonatoki-0.5.0.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
1
+ sonatoki-0.5.1.dist-info/METADATA,sha256=gj5B_q10R5l-w0jEuzFY2035qzp9tpmBQ-sZ0q73zXE,6370
2
+ sonatoki-0.5.1.dist-info/WHEEL,sha256=SOP-4bEE0jbVaCHQGVvF08uWxk5rcSsfEybvoQVHlD8,90
3
+ sonatoki-0.5.1.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
4
4
  sonatoki/Cleaners.py,sha256=x2dT3MpDUfbrHA0EP2D3n1sTiKFFi5jw9ha-1dX973o,1958
5
- sonatoki/Configs.py,sha256=gsAJB_SRBzUFTsdgOSqJ5k2WvHk_VuI_PBuXbC3z530,4192
6
- sonatoki/Filters.py,sha256=Hh5CIMTtJIDK5zEIpDZ-02zujpCT44frMC3VVAlTW0I,11565
5
+ sonatoki/Configs.py,sha256=HHaSAA7hus7aY6Xy-3fNlbzMwk3wJO0HrjTssg8P78M,4291
6
+ sonatoki/Filters.py,sha256=nVSmw5M4sEYA_8KI1fI53rMHkd9KO6yWbKfdxxExxN8,11700
7
7
  sonatoki/Preprocessors.py,sha256=zuu-6SLqFgk88vfSnYlyZjZrzoZQ56U_1SFXoxThQDQ,5628
8
8
  sonatoki/Scorers.py,sha256=LRQLgXKTU2VqhkMHFPVxyVt83DXf85_zrpDGk4ThU24,3811
9
9
  sonatoki/Tokenizers.py,sha256=qFaA1-v-wjKMihtEJMeZpi3m4cSkJQgWhGhL-w0VgPE,4236
10
10
  sonatoki/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  sonatoki/__main__.py,sha256=6n4kUF80APl6a0jV46h_ncHNuQbrLpZ_nAmiNAakiag,5673
12
12
  sonatoki/alphabetic.txt,sha256=duyqAKilD2vLIr75RShCIAnktNJcGeEoQIk18V6czmg,11702
13
- sonatoki/constants.py,sha256=ZITC1_MVWBdeNH2vzJfxNNv8vlUSUXtSrZkGyGaFthM,17626
13
+ sonatoki/constants.py,sha256=a3OjhtH2Jp6RDot1NE-PrQfm2VzfM850b-qipFLnjS4,18868
14
14
  sonatoki/ilo.py,sha256=PWZa202Q4h7IjnLxmfgT93iAPJL7dqJbA97L9kQDPiA,5658
15
15
  sonatoki/linku.json,sha256=FLsaESG01rQ88OU8HvwOUl_P9qtGykJ1X-1xoMVDkKA,295077
16
16
  sonatoki/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  sonatoki/sandbox.json,sha256=3BpCEjw-kB4z7DJAJ2UrE1YuFIe3knat8qi1iYuAIq4,83555
18
18
  sonatoki/syllabic.txt,sha256=HnqY4TrZ3tPcHah3TsvG9F9gjMrnAGdJ8hHJNHyyUPc,1712
19
19
  sonatoki/utils.py,sha256=sT5xLMEj0aLpy8GP92HKblJU1Wt1m8NUlMgCFWB32xQ,2265
20
- sonatoki-0.5.0.dist-info/RECORD,,
20
+ sonatoki-0.5.1.dist-info/RECORD,,