PyPI - sonatoki - Versions diffs - 0.7.0__tar.gz → 0.8.0__tar.gz - Mend

sonatoki 0.7.0tar.gz → 0.8.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

{sonatoki-0.7.0 → sonatoki-0.8.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sonatoki
-Version: 0.7.0
+Version: 0.8.0
 Summary: ilo li moku e toki li pana e sona ni: ni li toki ala toki pona?
 Author-Email: "jan Kekan San (@gregdan3)" <gregory.danielson3@gmail.com>
 License: AGPL-3.0-or-later

{sonatoki-0.7.0 → sonatoki-0.8.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "sonatoki"
-version = "0.7.0"
+version = "0.8.0"
 description = "ilo li moku e toki li pana e sona ni: ni li toki ala toki pona?"
 authors = [
     { name = "jan Kekan San (@gregdan3)", email = "gregory.danielson3@gmail.com" },

{sonatoki-0.7.0 → sonatoki-0.8.0}/src/sonatoki/Configs.py RENAMED Viewed

@@ -11,12 +11,12 @@ from sonatoki.Filters import (
     And,
     Not,
     Filter,
+    PuName,
     Numeric,
     NimiUCSUR,
     Alphabetic,
     NimiKuLili,
     NimiKuSuli,
-    ProperName,
     Punctuation,
     LongSyllabic,
     Miscellaneous,
@@ -131,7 +131,7 @@ LazyConfig: IloConfig = {
     "preprocessors": [Emoji, Codeblock, URLs, AngleBracketObject, Reference],
     "cleaners": [ConsecutiveDuplicates],
     "ignoring_filters": [Numeric, Punctuation],
-    "scoring_filters": [Alphabetic, NimiUCSUR, ProperName, Miscellaneous],
+    "scoring_filters": [Alphabetic, NimiUCSUR, PuName, Miscellaneous],
     "scorer": SoftPassFail,
     "passing_score": 0.8,
     "word_tokenizer": WordTokenizerRe,  # mimics old tokenizer

{sonatoki-0.7.0 → sonatoki-0.8.0}/src/sonatoki/Filters.py RENAMED Viewed

@@ -141,8 +141,27 @@ class FalsePosAlphabetic(MemberFilter):
 class ProperName(Filter):
-    """Determines if a given token is a valid name (also called a loan word).
-    When Toki Pona is written with the Latin alphabet, names are generally
+    """Determine if a given token is a valid name based on a reasonable weakening of
+    the rules given in Toki Pona: The Language of Good. A token matches if it has a capital
+    letter at its start and is **not** fully capitalized.
+    This corrects an issue with PuName, where scripts lacking a case distinction are
+    errantly counted"""
+    @classmethod
+    @override
+    @cache(maxsize=None)
+    def filter(cls, token: str) -> bool:
+        first_capitalized = token[0].isupper()
+        all_caps = token.isupper()
+        return first_capitalized and not all_caps
+class PuName(Filter):
+    """Determine if a given token is a valid name (also called a loan word) based on
+    the rules given in Toki Pona: The Language of Good.
+    When Toki Pona is written with the Latin alphabet, names are
     capitalized at their start. This filter identifies those tokens.
     Note that this alone cannot determine if a token is a valid name,
@@ -156,6 +175,9 @@ class ProperName(Filter):
     @override
     @cache(maxsize=None)
     def filter(cls, token: str) -> bool:
+        # first_capitalized = token[0].isupper()
+        # rest_capitalized = token[1:] == token[1:].upper()
+        # return first_capitalized and not rest_capitalized
         return token == token.capitalize()
         # TODO:  If the token is in a script which doesn't have a case distinction,
         # this will errantly match.
@@ -445,6 +467,7 @@ __all__ = [
     "Or",
     "Phonotactic",
     "ProperName",
+    "PuName",
     "Punctuation",
     "Syllabic",
 ]

{sonatoki-0.7.0 → sonatoki-0.8.0}/tests/test_filters.py RENAMED Viewed

@@ -11,12 +11,12 @@ from sonatoki.Filters import (
     And,
     Not,
     NimiPu,
+    PuName,
     Numeric,
     Syllabic,
     Alphabetic,
     NimiKuLili,
     NimiKuSuli,
-    ProperName,
     Phonotactic,
     Punctuation,
     AlphabeticRe,
@@ -144,7 +144,7 @@ def test_AlphabeticRe(s: str):
 @given(st.from_regex(PROPER_NAME_RE, fullmatch=True))
 def test_ProperName(s: str):
-    res = ProperName.filter(s)
+    res = PuName.filter(s)
     assert res, repr(s)
@@ -247,7 +247,7 @@ def test_OrFilter_IsipinEpiku(s: str):
 @given(st.sampled_from(list(words_by_tag("book", "pu"))))
 def test_AndFilter(s: str):
     s = s.capitalize()
-    f = And(ProperName, NimiPu)
+    f = And(PuName, NimiPu)
     assert f.filter(s)

{sonatoki-0.7.0 → sonatoki-0.8.0}/tests/test_ilo.py RENAMED Viewed

@@ -94,6 +94,9 @@ NAME_MATCHES = [
     "toki Kanse li lon",
     "toki Lojban li nasa e lawa mi",
     "ilo Firefox",
+    "ilo FaceBook li nasa",
+    "mi kepeken ilo MySQL",
+    "poki li nasin SQLite",
     "mi musi Space Station 13",
     "jan Tepo en jan Salo en jan Lakuse en pipi Kewapi en soweli Eweke en mi li musi",
 ]
@@ -135,7 +138,6 @@ EXCESSIVE_SYLLABICS = [
 ]
 EXCESSIVE_ALPHABETICS = [
-    "21st",  # candidate for xfails?
     "wen i tok usin onli notes in toki pona i look silli. ",
     "I wait, I sulk, as a tool I make stoops to ineptness.",
     "aaa i non-saw usa's most multiple element-set. it's as asinine as in `e`-less speak",
@@ -161,6 +163,7 @@ EXCESSIVE_ENGLISH = [
     "i'm online all the time",
     "How to Cut a Kiwi",
     "a e i o u",
+    "21st",  # previous false positive; fixed by ProperName change
 ]
 NON_MATCHES = [

{sonatoki-0.7.0 → sonatoki-0.8.0}/tests/test_scorers.py RENAMED Viewed

@@ -10,10 +10,10 @@ from hypothesis import given, example
 from sonatoki.Filters import (
     Filter,
     NimiPu,
+    PuName,
     Numeric,
     Syllabic,
     Alphabetic,
-    ProperName,
     Phonotactic,
     NimiLinkuCore,
     PunctuationRe,
@@ -31,7 +31,7 @@ FILTERS = [
     NimiLinkuCore,
     NimiLinkuCommon,
     Alphabetic,
-    ProperName,
+    PuName,
     Phonotactic,
     PunctuationRe,
 ]