PyPI - textclass-en - Versions diffs - 0.1.0__tar.gz - Mend

textclass-en 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

textclass_en-0.1.0/PKG-INFO +18 -0
textclass_en-0.1.0/README.md +12 -0
textclass_en-0.1.0/pyproject.toml +10 -0
textclass_en-0.1.0/src/textclass_en/__init__.py +1 -0
textclass_en-0.1.0/src/textclass_en/main.py +67 -0

textclass_en-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,18 @@
+Metadata-Version: 2.4
+Name: textclass-en
+Version: 0.1.0
+Summary: Extract keywords from English text
+Description-Content-Type: text/markdown
+# textclass-en
+A simple, fast Python library to extract keywords from English text.
+## Quick Start
+```python
+import textclass_en
+textclass_en.textclass("The quick brown fox jumped over the lazy dog") # result: ['quick', 'fox', 'jumped']
+textclass_en.textclass("The quick brown fox jumped over the lazy dog", max_keywords=10) # result: ['quick', 'fox', 'jumped', 'lazy', 'dog']
+textclass_en.textclass("The company 'OpenBrain' is about to go bankrupt because of their newest AI model.") # result: ['company', 'openbrain', 'bankrupt']
+textclass_en.textclass("The company 'OpenBrain' is about to go bankrupt because of their newest AI model.", max_keywords=10) # result: ['company', 'openbrain', 'bankrupt', 'newest', 'ai', 'model']
+```

textclass_en-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,12 @@
+# textclass-en
+A simple, fast Python library to extract keywords from English text.
+## Quick Start
+```python
+import textclass_en
+textclass_en.textclass("The quick brown fox jumped over the lazy dog") # result: ['quick', 'fox', 'jumped']
+textclass_en.textclass("The quick brown fox jumped over the lazy dog", max_keywords=10) # result: ['quick', 'fox', 'jumped', 'lazy', 'dog']
+textclass_en.textclass("The company 'OpenBrain' is about to go bankrupt because of their newest AI model.") # result: ['company', 'openbrain', 'bankrupt']
+textclass_en.textclass("The company 'OpenBrain' is about to go bankrupt because of their newest AI model.", max_keywords=10) # result: ['company', 'openbrain', 'bankrupt', 'newest', 'ai', 'model']
+```

textclass_en-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,10 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "textclass-en"
+version = "0.1.0"
+description = "Extract keywords from English text"
+readme = "README.md"
+dependencies = []

textclass_en-0.1.0/src/textclass_en/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .main import textclass

textclass_en-0.1.0/src/textclass_en/main.py ADDED Viewed

@@ -0,0 +1,67 @@
+stopwords = (
+    "a", "about", "above", "across", "after", "afterwards", "again", "against", "all", "almost",
+    "alone", "along", "already", "also", "although", "always", "am", "among", "amongst", "amoungst",
+    "amount", "an", "and", "another", "any", "anyhow", "anyone", "anything", "anyway", "anywhere",
+    "are", "around", "as", "at", "back", "be", "became", "because", "become", "becomes", "becoming",
+    "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between",
+    "beyond", "bill", "both", "bottom", "but", "by", "call", "can", "cannot", "cant", "co",
+    "computer", "con", "could", "couldnt", "cry", "de", "describe", "detail", "do", "done", "down",
+    "due", "during", "each", "eg", "eight", "either", "eleven", "else", "elsewhere", "empty",
+    "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except",
+    "few", "fifteen", "fify", "fill", "find", "fire", "first", "five", "for", "former", "formerly",
+    "forty", "found", "four", "from", "front", "full", "further", "get", "give", "go", "had", "has",
+    "hasnt", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon",
+    "hers", "herself", "him", "himself", "his", "how", "however", "hundred", "i", "ie", "if", "in",
+    "inc", "indeed", "interest", "into", "is", "it", "its", "itself", "keep", "last", "latter",
+    "latterly", "least", "less", "ltd", "made", "many", "may", "me", "meanwhile", "might", "mill",
+    "mine", "more", "moreover", "most", "mostly", "move", "much", "must", "my", "myself", "name",
+    "namely", "neither", "never", "nevertheless", "next", "nine", "no", "nobody", "none", "noone",
+    "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once", "one", "only",
+    "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own",
+    "part", "per", "perhaps", "please", "put", "rather", "re", "same", "see", "seem", "seemed",
+    "seeming", "seems", "serious", "several", "she", "should", "show", "side", "since", "sincere",
+    "six", "sixty", "so", "some", "somehow", "someone", "something", "sometime", "sometimes",
+    "somewhere", "still", "such", "system", "take", "ten", "than", "that", "the", "their", "them",
+    "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein",
+    "thereupon", "these", "they", "thick", "thin", "third", "this", "those", "though", "three",
+    "through", "throughout", "thru", "thus", "to", "together", "too", "top", "toward", "towards",
+    "twelve", "twenty", "two", "un", "under", "until", "up", "upon", "us", "very", "via", "was",
+    "we", "well", "were", "what", "whatever", "when", "whence", "whenever", "where", "whereafter",
+    "whereas", "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while",
+    "whither", "who", "whoever", "whole", "whom", "whose", "why", "will", "with", "within",
+    "without", "would", "yet", "you", "your", "yours", "yourself", "yourselves", "red", "green",
+    "blue", "magenta", "cyan", "teal", "brown", "orange", "white", "black", "pink"
+)
+def get_distance(s1, s2):
+    if len(s1) < len(s2):
+        return get_distance(s2, s1)
+    if not s2:
+        return len(s1)
+    previous_row = range(len(s2) + 1)
+    for i, c1 in enumerate(s1):
+        current_row = [i + 1]
+        for j, c2 in enumerate(s2):
+            insertions = previous_row[j + 1] + 1
+            deletions = current_row[j] + 1
+            substitutions = previous_row[j] + (c1 != c2)
+            current_row.append(min(insertions, deletions, substitutions))
+        previous_row = current_row
+    return previous_row[-1]
+def textclass(st, max_keywords=3):
+    tokens = "".join(char for char in st.lower() if char.isalpha() or char.isspace()).split(" ")
+    tokens = [tok for tok in tokens if tok not in stopwords]
+    for i, tok in enumerate(tokens):
+        match = next((existing for existing in tokens[:i] if get_distance(tok, existing) <= 1), None)
+        tokens[i] = match if match else tok
+    freq = {}
+    for tok in tokens:
+        if tok in freq:
+            freq[tok] += 1
+        else:
+            freq[tok] = 1
+    return sorted(freq, key=freq.get, reverse=True)[:max_keywords]