swarmauri_parser_keywordextractor 0.9.0.dev4__tar.gz → 0.9.0.dev33__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {swarmauri_parser_keywordextractor-0.9.0.dev4 → swarmauri_parser_keywordextractor-0.9.0.dev33}/PKG-INFO +41 -13
- {swarmauri_parser_keywordextractor-0.9.0.dev4 → swarmauri_parser_keywordextractor-0.9.0.dev33}/README.md +33 -10
- {swarmauri_parser_keywordextractor-0.9.0.dev4 → swarmauri_parser_keywordextractor-0.9.0.dev33}/pyproject.toml +12 -1
- {swarmauri_parser_keywordextractor-0.9.0.dev4 → swarmauri_parser_keywordextractor-0.9.0.dev33}/LICENSE +0 -0
- {swarmauri_parser_keywordextractor-0.9.0.dev4 → swarmauri_parser_keywordextractor-0.9.0.dev33}/swarmauri_parser_keywordextractor/KeywordExtractorParser.py +0 -0
- {swarmauri_parser_keywordextractor-0.9.0.dev4 → swarmauri_parser_keywordextractor-0.9.0.dev33}/swarmauri_parser_keywordextractor/__init__.py +0 -0
|
@@ -1,8 +1,10 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: swarmauri_parser_keywordextractor
|
|
3
|
-
Version: 0.9.0.
|
|
3
|
+
Version: 0.9.0.dev33
|
|
4
4
|
Summary: Keyword Extractor Parser for Swarmauri.
|
|
5
|
-
License: Apache-2.0
|
|
5
|
+
License-Expression: Apache-2.0
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Keywords: swarmauri,sdk,standards,parser,keywordextractor
|
|
6
8
|
Author: Jacob Stewart
|
|
7
9
|
Author-email: jacob@swarmauri.com
|
|
8
10
|
Requires-Python: >=3.10,<3.13
|
|
@@ -10,6 +12,9 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
10
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
11
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
12
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
13
18
|
Requires-Dist: swarmauri_base
|
|
14
19
|
Requires-Dist: swarmauri_core
|
|
15
20
|
Requires-Dist: swarmauri_standard
|
|
@@ -17,7 +22,7 @@ Requires-Dist: yake (==0.4.8)
|
|
|
17
22
|
Description-Content-Type: text/markdown
|
|
18
23
|
|
|
19
24
|
|
|
20
|
-

|
|
21
26
|
|
|
22
27
|
<p align="center">
|
|
23
28
|
<a href="https://pypi.org/project/swarmauri_parser_keywordextractor/">
|
|
@@ -36,31 +41,54 @@ Description-Content-Type: text/markdown
|
|
|
36
41
|
|
|
37
42
|
# Swarmauri Parser Keywordextractor
|
|
38
43
|
|
|
39
|
-
|
|
44
|
+
`KeywordExtractorParser` wraps the [YAKE](https://github.com/LIAAD/yake) keyword
|
|
45
|
+
extraction library to turn arbitrary text into a ranked list of
|
|
46
|
+
`swarmauri_standard.documents.Document` instances. Each returned document stores
|
|
47
|
+
the detected keyword in `content` and the YAKE importance score in
|
|
48
|
+
`metadata["score"]`.
|
|
49
|
+
|
|
50
|
+
The parser normalizes any input into a string before analysis and, by default,
|
|
51
|
+
extracts up to 10 keywords using the English language model, three-word maximum
|
|
52
|
+
phrases, and YAKE's sequence-matching deduplication (`dedupLim=0.9`). Override
|
|
53
|
+
`lang` or `num_keywords` when instantiating the parser to tailor the output to
|
|
54
|
+
your dataset.
|
|
40
55
|
|
|
41
56
|
## Installation
|
|
42
57
|
|
|
58
|
+
Choose the tool that matches your workflow:
|
|
59
|
+
|
|
43
60
|
```bash
|
|
61
|
+
# pip
|
|
44
62
|
pip install swarmauri_parser_keywordextractor
|
|
63
|
+
|
|
64
|
+
# Poetry
|
|
65
|
+
poetry add swarmauri_parser_keywordextractor
|
|
66
|
+
|
|
67
|
+
# uv
|
|
68
|
+
uv add swarmauri_parser_keywordextractor
|
|
45
69
|
```
|
|
46
70
|
|
|
47
71
|
## Usage
|
|
48
|
-
|
|
72
|
+
|
|
73
|
+
Here's a basic example of how to use the `KeywordExtractorParser`:
|
|
74
|
+
|
|
49
75
|
```python
|
|
50
|
-
from swarmauri_parser_keywordextractor
|
|
76
|
+
from swarmauri_parser_keywordextractor import KeywordExtractorParser
|
|
51
77
|
|
|
52
|
-
# Initialize the parser
|
|
53
|
-
parser = KeywordExtractorParser()
|
|
78
|
+
# Initialize the parser for three keywords in English
|
|
79
|
+
parser = KeywordExtractorParser(num_keywords=3, lang="en")
|
|
54
80
|
|
|
55
|
-
# Parse text and extract keywords
|
|
56
81
|
text = "Artificial intelligence and machine learning are transforming technology"
|
|
57
82
|
documents = parser.parse(text)
|
|
58
83
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
print(f"Keyword: {
|
|
84
|
+
for document in documents:
|
|
85
|
+
score = document.metadata["score"]
|
|
86
|
+
print(f"Keyword: {document.content}, Score: {score:.4f}")
|
|
62
87
|
```
|
|
63
88
|
|
|
89
|
+
Each call to `parse` returns a list of `Document` objects ranked by YAKE so you
|
|
90
|
+
can feed them directly into downstream Swarmauri pipelines.
|
|
91
|
+
|
|
64
92
|
## Want to help?
|
|
65
93
|
|
|
66
94
|
If you want to contribute to swarmauri-sdk, read up on our [guidelines for contributing](https://github.com/swarmauri/swarmauri-sdk/blob/master/contributing.md) that will help you get started.
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-

|
|
3
3
|
|
|
4
4
|
<p align="center">
|
|
5
5
|
<a href="https://pypi.org/project/swarmauri_parser_keywordextractor/">
|
|
@@ -18,31 +18,54 @@
|
|
|
18
18
|
|
|
19
19
|
# Swarmauri Parser Keywordextractor
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
`KeywordExtractorParser` wraps the [YAKE](https://github.com/LIAAD/yake) keyword
|
|
22
|
+
extraction library to turn arbitrary text into a ranked list of
|
|
23
|
+
`swarmauri_standard.documents.Document` instances. Each returned document stores
|
|
24
|
+
the detected keyword in `content` and the YAKE importance score in
|
|
25
|
+
`metadata["score"]`.
|
|
26
|
+
|
|
27
|
+
The parser normalizes any input into a string before analysis and, by default,
|
|
28
|
+
extracts up to 10 keywords using the English language model, three-word maximum
|
|
29
|
+
phrases, and YAKE's sequence-matching deduplication (`dedupLim=0.9`). Override
|
|
30
|
+
`lang` or `num_keywords` when instantiating the parser to tailor the output to
|
|
31
|
+
your dataset.
|
|
22
32
|
|
|
23
33
|
## Installation
|
|
24
34
|
|
|
35
|
+
Choose the tool that matches your workflow:
|
|
36
|
+
|
|
25
37
|
```bash
|
|
38
|
+
# pip
|
|
26
39
|
pip install swarmauri_parser_keywordextractor
|
|
40
|
+
|
|
41
|
+
# Poetry
|
|
42
|
+
poetry add swarmauri_parser_keywordextractor
|
|
43
|
+
|
|
44
|
+
# uv
|
|
45
|
+
uv add swarmauri_parser_keywordextractor
|
|
27
46
|
```
|
|
28
47
|
|
|
29
48
|
## Usage
|
|
30
|
-
|
|
49
|
+
|
|
50
|
+
Here's a basic example of how to use the `KeywordExtractorParser`:
|
|
51
|
+
|
|
31
52
|
```python
|
|
32
|
-
from swarmauri_parser_keywordextractor
|
|
53
|
+
from swarmauri_parser_keywordextractor import KeywordExtractorParser
|
|
33
54
|
|
|
34
|
-
# Initialize the parser
|
|
35
|
-
parser = KeywordExtractorParser()
|
|
55
|
+
# Initialize the parser for three keywords in English
|
|
56
|
+
parser = KeywordExtractorParser(num_keywords=3, lang="en")
|
|
36
57
|
|
|
37
|
-
# Parse text and extract keywords
|
|
38
58
|
text = "Artificial intelligence and machine learning are transforming technology"
|
|
39
59
|
documents = parser.parse(text)
|
|
40
60
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
print(f"Keyword: {
|
|
61
|
+
for document in documents:
|
|
62
|
+
score = document.metadata["score"]
|
|
63
|
+
print(f"Keyword: {document.content}, Score: {score:.4f}")
|
|
44
64
|
```
|
|
45
65
|
|
|
66
|
+
Each call to `parse` returns a list of `Document` objects ranked by YAKE so you
|
|
67
|
+
can feed them directly into downstream Swarmauri pipelines.
|
|
68
|
+
|
|
46
69
|
## Want to help?
|
|
47
70
|
|
|
48
71
|
If you want to contribute to swarmauri-sdk, read up on our [guidelines for contributing](https://github.com/swarmauri/swarmauri-sdk/blob/master/contributing.md) that will help you get started.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "swarmauri_parser_keywordextractor"
|
|
3
|
-
version = "0.9.0.
|
|
3
|
+
version = "0.9.0.dev33"
|
|
4
4
|
description = "Keyword Extractor Parser for Swarmauri."
|
|
5
5
|
license = "Apache-2.0"
|
|
6
6
|
readme = "README.md"
|
|
@@ -11,6 +11,9 @@ classifiers = [
|
|
|
11
11
|
"Programming Language :: Python :: 3.10",
|
|
12
12
|
"Programming Language :: Python :: 3.11",
|
|
13
13
|
"Programming Language :: Python :: 3.12",
|
|
14
|
+
"Programming Language :: Python",
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
14
17
|
]
|
|
15
18
|
authors = [{ name = "Jacob Stewart", email = "jacob@swarmauri.com" }]
|
|
16
19
|
dependencies = [
|
|
@@ -19,6 +22,13 @@ dependencies = [
|
|
|
19
22
|
"swarmauri_base",
|
|
20
23
|
"swarmauri_standard",
|
|
21
24
|
]
|
|
25
|
+
keywords = [
|
|
26
|
+
'swarmauri',
|
|
27
|
+
'sdk',
|
|
28
|
+
'standards',
|
|
29
|
+
'parser',
|
|
30
|
+
'keywordextractor',
|
|
31
|
+
]
|
|
22
32
|
|
|
23
33
|
[tool.uv.sources]
|
|
24
34
|
swarmauri_core = { workspace = true }
|
|
@@ -37,6 +47,7 @@ markers = [
|
|
|
37
47
|
"xfail: Expected failures",
|
|
38
48
|
"acceptance: Acceptance tests",
|
|
39
49
|
"perf: Performance tests that measure execution time and resource usage",
|
|
50
|
+
"example: Example usage tests",
|
|
40
51
|
]
|
|
41
52
|
timeout = 300
|
|
42
53
|
log_cli = true
|
|
File without changes
|
|
File without changes
|