swarmauri_parser_keywordextractor 0.6.0.dev154__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- swarmauri_parser_keywordextractor-0.6.0.dev154/PKG-INFO +20 -0
- swarmauri_parser_keywordextractor-0.6.0.dev154/README.md +1 -0
- swarmauri_parser_keywordextractor-0.6.0.dev154/pyproject.toml +56 -0
- swarmauri_parser_keywordextractor-0.6.0.dev154/swarmauri_parser_keywordextractor/KeywordExtractorParser.py +55 -0
- swarmauri_parser_keywordextractor-0.6.0.dev154/swarmauri_parser_keywordextractor/__init__.py +14 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: swarmauri_parser_keywordextractor
|
|
3
|
+
Version: 0.6.0.dev154
|
|
4
|
+
Summary: Keyword Extractor Parser for Swarmauri.
|
|
5
|
+
License: Apache-2.0
|
|
6
|
+
Author: Jacob Stewart
|
|
7
|
+
Author-email: jacob@swarmauri.com
|
|
8
|
+
Requires-Python: >=3.10,<3.13
|
|
9
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Requires-Dist: swarmauri_base (>=0.6.0.dev154,<0.7.0)
|
|
15
|
+
Requires-Dist: swarmauri_core (>=0.6.0.dev154,<0.7.0)
|
|
16
|
+
Requires-Dist: yake (==0.4.8)
|
|
17
|
+
Project-URL: Repository, http://github.com/swarmauri/swarmauri-sdk
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# Swarmauri Example Plugin
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Swarmauri Example Plugin
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "swarmauri_parser_keywordextractor"
|
|
3
|
+
version = "0.6.0.dev154"
|
|
4
|
+
description = "Keyword Extractor Parser for Swarmauri."
|
|
5
|
+
authors = ["Jacob Stewart <jacob@swarmauri.com>"]
|
|
6
|
+
license = "Apache-2.0"
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
repository = "http://github.com/swarmauri/swarmauri-sdk"
|
|
9
|
+
classifiers = [
|
|
10
|
+
"License :: OSI Approved :: Apache Software License",
|
|
11
|
+
"Programming Language :: Python :: 3.10",
|
|
12
|
+
"Programming Language :: Python :: 3.11",
|
|
13
|
+
"Programming Language :: Python :: 3.12"
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[tool.poetry.dependencies]
|
|
17
|
+
python = ">=3.10,<3.13"
|
|
18
|
+
|
|
19
|
+
# Swarmauri
|
|
20
|
+
swarmauri_core = {version = "^0.6.0.dev154"}
|
|
21
|
+
swarmauri_base = {version = "^0.6.0.dev154"}
|
|
22
|
+
|
|
23
|
+
# Dependencies
|
|
24
|
+
yake = "==0.4.8"
|
|
25
|
+
|
|
26
|
+
[tool.poetry.group.dev.dependencies]
|
|
27
|
+
flake8 = "^7.0"
|
|
28
|
+
pytest = "^8.0"
|
|
29
|
+
pytest-asyncio = ">=0.24.0"
|
|
30
|
+
pytest-xdist = "^3.6.1"
|
|
31
|
+
pytest-json-report = "^1.5.0"
|
|
32
|
+
python-dotenv = "*"
|
|
33
|
+
requests = "^2.32.3"
|
|
34
|
+
|
|
35
|
+
[build-system]
|
|
36
|
+
requires = ["poetry-core>=1.0.0"]
|
|
37
|
+
build-backend = "poetry.core.masonry.api"
|
|
38
|
+
|
|
39
|
+
[tool.pytest.ini_options]
|
|
40
|
+
norecursedirs = ["combined", "scripts"]
|
|
41
|
+
|
|
42
|
+
markers = [
|
|
43
|
+
"test: standard test",
|
|
44
|
+
"unit: Unit tests",
|
|
45
|
+
"integration: Integration tests",
|
|
46
|
+
"acceptance: Acceptance tests",
|
|
47
|
+
"experimental: Experimental tests"
|
|
48
|
+
]
|
|
49
|
+
log_cli = true
|
|
50
|
+
log_cli_level = "INFO"
|
|
51
|
+
log_cli_format = "%(asctime)s [%(levelname)s] %(message)s"
|
|
52
|
+
log_cli_date_format = "%Y-%m-%d %H:%M:%S"
|
|
53
|
+
asyncio_default_fixture_loop_scope = "function"
|
|
54
|
+
|
|
55
|
+
[tool.poetry.plugins."swarmauri.parsers"]
|
|
56
|
+
KeywordExtractorParser = "swarmauri_parser_keywordextractor:KeywordExtractorParser"
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import yake
|
|
2
|
+
from typing import List, Union, Any, Literal
|
|
3
|
+
from pydantic import ConfigDict, PrivateAttr
|
|
4
|
+
from swarmauri_standard.documents.Document import Document
|
|
5
|
+
from swarmauri_base.parsers.ParserBase import ParserBase
|
|
6
|
+
from swarmauri_core.ComponentBase import ComponentBase
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@ComponentBase.register_type(ParserBase, "KeywordExtractorParser")
|
|
10
|
+
class KeywordExtractorParser(ParserBase):
|
|
11
|
+
"""
|
|
12
|
+
Extracts keywords from text using the YAKE keyword extraction library.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
lang: str = "en"
|
|
16
|
+
num_keywords: int = 10
|
|
17
|
+
_kw_extractor: yake.KeywordExtractor = PrivateAttr(default=None)
|
|
18
|
+
model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
|
|
19
|
+
type: Literal["KeywordExtractorParser"] = "KeywordExtractorParser"
|
|
20
|
+
|
|
21
|
+
def __init__(self, **data):
|
|
22
|
+
super().__init__(**data)
|
|
23
|
+
self._kw_extractor = yake.KeywordExtractor(
|
|
24
|
+
lan=self.lang,
|
|
25
|
+
n=3,
|
|
26
|
+
dedupLim=0.9,
|
|
27
|
+
dedupFunc="seqm",
|
|
28
|
+
windowsSize=1,
|
|
29
|
+
top=self.num_keywords,
|
|
30
|
+
features=None,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
def parse(self, data: Union[str, Any]) -> List[Document]:
|
|
34
|
+
"""
|
|
35
|
+
Extract keywords from input text and return as list of Document instances containing keyword information.
|
|
36
|
+
|
|
37
|
+
Parameters:
|
|
38
|
+
- data (Union[str, Any]): The input text from which to extract keywords.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
- List[Document]: A list of Document instances, each containing information about an extracted keyword.
|
|
42
|
+
"""
|
|
43
|
+
# Ensure data is in string format for analysis
|
|
44
|
+
text = str(data) if not isinstance(data, str) else data
|
|
45
|
+
|
|
46
|
+
# Extract keywords using YAKE
|
|
47
|
+
keywords = self._kw_extractor.extract_keywords(text)
|
|
48
|
+
|
|
49
|
+
# Create Document instances for each keyword
|
|
50
|
+
documents = [
|
|
51
|
+
Document(content=keyword, metadata={"score": score})
|
|
52
|
+
for index, (keyword, score) in enumerate(keywords)
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
return documents
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from .KeywordExtractorParser import KeywordExtractorParser
|
|
2
|
+
|
|
3
|
+
__version__ = "0.6.0.dev26"
|
|
4
|
+
__long_desc__ = """
|
|
5
|
+
|
|
6
|
+
# Swarmauri Keyword Extractor Plugin
|
|
7
|
+
|
|
8
|
+
This repository includes a Keyword Extractor of a Swarmauri Plugin.
|
|
9
|
+
|
|
10
|
+
Visit us at: https://swarmauri.com
|
|
11
|
+
Follow us at: https://github.com/swarmauri
|
|
12
|
+
Star us at: https://github.com/swarmauri/swarmauri-sdk
|
|
13
|
+
|
|
14
|
+
"""
|