swarmauri_parser_keywordextractor 0.6.0.dev154__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ Metadata-Version: 2.3
2
+ Name: swarmauri_parser_keywordextractor
3
+ Version: 0.6.0.dev154
4
+ Summary: Keyword Extractor Parser for Swarmauri.
5
+ License: Apache-2.0
6
+ Author: Jacob Stewart
7
+ Author-email: jacob@swarmauri.com
8
+ Requires-Python: >=3.10,<3.13
9
+ Classifier: License :: OSI Approved :: Apache Software License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Dist: swarmauri_base (>=0.6.0.dev154,<0.7.0)
15
+ Requires-Dist: swarmauri_core (>=0.6.0.dev154,<0.7.0)
16
+ Requires-Dist: yake (==0.4.8)
17
+ Project-URL: Repository, http://github.com/swarmauri/swarmauri-sdk
18
+ Description-Content-Type: text/markdown
19
+
20
+ # Swarmauri Example Plugin
@@ -0,0 +1 @@
1
+ # Swarmauri Example Plugin
@@ -0,0 +1,56 @@
1
+ [tool.poetry]
2
+ name = "swarmauri_parser_keywordextractor"
3
+ version = "0.6.0.dev154"
4
+ description = "Keyword Extractor Parser for Swarmauri."
5
+ authors = ["Jacob Stewart <jacob@swarmauri.com>"]
6
+ license = "Apache-2.0"
7
+ readme = "README.md"
8
+ repository = "http://github.com/swarmauri/swarmauri-sdk"
9
+ classifiers = [
10
+ "License :: OSI Approved :: Apache Software License",
11
+ "Programming Language :: Python :: 3.10",
12
+ "Programming Language :: Python :: 3.11",
13
+ "Programming Language :: Python :: 3.12"
14
+ ]
15
+
16
+ [tool.poetry.dependencies]
17
+ python = ">=3.10,<3.13"
18
+
19
+ # Swarmauri
20
+ swarmauri_core = {version = "^0.6.0.dev154"}
21
+ swarmauri_base = {version = "^0.6.0.dev154"}
22
+
23
+ # Dependencies
24
+ yake = "==0.4.8"
25
+
26
+ [tool.poetry.group.dev.dependencies]
27
+ flake8 = "^7.0"
28
+ pytest = "^8.0"
29
+ pytest-asyncio = ">=0.24.0"
30
+ pytest-xdist = "^3.6.1"
31
+ pytest-json-report = "^1.5.0"
32
+ python-dotenv = "*"
33
+ requests = "^2.32.3"
34
+
35
+ [build-system]
36
+ requires = ["poetry-core>=1.0.0"]
37
+ build-backend = "poetry.core.masonry.api"
38
+
39
+ [tool.pytest.ini_options]
40
+ norecursedirs = ["combined", "scripts"]
41
+
42
+ markers = [
43
+ "test: standard test",
44
+ "unit: Unit tests",
45
+ "integration: Integration tests",
46
+ "acceptance: Acceptance tests",
47
+ "experimental: Experimental tests"
48
+ ]
49
+ log_cli = true
50
+ log_cli_level = "INFO"
51
+ log_cli_format = "%(asctime)s [%(levelname)s] %(message)s"
52
+ log_cli_date_format = "%Y-%m-%d %H:%M:%S"
53
+ asyncio_default_fixture_loop_scope = "function"
54
+
55
+ [tool.poetry.plugins."swarmauri.parsers"]
56
+ KeywordExtractorParser = "swarmauri_parser_keywordextractor:KeywordExtractorParser"
@@ -0,0 +1,55 @@
1
+ import yake
2
+ from typing import List, Union, Any, Literal
3
+ from pydantic import ConfigDict, PrivateAttr
4
+ from swarmauri_standard.documents.Document import Document
5
+ from swarmauri_base.parsers.ParserBase import ParserBase
6
+ from swarmauri_core.ComponentBase import ComponentBase
7
+
8
+
9
+ @ComponentBase.register_type(ParserBase, "KeywordExtractorParser")
10
+ class KeywordExtractorParser(ParserBase):
11
+ """
12
+ Extracts keywords from text using the YAKE keyword extraction library.
13
+ """
14
+
15
+ lang: str = "en"
16
+ num_keywords: int = 10
17
+ _kw_extractor: yake.KeywordExtractor = PrivateAttr(default=None)
18
+ model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True)
19
+ type: Literal["KeywordExtractorParser"] = "KeywordExtractorParser"
20
+
21
+ def __init__(self, **data):
22
+ super().__init__(**data)
23
+ self._kw_extractor = yake.KeywordExtractor(
24
+ lan=self.lang,
25
+ n=3,
26
+ dedupLim=0.9,
27
+ dedupFunc="seqm",
28
+ windowsSize=1,
29
+ top=self.num_keywords,
30
+ features=None,
31
+ )
32
+
33
+ def parse(self, data: Union[str, Any]) -> List[Document]:
34
+ """
35
+ Extract keywords from input text and return as list of Document instances containing keyword information.
36
+
37
+ Parameters:
38
+ - data (Union[str, Any]): The input text from which to extract keywords.
39
+
40
+ Returns:
41
+ - List[Document]: A list of Document instances, each containing information about an extracted keyword.
42
+ """
43
+ # Ensure data is in string format for analysis
44
+ text = str(data) if not isinstance(data, str) else data
45
+
46
+ # Extract keywords using YAKE
47
+ keywords = self._kw_extractor.extract_keywords(text)
48
+
49
+ # Create Document instances for each keyword
50
+ documents = [
51
+ Document(content=keyword, metadata={"score": score})
52
+ for index, (keyword, score) in enumerate(keywords)
53
+ ]
54
+
55
+ return documents
@@ -0,0 +1,14 @@
1
+ from .KeywordExtractorParser import KeywordExtractorParser
2
+
3
+ __version__ = "0.6.0.dev26"
4
+ __long_desc__ = """
5
+
6
+ # Swarmauri Keyword Extractor Plugin
7
+
8
+ This repository includes a Keyword Extractor of a Swarmauri Plugin.
9
+
10
+ Visit us at: https://swarmauri.com
11
+ Follow us at: https://github.com/swarmauri
12
+ Star us at: https://github.com/swarmauri/swarmauri-sdk
13
+
14
+ """