swarmauri_parser_textblob 0.6.0.dev154__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ Metadata-Version: 2.3
2
+ Name: swarmauri_parser_textblob
3
+ Version: 0.6.0.dev154
4
+ Summary: TextBlob Parser for Swarmauri.
5
+ License: Apache-2.0
6
+ Author: Jacob Stewart
7
+ Author-email: jacob@swarmauri.com
8
+ Requires-Python: >=3.10,<3.13
9
+ Classifier: License :: OSI Approved :: Apache Software License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Dist: nltk (>=3.9.1,<4.0.0)
15
+ Requires-Dist: swarmauri_base (>=0.6.0.dev154,<0.7.0)
16
+ Requires-Dist: swarmauri_core (>=0.6.0.dev154,<0.7.0)
17
+ Requires-Dist: textblob (>=0.18.0,<0.19.0)
18
+ Project-URL: Repository, http://github.com/swarmauri/swarmauri-sdk
19
+ Description-Content-Type: text/markdown
20
+
21
+ # Swarmauri Example Community Package
@@ -0,0 +1 @@
1
+ # Swarmauri Example Community Package
@@ -0,0 +1,59 @@
1
+ [tool.poetry]
2
+ name = "swarmauri_parser_textblob"
3
+ version = "0.6.0.dev154"
4
+ description = "TextBlob Parser for Swarmauri."
5
+ authors = ["Jacob Stewart <jacob@swarmauri.com>"]
6
+ license = "Apache-2.0"
7
+ readme = "README.md"
8
+ repository = "http://github.com/swarmauri/swarmauri-sdk"
9
+ classifiers = [
10
+ "License :: OSI Approved :: Apache Software License",
11
+ "Programming Language :: Python :: 3.10",
12
+ "Programming Language :: Python :: 3.11",
13
+ "Programming Language :: Python :: 3.12"
14
+ ]
15
+
16
+ [tool.poetry.dependencies]
17
+ python = ">=3.10,<3.13"
18
+
19
+ # Swarmauri
20
+ swarmauri_core = {version = "^0.6.0.dev154"}
21
+ swarmauri_base = {version = "^0.6.0.dev154"}
22
+
23
+ # Dependencies
24
+ textblob = "^0.18.0"
25
+ nltk = "^3.9.1"
26
+
27
+
28
+ [tool.poetry.group.dev.dependencies]
29
+ flake8 = "^7.0"
30
+ pytest = "^8.0"
31
+ pytest-asyncio = ">=0.24.0"
32
+ pytest-xdist = "^3.6.1"
33
+ pytest-json-report = "^1.5.0"
34
+ python-dotenv = "*"
35
+ requests = "^2.32.3"
36
+
37
+ [build-system]
38
+ requires = ["poetry-core>=1.0.0"]
39
+ build-backend = "poetry.core.masonry.api"
40
+
41
+ [tool.pytest.ini_options]
42
+ norecursedirs = ["combined", "scripts"]
43
+
44
+ markers = [
45
+ "test: standard test",
46
+ "unit: Unit tests",
47
+ "integration: Integration tests",
48
+ "acceptance: Acceptance tests",
49
+ "experimental: Experimental tests"
50
+ ]
51
+ log_cli = true
52
+ log_cli_level = "INFO"
53
+ log_cli_format = "%(asctime)s [%(levelname)s] %(message)s"
54
+ log_cli_date_format = "%Y-%m-%d %H:%M:%S"
55
+ asyncio_default_fixture_loop_scope = "function"
56
+
57
+ [tool.poetry.plugins."swarmauri.parsers"]
58
+ TextBlobNounParser = "swarmauri_parser_textblob.TextBlobNounParser:TextBlobNounParser"
59
+ TextBlobSentenceParser = "swarmauri_parser_textblob.TextBlobSentenceParser:TextBlobSentenceParser"
@@ -0,0 +1,60 @@
1
+ from swarmauri_core.ComponentBase import ComponentBase
2
+ from textblob import TextBlob
3
+ from typing import List, Union, Any, Literal
4
+ from swarmauri_standard.documents.Document import Document
5
+ from swarmauri_base.parsers.ParserBase import ParserBase
6
+
7
+
8
+ @ComponentBase.register_type(ParserBase, "TextBlobNounParser")
9
+ class TextBlobNounParser(ParserBase):
10
+ """
11
+ A concrete implementation of IParser using TextBlob for Natural Language Processing tasks.
12
+
13
+ This parser leverages TextBlob's functionalities such as noun phrase extraction,
14
+ sentiment analysis, classification, language translation, and more for parsing texts.
15
+ """
16
+
17
+ type: Literal["TextBlobNounParser"] = "TextBlobNounParser"
18
+
19
+ def __init__(self, **kwargs):
20
+ try:
21
+ import nltk
22
+
23
+ # Download required NLTK data
24
+ nltk.download("punkt")
25
+ nltk.download("averaged_perceptron_tagger")
26
+ nltk.download("brown")
27
+ nltk.download("wordnet")
28
+ nltk.download("punkt_tab")
29
+ super().__init__(**kwargs)
30
+ except Exception as e:
31
+ raise RuntimeError(f"Failed to initialize NLTK resources: {str(e)}")
32
+
33
+ def parse(self, data: Union[str, Any]) -> List[Document]:
34
+ """
35
+ Parses the input data using TextBlob to perform basic NLP tasks
36
+ and returns a list of documents with the parsed information.
37
+
38
+ Parameters:
39
+ - data (Union[str, Any]): The input data to parse, expected to be text data for this parser.
40
+
41
+ Returns:
42
+ - List[IDocument]: A list of documents with metadata generated from the parsing process.
43
+ """
44
+ # Ensure the data is a string
45
+ if not isinstance(data, str):
46
+ raise ValueError("TextBlobParser expects a string as input data.")
47
+
48
+ try:
49
+ # Use TextBlob for NLP tasks
50
+ blob = TextBlob(data)
51
+
52
+ # Extracts noun phrases to demonstrate one of TextBlob's capabilities.
53
+ noun_phrases = list(blob.noun_phrases)
54
+
55
+ # Create document with extracted information
56
+ document = Document(content=data, metadata={"noun_phrases": noun_phrases})
57
+
58
+ return [document]
59
+ except Exception as e:
60
+ raise RuntimeError(f"Error during text parsing: {str(e)}")
@@ -0,0 +1,51 @@
1
+ from swarmauri_core.ComponentBase import ComponentBase
2
+ from textblob import TextBlob
3
+ from typing import List, Union, Any, Literal
4
+ from swarmauri_standard.documents.Document import Document
5
+ from swarmauri_base.parsers.ParserBase import ParserBase
6
+
7
+
8
+ @ComponentBase.register_type(ParserBase, "TextBlobSentenceParser")
9
+ class TextBlobSentenceParser(ParserBase):
10
+ """
11
+ A parser that leverages TextBlob to break text into sentences.
12
+
13
+ This parser uses the natural language processing capabilities of TextBlob
14
+ to accurately identify sentence boundaries within large blocks of text.
15
+ """
16
+
17
+ type: Literal["TextBlobSentenceParser"] = "TextBlobSentenceParser"
18
+
19
+ def __init__(self, **kwargs):
20
+ import nltk
21
+
22
+ nltk.download("punkt_tab")
23
+ super().__init__(**kwargs)
24
+
25
+ def parse(self, data: Union[str, Any]) -> List[Document]:
26
+ """
27
+ Parses the input text into sentence-based document chunks using TextBlob.
28
+
29
+ Args:
30
+ data (Union[str, Any]): The input text to be parsed.
31
+
32
+ Returns:
33
+ List[IDocument]: A list of IDocument instances, each representing a sentence.
34
+ """
35
+ # Ensure the input is a string
36
+ if not isinstance(data, str):
37
+ data = str(data)
38
+
39
+ # Utilize TextBlob for sentence tokenization
40
+ blob = TextBlob(data)
41
+ sentences = blob.sentences
42
+
43
+ # Create a document instance for each sentence
44
+ documents = [
45
+ Document(
46
+ content=str(sentence), metadata={"parser": "TextBlobSentenceParser"}
47
+ )
48
+ for index, sentence in enumerate(sentences)
49
+ ]
50
+
51
+ return documents
@@ -0,0 +1,16 @@
1
+ from .TextBlobNounParser import TextBlobNounParser
2
+ from .TextBlobSentenceParser import TextBlobSentenceParser
3
+
4
+ __version__ = "0.6.0.dev26"
5
+ __long_desc__ = """
6
+
7
+ # Swarmauri TextBlob Based Components
8
+
9
+ Components Included:
10
+ - TextBlobNounParser
11
+ - TextBlobSentenceParser
12
+
13
+ Follow us at: https://github.com/swarmauri
14
+ Star us at: https://github.com/swarmauri/swarmauri-sdk
15
+
16
+ """