swarmauri_parser_textblob 0.6.0.dev154__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- swarmauri_parser_textblob-0.6.0.dev154/PKG-INFO +21 -0
- swarmauri_parser_textblob-0.6.0.dev154/README.md +1 -0
- swarmauri_parser_textblob-0.6.0.dev154/pyproject.toml +59 -0
- swarmauri_parser_textblob-0.6.0.dev154/swarmauri_parser_textblob/TextBlobNounParser.py +60 -0
- swarmauri_parser_textblob-0.6.0.dev154/swarmauri_parser_textblob/TextBlobSentenceParser.py +51 -0
- swarmauri_parser_textblob-0.6.0.dev154/swarmauri_parser_textblob/__init__.py +16 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: swarmauri_parser_textblob
|
|
3
|
+
Version: 0.6.0.dev154
|
|
4
|
+
Summary: TextBlob Parser for Swarmauri.
|
|
5
|
+
License: Apache-2.0
|
|
6
|
+
Author: Jacob Stewart
|
|
7
|
+
Author-email: jacob@swarmauri.com
|
|
8
|
+
Requires-Python: >=3.10,<3.13
|
|
9
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Requires-Dist: nltk (>=3.9.1,<4.0.0)
|
|
15
|
+
Requires-Dist: swarmauri_base (>=0.6.0.dev154,<0.7.0)
|
|
16
|
+
Requires-Dist: swarmauri_core (>=0.6.0.dev154,<0.7.0)
|
|
17
|
+
Requires-Dist: textblob (>=0.18.0,<0.19.0)
|
|
18
|
+
Project-URL: Repository, http://github.com/swarmauri/swarmauri-sdk
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# Swarmauri Example Community Package
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Swarmauri Example Community Package
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "swarmauri_parser_textblob"
|
|
3
|
+
version = "0.6.0.dev154"
|
|
4
|
+
description = "TextBlob Parser for Swarmauri."
|
|
5
|
+
authors = ["Jacob Stewart <jacob@swarmauri.com>"]
|
|
6
|
+
license = "Apache-2.0"
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
repository = "http://github.com/swarmauri/swarmauri-sdk"
|
|
9
|
+
classifiers = [
|
|
10
|
+
"License :: OSI Approved :: Apache Software License",
|
|
11
|
+
"Programming Language :: Python :: 3.10",
|
|
12
|
+
"Programming Language :: Python :: 3.11",
|
|
13
|
+
"Programming Language :: Python :: 3.12"
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[tool.poetry.dependencies]
|
|
17
|
+
python = ">=3.10,<3.13"
|
|
18
|
+
|
|
19
|
+
# Swarmauri
|
|
20
|
+
swarmauri_core = {version = "^0.6.0.dev154"}
|
|
21
|
+
swarmauri_base = {version = "^0.6.0.dev154"}
|
|
22
|
+
|
|
23
|
+
# Dependencies
|
|
24
|
+
textblob = "^0.18.0"
|
|
25
|
+
nltk = "^3.9.1"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
[tool.poetry.group.dev.dependencies]
|
|
29
|
+
flake8 = "^7.0"
|
|
30
|
+
pytest = "^8.0"
|
|
31
|
+
pytest-asyncio = ">=0.24.0"
|
|
32
|
+
pytest-xdist = "^3.6.1"
|
|
33
|
+
pytest-json-report = "^1.5.0"
|
|
34
|
+
python-dotenv = "*"
|
|
35
|
+
requests = "^2.32.3"
|
|
36
|
+
|
|
37
|
+
[build-system]
|
|
38
|
+
requires = ["poetry-core>=1.0.0"]
|
|
39
|
+
build-backend = "poetry.core.masonry.api"
|
|
40
|
+
|
|
41
|
+
[tool.pytest.ini_options]
|
|
42
|
+
norecursedirs = ["combined", "scripts"]
|
|
43
|
+
|
|
44
|
+
markers = [
|
|
45
|
+
"test: standard test",
|
|
46
|
+
"unit: Unit tests",
|
|
47
|
+
"integration: Integration tests",
|
|
48
|
+
"acceptance: Acceptance tests",
|
|
49
|
+
"experimental: Experimental tests"
|
|
50
|
+
]
|
|
51
|
+
log_cli = true
|
|
52
|
+
log_cli_level = "INFO"
|
|
53
|
+
log_cli_format = "%(asctime)s [%(levelname)s] %(message)s"
|
|
54
|
+
log_cli_date_format = "%Y-%m-%d %H:%M:%S"
|
|
55
|
+
asyncio_default_fixture_loop_scope = "function"
|
|
56
|
+
|
|
57
|
+
[tool.poetry.plugins."swarmauri.parsers"]
|
|
58
|
+
TextBlobNounParser = "swarmauri_parser_textblob.TextBlobNounParser:TextBlobNounParser"
|
|
59
|
+
TextBlobSentenceParser = "swarmauri_parser_textblob.TextBlobSentenceParser:TextBlobSentenceParser"
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from swarmauri_core.ComponentBase import ComponentBase
|
|
2
|
+
from textblob import TextBlob
|
|
3
|
+
from typing import List, Union, Any, Literal
|
|
4
|
+
from swarmauri_standard.documents.Document import Document
|
|
5
|
+
from swarmauri_base.parsers.ParserBase import ParserBase
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@ComponentBase.register_type(ParserBase, "TextBlobNounParser")
|
|
9
|
+
class TextBlobNounParser(ParserBase):
|
|
10
|
+
"""
|
|
11
|
+
A concrete implementation of IParser using TextBlob for Natural Language Processing tasks.
|
|
12
|
+
|
|
13
|
+
This parser leverages TextBlob's functionalities such as noun phrase extraction,
|
|
14
|
+
sentiment analysis, classification, language translation, and more for parsing texts.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
type: Literal["TextBlobNounParser"] = "TextBlobNounParser"
|
|
18
|
+
|
|
19
|
+
def __init__(self, **kwargs):
|
|
20
|
+
try:
|
|
21
|
+
import nltk
|
|
22
|
+
|
|
23
|
+
# Download required NLTK data
|
|
24
|
+
nltk.download("punkt")
|
|
25
|
+
nltk.download("averaged_perceptron_tagger")
|
|
26
|
+
nltk.download("brown")
|
|
27
|
+
nltk.download("wordnet")
|
|
28
|
+
nltk.download("punkt_tab")
|
|
29
|
+
super().__init__(**kwargs)
|
|
30
|
+
except Exception as e:
|
|
31
|
+
raise RuntimeError(f"Failed to initialize NLTK resources: {str(e)}")
|
|
32
|
+
|
|
33
|
+
def parse(self, data: Union[str, Any]) -> List[Document]:
|
|
34
|
+
"""
|
|
35
|
+
Parses the input data using TextBlob to perform basic NLP tasks
|
|
36
|
+
and returns a list of documents with the parsed information.
|
|
37
|
+
|
|
38
|
+
Parameters:
|
|
39
|
+
- data (Union[str, Any]): The input data to parse, expected to be text data for this parser.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
- List[IDocument]: A list of documents with metadata generated from the parsing process.
|
|
43
|
+
"""
|
|
44
|
+
# Ensure the data is a string
|
|
45
|
+
if not isinstance(data, str):
|
|
46
|
+
raise ValueError("TextBlobParser expects a string as input data.")
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
# Use TextBlob for NLP tasks
|
|
50
|
+
blob = TextBlob(data)
|
|
51
|
+
|
|
52
|
+
# Extracts noun phrases to demonstrate one of TextBlob's capabilities.
|
|
53
|
+
noun_phrases = list(blob.noun_phrases)
|
|
54
|
+
|
|
55
|
+
# Create document with extracted information
|
|
56
|
+
document = Document(content=data, metadata={"noun_phrases": noun_phrases})
|
|
57
|
+
|
|
58
|
+
return [document]
|
|
59
|
+
except Exception as e:
|
|
60
|
+
raise RuntimeError(f"Error during text parsing: {str(e)}")
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from swarmauri_core.ComponentBase import ComponentBase
|
|
2
|
+
from textblob import TextBlob
|
|
3
|
+
from typing import List, Union, Any, Literal
|
|
4
|
+
from swarmauri_standard.documents.Document import Document
|
|
5
|
+
from swarmauri_base.parsers.ParserBase import ParserBase
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@ComponentBase.register_type(ParserBase, "TextBlobSentenceParser")
|
|
9
|
+
class TextBlobSentenceParser(ParserBase):
|
|
10
|
+
"""
|
|
11
|
+
A parser that leverages TextBlob to break text into sentences.
|
|
12
|
+
|
|
13
|
+
This parser uses the natural language processing capabilities of TextBlob
|
|
14
|
+
to accurately identify sentence boundaries within large blocks of text.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
type: Literal["TextBlobSentenceParser"] = "TextBlobSentenceParser"
|
|
18
|
+
|
|
19
|
+
def __init__(self, **kwargs):
|
|
20
|
+
import nltk
|
|
21
|
+
|
|
22
|
+
nltk.download("punkt_tab")
|
|
23
|
+
super().__init__(**kwargs)
|
|
24
|
+
|
|
25
|
+
def parse(self, data: Union[str, Any]) -> List[Document]:
|
|
26
|
+
"""
|
|
27
|
+
Parses the input text into sentence-based document chunks using TextBlob.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
data (Union[str, Any]): The input text to be parsed.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
List[IDocument]: A list of IDocument instances, each representing a sentence.
|
|
34
|
+
"""
|
|
35
|
+
# Ensure the input is a string
|
|
36
|
+
if not isinstance(data, str):
|
|
37
|
+
data = str(data)
|
|
38
|
+
|
|
39
|
+
# Utilize TextBlob for sentence tokenization
|
|
40
|
+
blob = TextBlob(data)
|
|
41
|
+
sentences = blob.sentences
|
|
42
|
+
|
|
43
|
+
# Create a document instance for each sentence
|
|
44
|
+
documents = [
|
|
45
|
+
Document(
|
|
46
|
+
content=str(sentence), metadata={"parser": "TextBlobSentenceParser"}
|
|
47
|
+
)
|
|
48
|
+
for index, sentence in enumerate(sentences)
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
return documents
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from .TextBlobNounParser import TextBlobNounParser
|
|
2
|
+
from .TextBlobSentenceParser import TextBlobSentenceParser
|
|
3
|
+
|
|
4
|
+
__version__ = "0.6.0.dev26"
|
|
5
|
+
__long_desc__ = """
|
|
6
|
+
|
|
7
|
+
# Swarmauri TextBlob Based Components
|
|
8
|
+
|
|
9
|
+
Components Included:
|
|
10
|
+
- TextBlobNounParser
|
|
11
|
+
- TextBlobSentenceParser
|
|
12
|
+
|
|
13
|
+
Follow us at: https://github.com/swarmauri
|
|
14
|
+
Star us at: https://github.com/swarmauri/swarmauri-sdk
|
|
15
|
+
|
|
16
|
+
"""
|