swarmauri_tool_webscraping 0.6.0.dev154__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ Metadata-Version: 2.3
2
+ Name: swarmauri_tool_webscraping
3
+ Version: 0.6.0.dev154
4
+ Summary: Web Scraping Tool for Swarmauri
5
+ License: Apache-2.0
6
+ Author: Jacob Stewart
7
+ Author-email: jacob@swarmauri.com
8
+ Requires-Python: >=3.10,<3.13
9
+ Classifier: License :: OSI Approved :: Apache Software License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Dist: beautifulsoup4 (>=4.10.0,<5.0.0)
15
+ Requires-Dist: swarmauri_base (>=0.6.0.dev154,<0.7.0)
16
+ Requires-Dist: swarmauri_core (>=0.6.0.dev154,<0.7.0)
17
+ Project-URL: Repository, http://github.com/swarmauri/swarmauri-sdk
18
+ Description-Content-Type: text/markdown
19
+
20
+ # Swarmauri Example Community Package
@@ -0,0 +1 @@
1
+ # Swarmauri Example Community Package
@@ -0,0 +1,58 @@
1
+ [tool.poetry]
2
+ name = "swarmauri_tool_webscraping"
3
+ version = "0.6.0.dev154"
4
+ description = "Web Scraping Tool for Swarmauri"
5
+ authors = ["Jacob Stewart <jacob@swarmauri.com>"]
6
+ license = "Apache-2.0"
7
+ readme = "README.md"
8
+ repository = "http://github.com/swarmauri/swarmauri-sdk"
9
+ classifiers = [
10
+ "License :: OSI Approved :: Apache Software License",
11
+ "Programming Language :: Python :: 3.10",
12
+ "Programming Language :: Python :: 3.11",
13
+ "Programming Language :: Python :: 3.12"
14
+ ]
15
+
16
+ [tool.poetry.dependencies]
17
+ python = ">=3.10,<3.13"
18
+
19
+ # Swarmauri
20
+ swarmauri_core = {version = "^0.6.0.dev154"}
21
+ swarmauri_base = {version = "^0.6.0.dev154"}
22
+
23
+ # Dependencies
24
+ beautifulsoup4 = "^4.10.0"
25
+
26
+
27
+
28
+ [tool.poetry.group.dev.dependencies]
29
+ flake8 = "^7.0"
30
+ pytest = "^8.0"
31
+ pytest-asyncio = ">=0.24.0"
32
+ pytest-xdist = "^3.6.1"
33
+ pytest-json-report = "^1.5.0"
34
+ python-dotenv = "*"
35
+ requests = "^2.32.3"
36
+
37
+ [build-system]
38
+ requires = ["poetry-core>=1.0.0"]
39
+ build-backend = "poetry.core.masonry.api"
40
+
41
+ [tool.pytest.ini_options]
42
+ norecursedirs = ["combined", "scripts"]
43
+
44
+ markers = [
45
+ "test: standard test",
46
+ "unit: Unit tests",
47
+ "integration: Integration tests",
48
+ "acceptance: Acceptance tests",
49
+ "experimental: Experimental tests"
50
+ ]
51
+ log_cli = true
52
+ log_cli_level = "INFO"
53
+ log_cli_format = "%(asctime)s [%(levelname)s] %(message)s"
54
+ log_cli_date_format = "%Y-%m-%d %H:%M:%S"
55
+ asyncio_default_fixture_loop_scope = "function"
56
+
57
+ [tool.poetry.plugins."swarmauri.tools"]
58
+ WebScrapingTool = "swarmauri_tool_webscraping.WebScrapingTool:WebScrapingTool"
@@ -0,0 +1,60 @@
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ from swarmauri_core.ComponentBase import ComponentBase
4
+ from swarmauri_base.tools.ToolBase import ToolBase
5
+ from swarmauri_standard.tools.Parameter import Parameter
6
+ from typing import List, Literal, Dict
7
+ from pydantic import Field
8
+
9
+
10
+ @ComponentBase.register_type(ToolBase, "WebScrapingTool")
11
+ class WebScrapingTool(ToolBase):
12
+ version: str = "1.0.0"
13
+ parameters: List[Parameter] = Field(
14
+ default_factory=lambda: [
15
+ Parameter(
16
+ name="url",
17
+ type="string",
18
+ description="URL of the link, website, webpage, etc... to scrape",
19
+ required=True,
20
+ ),
21
+ Parameter(
22
+ name="selector",
23
+ type="string",
24
+ description="CSS selector to target specific elements",
25
+ required=True,
26
+ ),
27
+ ]
28
+ )
29
+
30
+ name: str = "WebScrapingTool"
31
+ description: str = (
32
+ "This is a web scraping tool that uses python's requests and BeautifulSoup libraries to parse a URL using a CSS selector to target specific elements."
33
+ )
34
+ type: Literal["WebScrapingTool"] = "WebScrapingTool"
35
+
36
+ def __call__(self, url: str, selector: str) -> Dict[str, str]:
37
+ """
38
+ Fetches content from the specified URL and extracts elements based on the provided CSS selector.
39
+
40
+ Args:
41
+ url (str): The URL of the webpage to scrape.
42
+ selector (str): CSS selector to target specific elements in the webpage.
43
+
44
+ Returns:
45
+ Dict: A dictionary containing the extracted text or an error message.
46
+ """
47
+ try:
48
+ response = requests.get(url)
49
+ response.raise_for_status() # Raises HTTPError for bad requests (4xx or 5xx)
50
+
51
+ html_content = response.content
52
+ soup = BeautifulSoup(html_content, "html.parser")
53
+
54
+ elements = soup.select(selector)
55
+ extracted_text = "\n".join([element.text for element in elements])
56
+ return {"extracted_text": extracted_text}
57
+ except requests.RequestException as e:
58
+ return {"error": f"Request error: {str(e)}"}
59
+ except Exception as e:
60
+ return {"error": f"An error occurred: {str(e)}"}
@@ -0,0 +1,12 @@
1
+ from .WebScrapingTool import WebScrapingTool
2
+
3
+ __version__ = "0.6.0.dev26"
4
+ __long_desc__ = """
5
+
6
+ # Swarmauri WebScraping Tool Plugin
7
+
8
+ Visit us at: https://swarmauri.com
9
+ Follow us at: https://github.com/swarmauri
10
+ Star us at: https://github.com/swarmauri/swarmauri-sdk
11
+
12
+ """