spiderforce4ai 2.3.1__tar.gz → 2.4__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {spiderforce4ai-2.3.1 → spiderforce4ai-2.4}/PKG-INFO +41 -3
- spiderforce4ai-2.4/pyproject.toml +166 -0
- spiderforce4ai-2.4/setup.py +82 -0
- {spiderforce4ai-2.3.1 → spiderforce4ai-2.4}/spiderforce4ai/__init__.py +411 -349
- spiderforce4ai-2.4/spiderforce4ai/post_extraction_agent.py +259 -0
- {spiderforce4ai-2.3.1 → spiderforce4ai-2.4}/spiderforce4ai.egg-info/PKG-INFO +41 -3
- {spiderforce4ai-2.3.1 → spiderforce4ai-2.4}/spiderforce4ai.egg-info/SOURCES.txt +3 -0
- spiderforce4ai-2.4/spiderforce4ai.egg-info/entry_points.txt +2 -0
- spiderforce4ai-2.4/spiderforce4ai.egg-info/not-zip-safe +1 -0
- spiderforce4ai-2.4/spiderforce4ai.egg-info/requires.txt +38 -0
- spiderforce4ai-2.3.1/pyproject.toml +0 -26
- spiderforce4ai-2.3.1/setup.py +0 -29
- spiderforce4ai-2.3.1/spiderforce4ai.egg-info/requires.txt +0 -5
- {spiderforce4ai-2.3.1 → spiderforce4ai-2.4}/README.md +0 -0
- {spiderforce4ai-2.3.1 → spiderforce4ai-2.4}/setup.cfg +0 -0
- {spiderforce4ai-2.3.1 → spiderforce4ai-2.4}/spiderforce4ai.egg-info/dependency_links.txt +0 -0
- {spiderforce4ai-2.3.1 → spiderforce4ai-2.4}/spiderforce4ai.egg-info/top_level.txt +0 -0
@@ -1,16 +1,24 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: spiderforce4ai
|
3
|
-
Version: 2.
|
4
|
-
Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service
|
3
|
+
Version: 2.4
|
4
|
+
Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
|
5
5
|
Home-page: https://petertam.pro
|
6
6
|
Author: Piotr Tamulewicz
|
7
7
|
Author-email: Piotr Tamulewicz <pt@petertam.pro>
|
8
|
-
|
8
|
+
Project-URL: Homepage, https://petertam.pro
|
9
|
+
Project-URL: Documentation, https://petertam.pro/docs/spiderforce4ai
|
10
|
+
Project-URL: Repository, https://github.com/yourusername/spiderforce4ai
|
11
|
+
Project-URL: Bug Tracker, https://github.com/yourusername/spiderforce4ai/issues
|
12
|
+
Keywords: web-scraping,markdown,html-to-markdown,llm,ai,content-extraction,async,parallel-processing
|
9
13
|
Classifier: Development Status :: 4 - Beta
|
10
14
|
Classifier: Intended Audience :: Developers
|
11
15
|
Classifier: License :: OSI Approved :: MIT License
|
12
16
|
Classifier: Programming Language :: Python :: 3.11
|
13
17
|
Classifier: Programming Language :: Python :: 3.12
|
18
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content
|
19
|
+
Classifier: Topic :: Text Processing :: Markup :: Markdown
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
14
22
|
Requires-Python: >=3.11
|
15
23
|
Description-Content-Type: text/markdown
|
16
24
|
Requires-Dist: aiohttp>=3.8.0
|
@@ -18,6 +26,36 @@ Requires-Dist: asyncio>=3.4.3
|
|
18
26
|
Requires-Dist: rich>=10.0.0
|
19
27
|
Requires-Dist: aiofiles>=0.8.0
|
20
28
|
Requires-Dist: httpx>=0.24.0
|
29
|
+
Requires-Dist: litellm>=1.26.0
|
30
|
+
Requires-Dist: pydantic>=2.6.0
|
31
|
+
Requires-Dist: requests>=2.31.0
|
32
|
+
Requires-Dist: aiofiles>=23.2.1
|
33
|
+
Requires-Dist: et-xmlfile>=1.1.0
|
34
|
+
Requires-Dist: multidict>=6.0.4
|
35
|
+
Requires-Dist: openai>=1.12.0
|
36
|
+
Requires-Dist: pandas>=2.2.0
|
37
|
+
Requires-Dist: numpy>=1.26.0
|
38
|
+
Requires-Dist: yarl>=1.9.4
|
39
|
+
Requires-Dist: typing_extensions>=4.9.0
|
40
|
+
Provides-Extra: dev
|
41
|
+
Requires-Dist: pytest>=7.4.0; extra == "dev"
|
42
|
+
Requires-Dist: pytest-asyncio>=0.21.1; extra == "dev"
|
43
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
44
|
+
Requires-Dist: black>=23.7.0; extra == "dev"
|
45
|
+
Requires-Dist: isort>=5.12.0; extra == "dev"
|
46
|
+
Requires-Dist: mypy>=1.4.1; extra == "dev"
|
47
|
+
Requires-Dist: ruff>=0.1.8; extra == "dev"
|
48
|
+
Requires-Dist: pre-commit>=3.5.0; extra == "dev"
|
49
|
+
Provides-Extra: test
|
50
|
+
Requires-Dist: pytest>=7.4.0; extra == "test"
|
51
|
+
Requires-Dist: pytest-asyncio>=0.21.1; extra == "test"
|
52
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "test"
|
53
|
+
Requires-Dist: pytest-mock>=3.12.0; extra == "test"
|
54
|
+
Requires-Dist: coverage>=7.4.0; extra == "test"
|
55
|
+
Provides-Extra: docs
|
56
|
+
Requires-Dist: sphinx>=7.1.0; extra == "docs"
|
57
|
+
Requires-Dist: sphinx-rtd-theme>=1.3.0; extra == "docs"
|
58
|
+
Requires-Dist: myst-parser>=2.0.0; extra == "docs"
|
21
59
|
Dynamic: author
|
22
60
|
Dynamic: home-page
|
23
61
|
Dynamic: requires-python
|
@@ -0,0 +1,166 @@
|
|
1
|
+
[build-system]
|
2
|
+
requires = ["setuptools>=65.0.0", "wheel>=0.40.0"]
|
3
|
+
build-backend = "setuptools.build_meta"
|
4
|
+
|
5
|
+
[project]
|
6
|
+
name = "spiderforce4ai"
|
7
|
+
version = "2.4"
|
8
|
+
description = "Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing"
|
9
|
+
readme = "README.md"
|
10
|
+
authors = [
|
11
|
+
{ name = "Piotr Tamulewicz", email = "pt@petertam.pro" }
|
12
|
+
]
|
13
|
+
license = { file = "LICENSE" }
|
14
|
+
classifiers = [
|
15
|
+
"Development Status :: 4 - Beta",
|
16
|
+
"Intended Audience :: Developers",
|
17
|
+
"License :: OSI Approved :: MIT License",
|
18
|
+
"Programming Language :: Python :: 3.11",
|
19
|
+
"Programming Language :: Python :: 3.12",
|
20
|
+
"Topic :: Internet :: WWW/HTTP :: Dynamic Content",
|
21
|
+
"Topic :: Text Processing :: Markup :: Markdown",
|
22
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
23
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
24
|
+
]
|
25
|
+
keywords = [
|
26
|
+
"web-scraping",
|
27
|
+
"markdown",
|
28
|
+
"html-to-markdown",
|
29
|
+
"llm",
|
30
|
+
"ai",
|
31
|
+
"content-extraction",
|
32
|
+
"async",
|
33
|
+
"parallel-processing"
|
34
|
+
]
|
35
|
+
requires-python = ">=3.11"
|
36
|
+
dependencies = [
|
37
|
+
"aiohttp>=3.8.0",
|
38
|
+
"asyncio>=3.4.3",
|
39
|
+
"rich>=10.0.0",
|
40
|
+
"aiofiles>=0.8.0",
|
41
|
+
"httpx>=0.24.0",
|
42
|
+
"litellm>=1.26.0",
|
43
|
+
"pydantic>=2.6.0",
|
44
|
+
"requests>=2.31.0",
|
45
|
+
"aiofiles>=23.2.1",
|
46
|
+
"et-xmlfile>=1.1.0",
|
47
|
+
"multidict>=6.0.4",
|
48
|
+
"openai>=1.12.0",
|
49
|
+
"pandas>=2.2.0",
|
50
|
+
"numpy>=1.26.0",
|
51
|
+
"yarl>=1.9.4",
|
52
|
+
"typing_extensions>=4.9.0",
|
53
|
+
]
|
54
|
+
|
55
|
+
[project.optional-dependencies]
|
56
|
+
dev = [
|
57
|
+
"pytest>=7.4.0",
|
58
|
+
"pytest-asyncio>=0.21.1",
|
59
|
+
"pytest-cov>=4.1.0",
|
60
|
+
"black>=23.7.0",
|
61
|
+
"isort>=5.12.0",
|
62
|
+
"mypy>=1.4.1",
|
63
|
+
"ruff>=0.1.8",
|
64
|
+
"pre-commit>=3.5.0",
|
65
|
+
]
|
66
|
+
|
67
|
+
test = [
|
68
|
+
"pytest>=7.4.0",
|
69
|
+
"pytest-asyncio>=0.21.1",
|
70
|
+
"pytest-cov>=4.1.0",
|
71
|
+
"pytest-mock>=3.12.0",
|
72
|
+
"coverage>=7.4.0",
|
73
|
+
]
|
74
|
+
|
75
|
+
docs = [
|
76
|
+
"sphinx>=7.1.0",
|
77
|
+
"sphinx-rtd-theme>=1.3.0",
|
78
|
+
"myst-parser>=2.0.0",
|
79
|
+
]
|
80
|
+
|
81
|
+
[project.urls]
|
82
|
+
Homepage = "https://petertam.pro"
|
83
|
+
Documentation = "https://petertam.pro/docs/spiderforce4ai"
|
84
|
+
Repository = "https://github.com/yourusername/spiderforce4ai"
|
85
|
+
"Bug Tracker" = "https://github.com/yourusername/spiderforce4ai/issues"
|
86
|
+
|
87
|
+
[project.scripts]
|
88
|
+
spiderforce4ai = "spiderforce4ai.cli:main"
|
89
|
+
|
90
|
+
[tool.setuptools]
|
91
|
+
packages = ["spiderforce4ai"]
|
92
|
+
|
93
|
+
[tool.setuptools.package-data]
|
94
|
+
spiderforce4ai = ["py.typed"]
|
95
|
+
|
96
|
+
[tool.black]
|
97
|
+
line-length = 100
|
98
|
+
target-version = ["py311"]
|
99
|
+
include = '\.pyi?$'
|
100
|
+
|
101
|
+
[tool.isort]
|
102
|
+
profile = "black"
|
103
|
+
line_length = 100
|
104
|
+
multi_line_output = 3
|
105
|
+
include_trailing_comma = true
|
106
|
+
force_grid_wrap = 0
|
107
|
+
use_parentheses = true
|
108
|
+
ensure_newline_before_comments = true
|
109
|
+
|
110
|
+
[tool.mypy]
|
111
|
+
python_version = "3.11"
|
112
|
+
warn_return_any = true
|
113
|
+
warn_unused_configs = true
|
114
|
+
disallow_untyped_defs = true
|
115
|
+
disallow_incomplete_defs = true
|
116
|
+
check_untyped_defs = true
|
117
|
+
disallow_untyped_decorators = true
|
118
|
+
no_implicit_optional = true
|
119
|
+
warn_redundant_casts = true
|
120
|
+
warn_unused_ignores = true
|
121
|
+
warn_no_return = true
|
122
|
+
warn_unreachable = true
|
123
|
+
show_error_codes = true
|
124
|
+
|
125
|
+
[tool.pytest.ini_options]
|
126
|
+
minversion = "7.0"
|
127
|
+
addopts = "-ra -q --cov=spiderforce4ai --cov-report=term-missing"
|
128
|
+
testpaths = ["tests"]
|
129
|
+
asyncio_mode = "auto"
|
130
|
+
|
131
|
+
[tool.coverage.run]
|
132
|
+
source = ["spiderforce4ai"]
|
133
|
+
branch = true
|
134
|
+
|
135
|
+
[tool.coverage.report]
|
136
|
+
exclude_lines = [
|
137
|
+
"pragma: no cover",
|
138
|
+
"def __repr__",
|
139
|
+
"if self.debug:",
|
140
|
+
"raise NotImplementedError",
|
141
|
+
"if __name__ == .__main__.:",
|
142
|
+
"pass",
|
143
|
+
"raise ImportError",
|
144
|
+
]
|
145
|
+
ignore_errors = true
|
146
|
+
omit = [
|
147
|
+
"tests/*",
|
148
|
+
"setup.py",
|
149
|
+
]
|
150
|
+
|
151
|
+
[tool.ruff]
|
152
|
+
line-length = 100
|
153
|
+
target-version = "py311"
|
154
|
+
select = [
|
155
|
+
"E", # pycodestyle errors
|
156
|
+
"W", # pycodestyle warnings
|
157
|
+
"F", # pyflakes
|
158
|
+
"I", # isort
|
159
|
+
"C", # flake8-comprehensions
|
160
|
+
"B", # flake8-bugbear
|
161
|
+
]
|
162
|
+
ignore = [
|
163
|
+
"E501", # line too long
|
164
|
+
]
|
165
|
+
[tool.ruff.per-file-ignores]
|
166
|
+
"__init__.py" = ["F401"]
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# setup.py
|
2
|
+
from setuptools import setup, find_packages
|
3
|
+
|
4
|
+
# Read the README.md file
|
5
|
+
with open("README.md", encoding="utf-8") as f:
|
6
|
+
long_description = f.read()
|
7
|
+
|
8
|
+
setup(
|
9
|
+
name="spiderforce4ai",
|
10
|
+
version="2.4",
|
11
|
+
author="Piotr Tamulewicz",
|
12
|
+
author_email="pt@petertam.pro",
|
13
|
+
description="Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing",
|
14
|
+
long_description=long_description,
|
15
|
+
long_description_content_type="text/markdown",
|
16
|
+
url="https://petertam.pro",
|
17
|
+
packages=find_packages(),
|
18
|
+
classifiers=[
|
19
|
+
"Development Status :: 4 - Beta",
|
20
|
+
"Intended Audience :: Developers",
|
21
|
+
"License :: OSI Approved :: MIT License",
|
22
|
+
"Programming Language :: Python :: 3.11",
|
23
|
+
"Programming Language :: Python :: 3.12",
|
24
|
+
"Topic :: Internet :: WWW/HTTP :: Dynamic Content",
|
25
|
+
"Topic :: Text Processing :: Markup :: Markdown",
|
26
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
27
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
28
|
+
],
|
29
|
+
python_requires=">=3.11",
|
30
|
+
install_requires=[
|
31
|
+
"aiohttp>=3.8.0",
|
32
|
+
"asyncio>=3.4.3",
|
33
|
+
"rich>=10.0.0",
|
34
|
+
"aiofiles>=0.8.0",
|
35
|
+
"httpx>=0.24.0",
|
36
|
+
"litellm>=1.26.0",
|
37
|
+
"pydantic>=2.6.0",
|
38
|
+
"requests>=2.31.0",
|
39
|
+
"aiofiles>=23.2.1",
|
40
|
+
"et-xmlfile>=1.1.0",
|
41
|
+
"multidict>=6.0.4",
|
42
|
+
"openai>=1.12.0",
|
43
|
+
"pandas>=2.2.0",
|
44
|
+
"numpy>=1.26.0",
|
45
|
+
"yarl>=1.9.4",
|
46
|
+
"typing_extensions>=4.9.0"
|
47
|
+
],
|
48
|
+
extras_require={
|
49
|
+
'dev': [
|
50
|
+
'pytest>=7.4.0',
|
51
|
+
'pytest-asyncio>=0.21.1',
|
52
|
+
'pytest-cov>=4.1.0',
|
53
|
+
'black>=23.7.0',
|
54
|
+
'isort>=5.12.0',
|
55
|
+
'mypy>=1.4.1',
|
56
|
+
],
|
57
|
+
},
|
58
|
+
project_urls={
|
59
|
+
"Bug Tracker": "https://github.com/yourusername/spiderforce4ai/issues",
|
60
|
+
"Documentation": "https://petertam.pro/docs/spiderforce4ai",
|
61
|
+
"Source Code": "https://github.com/yourusername/spiderforce4ai",
|
62
|
+
},
|
63
|
+
keywords=[
|
64
|
+
"web-scraping",
|
65
|
+
"markdown",
|
66
|
+
"html-to-markdown",
|
67
|
+
"llm",
|
68
|
+
"ai",
|
69
|
+
"content-extraction",
|
70
|
+
"async",
|
71
|
+
"parallel-processing"
|
72
|
+
],
|
73
|
+
entry_points={
|
74
|
+
'console_scripts': [
|
75
|
+
'spiderforce4ai=spiderforce4ai.cli:main',
|
76
|
+
],
|
77
|
+
},
|
78
|
+
package_data={
|
79
|
+
'spiderforce4ai': ['py.typed'],
|
80
|
+
},
|
81
|
+
zip_safe=False,
|
82
|
+
)
|