spiderforce4ai 2.4.8__py3-none-any.whl → 2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spiderforce4ai/__init__.py +17 -1
- spiderforce4ai/post_extraction_agent.py +23 -19
- {spiderforce4ai-2.4.8.dist-info → spiderforce4ai-2.5.dist-info}/METADATA +1 -1
- spiderforce4ai-2.5.dist-info/RECORD +7 -0
- spiderforce4ai-2.4.8.dist-info/RECORD +0 -7
- {spiderforce4ai-2.4.8.dist-info → spiderforce4ai-2.5.dist-info}/WHEEL +0 -0
- {spiderforce4ai-2.4.8.dist-info → spiderforce4ai-2.5.dist-info}/entry_points.txt +0 -0
- {spiderforce4ai-2.4.8.dist-info → spiderforce4ai-2.5.dist-info}/top_level.txt +0 -0
spiderforce4ai/__init__.py
CHANGED
@@ -5,6 +5,9 @@ import asyncio
|
|
5
5
|
import aiohttp
|
6
6
|
import json
|
7
7
|
import logging
|
8
|
+
|
9
|
+
logger = logging.getLogger(__name__)
|
10
|
+
import logging
|
8
11
|
from typing import List, Dict, Union, Optional, Tuple, Callable, Any
|
9
12
|
from dataclasses import dataclass, asdict
|
10
13
|
from urllib.parse import urljoin, urlparse
|
@@ -877,7 +880,20 @@ class SpiderForce4AI:
|
|
877
880
|
|
878
881
|
for result in successful_results:
|
879
882
|
try:
|
880
|
-
|
883
|
+
# Get LLM response
|
884
|
+
llm_response = agent.process_content(result.url, result.markdown)
|
885
|
+
if llm_response:
|
886
|
+
# Add URL to the response before transformation
|
887
|
+
llm_response['url'] = result.url
|
888
|
+
# Apply transformation if provided
|
889
|
+
if config.post_agent_transformer_function:
|
890
|
+
try:
|
891
|
+
result.extraction_result = config.post_agent_transformer_function(llm_response)
|
892
|
+
except Exception as e:
|
893
|
+
logger.error(f"Transformer error for {result.url}: {str(e)}")
|
894
|
+
result.extraction_result = llm_response # Use original response if transform fails
|
895
|
+
else:
|
896
|
+
result.extraction_result = llm_response
|
881
897
|
progress.update(llm_task, advance=1)
|
882
898
|
except Exception as e:
|
883
899
|
console.print(f"[red]Error in post-extraction processing for {result.url}: {str(e)}[/red]")
|
@@ -199,31 +199,35 @@ class PostExtractionAgent:
|
|
199
199
|
completion_args["response_format"] = {"type": self.config.response_format}
|
200
200
|
|
201
201
|
response = completion(**completion_args)
|
202
|
-
|
203
|
-
# Log raw response for debugging
|
204
202
|
raw_content = response.choices[0].message.content
|
205
203
|
logger.debug(f"Raw LLM response for {url}: {raw_content}")
|
206
|
-
|
204
|
+
|
205
|
+
# Handle response based on response_format
|
207
206
|
try:
|
208
|
-
|
209
|
-
|
210
|
-
extracted_data = json.loads(raw_content)
|
211
|
-
|
212
|
-
#
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
207
|
+
if self.config.response_format == "json_object":
|
208
|
+
# For json_object format, response should already be valid JSON
|
209
|
+
extracted_data = raw_content if isinstance(raw_content, dict) else json.loads(raw_content)
|
210
|
+
else:
|
211
|
+
# For text format or unspecified, try parsing JSON or use as text
|
212
|
+
try:
|
213
|
+
extracted_data = json.loads(raw_content)
|
214
|
+
except json.JSONDecodeError:
|
215
|
+
# Look for JSON in markdown code blocks
|
216
|
+
json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
|
217
|
+
if json_match:
|
218
|
+
json_content = json_match.group(1).strip()
|
219
|
+
extracted_data = json.loads(json_content)
|
220
|
+
else:
|
221
|
+
# If no JSON found and not json_object format, use raw content
|
222
|
+
extracted_data = {
|
223
|
+
"raw_content": raw_content,
|
224
|
+
"format": "text",
|
225
|
+
"timestamp": datetime.now().isoformat()
|
226
|
+
}
|
224
227
|
|
225
228
|
self.buffer.remove_request(url) # Remove from buffer if successful
|
226
229
|
return extracted_data
|
230
|
+
|
227
231
|
except Exception as e:
|
228
232
|
error_msg = (
|
229
233
|
f"Error processing LLM response for {url}:\n"
|
@@ -0,0 +1,7 @@
|
|
1
|
+
spiderforce4ai/__init__.py,sha256=T0H2nqzhvXuxYMKgWAVoRrSIUV72H7yZ8SHIsbG9I4g,43327
|
2
|
+
spiderforce4ai/post_extraction_agent.py,sha256=so5Ze7Vz3konpQ0iT7ZxDGE9kIYeTwPTFyzezRc5oys,15392
|
3
|
+
spiderforce4ai-2.5.dist-info/METADATA,sha256=4iP462Pmx5GikzNhhPFHhm89BdkGqTzFDTeiPN1Xp4U,9010
|
4
|
+
spiderforce4ai-2.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
+
spiderforce4ai-2.5.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
+
spiderforce4ai-2.5.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
+
spiderforce4ai-2.5.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
spiderforce4ai/__init__.py,sha256=DUPOKF7-vCVQi7JimsStU1qjk5x3yVUoMnUVOJxOrGk,42360
|
2
|
-
spiderforce4ai/post_extraction_agent.py,sha256=5M7pBU8O44Khfub2jSSPboSbrcsAPw6nnp576qIA2pY,14988
|
3
|
-
spiderforce4ai-2.4.8.dist-info/METADATA,sha256=zsEmCfjL_ueJzIeTJ9BIdUEF3R-4uOPDqrRxox70Cto,9012
|
4
|
-
spiderforce4ai-2.4.8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
-
spiderforce4ai-2.4.8.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
-
spiderforce4ai-2.4.8.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
-
spiderforce4ai-2.4.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|