spiderforce4ai 2.4.6__py3-none-any.whl → 2.4.8__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- spiderforce4ai/__init__.py +2 -1
- spiderforce4ai/post_extraction_agent.py +37 -11
- {spiderforce4ai-2.4.6.dist-info → spiderforce4ai-2.4.8.dist-info}/METADATA +1 -1
- spiderforce4ai-2.4.8.dist-info/RECORD +7 -0
- spiderforce4ai-2.4.6.dist-info/RECORD +0 -7
- {spiderforce4ai-2.4.6.dist-info → spiderforce4ai-2.4.8.dist-info}/WHEEL +0 -0
- {spiderforce4ai-2.4.6.dist-info → spiderforce4ai-2.4.8.dist-info}/entry_points.txt +0 -0
- {spiderforce4ai-2.4.6.dist-info → spiderforce4ai-2.4.8.dist-info}/top_level.txt +0 -0
spiderforce4ai/__init__.py
CHANGED
@@ -870,7 +870,8 @@ class SpiderForce4AI:
|
|
870
870
|
base_url=config.post_extraction_agent.get("base_url"),
|
871
871
|
combine_output=bool(config.post_extraction_agent_save_to_file),
|
872
872
|
output_file=config.post_extraction_agent_save_to_file,
|
873
|
-
custom_transform_function=config.post_agent_transformer_function
|
873
|
+
custom_transform_function=config.post_agent_transformer_function,
|
874
|
+
response_format=config.post_extraction_agent.get("response_format")
|
874
875
|
)
|
875
876
|
agent = PostExtractionAgent(post_config)
|
876
877
|
|
@@ -12,6 +12,9 @@ from pydantic import BaseModel, Field
|
|
12
12
|
import logging
|
13
13
|
from datetime import datetime
|
14
14
|
import re
|
15
|
+
from rich.console import Console
|
16
|
+
|
17
|
+
console = Console()
|
15
18
|
|
16
19
|
logger = logging.getLogger(__name__)
|
17
20
|
|
@@ -115,6 +118,7 @@ class PostExtractionConfig:
|
|
115
118
|
output_file: Optional[Path] = None
|
116
119
|
custom_transform_function: Optional[Callable] = None
|
117
120
|
buffer_file: Optional[Path] = None
|
121
|
+
response_format: Optional[str] = None # 'json' or 'text'
|
118
122
|
|
119
123
|
def __post_init__(self):
|
120
124
|
if self.output_file:
|
@@ -181,26 +185,48 @@ class PostExtractionAgent:
|
|
181
185
|
for attempt in range(max_retries):
|
182
186
|
try:
|
183
187
|
# Call completion synchronously
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
188
|
+
# Add response_format if specified
|
189
|
+
completion_args = {
|
190
|
+
"model": self.config.model,
|
191
|
+
"messages": messages,
|
192
|
+
"max_tokens": self.config.max_tokens,
|
193
|
+
"temperature": self.config.temperature,
|
194
|
+
"api_key": self.config.api_key,
|
195
|
+
}
|
196
|
+
if self.config.base_url:
|
197
|
+
completion_args["api_base"] = self.config.base_url
|
198
|
+
if self.config.response_format:
|
199
|
+
completion_args["response_format"] = {"type": self.config.response_format}
|
200
|
+
|
201
|
+
response = completion(**completion_args)
|
192
202
|
|
193
203
|
# Log raw response for debugging
|
194
204
|
raw_content = response.choices[0].message.content
|
195
205
|
logger.debug(f"Raw LLM response for {url}: {raw_content}")
|
196
206
|
|
197
207
|
try:
|
198
|
-
|
208
|
+
# First try direct JSON parsing
|
209
|
+
try:
|
210
|
+
extracted_data = json.loads(raw_content)
|
211
|
+
except json.JSONDecodeError:
|
212
|
+
# Look for JSON in markdown code blocks
|
213
|
+
json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
|
214
|
+
if json_match:
|
215
|
+
json_content = json_match.group(1).strip()
|
216
|
+
extracted_data = json.loads(json_content)
|
217
|
+
else:
|
218
|
+
# If no JSON found, try to extract structured data in any format
|
219
|
+
extracted_data = {
|
220
|
+
"raw_content": raw_content,
|
221
|
+
"format": "text",
|
222
|
+
"timestamp": datetime.now().isoformat()
|
223
|
+
}
|
224
|
+
|
199
225
|
self.buffer.remove_request(url) # Remove from buffer if successful
|
200
226
|
return extracted_data
|
201
|
-
except
|
227
|
+
except Exception as e:
|
202
228
|
error_msg = (
|
203
|
-
f"
|
229
|
+
f"Error processing LLM response for {url}:\n"
|
204
230
|
f"Error: {str(e)}\n"
|
205
231
|
f"Raw content: {raw_content[:500]}..." # First 500 chars of response
|
206
232
|
)
|
@@ -0,0 +1,7 @@
|
|
1
|
+
spiderforce4ai/__init__.py,sha256=DUPOKF7-vCVQi7JimsStU1qjk5x3yVUoMnUVOJxOrGk,42360
|
2
|
+
spiderforce4ai/post_extraction_agent.py,sha256=5M7pBU8O44Khfub2jSSPboSbrcsAPw6nnp576qIA2pY,14988
|
3
|
+
spiderforce4ai-2.4.8.dist-info/METADATA,sha256=zsEmCfjL_ueJzIeTJ9BIdUEF3R-4uOPDqrRxox70Cto,9012
|
4
|
+
spiderforce4ai-2.4.8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
+
spiderforce4ai-2.4.8.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
+
spiderforce4ai-2.4.8.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
+
spiderforce4ai-2.4.8.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
|
2
|
-
spiderforce4ai/post_extraction_agent.py,sha256=Ty9-Ai3_RVT86RrPUxKEzf4oUh-Wr7bk2aM87Je_WvE,13580
|
3
|
-
spiderforce4ai-2.4.6.dist-info/METADATA,sha256=7FaEgAHdD-8a0XmuDMkpAUjAQ7ZmFTD89IqQM17nllI,9012
|
4
|
-
spiderforce4ai-2.4.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
-
spiderforce4ai-2.4.6.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
-
spiderforce4ai-2.4.6.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
-
spiderforce4ai-2.4.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|