spiderforce4ai 2.4.7__py3-none-any.whl → 2.4.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spiderforce4ai/__init__.py +2 -1
- spiderforce4ai/post_extraction_agent.py +37 -26
- {spiderforce4ai-2.4.7.dist-info → spiderforce4ai-2.4.9.dist-info}/METADATA +1 -1
- spiderforce4ai-2.4.9.dist-info/RECORD +7 -0
- spiderforce4ai-2.4.7.dist-info/RECORD +0 -7
- {spiderforce4ai-2.4.7.dist-info → spiderforce4ai-2.4.9.dist-info}/WHEEL +0 -0
- {spiderforce4ai-2.4.7.dist-info → spiderforce4ai-2.4.9.dist-info}/entry_points.txt +0 -0
- {spiderforce4ai-2.4.7.dist-info → spiderforce4ai-2.4.9.dist-info}/top_level.txt +0 -0
spiderforce4ai/__init__.py
CHANGED
@@ -870,7 +870,8 @@ class SpiderForce4AI:
|
|
870
870
|
base_url=config.post_extraction_agent.get("base_url"),
|
871
871
|
combine_output=bool(config.post_extraction_agent_save_to_file),
|
872
872
|
output_file=config.post_extraction_agent_save_to_file,
|
873
|
-
custom_transform_function=config.post_agent_transformer_function
|
873
|
+
custom_transform_function=config.post_agent_transformer_function,
|
874
|
+
response_format=config.post_extraction_agent.get("response_format")
|
874
875
|
)
|
875
876
|
agent = PostExtractionAgent(post_config)
|
876
877
|
|
@@ -118,6 +118,7 @@ class PostExtractionConfig:
|
|
118
118
|
output_file: Optional[Path] = None
|
119
119
|
custom_transform_function: Optional[Callable] = None
|
120
120
|
buffer_file: Optional[Path] = None
|
121
|
+
response_format: Optional[str] = None # 'json' or 'text'
|
121
122
|
|
122
123
|
def __post_init__(self):
|
123
124
|
if self.output_file:
|
@@ -184,39 +185,49 @@ class PostExtractionAgent:
|
|
184
185
|
for attempt in range(max_retries):
|
185
186
|
try:
|
186
187
|
# Call completion synchronously
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
188
|
+
# Add response_format if specified
|
189
|
+
completion_args = {
|
190
|
+
"model": self.config.model,
|
191
|
+
"messages": messages,
|
192
|
+
"max_tokens": self.config.max_tokens,
|
193
|
+
"temperature": self.config.temperature,
|
194
|
+
"api_key": self.config.api_key,
|
195
|
+
}
|
196
|
+
if self.config.base_url:
|
197
|
+
completion_args["api_base"] = self.config.base_url
|
198
|
+
if self.config.response_format:
|
199
|
+
completion_args["response_format"] = {"type": self.config.response_format}
|
195
200
|
|
196
|
-
|
201
|
+
response = completion(**completion_args)
|
197
202
|
raw_content = response.choices[0].message.content
|
198
203
|
logger.debug(f"Raw LLM response for {url}: {raw_content}")
|
199
|
-
|
204
|
+
|
205
|
+
# Handle response based on response_format
|
200
206
|
try:
|
201
|
-
|
202
|
-
|
203
|
-
extracted_data = json.loads(raw_content)
|
204
|
-
|
205
|
-
#
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
207
|
+
if self.config.response_format == "json_object":
|
208
|
+
# For json_object format, response should already be valid JSON
|
209
|
+
extracted_data = raw_content if isinstance(raw_content, dict) else json.loads(raw_content)
|
210
|
+
else:
|
211
|
+
# For text format or unspecified, try parsing JSON or use as text
|
212
|
+
try:
|
213
|
+
extracted_data = json.loads(raw_content)
|
214
|
+
except json.JSONDecodeError:
|
215
|
+
# Look for JSON in markdown code blocks
|
216
|
+
json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
|
217
|
+
if json_match:
|
218
|
+
json_content = json_match.group(1).strip()
|
219
|
+
extracted_data = json.loads(json_content)
|
220
|
+
else:
|
221
|
+
# If no JSON found and not json_object format, use raw content
|
222
|
+
extracted_data = {
|
223
|
+
"raw_content": raw_content,
|
224
|
+
"format": "text",
|
225
|
+
"timestamp": datetime.now().isoformat()
|
226
|
+
}
|
217
227
|
|
218
228
|
self.buffer.remove_request(url) # Remove from buffer if successful
|
219
229
|
return extracted_data
|
230
|
+
|
220
231
|
except Exception as e:
|
221
232
|
error_msg = (
|
222
233
|
f"Error processing LLM response for {url}:\n"
|
@@ -0,0 +1,7 @@
|
|
1
|
+
spiderforce4ai/__init__.py,sha256=DUPOKF7-vCVQi7JimsStU1qjk5x3yVUoMnUVOJxOrGk,42360
|
2
|
+
spiderforce4ai/post_extraction_agent.py,sha256=so5Ze7Vz3konpQ0iT7ZxDGE9kIYeTwPTFyzezRc5oys,15392
|
3
|
+
spiderforce4ai-2.4.9.dist-info/METADATA,sha256=kEq3anAkoe_wpPVzpgaJlsSuAzTQHDgXiDFpirXvUQc,9012
|
4
|
+
spiderforce4ai-2.4.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
+
spiderforce4ai-2.4.9.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
+
spiderforce4ai-2.4.9.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
+
spiderforce4ai-2.4.9.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
|
2
|
-
spiderforce4ai/post_extraction_agent.py,sha256=q2ohsqw_F1e5rT2H9eSzCWzstJLbwGyCtwLsC6eMufs,14560
|
3
|
-
spiderforce4ai-2.4.7.dist-info/METADATA,sha256=r273h2ogI76aXTd8XN9b81EWtQLuhdSjZkXD2Ks8GnM,9012
|
4
|
-
spiderforce4ai-2.4.7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
-
spiderforce4ai-2.4.7.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
-
spiderforce4ai-2.4.7.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
-
spiderforce4ai-2.4.7.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|