spiderforce4ai 2.4.8__py3-none-any.whl → 2.4.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -199,31 +199,35 @@ class PostExtractionAgent:
199
199
  completion_args["response_format"] = {"type": self.config.response_format}
200
200
 
201
201
  response = completion(**completion_args)
202
-
203
- # Log raw response for debugging
204
202
  raw_content = response.choices[0].message.content
205
203
  logger.debug(f"Raw LLM response for {url}: {raw_content}")
206
-
204
+
205
+ # Handle response based on response_format
207
206
  try:
208
- # First try direct JSON parsing
209
- try:
210
- extracted_data = json.loads(raw_content)
211
- except json.JSONDecodeError:
212
- # Look for JSON in markdown code blocks
213
- json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
214
- if json_match:
215
- json_content = json_match.group(1).strip()
216
- extracted_data = json.loads(json_content)
217
- else:
218
- # If no JSON found, try to extract structured data in any format
219
- extracted_data = {
220
- "raw_content": raw_content,
221
- "format": "text",
222
- "timestamp": datetime.now().isoformat()
223
- }
207
+ if self.config.response_format == "json_object":
208
+ # For json_object format, response should already be valid JSON
209
+ extracted_data = raw_content if isinstance(raw_content, dict) else json.loads(raw_content)
210
+ else:
211
+ # For text format or unspecified, try parsing JSON or use as text
212
+ try:
213
+ extracted_data = json.loads(raw_content)
214
+ except json.JSONDecodeError:
215
+ # Look for JSON in markdown code blocks
216
+ json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
217
+ if json_match:
218
+ json_content = json_match.group(1).strip()
219
+ extracted_data = json.loads(json_content)
220
+ else:
221
+ # If no JSON found and not json_object format, use raw content
222
+ extracted_data = {
223
+ "raw_content": raw_content,
224
+ "format": "text",
225
+ "timestamp": datetime.now().isoformat()
226
+ }
224
227
 
225
228
  self.buffer.remove_request(url) # Remove from buffer if successful
226
229
  return extracted_data
230
+
227
231
  except Exception as e:
228
232
  error_msg = (
229
233
  f"Error processing LLM response for {url}:\n"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.4.8
3
+ Version: 2.4.9
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,7 @@
1
+ spiderforce4ai/__init__.py,sha256=DUPOKF7-vCVQi7JimsStU1qjk5x3yVUoMnUVOJxOrGk,42360
2
+ spiderforce4ai/post_extraction_agent.py,sha256=so5Ze7Vz3konpQ0iT7ZxDGE9kIYeTwPTFyzezRc5oys,15392
3
+ spiderforce4ai-2.4.9.dist-info/METADATA,sha256=kEq3anAkoe_wpPVzpgaJlsSuAzTQHDgXiDFpirXvUQc,9012
4
+ spiderforce4ai-2.4.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
+ spiderforce4ai-2.4.9.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
+ spiderforce4ai-2.4.9.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
+ spiderforce4ai-2.4.9.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=DUPOKF7-vCVQi7JimsStU1qjk5x3yVUoMnUVOJxOrGk,42360
2
- spiderforce4ai/post_extraction_agent.py,sha256=5M7pBU8O44Khfub2jSSPboSbrcsAPw6nnp576qIA2pY,14988
3
- spiderforce4ai-2.4.8.dist-info/METADATA,sha256=zsEmCfjL_ueJzIeTJ9BIdUEF3R-4uOPDqrRxox70Cto,9012
4
- spiderforce4ai-2.4.8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
- spiderforce4ai-2.4.8.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
- spiderforce4ai-2.4.8.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
- spiderforce4ai-2.4.8.dist-info/RECORD,,