spiderforce4ai 2.4.6__py3-none-any.whl → 2.4.7__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -12,6 +12,9 @@ from pydantic import BaseModel, Field
12
12
  import logging
13
13
  from datetime import datetime
14
14
  import re
15
+ from rich.console import Console
16
+
17
+ console = Console()
15
18
 
16
19
  logger = logging.getLogger(__name__)
17
20
 
@@ -195,12 +198,28 @@ class PostExtractionAgent:
195
198
  logger.debug(f"Raw LLM response for {url}: {raw_content}")
196
199
 
197
200
  try:
198
- extracted_data = json.loads(raw_content)
201
+ # First try direct JSON parsing
202
+ try:
203
+ extracted_data = json.loads(raw_content)
204
+ except json.JSONDecodeError:
205
+ # Look for JSON in markdown code blocks
206
+ json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
207
+ if json_match:
208
+ json_content = json_match.group(1).strip()
209
+ extracted_data = json.loads(json_content)
210
+ else:
211
+ # If no JSON found, try to extract structured data in any format
212
+ extracted_data = {
213
+ "raw_content": raw_content,
214
+ "format": "text",
215
+ "timestamp": datetime.now().isoformat()
216
+ }
217
+
199
218
  self.buffer.remove_request(url) # Remove from buffer if successful
200
219
  return extracted_data
201
- except json.JSONDecodeError as e:
220
+ except Exception as e:
202
221
  error_msg = (
203
- f"Invalid JSON response from LLM for {url}:\n"
222
+ f"Error processing LLM response for {url}:\n"
204
223
  f"Error: {str(e)}\n"
205
224
  f"Raw content: {raw_content[:500]}..." # First 500 chars of response
206
225
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.4.6
3
+ Version: 2.4.7
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,7 @@
1
+ spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
2
+ spiderforce4ai/post_extraction_agent.py,sha256=q2ohsqw_F1e5rT2H9eSzCWzstJLbwGyCtwLsC6eMufs,14560
3
+ spiderforce4ai-2.4.7.dist-info/METADATA,sha256=r273h2ogI76aXTd8XN9b81EWtQLuhdSjZkXD2Ks8GnM,9012
4
+ spiderforce4ai-2.4.7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
+ spiderforce4ai-2.4.7.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
+ spiderforce4ai-2.4.7.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
+ spiderforce4ai-2.4.7.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
2
- spiderforce4ai/post_extraction_agent.py,sha256=Ty9-Ai3_RVT86RrPUxKEzf4oUh-Wr7bk2aM87Je_WvE,13580
3
- spiderforce4ai-2.4.6.dist-info/METADATA,sha256=7FaEgAHdD-8a0XmuDMkpAUjAQ7ZmFTD89IqQM17nllI,9012
4
- spiderforce4ai-2.4.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
- spiderforce4ai-2.4.6.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
- spiderforce4ai-2.4.6.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
- spiderforce4ai-2.4.6.dist-info/RECORD,,