spiderforce4ai 2.4.5__py3-none-any.whl → 2.4.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,9 @@ from pydantic import BaseModel, Field
12
12
  import logging
13
13
  from datetime import datetime
14
14
  import re
15
+ from rich.console import Console
16
+
17
+ console = Console()
15
18
 
16
19
  logger = logging.getLogger(__name__)
17
20
 
@@ -190,17 +193,45 @@ class PostExtractionAgent:
190
193
  api_base=self.config.base_url
191
194
  )
192
195
 
193
- # Parse response
194
- extracted_data = json.loads(response.choices[0].message.content)
195
- self.buffer.remove_request(url) # Remove from buffer if successful
196
- return extracted_data
196
+ # Log raw response for debugging
197
+ raw_content = response.choices[0].message.content
198
+ logger.debug(f"Raw LLM response for {url}: {raw_content}")
197
199
 
198
- except json.JSONDecodeError as e:
199
- last_error = f"Invalid JSON response from LLM: {e}"
200
- if attempt < max_retries - 1:
201
- time.sleep(retry_delay * (attempt + 1))
200
+ try:
201
+ # First try direct JSON parsing
202
+ try:
203
+ extracted_data = json.loads(raw_content)
204
+ except json.JSONDecodeError:
205
+ # Look for JSON in markdown code blocks
206
+ json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
207
+ if json_match:
208
+ json_content = json_match.group(1).strip()
209
+ extracted_data = json.loads(json_content)
210
+ else:
211
+ # If no JSON found, try to extract structured data in any format
212
+ extracted_data = {
213
+ "raw_content": raw_content,
214
+ "format": "text",
215
+ "timestamp": datetime.now().isoformat()
216
+ }
217
+
218
+ self.buffer.remove_request(url) # Remove from buffer if successful
219
+ return extracted_data
220
+ except Exception as e:
221
+ error_msg = (
222
+ f"Error processing LLM response for {url}:\n"
223
+ f"Error: {str(e)}\n"
224
+ f"Raw content: {raw_content[:500]}..." # First 500 chars of response
225
+ )
226
+ logger.error(error_msg)
227
+ last_error = error_msg
228
+ if attempt < max_retries - 1:
229
+ time.sleep(retry_delay * (attempt + 1))
230
+
202
231
  except Exception as e:
203
- last_error = str(e)
232
+ error_msg = f"LLM processing error for {url}: {str(e)}"
233
+ logger.error(error_msg)
234
+ last_error = error_msg
204
235
  if attempt < max_retries - 1:
205
236
  time.sleep(retry_delay * (attempt + 1))
206
237
 
@@ -242,24 +273,42 @@ class PostExtractionAgent:
242
273
 
243
274
  def process_content(self, url: str, content: str) -> Optional[Dict]:
244
275
  """Process content with retry mechanism."""
276
+ logger.info(f"Starting content processing for {url}")
277
+
245
278
  for attempt in range(self.config.max_retries):
279
+ logger.info(f"Processing attempt {attempt + 1}/{self.config.max_retries} for {url}")
280
+
246
281
  result = self._process_single_content(url, content)
247
282
  if result:
283
+ logger.info(f"Successfully processed content for {url}")
284
+
248
285
  # Apply custom transformation if provided
249
286
  if self.config.custom_transform_function:
250
287
  try:
251
288
  result = self.config.custom_transform_function(result)
289
+ logger.info(f"Applied custom transformation for {url}")
252
290
  except Exception as e:
253
- logger.error(f"Error in custom transform for {url}: {str(e)}")
291
+ error_msg = f"Error in custom transform for {url}: {str(e)}"
292
+ logger.error(error_msg)
293
+ console.print(f"[red]{error_msg}[/red]")
254
294
 
255
295
  # Save result synchronously
256
- self._save_result_sync(url, result)
296
+ try:
297
+ self._save_result_sync(url, result)
298
+ logger.info(f"Saved results for {url}")
299
+ except Exception as e:
300
+ error_msg = f"Error saving results for {url}: {str(e)}"
301
+ logger.error(error_msg)
302
+ console.print(f"[red]{error_msg}[/red]")
303
+
257
304
  return result
258
305
 
259
306
  # Wait before retry
260
307
  if attempt < self.config.max_retries - 1:
308
+ logger.info(f"Attempt {attempt + 1} failed for {url}, waiting {self.config.retry_delay}s before retry")
261
309
  time.sleep(self.config.retry_delay)
262
310
 
311
+ logger.error(f"All processing attempts failed for {url}")
263
312
  return None
264
313
 
265
314
  async def process_bulk_content(self, content_map: Dict[str, str]) -> Dict[str, Optional[Dict]]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.4.5
3
+ Version: 2.4.7
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,7 @@
1
+ spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
2
+ spiderforce4ai/post_extraction_agent.py,sha256=q2ohsqw_F1e5rT2H9eSzCWzstJLbwGyCtwLsC6eMufs,14560
3
+ spiderforce4ai-2.4.7.dist-info/METADATA,sha256=r273h2ogI76aXTd8XN9b81EWtQLuhdSjZkXD2Ks8GnM,9012
4
+ spiderforce4ai-2.4.7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
+ spiderforce4ai-2.4.7.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
+ spiderforce4ai-2.4.7.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
+ spiderforce4ai-2.4.7.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
2
- spiderforce4ai/post_extraction_agent.py,sha256=t9KxjuNw16-6kige6ULPLyykNkiGmKhpCi8QjskdaTk,11959
3
- spiderforce4ai-2.4.5.dist-info/METADATA,sha256=q3VBuGb5wxsi9OPkzEMwFMyg9f_vT2RamWYIgu2JbLc,9012
4
- spiderforce4ai-2.4.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
- spiderforce4ai-2.4.5.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
- spiderforce4ai-2.4.5.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
- spiderforce4ai-2.4.5.dist-info/RECORD,,