spiderforce4ai 2.4.5__py3-none-any.whl → 2.4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -190,17 +190,29 @@ class PostExtractionAgent:
190
190
  api_base=self.config.base_url
191
191
  )
192
192
 
193
- # Parse response
194
- extracted_data = json.loads(response.choices[0].message.content)
195
- self.buffer.remove_request(url) # Remove from buffer if successful
196
- return extracted_data
193
+ # Log raw response for debugging
194
+ raw_content = response.choices[0].message.content
195
+ logger.debug(f"Raw LLM response for {url}: {raw_content}")
197
196
 
198
- except json.JSONDecodeError as e:
199
- last_error = f"Invalid JSON response from LLM: {e}"
200
- if attempt < max_retries - 1:
201
- time.sleep(retry_delay * (attempt + 1))
197
+ try:
198
+ extracted_data = json.loads(raw_content)
199
+ self.buffer.remove_request(url) # Remove from buffer if successful
200
+ return extracted_data
201
+ except json.JSONDecodeError as e:
202
+ error_msg = (
203
+ f"Invalid JSON response from LLM for {url}:\n"
204
+ f"Error: {str(e)}\n"
205
+ f"Raw content: {raw_content[:500]}..." # First 500 chars of response
206
+ )
207
+ logger.error(error_msg)
208
+ last_error = error_msg
209
+ if attempt < max_retries - 1:
210
+ time.sleep(retry_delay * (attempt + 1))
211
+
202
212
  except Exception as e:
203
- last_error = str(e)
213
+ error_msg = f"LLM processing error for {url}: {str(e)}"
214
+ logger.error(error_msg)
215
+ last_error = error_msg
204
216
  if attempt < max_retries - 1:
205
217
  time.sleep(retry_delay * (attempt + 1))
206
218
 
@@ -242,24 +254,42 @@ class PostExtractionAgent:
242
254
 
243
255
  def process_content(self, url: str, content: str) -> Optional[Dict]:
244
256
  """Process content with retry mechanism."""
257
+ logger.info(f"Starting content processing for {url}")
258
+
245
259
  for attempt in range(self.config.max_retries):
260
+ logger.info(f"Processing attempt {attempt + 1}/{self.config.max_retries} for {url}")
261
+
246
262
  result = self._process_single_content(url, content)
247
263
  if result:
264
+ logger.info(f"Successfully processed content for {url}")
265
+
248
266
  # Apply custom transformation if provided
249
267
  if self.config.custom_transform_function:
250
268
  try:
251
269
  result = self.config.custom_transform_function(result)
270
+ logger.info(f"Applied custom transformation for {url}")
252
271
  except Exception as e:
253
- logger.error(f"Error in custom transform for {url}: {str(e)}")
272
+ error_msg = f"Error in custom transform for {url}: {str(e)}"
273
+ logger.error(error_msg)
274
+ console.print(f"[red]{error_msg}[/red]")
254
275
 
255
276
  # Save result synchronously
256
- self._save_result_sync(url, result)
277
+ try:
278
+ self._save_result_sync(url, result)
279
+ logger.info(f"Saved results for {url}")
280
+ except Exception as e:
281
+ error_msg = f"Error saving results for {url}: {str(e)}"
282
+ logger.error(error_msg)
283
+ console.print(f"[red]{error_msg}[/red]")
284
+
257
285
  return result
258
286
 
259
287
  # Wait before retry
260
288
  if attempt < self.config.max_retries - 1:
289
+ logger.info(f"Attempt {attempt + 1} failed for {url}, waiting {self.config.retry_delay}s before retry")
261
290
  time.sleep(self.config.retry_delay)
262
291
 
292
+ logger.error(f"All processing attempts failed for {url}")
263
293
  return None
264
294
 
265
295
  async def process_bulk_content(self, content_map: Dict[str, str]) -> Dict[str, Optional[Dict]]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.4.5
3
+ Version: 2.4.6
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,7 @@
1
+ spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
2
+ spiderforce4ai/post_extraction_agent.py,sha256=Ty9-Ai3_RVT86RrPUxKEzf4oUh-Wr7bk2aM87Je_WvE,13580
3
+ spiderforce4ai-2.4.6.dist-info/METADATA,sha256=7FaEgAHdD-8a0XmuDMkpAUjAQ7ZmFTD89IqQM17nllI,9012
4
+ spiderforce4ai-2.4.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
+ spiderforce4ai-2.4.6.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
+ spiderforce4ai-2.4.6.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
+ spiderforce4ai-2.4.6.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
2
- spiderforce4ai/post_extraction_agent.py,sha256=t9KxjuNw16-6kige6ULPLyykNkiGmKhpCi8QjskdaTk,11959
3
- spiderforce4ai-2.4.5.dist-info/METADATA,sha256=q3VBuGb5wxsi9OPkzEMwFMyg9f_vT2RamWYIgu2JbLc,9012
4
- spiderforce4ai-2.4.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
- spiderforce4ai-2.4.5.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
- spiderforce4ai-2.4.5.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
- spiderforce4ai-2.4.5.dist-info/RECORD,,