spiderforce4ai 2.4.5__py3-none-any.whl → 2.4.6__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -190,17 +190,29 @@ class PostExtractionAgent:
190
190
  api_base=self.config.base_url
191
191
  )
192
192
 
193
- # Parse response
194
- extracted_data = json.loads(response.choices[0].message.content)
195
- self.buffer.remove_request(url) # Remove from buffer if successful
196
- return extracted_data
193
+ # Log raw response for debugging
194
+ raw_content = response.choices[0].message.content
195
+ logger.debug(f"Raw LLM response for {url}: {raw_content}")
197
196
 
198
- except json.JSONDecodeError as e:
199
- last_error = f"Invalid JSON response from LLM: {e}"
200
- if attempt < max_retries - 1:
201
- time.sleep(retry_delay * (attempt + 1))
197
+ try:
198
+ extracted_data = json.loads(raw_content)
199
+ self.buffer.remove_request(url) # Remove from buffer if successful
200
+ return extracted_data
201
+ except json.JSONDecodeError as e:
202
+ error_msg = (
203
+ f"Invalid JSON response from LLM for {url}:\n"
204
+ f"Error: {str(e)}\n"
205
+ f"Raw content: {raw_content[:500]}..." # First 500 chars of response
206
+ )
207
+ logger.error(error_msg)
208
+ last_error = error_msg
209
+ if attempt < max_retries - 1:
210
+ time.sleep(retry_delay * (attempt + 1))
211
+
202
212
  except Exception as e:
203
- last_error = str(e)
213
+ error_msg = f"LLM processing error for {url}: {str(e)}"
214
+ logger.error(error_msg)
215
+ last_error = error_msg
204
216
  if attempt < max_retries - 1:
205
217
  time.sleep(retry_delay * (attempt + 1))
206
218
 
@@ -242,24 +254,42 @@ class PostExtractionAgent:
242
254
 
243
255
  def process_content(self, url: str, content: str) -> Optional[Dict]:
244
256
  """Process content with retry mechanism."""
257
+ logger.info(f"Starting content processing for {url}")
258
+
245
259
  for attempt in range(self.config.max_retries):
260
+ logger.info(f"Processing attempt {attempt + 1}/{self.config.max_retries} for {url}")
261
+
246
262
  result = self._process_single_content(url, content)
247
263
  if result:
264
+ logger.info(f"Successfully processed content for {url}")
265
+
248
266
  # Apply custom transformation if provided
249
267
  if self.config.custom_transform_function:
250
268
  try:
251
269
  result = self.config.custom_transform_function(result)
270
+ logger.info(f"Applied custom transformation for {url}")
252
271
  except Exception as e:
253
- logger.error(f"Error in custom transform for {url}: {str(e)}")
272
+ error_msg = f"Error in custom transform for {url}: {str(e)}"
273
+ logger.error(error_msg)
274
+ console.print(f"[red]{error_msg}[/red]")
254
275
 
255
276
  # Save result synchronously
256
- self._save_result_sync(url, result)
277
+ try:
278
+ self._save_result_sync(url, result)
279
+ logger.info(f"Saved results for {url}")
280
+ except Exception as e:
281
+ error_msg = f"Error saving results for {url}: {str(e)}"
282
+ logger.error(error_msg)
283
+ console.print(f"[red]{error_msg}[/red]")
284
+
257
285
  return result
258
286
 
259
287
  # Wait before retry
260
288
  if attempt < self.config.max_retries - 1:
289
+ logger.info(f"Attempt {attempt + 1} failed for {url}, waiting {self.config.retry_delay}s before retry")
261
290
  time.sleep(self.config.retry_delay)
262
291
 
292
+ logger.error(f"All processing attempts failed for {url}")
263
293
  return None
264
294
 
265
295
  async def process_bulk_content(self, content_map: Dict[str, str]) -> Dict[str, Optional[Dict]]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.4.5
3
+ Version: 2.4.6
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,7 @@
1
+ spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
2
+ spiderforce4ai/post_extraction_agent.py,sha256=Ty9-Ai3_RVT86RrPUxKEzf4oUh-Wr7bk2aM87Je_WvE,13580
3
+ spiderforce4ai-2.4.6.dist-info/METADATA,sha256=7FaEgAHdD-8a0XmuDMkpAUjAQ7ZmFTD89IqQM17nllI,9012
4
+ spiderforce4ai-2.4.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
+ spiderforce4ai-2.4.6.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
+ spiderforce4ai-2.4.6.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
+ spiderforce4ai-2.4.6.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
2
- spiderforce4ai/post_extraction_agent.py,sha256=t9KxjuNw16-6kige6ULPLyykNkiGmKhpCi8QjskdaTk,11959
3
- spiderforce4ai-2.4.5.dist-info/METADATA,sha256=q3VBuGb5wxsi9OPkzEMwFMyg9f_vT2RamWYIgu2JbLc,9012
4
- spiderforce4ai-2.4.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
- spiderforce4ai-2.4.5.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
- spiderforce4ai-2.4.5.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
- spiderforce4ai-2.4.5.dist-info/RECORD,,