PyPI - spiderforce4ai - Versions diffs - 2.4.5__py3-none-any.whl → 2.4.7__py3-none-any.whl - Mend

spiderforce4ai 2.4.5py3-none-any.whl → 2.4.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

spiderforce4ai/post_extraction_agent.py CHANGED Viewed

@@ -12,6 +12,9 @@ from pydantic import BaseModel, Field
 import logging
 from datetime import datetime
 import re
+from rich.console import Console
+console = Console()
 logger = logging.getLogger(__name__)
@@ -190,17 +193,45 @@ class PostExtractionAgent:
                         api_base=self.config.base_url
                     )
-                    # Parse response
-                    extracted_data = json.loads(response.choices[0].message.content)
-                    self.buffer.remove_request(url)  # Remove from buffer if successful
-                    return extracted_data
+                    # Log raw response for debugging
+                    raw_content = response.choices[0].message.content
+                    logger.debug(f"Raw LLM response for {url}: {raw_content}")
-                except json.JSONDecodeError as e:
-                    last_error = f"Invalid JSON response from LLM: {e}"
-                    if attempt < max_retries - 1:
-                        time.sleep(retry_delay * (attempt + 1))
+                    try:
+                        # First try direct JSON parsing
+                        try:
+                            extracted_data = json.loads(raw_content)
+                        except json.JSONDecodeError:
+                            # Look for JSON in markdown code blocks
+                            json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
+                            if json_match:
+                                json_content = json_match.group(1).strip()
+                                extracted_data = json.loads(json_content)
+                            else:
+                                # If no JSON found, try to extract structured data in any format
+                                extracted_data = {
+                                    "raw_content": raw_content,
+                                    "format": "text",
+                                    "timestamp": datetime.now().isoformat()
+                                }
+                        self.buffer.remove_request(url)  # Remove from buffer if successful
+                        return extracted_data
+                    except Exception as e:
+                        error_msg = (
+                            f"Error processing LLM response for {url}:\n"
+                            f"Error: {str(e)}\n"
+                            f"Raw content: {raw_content[:500]}..."  # First 500 chars of response
+                        )
+                        logger.error(error_msg)
+                        last_error = error_msg
+                        if attempt < max_retries - 1:
+                            time.sleep(retry_delay * (attempt + 1))
                 except Exception as e:
-                    last_error = str(e)
+                    error_msg = f"LLM processing error for {url}: {str(e)}"
+                    logger.error(error_msg)
+                    last_error = error_msg
                     if attempt < max_retries - 1:
                         time.sleep(retry_delay * (attempt + 1))
@@ -242,24 +273,42 @@ class PostExtractionAgent:
     def process_content(self, url: str, content: str) -> Optional[Dict]:
         """Process content with retry mechanism."""
+        logger.info(f"Starting content processing for {url}")
         for attempt in range(self.config.max_retries):
+            logger.info(f"Processing attempt {attempt + 1}/{self.config.max_retries} for {url}")
             result = self._process_single_content(url, content)
             if result:
+                logger.info(f"Successfully processed content for {url}")
                 # Apply custom transformation if provided
                 if self.config.custom_transform_function:
                     try:
                         result = self.config.custom_transform_function(result)
+                        logger.info(f"Applied custom transformation for {url}")
                     except Exception as e:
-                        logger.error(f"Error in custom transform for {url}: {str(e)}")
+                        error_msg = f"Error in custom transform for {url}: {str(e)}"
+                        logger.error(error_msg)
+                        console.print(f"[red]{error_msg}[/red]")
                 # Save result synchronously
-                self._save_result_sync(url, result)
+                try:
+                    self._save_result_sync(url, result)
+                    logger.info(f"Saved results for {url}")
+                except Exception as e:
+                    error_msg = f"Error saving results for {url}: {str(e)}"
+                    logger.error(error_msg)
+                    console.print(f"[red]{error_msg}[/red]")
                 return result
             # Wait before retry
             if attempt < self.config.max_retries - 1:
+                logger.info(f"Attempt {attempt + 1} failed for {url}, waiting {self.config.retry_delay}s before retry")
                 time.sleep(self.config.retry_delay)
+        logger.error(f"All processing attempts failed for {url}")
         return None
     async def process_bulk_content(self, content_map: Dict[str, str]) -> Dict[str, Optional[Dict]]:

{spiderforce4ai-2.4.5.dist-info → spiderforce4ai-2.4.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: spiderforce4ai
-Version: 2.4.5
+Version: 2.4.7
 Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
 Home-page: https://petertam.pro
 Author: Piotr Tamulewicz

spiderforce4ai-2.4.7.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
+spiderforce4ai/post_extraction_agent.py,sha256=q2ohsqw_F1e5rT2H9eSzCWzstJLbwGyCtwLsC6eMufs,14560
+spiderforce4ai-2.4.7.dist-info/METADATA,sha256=r273h2ogI76aXTd8XN9b81EWtQLuhdSjZkXD2Ks8GnM,9012
+spiderforce4ai-2.4.7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+spiderforce4ai-2.4.7.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
+spiderforce4ai-2.4.7.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
+spiderforce4ai-2.4.7.dist-info/RECORD,,

spiderforce4ai-2.4.5.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
-spiderforce4ai/post_extraction_agent.py,sha256=t9KxjuNw16-6kige6ULPLyykNkiGmKhpCi8QjskdaTk,11959
-spiderforce4ai-2.4.5.dist-info/METADATA,sha256=q3VBuGb5wxsi9OPkzEMwFMyg9f_vT2RamWYIgu2JbLc,9012
-spiderforce4ai-2.4.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-spiderforce4ai-2.4.5.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
-spiderforce4ai-2.4.5.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
-spiderforce4ai-2.4.5.dist-info/RECORD,,

{spiderforce4ai-2.4.5.dist-info → spiderforce4ai-2.4.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{spiderforce4ai-2.4.5.dist-info → spiderforce4ai-2.4.7.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{spiderforce4ai-2.4.5.dist-info → spiderforce4ai-2.4.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

spiderforce4ai 2.4.5__py3-none-any.whl → 2.4.7__py3-none-any.whl

spiderforce4ai 2.4.5py3-none-any.whl → 2.4.7py3-none-any.whl