PyPI - spiderforce4ai - Versions diffs - 2.4.5__tar.gz → 2.4.6__tar.gz - Mend

spiderforce4ai 2.4.5tar.gz → 2.4.6tar.gz

Files changed (14) hide show

{spiderforce4ai-2.4.5 → spiderforce4ai-2.4.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: spiderforce4ai
-Version: 2.4.5
+Version: 2.4.6
 Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
 Home-page: https://petertam.pro
 Author: Piotr Tamulewicz

{spiderforce4ai-2.4.5 → spiderforce4ai-2.4.6}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "spiderforce4ai"
-version = "2.4.5"
+version = "2.4.6"
 description = "Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing"
 readme = "README.md"
 authors = [

{spiderforce4ai-2.4.5 → spiderforce4ai-2.4.6}/setup.py RENAMED Viewed

@@ -7,7 +7,7 @@ with open("README.md", encoding="utf-8") as f:
 setup(
     name="spiderforce4ai",
-    version="2.4.5",
+    version="2.4.6",
     author="Piotr Tamulewicz",
     author_email="pt@petertam.pro",
     description="Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing",

{spiderforce4ai-2.4.5 → spiderforce4ai-2.4.6}/spiderforce4ai/post_extraction_agent.py RENAMED Viewed

@@ -190,17 +190,29 @@ class PostExtractionAgent:
                         api_base=self.config.base_url
                     )
-                    # Parse response
-                    extracted_data = json.loads(response.choices[0].message.content)
-                    self.buffer.remove_request(url)  # Remove from buffer if successful
-                    return extracted_data
+                    # Log raw response for debugging
+                    raw_content = response.choices[0].message.content
+                    logger.debug(f"Raw LLM response for {url}: {raw_content}")
-                except json.JSONDecodeError as e:
-                    last_error = f"Invalid JSON response from LLM: {e}"
-                    if attempt < max_retries - 1:
-                        time.sleep(retry_delay * (attempt + 1))
+                    try:
+                        extracted_data = json.loads(raw_content)
+                        self.buffer.remove_request(url)  # Remove from buffer if successful
+                        return extracted_data
+                    except json.JSONDecodeError as e:
+                        error_msg = (
+                            f"Invalid JSON response from LLM for {url}:\n"
+                            f"Error: {str(e)}\n"
+                            f"Raw content: {raw_content[:500]}..."  # First 500 chars of response
+                        )
+                        logger.error(error_msg)
+                        last_error = error_msg
+                        if attempt < max_retries - 1:
+                            time.sleep(retry_delay * (attempt + 1))
                 except Exception as e:
-                    last_error = str(e)
+                    error_msg = f"LLM processing error for {url}: {str(e)}"
+                    logger.error(error_msg)
+                    last_error = error_msg
                     if attempt < max_retries - 1:
                         time.sleep(retry_delay * (attempt + 1))
@@ -242,24 +254,42 @@ class PostExtractionAgent:
     def process_content(self, url: str, content: str) -> Optional[Dict]:
         """Process content with retry mechanism."""
+        logger.info(f"Starting content processing for {url}")
         for attempt in range(self.config.max_retries):
+            logger.info(f"Processing attempt {attempt + 1}/{self.config.max_retries} for {url}")
             result = self._process_single_content(url, content)
             if result:
+                logger.info(f"Successfully processed content for {url}")
                 # Apply custom transformation if provided
                 if self.config.custom_transform_function:
                     try:
                         result = self.config.custom_transform_function(result)
+                        logger.info(f"Applied custom transformation for {url}")
                     except Exception as e:
-                        logger.error(f"Error in custom transform for {url}: {str(e)}")
+                        error_msg = f"Error in custom transform for {url}: {str(e)}"
+                        logger.error(error_msg)
+                        console.print(f"[red]{error_msg}[/red]")
                 # Save result synchronously
-                self._save_result_sync(url, result)
+                try:
+                    self._save_result_sync(url, result)
+                    logger.info(f"Saved results for {url}")
+                except Exception as e:
+                    error_msg = f"Error saving results for {url}: {str(e)}"
+                    logger.error(error_msg)
+                    console.print(f"[red]{error_msg}[/red]")
                 return result
             # Wait before retry
             if attempt < self.config.max_retries - 1:
+                logger.info(f"Attempt {attempt + 1} failed for {url}, waiting {self.config.retry_delay}s before retry")
                 time.sleep(self.config.retry_delay)
+        logger.error(f"All processing attempts failed for {url}")
         return None
     async def process_bulk_content(self, content_map: Dict[str, str]) -> Dict[str, Optional[Dict]]:

{spiderforce4ai-2.4.5 → spiderforce4ai-2.4.6}/spiderforce4ai.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: spiderforce4ai
-Version: 2.4.5
+Version: 2.4.6
 Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
 Home-page: https://petertam.pro
 Author: Piotr Tamulewicz