PyPI - spiderforce4ai - Versions diffs - 2.6.3__tar.gz → 2.6.5__tar.gz - Mend

spiderforce4ai 2.6.3tar.gz → 2.6.5tar.gz

Files changed (14) hide show

{spiderforce4ai-2.6.3 → spiderforce4ai-2.6.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: spiderforce4ai
-Version: 2.6.3
+Version: 2.6.5
 Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
 Home-page: https://petertam.pro
 Author: Piotr Tamulewicz

{spiderforce4ai-2.6.3 → spiderforce4ai-2.6.5}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "spiderforce4ai"
-version = "2.6.3"
+version = "2.6.5"
 description = "Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing"
 readme = "README.md"
 authors = [

{spiderforce4ai-2.6.3 → spiderforce4ai-2.6.5}/setup.py RENAMED Viewed

@@ -7,7 +7,7 @@ with open("README.md", encoding="utf-8") as f:
 setup(
     name="spiderforce4ai",
-    version="2.6.3",
+    version="2.6.5",
     author="Piotr Tamulewicz",
     author_email="pt@petertam.pro",
     description="Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing",

{spiderforce4ai-2.6.3 → spiderforce4ai-2.6.5}/spiderforce4ai/post_extraction_agent.py RENAMED Viewed

@@ -159,6 +159,12 @@ class PostExtractionAgent:
         # Convert string path to Path object if needed
         if isinstance(self.config.output_file, str):
             self.config.output_file = Path(self.config.output_file)
+            # Ensure parent directory exists
+            self.config.output_file.parent.mkdir(parents=True, exist_ok=True)
+            # Create empty JSON file if it doesn't exist
+            if not self.config.output_file.exists():
+                with open(self.config.output_file, 'w') as f:
+                    json.dump({}, f)
         self._setup_output()
     def _setup_output(self) -> None:
@@ -272,58 +278,35 @@ class PostExtractionAgent:
             return None
     def _save_result_sync(self, url: str, result: Dict) -> None:
-        """Save individual or combined results synchronously."""
+        """Save results synchronously to combined output file."""
         try:
-            if self.config.combine_output and self.config.output_file:
-                # Convert Path to string if needed
-                output_file = str(self.config.output_file) if isinstance(self.config.output_file, Path) else self.config.output_file
-                # Ensure output directory exists
-                Path(output_file).parent.mkdir(parents=True, exist_ok=True)
-                # Load existing results or create new
+            if self.config.output_file:
+                # Load existing results
                 try:
-                    if Path(output_file).exists():
-                        with open(output_file, 'r', encoding='utf-8') as f:
-                            self.results = json.load(f)
-                    else:
-                        self.results = {}
+                    with open(self.config.output_file, 'r', encoding='utf-8') as f:
+                        current_results = json.load(f)
                 except (json.JSONDecodeError, FileNotFoundError):
-                    self.results = {}
-                # Update results with new data
-                self.results[url] = result
-                # Save combined results atomically
-                temp_file = f"{output_file}.tmp"
+                    current_results = {}
+                # Update with new result
+                current_results[url] = result
+                # Save atomically using temporary file
+                temp_file = self.config.output_file.with_suffix('.tmp')
                 with open(temp_file, 'w', encoding='utf-8') as f:
-                    json.dump(self.results, f, indent=2, ensure_ascii=False)
-                # Atomic replace and cleanup backup files
-                Path(temp_file).replace(output_file)
-                logger.info(f"Updated combined results file with {url} in {output_file}")
-                # Cleanup all backup files
-                for backup_file in Path(output_file).parent.glob(f"{Path(output_file).stem}.bak_*"):
-                    try:
-                        backup_file.unlink()
-                        logger.info(f"Cleaned up backup file: {backup_file}")
-                    except Exception as e:
-                        logger.warning(f"Failed to remove backup file {backup_file}: {e}")
+                    json.dump(current_results, f, indent=2, ensure_ascii=False)
+                # Atomic replace
+                temp_file.replace(self.config.output_file)
+                logger.info(f"Updated combined results file with {url}")
                 # Cleanup backup files
-                for backup_file in Path(output_file).parent.glob(f"{Path(output_file).stem}.bak_*"):
+                for backup_file in self.config.output_file.parent.glob(f"{self.config.output_file.stem}.bak_*"):
                     try:
                         backup_file.unlink()
                         logger.info(f"Cleaned up backup file: {backup_file}")
                     except Exception as e:
                         logger.warning(f"Failed to remove backup file {backup_file}: {e}")
-            elif not self.config.combine_output and self.config.output_file:
-                individual_file = self.config.output_file.parent / f"{url.replace('/', '_')}.json"
-                with open(individual_file, 'w') as f:
-                    json.dump(result, f, indent=2)
-                logger.info(f"Saved individual result file for {url}")
         except Exception as e:
             logger.error(f"Error saving results for {url}: {str(e)}")
@@ -358,26 +341,34 @@ class PostExtractionAgent:
                         # Add URL to result before transformation
                         result['url'] = url
-                        logger.info(f"Executing custom transformer function for {url}")
+                        logger.info(f"Executing transformer function for {url}")
                         transformed_result = self.config.custom_transform_function(result)
                         logger.info(f"Successfully applied custom transformation for {url}")
+                        # Save the transformed result to combined output
+                        if self.config.output_file:
+                            self._save_result_sync(url, transformed_result)
+                            logger.info(f"Saved transformed result to combined output for {url}")
                         logger.info(f"Webhook response sent for {url}")
-                        return transformed_result  # Return the transformed result
+                        return transformed_result
                     except Exception as e:
                         error_msg = f"Warning: Issue in custom transform for {url}: {str(e)}"
                         logger.warning(error_msg)
                         console.print(f"[yellow]{error_msg}[/yellow]")
+                        # Save original result if transformation fails
+                        if self.config.output_file:
+                            self._save_result_sync(url, result)
+                            logger.info(f"Saved original result to combined output for {url}")
                 # Save result synchronously
                 try:
                     # Always save the result, whether transformed or original
                     result_to_save = transformed_result if self.config.custom_transform_function else result
-                    if self.config.combine_output and self.config.output_file:
-                        self._save_result_sync(url, result_to_save)
-                        logger.info(f"Saved results for {url} to combined output")
-                    else:
+                    if self.config.output_file:
                         self._save_result_sync(url, result_to_save)
-                        logger.info(f"Saved individual result for {url}")
+                        logger.info(f"Saved results for {url} to {self.config.output_file}")
                 except Exception as e:
                     error_msg = f"Error saving results for {url}: {str(e)}"
                     logger.error(error_msg)

{spiderforce4ai-2.6.3 → spiderforce4ai-2.6.5}/spiderforce4ai.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: spiderforce4ai
-Version: 2.6.3
+Version: 2.6.5
 Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
 Home-page: https://petertam.pro
 Author: Piotr Tamulewicz