PyPI - spiderforce4ai - Versions diffs - 2.4.8__tar.gz → 2.5__tar.gz - Mend

spiderforce4ai 2.4.8tar.gz → 2.5tar.gz

Files changed (14) hide show

{spiderforce4ai-2.4.8 → spiderforce4ai-2.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: spiderforce4ai
-Version: 2.4.8
+Version: 2.5
 Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
 Home-page: https://petertam.pro
 Author: Piotr Tamulewicz

{spiderforce4ai-2.4.8 → spiderforce4ai-2.5}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "spiderforce4ai"
-version = "2.4.8"
+version = "2.5"
 description = "Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing"
 readme = "README.md"
 authors = [

{spiderforce4ai-2.4.8 → spiderforce4ai-2.5}/setup.py RENAMED Viewed

@@ -7,7 +7,7 @@ with open("README.md", encoding="utf-8") as f:
 setup(
     name="spiderforce4ai",
-    version="2.4.8",
+    version="2.5",
     author="Piotr Tamulewicz",
     author_email="pt@petertam.pro",
     description="Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing",

{spiderforce4ai-2.4.8 → spiderforce4ai-2.5}/spiderforce4ai/__init__.py RENAMED Viewed

@@ -5,6 +5,9 @@ import asyncio
 import aiohttp
 import json
 import logging
+logger = logging.getLogger(__name__)
+import logging
 from typing import List, Dict, Union, Optional, Tuple, Callable, Any
 from dataclasses import dataclass, asdict
 from urllib.parse import urljoin, urlparse
@@ -877,7 +880,20 @@ class SpiderForce4AI:
                     for result in successful_results:
                         try:
-                            result.extraction_result = asyncio.run(agent.process_content(result.url, result.markdown))
+                            # Get LLM response
+                            llm_response = agent.process_content(result.url, result.markdown)
+                            if llm_response:
+                                # Add URL to the response before transformation
+                                llm_response['url'] = result.url
+                                # Apply transformation if provided
+                                if config.post_agent_transformer_function:
+                                    try:
+                                        result.extraction_result = config.post_agent_transformer_function(llm_response)
+                                    except Exception as e:
+                                        logger.error(f"Transformer error for {result.url}: {str(e)}")
+                                        result.extraction_result = llm_response  # Use original response if transform fails
+                                else:
+                                    result.extraction_result = llm_response
                             progress.update(llm_task, advance=1)
                         except Exception as e:
                             console.print(f"[red]Error in post-extraction processing for {result.url}: {str(e)}[/red]")

{spiderforce4ai-2.4.8 → spiderforce4ai-2.5}/spiderforce4ai/post_extraction_agent.py RENAMED Viewed

@@ -199,31 +199,35 @@ class PostExtractionAgent:
                         completion_args["response_format"] = {"type": self.config.response_format}
                     response = completion(**completion_args)
-                    # Log raw response for debugging
                     raw_content = response.choices[0].message.content
                     logger.debug(f"Raw LLM response for {url}: {raw_content}")
+                    # Handle response based on response_format
                     try:
-                        # First try direct JSON parsing
-                        try:
-                            extracted_data = json.loads(raw_content)
-                        except json.JSONDecodeError:
-                            # Look for JSON in markdown code blocks
-                            json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
-                            if json_match:
-                                json_content = json_match.group(1).strip()
-                                extracted_data = json.loads(json_content)
-                            else:
-                                # If no JSON found, try to extract structured data in any format
-                                extracted_data = {
-                                    "raw_content": raw_content,
-                                    "format": "text",
-                                    "timestamp": datetime.now().isoformat()
-                                }
+                        if self.config.response_format == "json_object":
+                            # For json_object format, response should already be valid JSON
+                            extracted_data = raw_content if isinstance(raw_content, dict) else json.loads(raw_content)
+                        else:
+                            # For text format or unspecified, try parsing JSON or use as text
+                            try:
+                                extracted_data = json.loads(raw_content)
+                            except json.JSONDecodeError:
+                                # Look for JSON in markdown code blocks
+                                json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
+                                if json_match:
+                                    json_content = json_match.group(1).strip()
+                                    extracted_data = json.loads(json_content)
+                                else:
+                                    # If no JSON found and not json_object format, use raw content
+                                    extracted_data = {
+                                        "raw_content": raw_content,
+                                        "format": "text",
+                                        "timestamp": datetime.now().isoformat()
+                                    }
                         self.buffer.remove_request(url)  # Remove from buffer if successful
                         return extracted_data
                     except Exception as e:
                         error_msg = (
                             f"Error processing LLM response for {url}:\n"

{spiderforce4ai-2.4.8 → spiderforce4ai-2.5}/spiderforce4ai.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: spiderforce4ai
-Version: 2.4.8
+Version: 2.5
 Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
 Home-page: https://petertam.pro
 Author: Piotr Tamulewicz