PyPI - spiderforce4ai - Versions diffs - 2.4.6__tar.gz → 2.4.8__tar.gz - Mend

spiderforce4ai 2.4.6tar.gz → 2.4.8tar.gz

Files changed (14) hide show

{spiderforce4ai-2.4.6 → spiderforce4ai-2.4.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: spiderforce4ai
-Version: 2.4.6
+Version: 2.4.8
 Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
 Home-page: https://petertam.pro
 Author: Piotr Tamulewicz

{spiderforce4ai-2.4.6 → spiderforce4ai-2.4.8}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "spiderforce4ai"
-version = "2.4.6"
+version = "2.4.8"
 description = "Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing"
 readme = "README.md"
 authors = [

{spiderforce4ai-2.4.6 → spiderforce4ai-2.4.8}/setup.py RENAMED Viewed

@@ -7,7 +7,7 @@ with open("README.md", encoding="utf-8") as f:
 setup(
     name="spiderforce4ai",
-    version="2.4.6",
+    version="2.4.8",
     author="Piotr Tamulewicz",
     author_email="pt@petertam.pro",
     description="Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing",

{spiderforce4ai-2.4.6 → spiderforce4ai-2.4.8}/spiderforce4ai/__init__.py RENAMED Viewed

@@ -870,7 +870,8 @@ class SpiderForce4AI:
                         base_url=config.post_extraction_agent.get("base_url"),
                         combine_output=bool(config.post_extraction_agent_save_to_file),
                         output_file=config.post_extraction_agent_save_to_file,
-                        custom_transform_function=config.post_agent_transformer_function
+                        custom_transform_function=config.post_agent_transformer_function,
+                        response_format=config.post_extraction_agent.get("response_format")
                     )
                     agent = PostExtractionAgent(post_config)

{spiderforce4ai-2.4.6 → spiderforce4ai-2.4.8}/spiderforce4ai/post_extraction_agent.py RENAMED Viewed

@@ -12,6 +12,9 @@ from pydantic import BaseModel, Field
 import logging
 from datetime import datetime
 import re
+from rich.console import Console
+console = Console()
 logger = logging.getLogger(__name__)
@@ -115,6 +118,7 @@ class PostExtractionConfig:
     output_file: Optional[Path] = None
     custom_transform_function: Optional[Callable] = None
     buffer_file: Optional[Path] = None
+    response_format: Optional[str] = None  # 'json' or 'text'
     def __post_init__(self):
         if self.output_file:
@@ -181,26 +185,48 @@ class PostExtractionAgent:
             for attempt in range(max_retries):
                 try:
                     # Call completion synchronously
-                    response = completion(
-                        model=self.config.model,
-                        messages=messages,
-                        max_tokens=self.config.max_tokens,
-                        temperature=self.config.temperature,
-                        api_key=self.config.api_key,
-                        api_base=self.config.base_url
-                    )
+                    # Add response_format if specified
+                    completion_args = {
+                        "model": self.config.model,
+                        "messages": messages,
+                        "max_tokens": self.config.max_tokens,
+                        "temperature": self.config.temperature,
+                        "api_key": self.config.api_key,
+                    }
+                    if self.config.base_url:
+                        completion_args["api_base"] = self.config.base_url
+                    if self.config.response_format:
+                        completion_args["response_format"] = {"type": self.config.response_format}
+                    response = completion(**completion_args)
                     # Log raw response for debugging
                     raw_content = response.choices[0].message.content
                     logger.debug(f"Raw LLM response for {url}: {raw_content}")
                     try:
-                        extracted_data = json.loads(raw_content)
+                        # First try direct JSON parsing
+                        try:
+                            extracted_data = json.loads(raw_content)
+                        except json.JSONDecodeError:
+                            # Look for JSON in markdown code blocks
+                            json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
+                            if json_match:
+                                json_content = json_match.group(1).strip()
+                                extracted_data = json.loads(json_content)
+                            else:
+                                # If no JSON found, try to extract structured data in any format
+                                extracted_data = {
+                                    "raw_content": raw_content,
+                                    "format": "text",
+                                    "timestamp": datetime.now().isoformat()
+                                }
                         self.buffer.remove_request(url)  # Remove from buffer if successful
                         return extracted_data
-                    except json.JSONDecodeError as e:
+                    except Exception as e:
                         error_msg = (
-                            f"Invalid JSON response from LLM for {url}:\n"
+                            f"Error processing LLM response for {url}:\n"
                             f"Error: {str(e)}\n"
                             f"Raw content: {raw_content[:500]}..."  # First 500 chars of response
                         )

{spiderforce4ai-2.4.6 → spiderforce4ai-2.4.8}/spiderforce4ai.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: spiderforce4ai
-Version: 2.4.6
+Version: 2.4.8
 Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
 Home-page: https://petertam.pro
 Author: Piotr Tamulewicz