PyPI - spiderforce4ai - Versions diffs - 2.4.7__tar.gz → 2.4.9__tar.gz - Mend

spiderforce4ai 2.4.7tar.gz → 2.4.9tar.gz

Files changed (14) hide show

{spiderforce4ai-2.4.7 → spiderforce4ai-2.4.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: spiderforce4ai
-Version: 2.4.7
+Version: 2.4.9
 Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
 Home-page: https://petertam.pro
 Author: Piotr Tamulewicz

{spiderforce4ai-2.4.7 → spiderforce4ai-2.4.9}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "spiderforce4ai"
-version = "2.4.7"
+version = "2.4.9"
 description = "Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing"
 readme = "README.md"
 authors = [

{spiderforce4ai-2.4.7 → spiderforce4ai-2.4.9}/setup.py RENAMED Viewed

@@ -7,7 +7,7 @@ with open("README.md", encoding="utf-8") as f:
 setup(
     name="spiderforce4ai",
-    version="2.4.7",
+    version="2.4.9",
     author="Piotr Tamulewicz",
     author_email="pt@petertam.pro",
     description="Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing",

{spiderforce4ai-2.4.7 → spiderforce4ai-2.4.9}/spiderforce4ai/__init__.py RENAMED Viewed

@@ -870,7 +870,8 @@ class SpiderForce4AI:
                         base_url=config.post_extraction_agent.get("base_url"),
                         combine_output=bool(config.post_extraction_agent_save_to_file),
                         output_file=config.post_extraction_agent_save_to_file,
-                        custom_transform_function=config.post_agent_transformer_function
+                        custom_transform_function=config.post_agent_transformer_function,
+                        response_format=config.post_extraction_agent.get("response_format")
                     )
                     agent = PostExtractionAgent(post_config)

{spiderforce4ai-2.4.7 → spiderforce4ai-2.4.9}/spiderforce4ai/post_extraction_agent.py RENAMED Viewed

@@ -118,6 +118,7 @@ class PostExtractionConfig:
     output_file: Optional[Path] = None
     custom_transform_function: Optional[Callable] = None
     buffer_file: Optional[Path] = None
+    response_format: Optional[str] = None  # 'json' or 'text'
     def __post_init__(self):
         if self.output_file:
@@ -184,39 +185,49 @@ class PostExtractionAgent:
             for attempt in range(max_retries):
                 try:
                     # Call completion synchronously
-                    response = completion(
-                        model=self.config.model,
-                        messages=messages,
-                        max_tokens=self.config.max_tokens,
-                        temperature=self.config.temperature,
-                        api_key=self.config.api_key,
-                        api_base=self.config.base_url
-                    )
+                    # Add response_format if specified
+                    completion_args = {
+                        "model": self.config.model,
+                        "messages": messages,
+                        "max_tokens": self.config.max_tokens,
+                        "temperature": self.config.temperature,
+                        "api_key": self.config.api_key,
+                    }
+                    if self.config.base_url:
+                        completion_args["api_base"] = self.config.base_url
+                    if self.config.response_format:
+                        completion_args["response_format"] = {"type": self.config.response_format}
-                    # Log raw response for debugging
+                    response = completion(**completion_args)
                     raw_content = response.choices[0].message.content
                     logger.debug(f"Raw LLM response for {url}: {raw_content}")
+                    # Handle response based on response_format
                     try:
-                        # First try direct JSON parsing
-                        try:
-                            extracted_data = json.loads(raw_content)
-                        except json.JSONDecodeError:
-                            # Look for JSON in markdown code blocks
-                            json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
-                            if json_match:
-                                json_content = json_match.group(1).strip()
-                                extracted_data = json.loads(json_content)
-                            else:
-                                # If no JSON found, try to extract structured data in any format
-                                extracted_data = {
-                                    "raw_content": raw_content,
-                                    "format": "text",
-                                    "timestamp": datetime.now().isoformat()
-                                }
+                        if self.config.response_format == "json_object":
+                            # For json_object format, response should already be valid JSON
+                            extracted_data = raw_content if isinstance(raw_content, dict) else json.loads(raw_content)
+                        else:
+                            # For text format or unspecified, try parsing JSON or use as text
+                            try:
+                                extracted_data = json.loads(raw_content)
+                            except json.JSONDecodeError:
+                                # Look for JSON in markdown code blocks
+                                json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
+                                if json_match:
+                                    json_content = json_match.group(1).strip()
+                                    extracted_data = json.loads(json_content)
+                                else:
+                                    # If no JSON found and not json_object format, use raw content
+                                    extracted_data = {
+                                        "raw_content": raw_content,
+                                        "format": "text",
+                                        "timestamp": datetime.now().isoformat()
+                                    }
                         self.buffer.remove_request(url)  # Remove from buffer if successful
                         return extracted_data
                     except Exception as e:
                         error_msg = (
                             f"Error processing LLM response for {url}:\n"

{spiderforce4ai-2.4.7 → spiderforce4ai-2.4.9}/spiderforce4ai.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: spiderforce4ai
-Version: 2.4.7
+Version: 2.4.9
 Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
 Home-page: https://petertam.pro
 Author: Piotr Tamulewicz