PyPI - spiderforce4ai - Versions diffs - 2.4.8__tar.gz → 2.4.9__tar.gz - Mend

spiderforce4ai 2.4.8tar.gz → 2.4.9tar.gz

Files changed (14) hide show

{spiderforce4ai-2.4.8 → spiderforce4ai-2.4.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: spiderforce4ai
-Version: 2.4.8
+Version: 2.4.9
 Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
 Home-page: https://petertam.pro
 Author: Piotr Tamulewicz

{spiderforce4ai-2.4.8 → spiderforce4ai-2.4.9}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "spiderforce4ai"
-version = "2.4.8"
+version = "2.4.9"
 description = "Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing"
 readme = "README.md"
 authors = [

{spiderforce4ai-2.4.8 → spiderforce4ai-2.4.9}/setup.py RENAMED Viewed

@@ -7,7 +7,7 @@ with open("README.md", encoding="utf-8") as f:
 setup(
     name="spiderforce4ai",
-    version="2.4.8",
+    version="2.4.9",
     author="Piotr Tamulewicz",
     author_email="pt@petertam.pro",
     description="Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing",

{spiderforce4ai-2.4.8 → spiderforce4ai-2.4.9}/spiderforce4ai/post_extraction_agent.py RENAMED Viewed

@@ -199,31 +199,35 @@ class PostExtractionAgent:
                         completion_args["response_format"] = {"type": self.config.response_format}
                     response = completion(**completion_args)
-                    # Log raw response for debugging
                     raw_content = response.choices[0].message.content
                     logger.debug(f"Raw LLM response for {url}: {raw_content}")
+                    # Handle response based on response_format
                     try:
-                        # First try direct JSON parsing
-                        try:
-                            extracted_data = json.loads(raw_content)
-                        except json.JSONDecodeError:
-                            # Look for JSON in markdown code blocks
-                            json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
-                            if json_match:
-                                json_content = json_match.group(1).strip()
-                                extracted_data = json.loads(json_content)
-                            else:
-                                # If no JSON found, try to extract structured data in any format
-                                extracted_data = {
-                                    "raw_content": raw_content,
-                                    "format": "text",
-                                    "timestamp": datetime.now().isoformat()
-                                }
+                        if self.config.response_format == "json_object":
+                            # For json_object format, response should already be valid JSON
+                            extracted_data = raw_content if isinstance(raw_content, dict) else json.loads(raw_content)
+                        else:
+                            # For text format or unspecified, try parsing JSON or use as text
+                            try:
+                                extracted_data = json.loads(raw_content)
+                            except json.JSONDecodeError:
+                                # Look for JSON in markdown code blocks
+                                json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
+                                if json_match:
+                                    json_content = json_match.group(1).strip()
+                                    extracted_data = json.loads(json_content)
+                                else:
+                                    # If no JSON found and not json_object format, use raw content
+                                    extracted_data = {
+                                        "raw_content": raw_content,
+                                        "format": "text",
+                                        "timestamp": datetime.now().isoformat()
+                                    }
                         self.buffer.remove_request(url)  # Remove from buffer if successful
                         return extracted_data
                     except Exception as e:
                         error_msg = (
                             f"Error processing LLM response for {url}:\n"

{spiderforce4ai-2.4.8 → spiderforce4ai-2.4.9}/spiderforce4ai.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: spiderforce4ai
-Version: 2.4.8
+Version: 2.4.9
 Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
 Home-page: https://petertam.pro
 Author: Piotr Tamulewicz