PyPI - spiderforce4ai - Versions diffs - 2.4.7__py3-none-any.whl → 2.4.9__py3-none-any.whl - Mend

spiderforce4ai 2.4.7py3-none-any.whl → 2.4.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

spiderforce4ai/__init__.py CHANGED Viewed

@@ -870,7 +870,8 @@ class SpiderForce4AI:
                         base_url=config.post_extraction_agent.get("base_url"),
                         combine_output=bool(config.post_extraction_agent_save_to_file),
                         output_file=config.post_extraction_agent_save_to_file,
-                        custom_transform_function=config.post_agent_transformer_function
+                        custom_transform_function=config.post_agent_transformer_function,
+                        response_format=config.post_extraction_agent.get("response_format")
                     )
                     agent = PostExtractionAgent(post_config)

spiderforce4ai/post_extraction_agent.py CHANGED Viewed

@@ -118,6 +118,7 @@ class PostExtractionConfig:
     output_file: Optional[Path] = None
     custom_transform_function: Optional[Callable] = None
     buffer_file: Optional[Path] = None
+    response_format: Optional[str] = None  # 'json' or 'text'
     def __post_init__(self):
         if self.output_file:
@@ -184,39 +185,49 @@ class PostExtractionAgent:
             for attempt in range(max_retries):
                 try:
                     # Call completion synchronously
-                    response = completion(
-                        model=self.config.model,
-                        messages=messages,
-                        max_tokens=self.config.max_tokens,
-                        temperature=self.config.temperature,
-                        api_key=self.config.api_key,
-                        api_base=self.config.base_url
-                    )
+                    # Add response_format if specified
+                    completion_args = {
+                        "model": self.config.model,
+                        "messages": messages,
+                        "max_tokens": self.config.max_tokens,
+                        "temperature": self.config.temperature,
+                        "api_key": self.config.api_key,
+                    }
+                    if self.config.base_url:
+                        completion_args["api_base"] = self.config.base_url
+                    if self.config.response_format:
+                        completion_args["response_format"] = {"type": self.config.response_format}
-                    # Log raw response for debugging
+                    response = completion(**completion_args)
                     raw_content = response.choices[0].message.content
                     logger.debug(f"Raw LLM response for {url}: {raw_content}")
+                    # Handle response based on response_format
                     try:
-                        # First try direct JSON parsing
-                        try:
-                            extracted_data = json.loads(raw_content)
-                        except json.JSONDecodeError:
-                            # Look for JSON in markdown code blocks
-                            json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
-                            if json_match:
-                                json_content = json_match.group(1).strip()
-                                extracted_data = json.loads(json_content)
-                            else:
-                                # If no JSON found, try to extract structured data in any format
-                                extracted_data = {
-                                    "raw_content": raw_content,
-                                    "format": "text",
-                                    "timestamp": datetime.now().isoformat()
-                                }
+                        if self.config.response_format == "json_object":
+                            # For json_object format, response should already be valid JSON
+                            extracted_data = raw_content if isinstance(raw_content, dict) else json.loads(raw_content)
+                        else:
+                            # For text format or unspecified, try parsing JSON or use as text
+                            try:
+                                extracted_data = json.loads(raw_content)
+                            except json.JSONDecodeError:
+                                # Look for JSON in markdown code blocks
+                                json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
+                                if json_match:
+                                    json_content = json_match.group(1).strip()
+                                    extracted_data = json.loads(json_content)
+                                else:
+                                    # If no JSON found and not json_object format, use raw content
+                                    extracted_data = {
+                                        "raw_content": raw_content,
+                                        "format": "text",
+                                        "timestamp": datetime.now().isoformat()
+                                    }
                         self.buffer.remove_request(url)  # Remove from buffer if successful
                         return extracted_data
                     except Exception as e:
                         error_msg = (
                             f"Error processing LLM response for {url}:\n"

{spiderforce4ai-2.4.7.dist-info → spiderforce4ai-2.4.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: spiderforce4ai
-Version: 2.4.7
+Version: 2.4.9
 Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
 Home-page: https://petertam.pro
 Author: Piotr Tamulewicz

spiderforce4ai-2.4.9.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+spiderforce4ai/__init__.py,sha256=DUPOKF7-vCVQi7JimsStU1qjk5x3yVUoMnUVOJxOrGk,42360
+spiderforce4ai/post_extraction_agent.py,sha256=so5Ze7Vz3konpQ0iT7ZxDGE9kIYeTwPTFyzezRc5oys,15392
+spiderforce4ai-2.4.9.dist-info/METADATA,sha256=kEq3anAkoe_wpPVzpgaJlsSuAzTQHDgXiDFpirXvUQc,9012
+spiderforce4ai-2.4.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+spiderforce4ai-2.4.9.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
+spiderforce4ai-2.4.9.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
+spiderforce4ai-2.4.9.dist-info/RECORD,,

spiderforce4ai-2.4.7.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
-spiderforce4ai/post_extraction_agent.py,sha256=q2ohsqw_F1e5rT2H9eSzCWzstJLbwGyCtwLsC6eMufs,14560
-spiderforce4ai-2.4.7.dist-info/METADATA,sha256=r273h2ogI76aXTd8XN9b81EWtQLuhdSjZkXD2Ks8GnM,9012
-spiderforce4ai-2.4.7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-spiderforce4ai-2.4.7.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
-spiderforce4ai-2.4.7.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
-spiderforce4ai-2.4.7.dist-info/RECORD,,

{spiderforce4ai-2.4.7.dist-info → spiderforce4ai-2.4.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{spiderforce4ai-2.4.7.dist-info → spiderforce4ai-2.4.9.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{spiderforce4ai-2.4.7.dist-info → spiderforce4ai-2.4.9.dist-info}/top_level.txt RENAMED Viewed

File without changes

spiderforce4ai 2.4.7__py3-none-any.whl → 2.4.9__py3-none-any.whl

spiderforce4ai 2.4.7py3-none-any.whl → 2.4.9py3-none-any.whl