spiderforce4ai 2.4.6__py3-none-any.whl → 2.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -870,7 +870,8 @@ class SpiderForce4AI:
870
870
  base_url=config.post_extraction_agent.get("base_url"),
871
871
  combine_output=bool(config.post_extraction_agent_save_to_file),
872
872
  output_file=config.post_extraction_agent_save_to_file,
873
- custom_transform_function=config.post_agent_transformer_function
873
+ custom_transform_function=config.post_agent_transformer_function,
874
+ response_format=config.post_extraction_agent.get("response_format")
874
875
  )
875
876
  agent = PostExtractionAgent(post_config)
876
877
 
@@ -12,6 +12,9 @@ from pydantic import BaseModel, Field
12
12
  import logging
13
13
  from datetime import datetime
14
14
  import re
15
+ from rich.console import Console
16
+
17
+ console = Console()
15
18
 
16
19
  logger = logging.getLogger(__name__)
17
20
 
@@ -115,6 +118,7 @@ class PostExtractionConfig:
115
118
  output_file: Optional[Path] = None
116
119
  custom_transform_function: Optional[Callable] = None
117
120
  buffer_file: Optional[Path] = None
121
+ response_format: Optional[str] = None # 'json' or 'text'
118
122
 
119
123
  def __post_init__(self):
120
124
  if self.output_file:
@@ -181,26 +185,48 @@ class PostExtractionAgent:
181
185
  for attempt in range(max_retries):
182
186
  try:
183
187
  # Call completion synchronously
184
- response = completion(
185
- model=self.config.model,
186
- messages=messages,
187
- max_tokens=self.config.max_tokens,
188
- temperature=self.config.temperature,
189
- api_key=self.config.api_key,
190
- api_base=self.config.base_url
191
- )
188
+ # Add response_format if specified
189
+ completion_args = {
190
+ "model": self.config.model,
191
+ "messages": messages,
192
+ "max_tokens": self.config.max_tokens,
193
+ "temperature": self.config.temperature,
194
+ "api_key": self.config.api_key,
195
+ }
196
+ if self.config.base_url:
197
+ completion_args["api_base"] = self.config.base_url
198
+ if self.config.response_format:
199
+ completion_args["response_format"] = {"type": self.config.response_format}
200
+
201
+ response = completion(**completion_args)
192
202
 
193
203
  # Log raw response for debugging
194
204
  raw_content = response.choices[0].message.content
195
205
  logger.debug(f"Raw LLM response for {url}: {raw_content}")
196
206
 
197
207
  try:
198
- extracted_data = json.loads(raw_content)
208
+ # First try direct JSON parsing
209
+ try:
210
+ extracted_data = json.loads(raw_content)
211
+ except json.JSONDecodeError:
212
+ # Look for JSON in markdown code blocks
213
+ json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
214
+ if json_match:
215
+ json_content = json_match.group(1).strip()
216
+ extracted_data = json.loads(json_content)
217
+ else:
218
+ # If no JSON found, try to extract structured data in any format
219
+ extracted_data = {
220
+ "raw_content": raw_content,
221
+ "format": "text",
222
+ "timestamp": datetime.now().isoformat()
223
+ }
224
+
199
225
  self.buffer.remove_request(url) # Remove from buffer if successful
200
226
  return extracted_data
201
- except json.JSONDecodeError as e:
227
+ except Exception as e:
202
228
  error_msg = (
203
- f"Invalid JSON response from LLM for {url}:\n"
229
+ f"Error processing LLM response for {url}:\n"
204
230
  f"Error: {str(e)}\n"
205
231
  f"Raw content: {raw_content[:500]}..." # First 500 chars of response
206
232
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.4.6
3
+ Version: 2.4.8
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,7 @@
1
+ spiderforce4ai/__init__.py,sha256=DUPOKF7-vCVQi7JimsStU1qjk5x3yVUoMnUVOJxOrGk,42360
2
+ spiderforce4ai/post_extraction_agent.py,sha256=5M7pBU8O44Khfub2jSSPboSbrcsAPw6nnp576qIA2pY,14988
3
+ spiderforce4ai-2.4.8.dist-info/METADATA,sha256=zsEmCfjL_ueJzIeTJ9BIdUEF3R-4uOPDqrRxox70Cto,9012
4
+ spiderforce4ai-2.4.8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
+ spiderforce4ai-2.4.8.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
+ spiderforce4ai-2.4.8.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
+ spiderforce4ai-2.4.8.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
2
- spiderforce4ai/post_extraction_agent.py,sha256=Ty9-Ai3_RVT86RrPUxKEzf4oUh-Wr7bk2aM87Je_WvE,13580
3
- spiderforce4ai-2.4.6.dist-info/METADATA,sha256=7FaEgAHdD-8a0XmuDMkpAUjAQ7ZmFTD89IqQM17nllI,9012
4
- spiderforce4ai-2.4.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
- spiderforce4ai-2.4.6.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
- spiderforce4ai-2.4.6.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
- spiderforce4ai-2.4.6.dist-info/RECORD,,