spiderforce4ai 2.4.6__py3-none-any.whl → 2.4.8__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -870,7 +870,8 @@ class SpiderForce4AI:
870
870
  base_url=config.post_extraction_agent.get("base_url"),
871
871
  combine_output=bool(config.post_extraction_agent_save_to_file),
872
872
  output_file=config.post_extraction_agent_save_to_file,
873
- custom_transform_function=config.post_agent_transformer_function
873
+ custom_transform_function=config.post_agent_transformer_function,
874
+ response_format=config.post_extraction_agent.get("response_format")
874
875
  )
875
876
  agent = PostExtractionAgent(post_config)
876
877
 
@@ -12,6 +12,9 @@ from pydantic import BaseModel, Field
12
12
  import logging
13
13
  from datetime import datetime
14
14
  import re
15
+ from rich.console import Console
16
+
17
+ console = Console()
15
18
 
16
19
  logger = logging.getLogger(__name__)
17
20
 
@@ -115,6 +118,7 @@ class PostExtractionConfig:
115
118
  output_file: Optional[Path] = None
116
119
  custom_transform_function: Optional[Callable] = None
117
120
  buffer_file: Optional[Path] = None
121
+ response_format: Optional[str] = None # 'json' or 'text'
118
122
 
119
123
  def __post_init__(self):
120
124
  if self.output_file:
@@ -181,26 +185,48 @@ class PostExtractionAgent:
181
185
  for attempt in range(max_retries):
182
186
  try:
183
187
  # Call completion synchronously
184
- response = completion(
185
- model=self.config.model,
186
- messages=messages,
187
- max_tokens=self.config.max_tokens,
188
- temperature=self.config.temperature,
189
- api_key=self.config.api_key,
190
- api_base=self.config.base_url
191
- )
188
+ # Add response_format if specified
189
+ completion_args = {
190
+ "model": self.config.model,
191
+ "messages": messages,
192
+ "max_tokens": self.config.max_tokens,
193
+ "temperature": self.config.temperature,
194
+ "api_key": self.config.api_key,
195
+ }
196
+ if self.config.base_url:
197
+ completion_args["api_base"] = self.config.base_url
198
+ if self.config.response_format:
199
+ completion_args["response_format"] = {"type": self.config.response_format}
200
+
201
+ response = completion(**completion_args)
192
202
 
193
203
  # Log raw response for debugging
194
204
  raw_content = response.choices[0].message.content
195
205
  logger.debug(f"Raw LLM response for {url}: {raw_content}")
196
206
 
197
207
  try:
198
- extracted_data = json.loads(raw_content)
208
+ # First try direct JSON parsing
209
+ try:
210
+ extracted_data = json.loads(raw_content)
211
+ except json.JSONDecodeError:
212
+ # Look for JSON in markdown code blocks
213
+ json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
214
+ if json_match:
215
+ json_content = json_match.group(1).strip()
216
+ extracted_data = json.loads(json_content)
217
+ else:
218
+ # If no JSON found, try to extract structured data in any format
219
+ extracted_data = {
220
+ "raw_content": raw_content,
221
+ "format": "text",
222
+ "timestamp": datetime.now().isoformat()
223
+ }
224
+
199
225
  self.buffer.remove_request(url) # Remove from buffer if successful
200
226
  return extracted_data
201
- except json.JSONDecodeError as e:
227
+ except Exception as e:
202
228
  error_msg = (
203
- f"Invalid JSON response from LLM for {url}:\n"
229
+ f"Error processing LLM response for {url}:\n"
204
230
  f"Error: {str(e)}\n"
205
231
  f"Raw content: {raw_content[:500]}..." # First 500 chars of response
206
232
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.4.6
3
+ Version: 2.4.8
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,7 @@
1
+ spiderforce4ai/__init__.py,sha256=DUPOKF7-vCVQi7JimsStU1qjk5x3yVUoMnUVOJxOrGk,42360
2
+ spiderforce4ai/post_extraction_agent.py,sha256=5M7pBU8O44Khfub2jSSPboSbrcsAPw6nnp576qIA2pY,14988
3
+ spiderforce4ai-2.4.8.dist-info/METADATA,sha256=zsEmCfjL_ueJzIeTJ9BIdUEF3R-4uOPDqrRxox70Cto,9012
4
+ spiderforce4ai-2.4.8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
+ spiderforce4ai-2.4.8.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
+ spiderforce4ai-2.4.8.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
+ spiderforce4ai-2.4.8.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
2
- spiderforce4ai/post_extraction_agent.py,sha256=Ty9-Ai3_RVT86RrPUxKEzf4oUh-Wr7bk2aM87Je_WvE,13580
3
- spiderforce4ai-2.4.6.dist-info/METADATA,sha256=7FaEgAHdD-8a0XmuDMkpAUjAQ7ZmFTD89IqQM17nllI,9012
4
- spiderforce4ai-2.4.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
- spiderforce4ai-2.4.6.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
- spiderforce4ai-2.4.6.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
- spiderforce4ai-2.4.6.dist-info/RECORD,,