spiderforce4ai 2.4.6__tar.gz → 2.4.8__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.4.6
3
+ Version: 2.4.8
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "spiderforce4ai"
7
- version = "2.4.6"
7
+ version = "2.4.8"
8
8
  description = "Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -7,7 +7,7 @@ with open("README.md", encoding="utf-8") as f:
7
7
 
8
8
  setup(
9
9
  name="spiderforce4ai",
10
- version="2.4.6",
10
+ version="2.4.8",
11
11
  author="Piotr Tamulewicz",
12
12
  author_email="pt@petertam.pro",
13
13
  description="Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing",
@@ -870,7 +870,8 @@ class SpiderForce4AI:
870
870
  base_url=config.post_extraction_agent.get("base_url"),
871
871
  combine_output=bool(config.post_extraction_agent_save_to_file),
872
872
  output_file=config.post_extraction_agent_save_to_file,
873
- custom_transform_function=config.post_agent_transformer_function
873
+ custom_transform_function=config.post_agent_transformer_function,
874
+ response_format=config.post_extraction_agent.get("response_format")
874
875
  )
875
876
  agent = PostExtractionAgent(post_config)
876
877
 
@@ -12,6 +12,9 @@ from pydantic import BaseModel, Field
12
12
  import logging
13
13
  from datetime import datetime
14
14
  import re
15
+ from rich.console import Console
16
+
17
+ console = Console()
15
18
 
16
19
  logger = logging.getLogger(__name__)
17
20
 
@@ -115,6 +118,7 @@ class PostExtractionConfig:
115
118
  output_file: Optional[Path] = None
116
119
  custom_transform_function: Optional[Callable] = None
117
120
  buffer_file: Optional[Path] = None
121
+ response_format: Optional[str] = None # 'json' or 'text'
118
122
 
119
123
  def __post_init__(self):
120
124
  if self.output_file:
@@ -181,26 +185,48 @@ class PostExtractionAgent:
181
185
  for attempt in range(max_retries):
182
186
  try:
183
187
  # Call completion synchronously
184
- response = completion(
185
- model=self.config.model,
186
- messages=messages,
187
- max_tokens=self.config.max_tokens,
188
- temperature=self.config.temperature,
189
- api_key=self.config.api_key,
190
- api_base=self.config.base_url
191
- )
188
+ # Add response_format if specified
189
+ completion_args = {
190
+ "model": self.config.model,
191
+ "messages": messages,
192
+ "max_tokens": self.config.max_tokens,
193
+ "temperature": self.config.temperature,
194
+ "api_key": self.config.api_key,
195
+ }
196
+ if self.config.base_url:
197
+ completion_args["api_base"] = self.config.base_url
198
+ if self.config.response_format:
199
+ completion_args["response_format"] = {"type": self.config.response_format}
200
+
201
+ response = completion(**completion_args)
192
202
 
193
203
  # Log raw response for debugging
194
204
  raw_content = response.choices[0].message.content
195
205
  logger.debug(f"Raw LLM response for {url}: {raw_content}")
196
206
 
197
207
  try:
198
- extracted_data = json.loads(raw_content)
208
+ # First try direct JSON parsing
209
+ try:
210
+ extracted_data = json.loads(raw_content)
211
+ except json.JSONDecodeError:
212
+ # Look for JSON in markdown code blocks
213
+ json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
214
+ if json_match:
215
+ json_content = json_match.group(1).strip()
216
+ extracted_data = json.loads(json_content)
217
+ else:
218
+ # If no JSON found, try to extract structured data in any format
219
+ extracted_data = {
220
+ "raw_content": raw_content,
221
+ "format": "text",
222
+ "timestamp": datetime.now().isoformat()
223
+ }
224
+
199
225
  self.buffer.remove_request(url) # Remove from buffer if successful
200
226
  return extracted_data
201
- except json.JSONDecodeError as e:
227
+ except Exception as e:
202
228
  error_msg = (
203
- f"Invalid JSON response from LLM for {url}:\n"
229
+ f"Error processing LLM response for {url}:\n"
204
230
  f"Error: {str(e)}\n"
205
231
  f"Raw content: {raw_content[:500]}..." # First 500 chars of response
206
232
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.4.6
3
+ Version: 2.4.8
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
File without changes
File without changes