spiderforce4ai 2.4.8__tar.gz → 2.4.9__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.4.8
3
+ Version: 2.4.9
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "spiderforce4ai"
7
- version = "2.4.8"
7
+ version = "2.4.9"
8
8
  description = "Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -7,7 +7,7 @@ with open("README.md", encoding="utf-8") as f:
7
7
 
8
8
  setup(
9
9
  name="spiderforce4ai",
10
- version="2.4.8",
10
+ version="2.4.9",
11
11
  author="Piotr Tamulewicz",
12
12
  author_email="pt@petertam.pro",
13
13
  description="Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing",
@@ -199,31 +199,35 @@ class PostExtractionAgent:
199
199
  completion_args["response_format"] = {"type": self.config.response_format}
200
200
 
201
201
  response = completion(**completion_args)
202
-
203
- # Log raw response for debugging
204
202
  raw_content = response.choices[0].message.content
205
203
  logger.debug(f"Raw LLM response for {url}: {raw_content}")
206
-
204
+
205
+ # Handle response based on response_format
207
206
  try:
208
- # First try direct JSON parsing
209
- try:
210
- extracted_data = json.loads(raw_content)
211
- except json.JSONDecodeError:
212
- # Look for JSON in markdown code blocks
213
- json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
214
- if json_match:
215
- json_content = json_match.group(1).strip()
216
- extracted_data = json.loads(json_content)
217
- else:
218
- # If no JSON found, try to extract structured data in any format
219
- extracted_data = {
220
- "raw_content": raw_content,
221
- "format": "text",
222
- "timestamp": datetime.now().isoformat()
223
- }
207
+ if self.config.response_format == "json_object":
208
+ # For json_object format, response should already be valid JSON
209
+ extracted_data = raw_content if isinstance(raw_content, dict) else json.loads(raw_content)
210
+ else:
211
+ # For text format or unspecified, try parsing JSON or use as text
212
+ try:
213
+ extracted_data = json.loads(raw_content)
214
+ except json.JSONDecodeError:
215
+ # Look for JSON in markdown code blocks
216
+ json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
217
+ if json_match:
218
+ json_content = json_match.group(1).strip()
219
+ extracted_data = json.loads(json_content)
220
+ else:
221
+ # If no JSON found and not json_object format, use raw content
222
+ extracted_data = {
223
+ "raw_content": raw_content,
224
+ "format": "text",
225
+ "timestamp": datetime.now().isoformat()
226
+ }
224
227
 
225
228
  self.buffer.remove_request(url) # Remove from buffer if successful
226
229
  return extracted_data
230
+
227
231
  except Exception as e:
228
232
  error_msg = (
229
233
  f"Error processing LLM response for {url}:\n"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.4.8
3
+ Version: 2.4.9
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
File without changes
File without changes