spiderforce4ai 2.4.7__py3-none-any.whl → 2.4.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -870,7 +870,8 @@ class SpiderForce4AI:
870
870
  base_url=config.post_extraction_agent.get("base_url"),
871
871
  combine_output=bool(config.post_extraction_agent_save_to_file),
872
872
  output_file=config.post_extraction_agent_save_to_file,
873
- custom_transform_function=config.post_agent_transformer_function
873
+ custom_transform_function=config.post_agent_transformer_function,
874
+ response_format=config.post_extraction_agent.get("response_format")
874
875
  )
875
876
  agent = PostExtractionAgent(post_config)
876
877
 
@@ -118,6 +118,7 @@ class PostExtractionConfig:
118
118
  output_file: Optional[Path] = None
119
119
  custom_transform_function: Optional[Callable] = None
120
120
  buffer_file: Optional[Path] = None
121
+ response_format: Optional[str] = None # 'json' or 'text'
121
122
 
122
123
  def __post_init__(self):
123
124
  if self.output_file:
@@ -184,39 +185,49 @@ class PostExtractionAgent:
184
185
  for attempt in range(max_retries):
185
186
  try:
186
187
  # Call completion synchronously
187
- response = completion(
188
- model=self.config.model,
189
- messages=messages,
190
- max_tokens=self.config.max_tokens,
191
- temperature=self.config.temperature,
192
- api_key=self.config.api_key,
193
- api_base=self.config.base_url
194
- )
188
+ # Add response_format if specified
189
+ completion_args = {
190
+ "model": self.config.model,
191
+ "messages": messages,
192
+ "max_tokens": self.config.max_tokens,
193
+ "temperature": self.config.temperature,
194
+ "api_key": self.config.api_key,
195
+ }
196
+ if self.config.base_url:
197
+ completion_args["api_base"] = self.config.base_url
198
+ if self.config.response_format:
199
+ completion_args["response_format"] = {"type": self.config.response_format}
195
200
 
196
- # Log raw response for debugging
201
+ response = completion(**completion_args)
197
202
  raw_content = response.choices[0].message.content
198
203
  logger.debug(f"Raw LLM response for {url}: {raw_content}")
199
-
204
+
205
+ # Handle response based on response_format
200
206
  try:
201
- # First try direct JSON parsing
202
- try:
203
- extracted_data = json.loads(raw_content)
204
- except json.JSONDecodeError:
205
- # Look for JSON in markdown code blocks
206
- json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
207
- if json_match:
208
- json_content = json_match.group(1).strip()
209
- extracted_data = json.loads(json_content)
210
- else:
211
- # If no JSON found, try to extract structured data in any format
212
- extracted_data = {
213
- "raw_content": raw_content,
214
- "format": "text",
215
- "timestamp": datetime.now().isoformat()
216
- }
207
+ if self.config.response_format == "json_object":
208
+ # For json_object format, response should already be valid JSON
209
+ extracted_data = raw_content if isinstance(raw_content, dict) else json.loads(raw_content)
210
+ else:
211
+ # For text format or unspecified, try parsing JSON or use as text
212
+ try:
213
+ extracted_data = json.loads(raw_content)
214
+ except json.JSONDecodeError:
215
+ # Look for JSON in markdown code blocks
216
+ json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
217
+ if json_match:
218
+ json_content = json_match.group(1).strip()
219
+ extracted_data = json.loads(json_content)
220
+ else:
221
+ # If no JSON found and not json_object format, use raw content
222
+ extracted_data = {
223
+ "raw_content": raw_content,
224
+ "format": "text",
225
+ "timestamp": datetime.now().isoformat()
226
+ }
217
227
 
218
228
  self.buffer.remove_request(url) # Remove from buffer if successful
219
229
  return extracted_data
230
+
220
231
  except Exception as e:
221
232
  error_msg = (
222
233
  f"Error processing LLM response for {url}:\n"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.4.7
3
+ Version: 2.4.9
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,7 @@
1
+ spiderforce4ai/__init__.py,sha256=DUPOKF7-vCVQi7JimsStU1qjk5x3yVUoMnUVOJxOrGk,42360
2
+ spiderforce4ai/post_extraction_agent.py,sha256=so5Ze7Vz3konpQ0iT7ZxDGE9kIYeTwPTFyzezRc5oys,15392
3
+ spiderforce4ai-2.4.9.dist-info/METADATA,sha256=kEq3anAkoe_wpPVzpgaJlsSuAzTQHDgXiDFpirXvUQc,9012
4
+ spiderforce4ai-2.4.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
+ spiderforce4ai-2.4.9.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
+ spiderforce4ai-2.4.9.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
+ spiderforce4ai-2.4.9.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
2
- spiderforce4ai/post_extraction_agent.py,sha256=q2ohsqw_F1e5rT2H9eSzCWzstJLbwGyCtwLsC6eMufs,14560
3
- spiderforce4ai-2.4.7.dist-info/METADATA,sha256=r273h2ogI76aXTd8XN9b81EWtQLuhdSjZkXD2Ks8GnM,9012
4
- spiderforce4ai-2.4.7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
- spiderforce4ai-2.4.7.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
- spiderforce4ai-2.4.7.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
- spiderforce4ai-2.4.7.dist-info/RECORD,,