spiderforce4ai 2.4.7__py3-none-any.whl → 2.4.9__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -870,7 +870,8 @@ class SpiderForce4AI:
870
870
  base_url=config.post_extraction_agent.get("base_url"),
871
871
  combine_output=bool(config.post_extraction_agent_save_to_file),
872
872
  output_file=config.post_extraction_agent_save_to_file,
873
- custom_transform_function=config.post_agent_transformer_function
873
+ custom_transform_function=config.post_agent_transformer_function,
874
+ response_format=config.post_extraction_agent.get("response_format")
874
875
  )
875
876
  agent = PostExtractionAgent(post_config)
876
877
 
@@ -118,6 +118,7 @@ class PostExtractionConfig:
118
118
  output_file: Optional[Path] = None
119
119
  custom_transform_function: Optional[Callable] = None
120
120
  buffer_file: Optional[Path] = None
121
+ response_format: Optional[str] = None # 'json' or 'text'
121
122
 
122
123
  def __post_init__(self):
123
124
  if self.output_file:
@@ -184,39 +185,49 @@ class PostExtractionAgent:
184
185
  for attempt in range(max_retries):
185
186
  try:
186
187
  # Call completion synchronously
187
- response = completion(
188
- model=self.config.model,
189
- messages=messages,
190
- max_tokens=self.config.max_tokens,
191
- temperature=self.config.temperature,
192
- api_key=self.config.api_key,
193
- api_base=self.config.base_url
194
- )
188
+ # Add response_format if specified
189
+ completion_args = {
190
+ "model": self.config.model,
191
+ "messages": messages,
192
+ "max_tokens": self.config.max_tokens,
193
+ "temperature": self.config.temperature,
194
+ "api_key": self.config.api_key,
195
+ }
196
+ if self.config.base_url:
197
+ completion_args["api_base"] = self.config.base_url
198
+ if self.config.response_format:
199
+ completion_args["response_format"] = {"type": self.config.response_format}
195
200
 
196
- # Log raw response for debugging
201
+ response = completion(**completion_args)
197
202
  raw_content = response.choices[0].message.content
198
203
  logger.debug(f"Raw LLM response for {url}: {raw_content}")
199
-
204
+
205
+ # Handle response based on response_format
200
206
  try:
201
- # First try direct JSON parsing
202
- try:
203
- extracted_data = json.loads(raw_content)
204
- except json.JSONDecodeError:
205
- # Look for JSON in markdown code blocks
206
- json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
207
- if json_match:
208
- json_content = json_match.group(1).strip()
209
- extracted_data = json.loads(json_content)
210
- else:
211
- # If no JSON found, try to extract structured data in any format
212
- extracted_data = {
213
- "raw_content": raw_content,
214
- "format": "text",
215
- "timestamp": datetime.now().isoformat()
216
- }
207
+ if self.config.response_format == "json_object":
208
+ # For json_object format, response should already be valid JSON
209
+ extracted_data = raw_content if isinstance(raw_content, dict) else json.loads(raw_content)
210
+ else:
211
+ # For text format or unspecified, try parsing JSON or use as text
212
+ try:
213
+ extracted_data = json.loads(raw_content)
214
+ except json.JSONDecodeError:
215
+ # Look for JSON in markdown code blocks
216
+ json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
217
+ if json_match:
218
+ json_content = json_match.group(1).strip()
219
+ extracted_data = json.loads(json_content)
220
+ else:
221
+ # If no JSON found and not json_object format, use raw content
222
+ extracted_data = {
223
+ "raw_content": raw_content,
224
+ "format": "text",
225
+ "timestamp": datetime.now().isoformat()
226
+ }
217
227
 
218
228
  self.buffer.remove_request(url) # Remove from buffer if successful
219
229
  return extracted_data
230
+
220
231
  except Exception as e:
221
232
  error_msg = (
222
233
  f"Error processing LLM response for {url}:\n"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.4.7
3
+ Version: 2.4.9
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -0,0 +1,7 @@
1
+ spiderforce4ai/__init__.py,sha256=DUPOKF7-vCVQi7JimsStU1qjk5x3yVUoMnUVOJxOrGk,42360
2
+ spiderforce4ai/post_extraction_agent.py,sha256=so5Ze7Vz3konpQ0iT7ZxDGE9kIYeTwPTFyzezRc5oys,15392
3
+ spiderforce4ai-2.4.9.dist-info/METADATA,sha256=kEq3anAkoe_wpPVzpgaJlsSuAzTQHDgXiDFpirXvUQc,9012
4
+ spiderforce4ai-2.4.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
+ spiderforce4ai-2.4.9.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
+ spiderforce4ai-2.4.9.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
+ spiderforce4ai-2.4.9.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
2
- spiderforce4ai/post_extraction_agent.py,sha256=q2ohsqw_F1e5rT2H9eSzCWzstJLbwGyCtwLsC6eMufs,14560
3
- spiderforce4ai-2.4.7.dist-info/METADATA,sha256=r273h2ogI76aXTd8XN9b81EWtQLuhdSjZkXD2Ks8GnM,9012
4
- spiderforce4ai-2.4.7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
5
- spiderforce4ai-2.4.7.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
6
- spiderforce4ai-2.4.7.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
7
- spiderforce4ai-2.4.7.dist-info/RECORD,,