spiderforce4ai 2.4.5__tar.gz → 2.4.6__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.4.5
3
+ Version: 2.4.6
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "spiderforce4ai"
7
- version = "2.4.5"
7
+ version = "2.4.6"
8
8
  description = "Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -7,7 +7,7 @@ with open("README.md", encoding="utf-8") as f:
7
7
 
8
8
  setup(
9
9
  name="spiderforce4ai",
10
- version="2.4.5",
10
+ version="2.4.6",
11
11
  author="Piotr Tamulewicz",
12
12
  author_email="pt@petertam.pro",
13
13
  description="Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing",
@@ -190,17 +190,29 @@ class PostExtractionAgent:
190
190
  api_base=self.config.base_url
191
191
  )
192
192
 
193
- # Parse response
194
- extracted_data = json.loads(response.choices[0].message.content)
195
- self.buffer.remove_request(url) # Remove from buffer if successful
196
- return extracted_data
193
+ # Log raw response for debugging
194
+ raw_content = response.choices[0].message.content
195
+ logger.debug(f"Raw LLM response for {url}: {raw_content}")
197
196
 
198
- except json.JSONDecodeError as e:
199
- last_error = f"Invalid JSON response from LLM: {e}"
200
- if attempt < max_retries - 1:
201
- time.sleep(retry_delay * (attempt + 1))
197
+ try:
198
+ extracted_data = json.loads(raw_content)
199
+ self.buffer.remove_request(url) # Remove from buffer if successful
200
+ return extracted_data
201
+ except json.JSONDecodeError as e:
202
+ error_msg = (
203
+ f"Invalid JSON response from LLM for {url}:\n"
204
+ f"Error: {str(e)}\n"
205
+ f"Raw content: {raw_content[:500]}..." # First 500 chars of response
206
+ )
207
+ logger.error(error_msg)
208
+ last_error = error_msg
209
+ if attempt < max_retries - 1:
210
+ time.sleep(retry_delay * (attempt + 1))
211
+
202
212
  except Exception as e:
203
- last_error = str(e)
213
+ error_msg = f"LLM processing error for {url}: {str(e)}"
214
+ logger.error(error_msg)
215
+ last_error = error_msg
204
216
  if attempt < max_retries - 1:
205
217
  time.sleep(retry_delay * (attempt + 1))
206
218
 
@@ -242,24 +254,42 @@ class PostExtractionAgent:
242
254
 
243
255
  def process_content(self, url: str, content: str) -> Optional[Dict]:
244
256
  """Process content with retry mechanism."""
257
+ logger.info(f"Starting content processing for {url}")
258
+
245
259
  for attempt in range(self.config.max_retries):
260
+ logger.info(f"Processing attempt {attempt + 1}/{self.config.max_retries} for {url}")
261
+
246
262
  result = self._process_single_content(url, content)
247
263
  if result:
264
+ logger.info(f"Successfully processed content for {url}")
265
+
248
266
  # Apply custom transformation if provided
249
267
  if self.config.custom_transform_function:
250
268
  try:
251
269
  result = self.config.custom_transform_function(result)
270
+ logger.info(f"Applied custom transformation for {url}")
252
271
  except Exception as e:
253
- logger.error(f"Error in custom transform for {url}: {str(e)}")
272
+ error_msg = f"Error in custom transform for {url}: {str(e)}"
273
+ logger.error(error_msg)
274
+ console.print(f"[red]{error_msg}[/red]")
254
275
 
255
276
  # Save result synchronously
256
- self._save_result_sync(url, result)
277
+ try:
278
+ self._save_result_sync(url, result)
279
+ logger.info(f"Saved results for {url}")
280
+ except Exception as e:
281
+ error_msg = f"Error saving results for {url}: {str(e)}"
282
+ logger.error(error_msg)
283
+ console.print(f"[red]{error_msg}[/red]")
284
+
257
285
  return result
258
286
 
259
287
  # Wait before retry
260
288
  if attempt < self.config.max_retries - 1:
289
+ logger.info(f"Attempt {attempt + 1} failed for {url}, waiting {self.config.retry_delay}s before retry")
261
290
  time.sleep(self.config.retry_delay)
262
291
 
292
+ logger.error(f"All processing attempts failed for {url}")
263
293
  return None
264
294
 
265
295
  async def process_bulk_content(self, content_map: Dict[str, str]) -> Dict[str, Optional[Dict]]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.4.5
3
+ Version: 2.4.6
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
File without changes
File without changes