spiderforce4ai 2.4.5__py3-none-any.whl → 2.4.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spiderforce4ai/post_extraction_agent.py +41 -11
- {spiderforce4ai-2.4.5.dist-info → spiderforce4ai-2.4.6.dist-info}/METADATA +1 -1
- spiderforce4ai-2.4.6.dist-info/RECORD +7 -0
- spiderforce4ai-2.4.5.dist-info/RECORD +0 -7
- {spiderforce4ai-2.4.5.dist-info → spiderforce4ai-2.4.6.dist-info}/WHEEL +0 -0
- {spiderforce4ai-2.4.5.dist-info → spiderforce4ai-2.4.6.dist-info}/entry_points.txt +0 -0
- {spiderforce4ai-2.4.5.dist-info → spiderforce4ai-2.4.6.dist-info}/top_level.txt +0 -0
@@ -190,17 +190,29 @@ class PostExtractionAgent:
|
|
190
190
|
api_base=self.config.base_url
|
191
191
|
)
|
192
192
|
|
193
|
-
#
|
194
|
-
|
195
|
-
|
196
|
-
return extracted_data
|
193
|
+
# Log raw response for debugging
|
194
|
+
raw_content = response.choices[0].message.content
|
195
|
+
logger.debug(f"Raw LLM response for {url}: {raw_content}")
|
197
196
|
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
197
|
+
try:
|
198
|
+
extracted_data = json.loads(raw_content)
|
199
|
+
self.buffer.remove_request(url) # Remove from buffer if successful
|
200
|
+
return extracted_data
|
201
|
+
except json.JSONDecodeError as e:
|
202
|
+
error_msg = (
|
203
|
+
f"Invalid JSON response from LLM for {url}:\n"
|
204
|
+
f"Error: {str(e)}\n"
|
205
|
+
f"Raw content: {raw_content[:500]}..." # First 500 chars of response
|
206
|
+
)
|
207
|
+
logger.error(error_msg)
|
208
|
+
last_error = error_msg
|
209
|
+
if attempt < max_retries - 1:
|
210
|
+
time.sleep(retry_delay * (attempt + 1))
|
211
|
+
|
202
212
|
except Exception as e:
|
203
|
-
|
213
|
+
error_msg = f"LLM processing error for {url}: {str(e)}"
|
214
|
+
logger.error(error_msg)
|
215
|
+
last_error = error_msg
|
204
216
|
if attempt < max_retries - 1:
|
205
217
|
time.sleep(retry_delay * (attempt + 1))
|
206
218
|
|
@@ -242,24 +254,42 @@ class PostExtractionAgent:
|
|
242
254
|
|
243
255
|
def process_content(self, url: str, content: str) -> Optional[Dict]:
|
244
256
|
"""Process content with retry mechanism."""
|
257
|
+
logger.info(f"Starting content processing for {url}")
|
258
|
+
|
245
259
|
for attempt in range(self.config.max_retries):
|
260
|
+
logger.info(f"Processing attempt {attempt + 1}/{self.config.max_retries} for {url}")
|
261
|
+
|
246
262
|
result = self._process_single_content(url, content)
|
247
263
|
if result:
|
264
|
+
logger.info(f"Successfully processed content for {url}")
|
265
|
+
|
248
266
|
# Apply custom transformation if provided
|
249
267
|
if self.config.custom_transform_function:
|
250
268
|
try:
|
251
269
|
result = self.config.custom_transform_function(result)
|
270
|
+
logger.info(f"Applied custom transformation for {url}")
|
252
271
|
except Exception as e:
|
253
|
-
|
272
|
+
error_msg = f"Error in custom transform for {url}: {str(e)}"
|
273
|
+
logger.error(error_msg)
|
274
|
+
console.print(f"[red]{error_msg}[/red]")
|
254
275
|
|
255
276
|
# Save result synchronously
|
256
|
-
|
277
|
+
try:
|
278
|
+
self._save_result_sync(url, result)
|
279
|
+
logger.info(f"Saved results for {url}")
|
280
|
+
except Exception as e:
|
281
|
+
error_msg = f"Error saving results for {url}: {str(e)}"
|
282
|
+
logger.error(error_msg)
|
283
|
+
console.print(f"[red]{error_msg}[/red]")
|
284
|
+
|
257
285
|
return result
|
258
286
|
|
259
287
|
# Wait before retry
|
260
288
|
if attempt < self.config.max_retries - 1:
|
289
|
+
logger.info(f"Attempt {attempt + 1} failed for {url}, waiting {self.config.retry_delay}s before retry")
|
261
290
|
time.sleep(self.config.retry_delay)
|
262
291
|
|
292
|
+
logger.error(f"All processing attempts failed for {url}")
|
263
293
|
return None
|
264
294
|
|
265
295
|
async def process_bulk_content(self, content_map: Dict[str, str]) -> Dict[str, Optional[Dict]]:
|
@@ -0,0 +1,7 @@
|
|
1
|
+
spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
|
2
|
+
spiderforce4ai/post_extraction_agent.py,sha256=Ty9-Ai3_RVT86RrPUxKEzf4oUh-Wr7bk2aM87Je_WvE,13580
|
3
|
+
spiderforce4ai-2.4.6.dist-info/METADATA,sha256=7FaEgAHdD-8a0XmuDMkpAUjAQ7ZmFTD89IqQM17nllI,9012
|
4
|
+
spiderforce4ai-2.4.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
+
spiderforce4ai-2.4.6.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
+
spiderforce4ai-2.4.6.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
+
spiderforce4ai-2.4.6.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
|
2
|
-
spiderforce4ai/post_extraction_agent.py,sha256=t9KxjuNw16-6kige6ULPLyykNkiGmKhpCi8QjskdaTk,11959
|
3
|
-
spiderforce4ai-2.4.5.dist-info/METADATA,sha256=q3VBuGb5wxsi9OPkzEMwFMyg9f_vT2RamWYIgu2JbLc,9012
|
4
|
-
spiderforce4ai-2.4.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
-
spiderforce4ai-2.4.5.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
-
spiderforce4ai-2.4.5.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
-
spiderforce4ai-2.4.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|