spiderforce4ai 2.4.5__py3-none-any.whl → 2.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spiderforce4ai/post_extraction_agent.py +60 -11
- {spiderforce4ai-2.4.5.dist-info → spiderforce4ai-2.4.7.dist-info}/METADATA +1 -1
- spiderforce4ai-2.4.7.dist-info/RECORD +7 -0
- spiderforce4ai-2.4.5.dist-info/RECORD +0 -7
- {spiderforce4ai-2.4.5.dist-info → spiderforce4ai-2.4.7.dist-info}/WHEEL +0 -0
- {spiderforce4ai-2.4.5.dist-info → spiderforce4ai-2.4.7.dist-info}/entry_points.txt +0 -0
- {spiderforce4ai-2.4.5.dist-info → spiderforce4ai-2.4.7.dist-info}/top_level.txt +0 -0
@@ -12,6 +12,9 @@ from pydantic import BaseModel, Field
|
|
12
12
|
import logging
|
13
13
|
from datetime import datetime
|
14
14
|
import re
|
15
|
+
from rich.console import Console
|
16
|
+
|
17
|
+
console = Console()
|
15
18
|
|
16
19
|
logger = logging.getLogger(__name__)
|
17
20
|
|
@@ -190,17 +193,45 @@ class PostExtractionAgent:
|
|
190
193
|
api_base=self.config.base_url
|
191
194
|
)
|
192
195
|
|
193
|
-
#
|
194
|
-
|
195
|
-
|
196
|
-
return extracted_data
|
196
|
+
# Log raw response for debugging
|
197
|
+
raw_content = response.choices[0].message.content
|
198
|
+
logger.debug(f"Raw LLM response for {url}: {raw_content}")
|
197
199
|
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
200
|
+
try:
|
201
|
+
# First try direct JSON parsing
|
202
|
+
try:
|
203
|
+
extracted_data = json.loads(raw_content)
|
204
|
+
except json.JSONDecodeError:
|
205
|
+
# Look for JSON in markdown code blocks
|
206
|
+
json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
|
207
|
+
if json_match:
|
208
|
+
json_content = json_match.group(1).strip()
|
209
|
+
extracted_data = json.loads(json_content)
|
210
|
+
else:
|
211
|
+
# If no JSON found, try to extract structured data in any format
|
212
|
+
extracted_data = {
|
213
|
+
"raw_content": raw_content,
|
214
|
+
"format": "text",
|
215
|
+
"timestamp": datetime.now().isoformat()
|
216
|
+
}
|
217
|
+
|
218
|
+
self.buffer.remove_request(url) # Remove from buffer if successful
|
219
|
+
return extracted_data
|
220
|
+
except Exception as e:
|
221
|
+
error_msg = (
|
222
|
+
f"Error processing LLM response for {url}:\n"
|
223
|
+
f"Error: {str(e)}\n"
|
224
|
+
f"Raw content: {raw_content[:500]}..." # First 500 chars of response
|
225
|
+
)
|
226
|
+
logger.error(error_msg)
|
227
|
+
last_error = error_msg
|
228
|
+
if attempt < max_retries - 1:
|
229
|
+
time.sleep(retry_delay * (attempt + 1))
|
230
|
+
|
202
231
|
except Exception as e:
|
203
|
-
|
232
|
+
error_msg = f"LLM processing error for {url}: {str(e)}"
|
233
|
+
logger.error(error_msg)
|
234
|
+
last_error = error_msg
|
204
235
|
if attempt < max_retries - 1:
|
205
236
|
time.sleep(retry_delay * (attempt + 1))
|
206
237
|
|
@@ -242,24 +273,42 @@ class PostExtractionAgent:
|
|
242
273
|
|
243
274
|
def process_content(self, url: str, content: str) -> Optional[Dict]:
|
244
275
|
"""Process content with retry mechanism."""
|
276
|
+
logger.info(f"Starting content processing for {url}")
|
277
|
+
|
245
278
|
for attempt in range(self.config.max_retries):
|
279
|
+
logger.info(f"Processing attempt {attempt + 1}/{self.config.max_retries} for {url}")
|
280
|
+
|
246
281
|
result = self._process_single_content(url, content)
|
247
282
|
if result:
|
283
|
+
logger.info(f"Successfully processed content for {url}")
|
284
|
+
|
248
285
|
# Apply custom transformation if provided
|
249
286
|
if self.config.custom_transform_function:
|
250
287
|
try:
|
251
288
|
result = self.config.custom_transform_function(result)
|
289
|
+
logger.info(f"Applied custom transformation for {url}")
|
252
290
|
except Exception as e:
|
253
|
-
|
291
|
+
error_msg = f"Error in custom transform for {url}: {str(e)}"
|
292
|
+
logger.error(error_msg)
|
293
|
+
console.print(f"[red]{error_msg}[/red]")
|
254
294
|
|
255
295
|
# Save result synchronously
|
256
|
-
|
296
|
+
try:
|
297
|
+
self._save_result_sync(url, result)
|
298
|
+
logger.info(f"Saved results for {url}")
|
299
|
+
except Exception as e:
|
300
|
+
error_msg = f"Error saving results for {url}: {str(e)}"
|
301
|
+
logger.error(error_msg)
|
302
|
+
console.print(f"[red]{error_msg}[/red]")
|
303
|
+
|
257
304
|
return result
|
258
305
|
|
259
306
|
# Wait before retry
|
260
307
|
if attempt < self.config.max_retries - 1:
|
308
|
+
logger.info(f"Attempt {attempt + 1} failed for {url}, waiting {self.config.retry_delay}s before retry")
|
261
309
|
time.sleep(self.config.retry_delay)
|
262
310
|
|
311
|
+
logger.error(f"All processing attempts failed for {url}")
|
263
312
|
return None
|
264
313
|
|
265
314
|
async def process_bulk_content(self, content_map: Dict[str, str]) -> Dict[str, Optional[Dict]]:
|
@@ -0,0 +1,7 @@
|
|
1
|
+
spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
|
2
|
+
spiderforce4ai/post_extraction_agent.py,sha256=q2ohsqw_F1e5rT2H9eSzCWzstJLbwGyCtwLsC6eMufs,14560
|
3
|
+
spiderforce4ai-2.4.7.dist-info/METADATA,sha256=r273h2ogI76aXTd8XN9b81EWtQLuhdSjZkXD2Ks8GnM,9012
|
4
|
+
spiderforce4ai-2.4.7.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
+
spiderforce4ai-2.4.7.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
+
spiderforce4ai-2.4.7.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
+
spiderforce4ai-2.4.7.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
|
2
|
-
spiderforce4ai/post_extraction_agent.py,sha256=t9KxjuNw16-6kige6ULPLyykNkiGmKhpCi8QjskdaTk,11959
|
3
|
-
spiderforce4ai-2.4.5.dist-info/METADATA,sha256=q3VBuGb5wxsi9OPkzEMwFMyg9f_vT2RamWYIgu2JbLc,9012
|
4
|
-
spiderforce4ai-2.4.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
-
spiderforce4ai-2.4.5.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
-
spiderforce4ai-2.4.5.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
-
spiderforce4ai-2.4.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|