spiderforce4ai 2.5.6__py3-none-any.whl → 2.5.8__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- spiderforce4ai/post_extraction_agent.py +12 -7
- {spiderforce4ai-2.5.6.dist-info → spiderforce4ai-2.5.8.dist-info}/METADATA +1 -1
- spiderforce4ai-2.5.8.dist-info/RECORD +7 -0
- spiderforce4ai-2.5.6.dist-info/RECORD +0 -7
- {spiderforce4ai-2.5.6.dist-info → spiderforce4ai-2.5.8.dist-info}/WHEEL +0 -0
- {spiderforce4ai-2.5.6.dist-info → spiderforce4ai-2.5.8.dist-info}/entry_points.txt +0 -0
- {spiderforce4ai-2.5.6.dist-info → spiderforce4ai-2.5.8.dist-info}/top_level.txt +0 -0
@@ -167,6 +167,8 @@ class PostExtractionAgent:
|
|
167
167
|
backup_path = self.config.output_file.with_suffix(f".bak_{int(time.time())}")
|
168
168
|
self.config.output_file.rename(backup_path)
|
169
169
|
self.config.output_file.touch()
|
170
|
+
# Initialize empty results dictionary
|
171
|
+
self.results = {}
|
170
172
|
|
171
173
|
def _process_single_content(self, url: str, content: str) -> Optional[Dict]:
|
172
174
|
"""Process a single piece of content through the LLM."""
|
@@ -259,8 +261,18 @@ class PostExtractionAgent:
|
|
259
261
|
try:
|
260
262
|
if self.config.combine_output and self.config.output_file:
|
261
263
|
self.results[url] = result
|
264
|
+
# Save combined results
|
262
265
|
with open(self.config.output_file, 'w') as f:
|
263
266
|
json.dump(self.results, f, indent=2)
|
267
|
+
|
268
|
+
# Cleanup backup files after successful save
|
269
|
+
for backup_file in self.config.output_file.parent.glob(f"{self.config.output_file.stem}.bak_*"):
|
270
|
+
try:
|
271
|
+
backup_file.unlink()
|
272
|
+
logger.info(f"Cleaned up backup file: {backup_file}")
|
273
|
+
except Exception as e:
|
274
|
+
logger.warning(f"Failed to remove backup file {backup_file}: {e}")
|
275
|
+
|
264
276
|
elif not self.config.combine_output and self.config.output_file:
|
265
277
|
individual_file = self.config.output_file.parent / f"{url.replace('/', '_')}.json"
|
266
278
|
with open(individual_file, 'w') as f:
|
@@ -299,13 +311,6 @@ class PostExtractionAgent:
|
|
299
311
|
# Add URL to result before transformation
|
300
312
|
result['url'] = url
|
301
313
|
|
302
|
-
# Check for required fields and set to None if missing
|
303
|
-
required_fields = ['ContactInformation', 'CallToAction', 'KeyPoints', 'Title', 'Description', 'CanonicalUrl']
|
304
|
-
for field in required_fields:
|
305
|
-
if field not in result:
|
306
|
-
logger.warning(f"Missing field '{field}' in LLM response for {url}, setting to None")
|
307
|
-
result[field] = None
|
308
|
-
|
309
314
|
logger.info(f"Executing custom transformer function for {url}")
|
310
315
|
transformed_result = self.config.custom_transform_function(result)
|
311
316
|
logger.info(f"Successfully applied custom transformation for {url}")
|
@@ -0,0 +1,7 @@
|
|
1
|
+
spiderforce4ai/__init__.py,sha256=BlrRIrtpDUHjmDedqgXP1KbAAsAH9vwFPncUR5VGGyM,44804
|
2
|
+
spiderforce4ai/post_extraction_agent.py,sha256=nyc6V21E3laTetFG2aEBMmFiXv5goumo8eHoupp3mKA,16430
|
3
|
+
spiderforce4ai-2.5.8.dist-info/METADATA,sha256=eitdlF0yEzoFabYliZWvNSx7hGQMBDVvKwgCh21lU18,9012
|
4
|
+
spiderforce4ai-2.5.8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
+
spiderforce4ai-2.5.8.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
+
spiderforce4ai-2.5.8.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
+
spiderforce4ai-2.5.8.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
spiderforce4ai/__init__.py,sha256=BlrRIrtpDUHjmDedqgXP1KbAAsAH9vwFPncUR5VGGyM,44804
|
2
|
-
spiderforce4ai/post_extraction_agent.py,sha256=OP8_gVWzWdQ3B6d0vd54IbcopHSVpAkWrUmnrDVT5_k,16327
|
3
|
-
spiderforce4ai-2.5.6.dist-info/METADATA,sha256=TjgWAC0iHPrSaedmZL-1PZWCoLEMZ7NJUhd74tfWj6A,9012
|
4
|
-
spiderforce4ai-2.5.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
5
|
-
spiderforce4ai-2.5.6.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
|
6
|
-
spiderforce4ai-2.5.6.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
|
7
|
-
spiderforce4ai-2.5.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|