spiderforce4ai 2.5.9__tar.gz → 2.6__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {spiderforce4ai-2.5.9 → spiderforce4ai-2.6}/PKG-INFO +1 -1
- {spiderforce4ai-2.5.9 → spiderforce4ai-2.6}/pyproject.toml +1 -1
- {spiderforce4ai-2.5.9 → spiderforce4ai-2.6}/setup.py +1 -1
- {spiderforce4ai-2.5.9 → spiderforce4ai-2.6}/spiderforce4ai/post_extraction_agent.py +18 -6
- {spiderforce4ai-2.5.9 → spiderforce4ai-2.6}/spiderforce4ai.egg-info/PKG-INFO +1 -1
- {spiderforce4ai-2.5.9 → spiderforce4ai-2.6}/README.md +0 -0
- {spiderforce4ai-2.5.9 → spiderforce4ai-2.6}/setup.cfg +0 -0
- {spiderforce4ai-2.5.9 → spiderforce4ai-2.6}/spiderforce4ai/__init__.py +0 -0
- {spiderforce4ai-2.5.9 → spiderforce4ai-2.6}/spiderforce4ai.egg-info/SOURCES.txt +0 -0
- {spiderforce4ai-2.5.9 → spiderforce4ai-2.6}/spiderforce4ai.egg-info/dependency_links.txt +0 -0
- {spiderforce4ai-2.5.9 → spiderforce4ai-2.6}/spiderforce4ai.egg-info/entry_points.txt +0 -0
- {spiderforce4ai-2.5.9 → spiderforce4ai-2.6}/spiderforce4ai.egg-info/not-zip-safe +0 -0
- {spiderforce4ai-2.5.9 → spiderforce4ai-2.6}/spiderforce4ai.egg-info/requires.txt +0 -0
- {spiderforce4ai-2.5.9 → spiderforce4ai-2.6}/spiderforce4ai.egg-info/top_level.txt +0 -0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "spiderforce4ai"
|
7
|
-
version = "2.
|
7
|
+
version = "2.6"
|
8
8
|
description = "Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing"
|
9
9
|
readme = "README.md"
|
10
10
|
authors = [
|
@@ -7,7 +7,7 @@ with open("README.md", encoding="utf-8") as f:
|
|
7
7
|
|
8
8
|
setup(
|
9
9
|
name="spiderforce4ai",
|
10
|
-
version="2.
|
10
|
+
version="2.6",
|
11
11
|
author="Piotr Tamulewicz",
|
12
12
|
author_email="pt@petertam.pro",
|
13
13
|
description="Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing",
|
@@ -260,22 +260,34 @@ class PostExtractionAgent:
|
|
260
260
|
"""Save individual or combined results synchronously."""
|
261
261
|
try:
|
262
262
|
if self.config.combine_output and self.config.output_file:
|
263
|
-
#
|
263
|
+
# Convert Path to string if needed
|
264
|
+
output_file = str(self.config.output_file) if isinstance(self.config.output_file, Path) else self.config.output_file
|
265
|
+
|
266
|
+
# Load existing results if file exists
|
267
|
+
if Path(output_file).exists():
|
268
|
+
try:
|
269
|
+
with open(output_file, 'r') as f:
|
270
|
+
self.results = json.load(f)
|
271
|
+
except json.JSONDecodeError:
|
272
|
+
self.results = {}
|
273
|
+
|
274
|
+
# Update results with new data
|
264
275
|
self.results[url] = result
|
265
276
|
|
266
277
|
# Ensure output directory exists
|
267
|
-
|
278
|
+
Path(output_file).parent.mkdir(parents=True, exist_ok=True)
|
268
279
|
|
269
280
|
# Save combined results atomically
|
270
|
-
temp_file =
|
281
|
+
temp_file = f"{output_file}.tmp"
|
271
282
|
with open(temp_file, 'w') as f:
|
272
283
|
json.dump(self.results, f, indent=2)
|
273
|
-
temp_file.replace(self.config.output_file)
|
274
284
|
|
285
|
+
# Atomic replace
|
286
|
+
Path(temp_file).replace(output_file)
|
275
287
|
logger.info(f"Updated combined results file with {url}")
|
276
288
|
|
277
|
-
# Cleanup backup files
|
278
|
-
for backup_file in
|
289
|
+
# Cleanup backup files
|
290
|
+
for backup_file in Path(output_file).parent.glob(f"{Path(output_file).stem}.bak_*"):
|
279
291
|
try:
|
280
292
|
backup_file.unlink()
|
281
293
|
logger.info(f"Cleaned up backup file: {backup_file}")
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|