spiderforce4ai 2.5.9__tar.gz → 2.6__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.5.9
3
+ Version: 2.6
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "spiderforce4ai"
7
- version = "2.5.9"
7
+ version = "2.6"
8
8
  description = "Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing"
9
9
  readme = "README.md"
10
10
  authors = [
@@ -7,7 +7,7 @@ with open("README.md", encoding="utf-8") as f:
7
7
 
8
8
  setup(
9
9
  name="spiderforce4ai",
10
- version="2.5.9",
10
+ version="2.6",
11
11
  author="Piotr Tamulewicz",
12
12
  author_email="pt@petertam.pro",
13
13
  description="Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing",
@@ -260,22 +260,34 @@ class PostExtractionAgent:
260
260
  """Save individual or combined results synchronously."""
261
261
  try:
262
262
  if self.config.combine_output and self.config.output_file:
263
- # Update the results dictionary
263
+ # Convert Path to string if needed
264
+ output_file = str(self.config.output_file) if isinstance(self.config.output_file, Path) else self.config.output_file
265
+
266
+ # Load existing results if file exists
267
+ if Path(output_file).exists():
268
+ try:
269
+ with open(output_file, 'r') as f:
270
+ self.results = json.load(f)
271
+ except json.JSONDecodeError:
272
+ self.results = {}
273
+
274
+ # Update results with new data
264
275
  self.results[url] = result
265
276
 
266
277
  # Ensure output directory exists
267
- self.config.output_file.parent.mkdir(parents=True, exist_ok=True)
278
+ Path(output_file).parent.mkdir(parents=True, exist_ok=True)
268
279
 
269
280
  # Save combined results atomically
270
- temp_file = self.config.output_file.with_suffix('.tmp')
281
+ temp_file = f"{output_file}.tmp"
271
282
  with open(temp_file, 'w') as f:
272
283
  json.dump(self.results, f, indent=2)
273
- temp_file.replace(self.config.output_file)
274
284
 
285
+ # Atomic replace
286
+ Path(temp_file).replace(output_file)
275
287
  logger.info(f"Updated combined results file with {url}")
276
288
 
277
- # Cleanup backup files after successful save
278
- for backup_file in self.config.output_file.parent.glob(f"{self.config.output_file.stem}.bak_*"):
289
+ # Cleanup backup files
290
+ for backup_file in Path(output_file).parent.glob(f"{Path(output_file).stem}.bak_*"):
279
291
  try:
280
292
  backup_file.unlink()
281
293
  logger.info(f"Cleaned up backup file: {backup_file}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: spiderforce4ai
3
- Version: 2.5.9
3
+ Version: 2.6
4
4
  Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
5
5
  Home-page: https://petertam.pro
6
6
  Author: Piotr Tamulewicz
File without changes
File without changes