PyPI - spiderforce4ai - Versions diffs - 2.4.1__tar.gz → 2.4.2__tar.gz - Mend

spiderforce4ai 2.4.1tar.gz → 2.4.2tar.gz

Files changed (14) hide show

{spiderforce4ai-2.4.1 → spiderforce4ai-2.4.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: spiderforce4ai
-Version: 2.4.1
+Version: 2.4.2
 Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
 Home-page: https://petertam.pro
 Author: Piotr Tamulewicz

{spiderforce4ai-2.4.1 → spiderforce4ai-2.4.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "spiderforce4ai"
-version = "2.4.1"
+version = "2.4.2"
 description = "Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing"
 readme = "README.md"
 authors = [

{spiderforce4ai-2.4.1 → spiderforce4ai-2.4.2}/setup.py RENAMED Viewed

@@ -7,7 +7,7 @@ with open("README.md", encoding="utf-8") as f:
 setup(
     name="spiderforce4ai",
-    version="2.4.1",
+    version="2.4.2",
     author="Piotr Tamulewicz",
     author_email="pt@petertam.pro",
     description="Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing",

{spiderforce4ai-2.4.1 → spiderforce4ai-2.4.2}/spiderforce4ai/__init__.py RENAMED Viewed

@@ -460,28 +460,6 @@ class SpiderForce4AI:
                     if config.output_dir:
                         await _save_markdown_async(url, markdown, config)
-                    # Handle post-extraction if configured
-                    if config.post_extraction_agent and result.status == "success":
-                        try:
-                            post_config = PostExtractionConfig(
-                                model=config.post_extraction_agent["model"],
-                                messages=config.post_extraction_agent["messages"],
-                                api_key=config.post_extraction_agent["api_key"],
-                                max_tokens=config.post_extraction_agent.get("max_tokens", 1000),
-                                temperature=config.post_extraction_agent.get("temperature", 0.7),
-                                base_url=config.post_extraction_agent.get("base_url"),
-                                combine_output=bool(config.post_extraction_agent_save_to_file),
-                                output_file=config.post_extraction_agent_save_to_file,
-                                custom_transform_function=config.post_agent_transformer_function
-                            )
-                            agent = PostExtractionAgent(post_config)
-                            extraction_result = await agent.process_content(url, markdown)
-                            if extraction_result:
-                                result.extraction_result = extraction_result
-                        except Exception as e:
-                            console.print(f"[red]Error in post-extraction processing for {url}: {str(e)}[/red]")
                     await _send_webhook_async(result, config)
                 self.crawl_results.append(result)
@@ -635,10 +613,40 @@ class SpiderForce4AI:
                         except Exception as e:
                             console.print(f"[red]Error in post-extraction processing for {result.url}: {str(e)}[/red]")
+            # Process LLM requests sequentially after all crawling is complete
+            llm_successful = 0
+            if config.post_extraction_agent:
+                console.print("\n[cyan]Starting post-extraction processing...[/cyan]")
+                successful_results = [r for r in results if r.status == "success"]
+                llm_task = progress.add_task("[cyan]Post-extraction processing...", total=len(successful_results))
+                post_config = PostExtractionConfig(
+                    model=config.post_extraction_agent["model"],
+                    messages=config.post_extraction_agent["messages"],
+                    api_key=config.post_extraction_agent["api_key"],
+                    max_tokens=config.post_extraction_agent.get("max_tokens", 1000),
+                    temperature=config.post_extraction_agent.get("temperature", 0.7),
+                    base_url=config.post_extraction_agent.get("base_url"),
+                    combine_output=bool(config.post_extraction_agent_save_to_file),
+                    output_file=config.post_extraction_agent_save_to_file,
+                    custom_transform_function=config.post_agent_transformer_function
+                )
+                agent = PostExtractionAgent(post_config)
+                for result in successful_results:
+                    try:
+                        result.extraction_result = await agent.process_content(result.url, result.markdown)
+                        if result.extraction_result:
+                            llm_successful += 1
+                        progress.update(llm_task, advance=1)
+                    except Exception as e:
+                        console.print(f"[red]Error in post-extraction processing for {result.url}: {str(e)}[/red]")
+                        await asyncio.sleep(1)  # Add delay after error
+                    await asyncio.sleep(0.5)  # Rate limiting between requests
             # Calculate final statistics
             final_successful = len([r for r in results if r.status == "success"])
             final_failed = len([r for r in results if r.status == "failed"])
-            llm_successful = len([r for r in results if r.extraction_result is not None])
             # Update retry stats
             self._retry_stats = {

{spiderforce4ai-2.4.1 → spiderforce4ai-2.4.2}/spiderforce4ai.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: spiderforce4ai
-Version: 2.4.1
+Version: 2.4.2
 Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
 Home-page: https://petertam.pro
 Author: Piotr Tamulewicz