PyPI - spiderforce4ai - Versions diffs - 2.4.3__py3-none-any.whl → 2.4.5__py3-none-any.whl - Mend

spiderforce4ai 2.4.3py3-none-any.whl → 2.4.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

spiderforce4ai/__init__.py CHANGED Viewed

@@ -586,7 +586,7 @@ class SpiderForce4AI:
                 for result in results:
                     if result.status == "success":
                         try:
-                            result.extraction_result = await agent.process_content(result.url, result.markdown)
+                            result.extraction_result = agent.process_content(result.url, result.markdown)
                             progress.update(llm_task, advance=1)
                         except Exception as e:
                             console.print(f"[red]Error in post-extraction processing for {result.url}: {str(e)}[/red]")

spiderforce4ai/post_extraction_agent.py CHANGED Viewed

@@ -164,12 +164,9 @@ class PostExtractionAgent:
                 self.config.output_file.rename(backup_path)
             self.config.output_file.touch()
-    async def _process_single_content(self, url: str, content: str) -> Optional[Dict]:
+    def _process_single_content(self, url: str, content: str) -> Optional[Dict]:
         """Process a single piece of content through the LLM."""
         try:
-            # Apply rate limiting
-            await self.rate_limiter.acquire()
             # Replace placeholder in messages with actual content
             messages = [
                 {**msg, 'content': msg['content'].replace('{here_markdown_content}', content)}
@@ -183,7 +180,8 @@ class PostExtractionAgent:
             for attempt in range(max_retries):
                 try:
-                    response = await completion(
+                    # Call completion synchronously
+                    response = completion(
                         model=self.config.model,
                         messages=messages,
                         max_tokens=self.config.max_tokens,
@@ -200,11 +198,11 @@ class PostExtractionAgent:
                 except json.JSONDecodeError as e:
                     last_error = f"Invalid JSON response from LLM: {e}"
                     if attempt < max_retries - 1:
-                        await asyncio.sleep(retry_delay * (attempt + 1))
+                        time.sleep(retry_delay * (attempt + 1))
                 except Exception as e:
                     last_error = str(e)
                     if attempt < max_retries - 1:
-                        await asyncio.sleep(retry_delay * (attempt + 1))
+                        time.sleep(retry_delay * (attempt + 1))
             # If we get here, all retries failed
             raise Exception(last_error)
@@ -214,6 +212,20 @@ class PostExtractionAgent:
             self.buffer.add_failed_request(url, content, str(e))
             return None
+    def _save_result_sync(self, url: str, result: Dict) -> None:
+        """Save individual or combined results synchronously."""
+        try:
+            if self.config.combine_output and self.config.output_file:
+                self.results[url] = result
+                with open(self.config.output_file, 'w') as f:
+                    json.dump(self.results, f, indent=2)
+            elif not self.config.combine_output and self.config.output_file:
+                individual_file = self.config.output_file.parent / f"{url.replace('/', '_')}.json"
+                with open(individual_file, 'w') as f:
+                    json.dump(result, f, indent=2)
+        except Exception as e:
+            logger.error(f"Error saving results for {url}: {str(e)}")
     async def _save_result(self, url: str, result: Dict) -> None:
         """Save individual or combined results."""
         try:
@@ -228,10 +240,10 @@ class PostExtractionAgent:
         except Exception as e:
             logger.error(f"Error saving results for {url}: {str(e)}")
-    async def process_content(self, url: str, content: str) -> Optional[Dict]:
+    def process_content(self, url: str, content: str) -> Optional[Dict]:
         """Process content with retry mechanism."""
         for attempt in range(self.config.max_retries):
-            result = await self._process_single_content(url, content)
+            result = self._process_single_content(url, content)
             if result:
                 # Apply custom transformation if provided
                 if self.config.custom_transform_function:
@@ -240,12 +252,13 @@ class PostExtractionAgent:
                     except Exception as e:
                         logger.error(f"Error in custom transform for {url}: {str(e)}")
-                await self._save_result(url, result)
+                # Save result synchronously
+                self._save_result_sync(url, result)
                 return result
             # Wait before retry
             if attempt < self.config.max_retries - 1:
-                await asyncio.sleep(self.config.retry_delay)
+                time.sleep(self.config.retry_delay)
         return None

{spiderforce4ai-2.4.3.dist-info → spiderforce4ai-2.4.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: spiderforce4ai
-Version: 2.4.3
+Version: 2.4.5
 Summary: Python wrapper for SpiderForce4AI HTML-to-Markdown conversion service with LLM post-processing
 Home-page: https://petertam.pro
 Author: Piotr Tamulewicz

spiderforce4ai-2.4.5.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
+spiderforce4ai/post_extraction_agent.py,sha256=t9KxjuNw16-6kige6ULPLyykNkiGmKhpCi8QjskdaTk,11959
+spiderforce4ai-2.4.5.dist-info/METADATA,sha256=q3VBuGb5wxsi9OPkzEMwFMyg9f_vT2RamWYIgu2JbLc,9012
+spiderforce4ai-2.4.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+spiderforce4ai-2.4.5.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
+spiderforce4ai-2.4.5.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
+spiderforce4ai-2.4.5.dist-info/RECORD,,

spiderforce4ai-2.4.3.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-spiderforce4ai/__init__.py,sha256=iwCLSvooHtFAo-rU52-nsFgyn99Dflpt_OpSrIW-PqA,42273
-spiderforce4ai/post_extraction_agent.py,sha256=m00-y0SCoutUnxsMwHxPaW-qRm4o5alQWjggDStUSrg,11249
-spiderforce4ai-2.4.3.dist-info/METADATA,sha256=-i_vH6DDs4xVFVdDfaFG_Xka0pqXCSQdCrKgym5r5b0,9012
-spiderforce4ai-2.4.3.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-spiderforce4ai-2.4.3.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
-spiderforce4ai-2.4.3.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
-spiderforce4ai-2.4.3.dist-info/RECORD,,

{spiderforce4ai-2.4.3.dist-info → spiderforce4ai-2.4.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{spiderforce4ai-2.4.3.dist-info → spiderforce4ai-2.4.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{spiderforce4ai-2.4.3.dist-info → spiderforce4ai-2.4.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

spiderforce4ai 2.4.3__py3-none-any.whl → 2.4.5__py3-none-any.whl

spiderforce4ai 2.4.3py3-none-any.whl → 2.4.5py3-none-any.whl