spiderforce4ai 2.4.5__py3-none-any.whl → 2.4.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spiderforce4ai/post_extraction_agent.py +41 -11
 - {spiderforce4ai-2.4.5.dist-info → spiderforce4ai-2.4.6.dist-info}/METADATA +1 -1
 - spiderforce4ai-2.4.6.dist-info/RECORD +7 -0
 - spiderforce4ai-2.4.5.dist-info/RECORD +0 -7
 - {spiderforce4ai-2.4.5.dist-info → spiderforce4ai-2.4.6.dist-info}/WHEEL +0 -0
 - {spiderforce4ai-2.4.5.dist-info → spiderforce4ai-2.4.6.dist-info}/entry_points.txt +0 -0
 - {spiderforce4ai-2.4.5.dist-info → spiderforce4ai-2.4.6.dist-info}/top_level.txt +0 -0
 
| 
         @@ -190,17 +190,29 @@ class PostExtractionAgent: 
     | 
|
| 
       190 
190 
     | 
    
         
             
                                    api_base=self.config.base_url
         
     | 
| 
       191 
191 
     | 
    
         
             
                                )
         
     | 
| 
       192 
192 
     | 
    
         | 
| 
       193 
     | 
    
         
            -
                                #  
     | 
| 
       194 
     | 
    
         
            -
                                 
     | 
| 
       195 
     | 
    
         
            -
                                 
     | 
| 
       196 
     | 
    
         
            -
                                return extracted_data
         
     | 
| 
      
 193 
     | 
    
         
            +
                                # Log raw response for debugging
         
     | 
| 
      
 194 
     | 
    
         
            +
                                raw_content = response.choices[0].message.content
         
     | 
| 
      
 195 
     | 
    
         
            +
                                logger.debug(f"Raw LLM response for {url}: {raw_content}")
         
     | 
| 
       197 
196 
     | 
    
         | 
| 
       198 
     | 
    
         
            -
             
     | 
| 
       199 
     | 
    
         
            -
             
     | 
| 
       200 
     | 
    
         
            -
             
     | 
| 
       201 
     | 
    
         
            -
                                     
     | 
| 
      
 197 
     | 
    
         
            +
                                try:
         
     | 
| 
      
 198 
     | 
    
         
            +
                                    extracted_data = json.loads(raw_content)
         
     | 
| 
      
 199 
     | 
    
         
            +
                                    self.buffer.remove_request(url)  # Remove from buffer if successful
         
     | 
| 
      
 200 
     | 
    
         
            +
                                    return extracted_data
         
     | 
| 
      
 201 
     | 
    
         
            +
                                except json.JSONDecodeError as e:
         
     | 
| 
      
 202 
     | 
    
         
            +
                                    error_msg = (
         
     | 
| 
      
 203 
     | 
    
         
            +
                                        f"Invalid JSON response from LLM for {url}:\n"
         
     | 
| 
      
 204 
     | 
    
         
            +
                                        f"Error: {str(e)}\n"
         
     | 
| 
      
 205 
     | 
    
         
            +
                                        f"Raw content: {raw_content[:500]}..."  # First 500 chars of response
         
     | 
| 
      
 206 
     | 
    
         
            +
                                    )
         
     | 
| 
      
 207 
     | 
    
         
            +
                                    logger.error(error_msg)
         
     | 
| 
      
 208 
     | 
    
         
            +
                                    last_error = error_msg
         
     | 
| 
      
 209 
     | 
    
         
            +
                                    if attempt < max_retries - 1:
         
     | 
| 
      
 210 
     | 
    
         
            +
                                        time.sleep(retry_delay * (attempt + 1))
         
     | 
| 
      
 211 
     | 
    
         
            +
                                        
         
     | 
| 
       202 
212 
     | 
    
         
             
                            except Exception as e:
         
     | 
| 
       203 
     | 
    
         
            -
                                 
     | 
| 
      
 213 
     | 
    
         
            +
                                error_msg = f"LLM processing error for {url}: {str(e)}"
         
     | 
| 
      
 214 
     | 
    
         
            +
                                logger.error(error_msg)
         
     | 
| 
      
 215 
     | 
    
         
            +
                                last_error = error_msg
         
     | 
| 
       204 
216 
     | 
    
         
             
                                if attempt < max_retries - 1:
         
     | 
| 
       205 
217 
     | 
    
         
             
                                    time.sleep(retry_delay * (attempt + 1))
         
     | 
| 
       206 
218 
     | 
    
         | 
| 
         @@ -242,24 +254,42 @@ class PostExtractionAgent: 
     | 
|
| 
       242 
254 
     | 
    
         | 
| 
       243 
255 
     | 
    
         
             
                def process_content(self, url: str, content: str) -> Optional[Dict]:
         
     | 
| 
       244 
256 
     | 
    
         
             
                    """Process content with retry mechanism."""
         
     | 
| 
      
 257 
     | 
    
         
            +
                    logger.info(f"Starting content processing for {url}")
         
     | 
| 
      
 258 
     | 
    
         
            +
                    
         
     | 
| 
       245 
259 
     | 
    
         
             
                    for attempt in range(self.config.max_retries):
         
     | 
| 
      
 260 
     | 
    
         
            +
                        logger.info(f"Processing attempt {attempt + 1}/{self.config.max_retries} for {url}")
         
     | 
| 
      
 261 
     | 
    
         
            +
                        
         
     | 
| 
       246 
262 
     | 
    
         
             
                        result = self._process_single_content(url, content)
         
     | 
| 
       247 
263 
     | 
    
         
             
                        if result:
         
     | 
| 
      
 264 
     | 
    
         
            +
                            logger.info(f"Successfully processed content for {url}")
         
     | 
| 
      
 265 
     | 
    
         
            +
                            
         
     | 
| 
       248 
266 
     | 
    
         
             
                            # Apply custom transformation if provided
         
     | 
| 
       249 
267 
     | 
    
         
             
                            if self.config.custom_transform_function:
         
     | 
| 
       250 
268 
     | 
    
         
             
                                try:
         
     | 
| 
       251 
269 
     | 
    
         
             
                                    result = self.config.custom_transform_function(result)
         
     | 
| 
      
 270 
     | 
    
         
            +
                                    logger.info(f"Applied custom transformation for {url}")
         
     | 
| 
       252 
271 
     | 
    
         
             
                                except Exception as e:
         
     | 
| 
       253 
     | 
    
         
            -
                                     
     | 
| 
      
 272 
     | 
    
         
            +
                                    error_msg = f"Error in custom transform for {url}: {str(e)}"
         
     | 
| 
      
 273 
     | 
    
         
            +
                                    logger.error(error_msg)
         
     | 
| 
      
 274 
     | 
    
         
            +
                                    console.print(f"[red]{error_msg}[/red]")
         
     | 
| 
       254 
275 
     | 
    
         | 
| 
       255 
276 
     | 
    
         
             
                            # Save result synchronously
         
     | 
| 
       256 
     | 
    
         
            -
                             
     | 
| 
      
 277 
     | 
    
         
            +
                            try:
         
     | 
| 
      
 278 
     | 
    
         
            +
                                self._save_result_sync(url, result)
         
     | 
| 
      
 279 
     | 
    
         
            +
                                logger.info(f"Saved results for {url}")
         
     | 
| 
      
 280 
     | 
    
         
            +
                            except Exception as e:
         
     | 
| 
      
 281 
     | 
    
         
            +
                                error_msg = f"Error saving results for {url}: {str(e)}"
         
     | 
| 
      
 282 
     | 
    
         
            +
                                logger.error(error_msg)
         
     | 
| 
      
 283 
     | 
    
         
            +
                                console.print(f"[red]{error_msg}[/red]")
         
     | 
| 
      
 284 
     | 
    
         
            +
                            
         
     | 
| 
       257 
285 
     | 
    
         
             
                            return result
         
     | 
| 
       258 
286 
     | 
    
         | 
| 
       259 
287 
     | 
    
         
             
                        # Wait before retry
         
     | 
| 
       260 
288 
     | 
    
         
             
                        if attempt < self.config.max_retries - 1:
         
     | 
| 
      
 289 
     | 
    
         
            +
                            logger.info(f"Attempt {attempt + 1} failed for {url}, waiting {self.config.retry_delay}s before retry")
         
     | 
| 
       261 
290 
     | 
    
         
             
                            time.sleep(self.config.retry_delay)
         
     | 
| 
       262 
291 
     | 
    
         | 
| 
      
 292 
     | 
    
         
            +
                    logger.error(f"All processing attempts failed for {url}")
         
     | 
| 
       263 
293 
     | 
    
         
             
                    return None
         
     | 
| 
       264 
294 
     | 
    
         | 
| 
       265 
295 
     | 
    
         
             
                async def process_bulk_content(self, content_map: Dict[str, str]) -> Dict[str, Optional[Dict]]:
         
     | 
| 
         @@ -0,0 +1,7 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
         
     | 
| 
      
 2 
     | 
    
         
            +
            spiderforce4ai/post_extraction_agent.py,sha256=Ty9-Ai3_RVT86RrPUxKEzf4oUh-Wr7bk2aM87Je_WvE,13580
         
     | 
| 
      
 3 
     | 
    
         
            +
            spiderforce4ai-2.4.6.dist-info/METADATA,sha256=7FaEgAHdD-8a0XmuDMkpAUjAQ7ZmFTD89IqQM17nllI,9012
         
     | 
| 
      
 4 
     | 
    
         
            +
            spiderforce4ai-2.4.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
         
     | 
| 
      
 5 
     | 
    
         
            +
            spiderforce4ai-2.4.6.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
         
     | 
| 
      
 6 
     | 
    
         
            +
            spiderforce4ai-2.4.6.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
         
     | 
| 
      
 7 
     | 
    
         
            +
            spiderforce4ai-2.4.6.dist-info/RECORD,,
         
     | 
| 
         @@ -1,7 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            spiderforce4ai/__init__.py,sha256=PPpJLowJhgoRijsF2ebmdkFbIriI_yIFlCi1wL6hSP8,42267
         
     | 
| 
       2 
     | 
    
         
            -
            spiderforce4ai/post_extraction_agent.py,sha256=t9KxjuNw16-6kige6ULPLyykNkiGmKhpCi8QjskdaTk,11959
         
     | 
| 
       3 
     | 
    
         
            -
            spiderforce4ai-2.4.5.dist-info/METADATA,sha256=q3VBuGb5wxsi9OPkzEMwFMyg9f_vT2RamWYIgu2JbLc,9012
         
     | 
| 
       4 
     | 
    
         
            -
            spiderforce4ai-2.4.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
         
     | 
| 
       5 
     | 
    
         
            -
            spiderforce4ai-2.4.5.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
         
     | 
| 
       6 
     | 
    
         
            -
            spiderforce4ai-2.4.5.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
         
     | 
| 
       7 
     | 
    
         
            -
            spiderforce4ai-2.4.5.dist-info/RECORD,,
         
     | 
| 
         
            File without changes
         
     | 
| 
         
            File without changes
         
     | 
| 
         
            File without changes
         
     |