spiderforce4ai 2.4.8__py3-none-any.whl → 2.4.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spiderforce4ai/post_extraction_agent.py +23 -19
 - {spiderforce4ai-2.4.8.dist-info → spiderforce4ai-2.4.9.dist-info}/METADATA +1 -1
 - spiderforce4ai-2.4.9.dist-info/RECORD +7 -0
 - spiderforce4ai-2.4.8.dist-info/RECORD +0 -7
 - {spiderforce4ai-2.4.8.dist-info → spiderforce4ai-2.4.9.dist-info}/WHEEL +0 -0
 - {spiderforce4ai-2.4.8.dist-info → spiderforce4ai-2.4.9.dist-info}/entry_points.txt +0 -0
 - {spiderforce4ai-2.4.8.dist-info → spiderforce4ai-2.4.9.dist-info}/top_level.txt +0 -0
 
| 
         @@ -199,31 +199,35 @@ class PostExtractionAgent: 
     | 
|
| 
       199 
199 
     | 
    
         
             
                                    completion_args["response_format"] = {"type": self.config.response_format}
         
     | 
| 
       200 
200 
     | 
    
         | 
| 
       201 
201 
     | 
    
         
             
                                response = completion(**completion_args)
         
     | 
| 
       202 
     | 
    
         
            -
                                
         
     | 
| 
       203 
     | 
    
         
            -
                                # Log raw response for debugging
         
     | 
| 
       204 
202 
     | 
    
         
             
                                raw_content = response.choices[0].message.content
         
     | 
| 
       205 
203 
     | 
    
         
             
                                logger.debug(f"Raw LLM response for {url}: {raw_content}")
         
     | 
| 
       206 
     | 
    
         
            -
             
     | 
| 
      
 204 
     | 
    
         
            +
             
     | 
| 
      
 205 
     | 
    
         
            +
                                # Handle response based on response_format
         
     | 
| 
       207 
206 
     | 
    
         
             
                                try:
         
     | 
| 
       208 
     | 
    
         
            -
                                     
     | 
| 
       209 
     | 
    
         
            -
             
     | 
| 
       210 
     | 
    
         
            -
                                        extracted_data = json.loads(raw_content)
         
     | 
| 
       211 
     | 
    
         
            -
                                     
     | 
| 
       212 
     | 
    
         
            -
                                        #  
     | 
| 
       213 
     | 
    
         
            -
                                         
     | 
| 
       214 
     | 
    
         
            -
             
     | 
| 
       215 
     | 
    
         
            -
             
     | 
| 
       216 
     | 
    
         
            -
                                             
     | 
| 
       217 
     | 
    
         
            -
             
     | 
| 
       218 
     | 
    
         
            -
                                             
     | 
| 
       219 
     | 
    
         
            -
             
     | 
| 
       220 
     | 
    
         
            -
                                                 
     | 
| 
       221 
     | 
    
         
            -
             
     | 
| 
       222 
     | 
    
         
            -
                                                 
     | 
| 
       223 
     | 
    
         
            -
             
     | 
| 
      
 207 
     | 
    
         
            +
                                    if self.config.response_format == "json_object":
         
     | 
| 
      
 208 
     | 
    
         
            +
                                        # For json_object format, response should already be valid JSON
         
     | 
| 
      
 209 
     | 
    
         
            +
                                        extracted_data = raw_content if isinstance(raw_content, dict) else json.loads(raw_content)
         
     | 
| 
      
 210 
     | 
    
         
            +
                                    else:
         
     | 
| 
      
 211 
     | 
    
         
            +
                                        # For text format or unspecified, try parsing JSON or use as text
         
     | 
| 
      
 212 
     | 
    
         
            +
                                        try:
         
     | 
| 
      
 213 
     | 
    
         
            +
                                            extracted_data = json.loads(raw_content)
         
     | 
| 
      
 214 
     | 
    
         
            +
                                        except json.JSONDecodeError:
         
     | 
| 
      
 215 
     | 
    
         
            +
                                            # Look for JSON in markdown code blocks
         
     | 
| 
      
 216 
     | 
    
         
            +
                                            json_match = re.search(r'```(?:json)?\s*\n([\s\S]*?)\n```', raw_content)
         
     | 
| 
      
 217 
     | 
    
         
            +
                                            if json_match:
         
     | 
| 
      
 218 
     | 
    
         
            +
                                                json_content = json_match.group(1).strip()
         
     | 
| 
      
 219 
     | 
    
         
            +
                                                extracted_data = json.loads(json_content)
         
     | 
| 
      
 220 
     | 
    
         
            +
                                            else:
         
     | 
| 
      
 221 
     | 
    
         
            +
                                                # If no JSON found and not json_object format, use raw content
         
     | 
| 
      
 222 
     | 
    
         
            +
                                                extracted_data = {
         
     | 
| 
      
 223 
     | 
    
         
            +
                                                    "raw_content": raw_content,
         
     | 
| 
      
 224 
     | 
    
         
            +
                                                    "format": "text",
         
     | 
| 
      
 225 
     | 
    
         
            +
                                                    "timestamp": datetime.now().isoformat()
         
     | 
| 
      
 226 
     | 
    
         
            +
                                                }
         
     | 
| 
       224 
227 
     | 
    
         | 
| 
       225 
228 
     | 
    
         
             
                                    self.buffer.remove_request(url)  # Remove from buffer if successful
         
     | 
| 
       226 
229 
     | 
    
         
             
                                    return extracted_data
         
     | 
| 
      
 230 
     | 
    
         
            +
                                    
         
     | 
| 
       227 
231 
     | 
    
         
             
                                except Exception as e:
         
     | 
| 
       228 
232 
     | 
    
         
             
                                    error_msg = (
         
     | 
| 
       229 
233 
     | 
    
         
             
                                        f"Error processing LLM response for {url}:\n"
         
     | 
| 
         @@ -0,0 +1,7 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            spiderforce4ai/__init__.py,sha256=DUPOKF7-vCVQi7JimsStU1qjk5x3yVUoMnUVOJxOrGk,42360
         
     | 
| 
      
 2 
     | 
    
         
            +
            spiderforce4ai/post_extraction_agent.py,sha256=so5Ze7Vz3konpQ0iT7ZxDGE9kIYeTwPTFyzezRc5oys,15392
         
     | 
| 
      
 3 
     | 
    
         
            +
            spiderforce4ai-2.4.9.dist-info/METADATA,sha256=kEq3anAkoe_wpPVzpgaJlsSuAzTQHDgXiDFpirXvUQc,9012
         
     | 
| 
      
 4 
     | 
    
         
            +
            spiderforce4ai-2.4.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
         
     | 
| 
      
 5 
     | 
    
         
            +
            spiderforce4ai-2.4.9.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
         
     | 
| 
      
 6 
     | 
    
         
            +
            spiderforce4ai-2.4.9.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
         
     | 
| 
      
 7 
     | 
    
         
            +
            spiderforce4ai-2.4.9.dist-info/RECORD,,
         
     | 
| 
         @@ -1,7 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            spiderforce4ai/__init__.py,sha256=DUPOKF7-vCVQi7JimsStU1qjk5x3yVUoMnUVOJxOrGk,42360
         
     | 
| 
       2 
     | 
    
         
            -
            spiderforce4ai/post_extraction_agent.py,sha256=5M7pBU8O44Khfub2jSSPboSbrcsAPw6nnp576qIA2pY,14988
         
     | 
| 
       3 
     | 
    
         
            -
            spiderforce4ai-2.4.8.dist-info/METADATA,sha256=zsEmCfjL_ueJzIeTJ9BIdUEF3R-4uOPDqrRxox70Cto,9012
         
     | 
| 
       4 
     | 
    
         
            -
            spiderforce4ai-2.4.8.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
         
     | 
| 
       5 
     | 
    
         
            -
            spiderforce4ai-2.4.8.dist-info/entry_points.txt,sha256=ibARQxOlDiL1ho12zbDZt4Uq5RKSIk_qk159ZlZ46hc,59
         
     | 
| 
       6 
     | 
    
         
            -
            spiderforce4ai-2.4.8.dist-info/top_level.txt,sha256=Kth7A21Js7DCp0j5XBBi-FE45SCLouZkeNZU__Yr9Yk,15
         
     | 
| 
       7 
     | 
    
         
            -
            spiderforce4ai-2.4.8.dist-info/RECORD,,
         
     | 
| 
         
            File without changes
         
     | 
| 
         
            File without changes
         
     | 
| 
         
            File without changes
         
     |