vision-agent 0.2.34__py3-none-any.whl → 0.2.36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/vision_agent.py +5 -1
 - vision_agent/tools/tools.py +11 -9
 - {vision_agent-0.2.34.dist-info → vision_agent-0.2.36.dist-info}/METADATA +1 -1
 - {vision_agent-0.2.34.dist-info → vision_agent-0.2.36.dist-info}/RECORD +6 -6
 - {vision_agent-0.2.34.dist-info → vision_agent-0.2.36.dist-info}/LICENSE +0 -0
 - {vision_agent-0.2.34.dist-info → vision_agent-0.2.36.dist-info}/WHEEL +0 -0
 
| 
         @@ -264,15 +264,19 @@ def retrieve_tools( 
     | 
|
| 
       264 
264 
     | 
    
         
             
                )
         
     | 
| 
       265 
265 
     | 
    
         
             
                tool_info = []
         
     | 
| 
       266 
266 
     | 
    
         
             
                tool_desc = []
         
     | 
| 
      
 267 
     | 
    
         
            +
                tool_list: List[Dict[str, str]] = []
         
     | 
| 
       267 
268 
     | 
    
         
             
                for task in plan:
         
     | 
| 
       268 
269 
     | 
    
         
             
                    tools = tool_recommender.top_k(task["instructions"], k=2, thresh=0.3)
         
     | 
| 
       269 
270 
     | 
    
         
             
                    tool_info.extend([e["doc"] for e in tools])
         
     | 
| 
       270 
271 
     | 
    
         
             
                    tool_desc.extend([e["desc"] for e in tools])
         
     | 
| 
      
 272 
     | 
    
         
            +
                    tool_list.extend(
         
     | 
| 
      
 273 
     | 
    
         
            +
                        {"description": e["desc"], "documentation": e["doc"]} for e in tools
         
     | 
| 
      
 274 
     | 
    
         
            +
                    )
         
     | 
| 
       271 
275 
     | 
    
         
             
                log_progress(
         
     | 
| 
       272 
276 
     | 
    
         
             
                    {
         
     | 
| 
       273 
277 
     | 
    
         
             
                        "type": "tools",
         
     | 
| 
       274 
278 
     | 
    
         
             
                        "status": "completed",
         
     | 
| 
       275 
     | 
    
         
            -
                        "payload":  
     | 
| 
      
 279 
     | 
    
         
            +
                        "payload": tool_list,
         
     | 
| 
       276 
280 
     | 
    
         
             
                    }
         
     | 
| 
       277 
281 
     | 
    
         
             
                )
         
     | 
| 
       278 
282 
     | 
    
         
             
                if verbosity == 2:
         
     | 
    
        vision_agent/tools/tools.py
    CHANGED
    
    | 
         @@ -58,9 +58,10 @@ def grounding_dino( 
     | 
|
| 
       58 
58 
     | 
    
         
             
                box_threshold: float = 0.20,
         
     | 
| 
       59 
59 
     | 
    
         
             
                iou_threshold: float = 0.20,
         
     | 
| 
       60 
60 
     | 
    
         
             
            ) -> List[Dict[str, Any]]:
         
     | 
| 
       61 
     | 
    
         
            -
                """'grounding_dino' is a tool that can detect and count objects given a text 
     | 
| 
       62 
     | 
    
         
            -
                such as category names or referring expressions.  
     | 
| 
       63 
     | 
    
         
            -
                 
     | 
| 
      
 61 
     | 
    
         
            +
                """'grounding_dino' is a tool that can detect and count multiple objects given a text
         
     | 
| 
      
 62 
     | 
    
         
            +
                prompt such as category names or referring expressions. The categories in text prompt
         
     | 
| 
      
 63 
     | 
    
         
            +
                are separated by commas or periods. It returns a list and count of bounding boxes,
         
     | 
| 
      
 64 
     | 
    
         
            +
                label names and associated probability scores.
         
     | 
| 
       64 
65 
     | 
    
         | 
| 
       65 
66 
     | 
    
         
             
                Parameters:
         
     | 
| 
       66 
67 
     | 
    
         
             
                    prompt (str): The prompt to ground to the image.
         
     | 
| 
         @@ -111,9 +112,10 @@ def grounding_sam( 
     | 
|
| 
       111 
112 
     | 
    
         
             
                box_threshold: float = 0.20,
         
     | 
| 
       112 
113 
     | 
    
         
             
                iou_threshold: float = 0.20,
         
     | 
| 
       113 
114 
     | 
    
         
             
            ) -> List[Dict[str, Any]]:
         
     | 
| 
       114 
     | 
    
         
            -
                """'grounding_sam' is a tool that can detect and segment objects given a 
     | 
| 
       115 
     | 
    
         
            -
                prompt such as category names or referring expressions.  
     | 
| 
       116 
     | 
    
         
            -
                 
     | 
| 
      
 115 
     | 
    
         
            +
                """'grounding_sam' is a tool that can detect and segment multiple objects given a
         
     | 
| 
      
 116 
     | 
    
         
            +
                text prompt such as category names or referring expressions. The categories in text
         
     | 
| 
      
 117 
     | 
    
         
            +
                prompt are separated by commas or periods. It returns a list of bounding boxes,
         
     | 
| 
      
 118 
     | 
    
         
            +
                label names, mask file names and associated probability scores.
         
     | 
| 
       117 
119 
     | 
    
         | 
| 
       118 
120 
     | 
    
         
             
                Parameters:
         
     | 
| 
       119 
121 
     | 
    
         
             
                    prompt (str): The prompt to ground to the image.
         
     | 
| 
         @@ -343,9 +345,9 @@ def image_question_answering(image: np.ndarray, prompt: str) -> str: 
     | 
|
| 
       343 
345 
     | 
    
         | 
| 
       344 
346 
     | 
    
         | 
| 
       345 
347 
     | 
    
         
             
            def clip(image: np.ndarray, classes: List[str]) -> Dict[str, Any]:
         
     | 
| 
       346 
     | 
    
         
            -
                """'clip' is a tool that can classify an image  
     | 
| 
       347 
     | 
    
         
            -
                It returns the same list of the input classes along with 
     | 
| 
       348 
     | 
    
         
            -
                based on image content.
         
     | 
| 
      
 348 
     | 
    
         
            +
                """'clip' is a tool that can classify an image or a cropped detection given a list
         
     | 
| 
      
 349 
     | 
    
         
            +
                of input classes or tags. It returns the same list of the input classes along with
         
     | 
| 
      
 350 
     | 
    
         
            +
                their probability scores based on image content.
         
     | 
| 
       349 
351 
     | 
    
         | 
| 
       350 
352 
     | 
    
         
             
                Parameters:
         
     | 
| 
       351 
353 
     | 
    
         
             
                    image (np.ndarray): The image to classify or tag
         
     | 
| 
         @@ -11,7 +11,7 @@ vision_agent/agent/easytool_v2.py,sha256=CjY-sSj3abxnSq3ZHZMt-7YvRWDXEZsC6RN8FFI 
     | 
|
| 
       11 
11 
     | 
    
         
             
            vision_agent/agent/easytool_v2_prompts.py,sha256=MZSIwovYgB-f-kdJ6btaNDVXptJn47bfOL3-Zn6NiC0,8573
         
     | 
| 
       12 
12 
     | 
    
         
             
            vision_agent/agent/reflexion.py,sha256=AlM5AvBJvCslXlYQdZiadq4oVHsNBm3IF_03DglTxRo,10506
         
     | 
| 
       13 
13 
     | 
    
         
             
            vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
         
     | 
| 
       14 
     | 
    
         
            -
            vision_agent/agent/vision_agent.py,sha256= 
     | 
| 
      
 14 
     | 
    
         
            +
            vision_agent/agent/vision_agent.py,sha256=WMClrV5qhPyqnQscNgek2vVes6-A1jNwaHH0vzgy6Zk,16802
         
     | 
| 
       15 
15 
     | 
    
         
             
            vision_agent/agent/vision_agent_prompts.py,sha256=0YbiS59IEWbiE43gCvOqfWrpudIAhTn8FHzXW0Y-Gaw,8201
         
     | 
| 
       16 
16 
     | 
    
         
             
            vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
         
     | 
| 
       17 
17 
     | 
    
         
             
            vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
         
     | 
| 
         @@ -23,14 +23,14 @@ vision_agent/tools/__init__.py,sha256=oZa_sslb1UqEgpdWROChDcz5JHdB475ejJX78FMLYv 
     | 
|
| 
       23 
23 
     | 
    
         
             
            vision_agent/tools/easytool_tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
         
     | 
| 
       24 
24 
     | 
    
         
             
            vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
         
     | 
| 
       25 
25 
     | 
    
         
             
            vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
         
     | 
| 
       26 
     | 
    
         
            -
            vision_agent/tools/tools.py,sha256= 
     | 
| 
      
 26 
     | 
    
         
            +
            vision_agent/tools/tools.py,sha256=RVBuaP8KZrBVppEPaqP0Tey3Am6O5zoNIaZpBSW125c,23523
         
     | 
| 
       27 
27 
     | 
    
         
             
            vision_agent/utils/__init__.py,sha256=xsHFyJSDbLdonB9Dh74cwZnVTiT__2OQF3Brd3Nmglc,116
         
     | 
| 
       28 
28 
     | 
    
         
             
            vision_agent/utils/execute.py,sha256=8_SfK-IkHH4lXF0JVyV7sDFszZn9HKsh1bFITKGCJ1g,3881
         
     | 
| 
       29 
29 
     | 
    
         
             
            vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
         
     | 
| 
       30 
30 
     | 
    
         
             
            vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
         
     | 
| 
       31 
31 
     | 
    
         
             
            vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
         
     | 
| 
       32 
32 
     | 
    
         
             
            vision_agent/utils/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
         
     | 
| 
       33 
     | 
    
         
            -
            vision_agent-0.2. 
     | 
| 
       34 
     | 
    
         
            -
            vision_agent-0.2. 
     | 
| 
       35 
     | 
    
         
            -
            vision_agent-0.2. 
     | 
| 
       36 
     | 
    
         
            -
            vision_agent-0.2. 
     | 
| 
      
 33 
     | 
    
         
            +
            vision_agent-0.2.36.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
         
     | 
| 
      
 34 
     | 
    
         
            +
            vision_agent-0.2.36.dist-info/METADATA,sha256=aKsyRoGvUDMRtezD5RCoJP3aTn6cky5DFO5t2vVaHx4,6698
         
     | 
| 
      
 35 
     | 
    
         
            +
            vision_agent-0.2.36.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
         
     | 
| 
      
 36 
     | 
    
         
            +
            vision_agent-0.2.36.dist-info/RECORD,,
         
     | 
| 
         
            File without changes
         
     | 
| 
         
            File without changes
         
     |