vision-agent 0.2.185__py3-none-any.whl → 0.2.186__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/tools/tools.py +13 -15
- {vision_agent-0.2.185.dist-info → vision_agent-0.2.186.dist-info}/METADATA +3 -3
- {vision_agent-0.2.185.dist-info → vision_agent-0.2.186.dist-info}/RECORD +5 -5
- {vision_agent-0.2.185.dist-info → vision_agent-0.2.186.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.185.dist-info → vision_agent-0.2.186.dist-info}/WHEEL +0 -0
    
        vision_agent/tools/tools.py
    CHANGED
    
    | @@ -1057,23 +1057,25 @@ def video_temporal_localization( | |
| 1057 1057 | 
             
                prompt: str,
         | 
| 1058 1058 | 
             
                frames: List[np.ndarray],
         | 
| 1059 1059 | 
             
                model: str = "qwen2vl",
         | 
| 1060 | 
            -
                chunk_length: Optional[float] = None,
         | 
| 1061 | 
            -
                chunk_length_seconds: Optional[float] = None,
         | 
| 1062 1060 | 
             
                chunk_length_frames: Optional[int] = 2,
         | 
| 1063 1061 | 
             
            ) -> List[float]:
         | 
| 1064 | 
            -
                """'video_temporal_localization'  | 
| 1065 | 
            -
                 | 
| 1062 | 
            +
                """'video_temporal_localization' will run qwen2vl on each chunk_length_frames
         | 
| 1063 | 
            +
                value selected for the video. It can detect multiple objects independently per
         | 
| 1064 | 
            +
                chunk_length_frames given a text prompt such as a referring expression
         | 
| 1065 | 
            +
                but does not track objects across frames.
         | 
| 1066 | 
            +
                It returns a list of floats with a value of 1.0 if the objects are found in a given
         | 
| 1067 | 
            +
                chunk_length_frames of the video.
         | 
| 1066 1068 |  | 
| 1067 1069 | 
             
                Parameters:
         | 
| 1068 1070 | 
             
                    prompt (str): The question about the video
         | 
| 1069 1071 | 
             
                    frames (List[np.ndarray]): The reference frames used for the question
         | 
| 1070 | 
            -
                    model (str): The model to use for the inference. Valid values are | 
| 1071 | 
            -
             | 
| 1072 | 
            -
                    chunk_length_seconds (Optional[float]): alternative length for chunk in seconds
         | 
| 1072 | 
            +
                    model (str): The model to use for the inference. Valid values are
         | 
| 1073 | 
            +
                        'qwen2vl', 'gpt4o', 'internlm-xcomposer'
         | 
| 1073 1074 | 
             
                    chunk_length_frames (Optional[int]): length of each chunk in frames
         | 
| 1074 1075 |  | 
| 1075 1076 | 
             
                Returns:
         | 
| 1076 | 
            -
                    List[float]: A list of floats with a value of 1.0 if the  | 
| 1077 | 
            +
                    List[float]: A list of floats with a value of 1.0 if the objects to be found
         | 
| 1078 | 
            +
                        are present in the chunk_length_frames of the video.
         | 
| 1077 1079 |  | 
| 1078 1080 | 
             
                Example
         | 
| 1079 1081 | 
             
                -------
         | 
| @@ -1088,10 +1090,6 @@ def video_temporal_localization( | |
| 1088 1090 | 
             
                    "model": model,
         | 
| 1089 1091 | 
             
                    "function_name": "video_temporal_localization",
         | 
| 1090 1092 | 
             
                }
         | 
| 1091 | 
            -
                if chunk_length is not None:
         | 
| 1092 | 
            -
                    payload["chunk_length"] = chunk_length
         | 
| 1093 | 
            -
                if chunk_length_seconds is not None:
         | 
| 1094 | 
            -
                    payload["chunk_length_seconds"] = chunk_length_seconds
         | 
| 1095 1093 | 
             
                if chunk_length_frames is not None:
         | 
| 1096 1094 | 
             
                    payload["chunk_length_frames"] = chunk_length_frames
         | 
| 1097 1095 |  | 
| @@ -1790,9 +1788,8 @@ def flux_image_inpainting( | |
| 1790 1788 | 
             
                        where 1 indicates areas to be inpainted and 0 indicates areas to be preserved.
         | 
| 1791 1789 |  | 
| 1792 1790 | 
             
                Returns:
         | 
| 1793 | 
            -
                    np.ndarray:
         | 
| 1794 | 
            -
                         | 
| 1795 | 
            -
                        with values ranging from 0 to 255.
         | 
| 1791 | 
            +
                    np.ndarray: The generated image(s) as a numpy array in RGB format with values
         | 
| 1792 | 
            +
                        ranging from 0 to 255.
         | 
| 1796 1793 |  | 
| 1797 1794 | 
             
                -------
         | 
| 1798 1795 | 
             
                Example:
         | 
| @@ -2351,6 +2348,7 @@ FUNCTION_TOOLS = [ | |
| 2351 2348 | 
             
                closest_box_distance,
         | 
| 2352 2349 | 
             
                qwen2_vl_images_vqa,
         | 
| 2353 2350 | 
             
                qwen2_vl_video_vqa,
         | 
| 2351 | 
            +
                video_temporal_localization,
         | 
| 2354 2352 | 
             
            ]
         | 
| 2355 2353 |  | 
| 2356 2354 | 
             
            UTIL_TOOLS = [
         | 
| @@ -1,6 +1,6 @@ | |
| 1 1 | 
             
            Metadata-Version: 2.1
         | 
| 2 2 | 
             
            Name: vision-agent
         | 
| 3 | 
            -
            Version: 0.2. | 
| 3 | 
            +
            Version: 0.2.186
         | 
| 4 4 | 
             
            Summary: Toolset for Vision Agent
         | 
| 5 5 | 
             
            Author: Landing AI
         | 
| 6 6 | 
             
            Author-email: dev@landing.ai
         | 
| @@ -63,10 +63,10 @@ code to solve the task for them. Check out our discord for updates and roadmaps! | |
| 63 63 | 
             
            ## Table of Contents
         | 
| 64 64 | 
             
            - [🚀Quick Start](#quick-start)
         | 
| 65 65 | 
             
            - [📚Documentation](#documentation)
         | 
| 66 | 
            -
            - [🔍🤖VisionAgent](# | 
| 66 | 
            +
            - [🔍🤖VisionAgent](#visionagent-basic-usage)
         | 
| 67 67 | 
             
            - [🛠️Tools](#tools)
         | 
| 68 68 | 
             
            - [🤖LMMs](#lmms)
         | 
| 69 | 
            -
            - [💻🤖VisionAgent Coder](# | 
| 69 | 
            +
            - [💻🤖VisionAgent Coder](#visionagent-coder)
         | 
| 70 70 | 
             
            - [🏗️Additional Backends](#additional-backends)
         | 
| 71 71 |  | 
| 72 72 | 
             
            ## Quick Start
         | 
| @@ -20,7 +20,7 @@ vision_agent/tools/__init__.py,sha256=KVP4_6qxOb2lpFdQgQtyDfdkMLL1O6wVZNK19MXp-x | |
| 20 20 | 
             
            vision_agent/tools/meta_tools.py,sha256=by7TIbH7lsLIayX_Pe2mS1iw8aeLn2T8yqAo8SkB9Kg,32074
         | 
| 21 21 | 
             
            vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
         | 
| 22 22 | 
             
            vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
         | 
| 23 | 
            -
            vision_agent/tools/tools.py,sha256 | 
| 23 | 
            +
            vision_agent/tools/tools.py,sha256=-oq8jzITi-yVYJ3ut5MuGJ65jd3ESRtHfw4SCAruMps,83059
         | 
| 24 24 | 
             
            vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
         | 
| 25 25 | 
             
            vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
         | 
| 26 26 | 
             
            vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
         | 
| @@ -29,7 +29,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd | |
| 29 29 | 
             
            vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
         | 
| 30 30 | 
             
            vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
         | 
| 31 31 | 
             
            vision_agent/utils/video.py,sha256=fOPR48-SuwMbE5eB5rc2F7lVo6k1mVHn26eEJ0QCslc,5602
         | 
| 32 | 
            -
            vision_agent-0.2. | 
| 33 | 
            -
            vision_agent-0.2. | 
| 34 | 
            -
            vision_agent-0.2. | 
| 35 | 
            -
            vision_agent-0.2. | 
| 32 | 
            +
            vision_agent-0.2.186.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
         | 
| 33 | 
            +
            vision_agent-0.2.186.dist-info/METADATA,sha256=NQfESIRsq9-QWyPzNkyv6dSuRS6TGe5D2tZH4iJpeBU,18328
         | 
| 34 | 
            +
            vision_agent-0.2.186.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
         | 
| 35 | 
            +
            vision_agent-0.2.186.dist-info/RECORD,,
         | 
| 
            File without changes
         | 
| 
            File without changes
         |