vision-agent 0.2.188__py3-none-any.whl → 0.2.190__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/tools/__init__.py +1 -0
- vision_agent/tools/tools.py +43 -0
- {vision_agent-0.2.188.dist-info → vision_agent-0.2.190.dist-info}/METADATA +1 -1
- {vision_agent-0.2.188.dist-info → vision_agent-0.2.190.dist-info}/RECORD +6 -6
- {vision_agent-0.2.188.dist-info → vision_agent-0.2.190.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.188.dist-info → vision_agent-0.2.190.dist-info}/WHEEL +0 -0
    
        vision_agent/tools/__init__.py
    CHANGED
    
    
    
        vision_agent/tools/tools.py
    CHANGED
    
    | @@ -1845,6 +1845,48 @@ def flux_image_inpainting( | |
| 1845 1845 | 
             
                return output_image
         | 
| 1846 1846 |  | 
| 1847 1847 |  | 
| 1848 | 
            +
            def siglip_classification(image: np.ndarray, labels: List[str]) -> Dict[str, Any]:
         | 
| 1849 | 
            +
                """'siglip_classification' is a tool that can classify an image or a cropped detection given a list
         | 
| 1850 | 
            +
                of input labels or tags. It returns the same list of the input labels along with
         | 
| 1851 | 
            +
                their probability scores based on image content.
         | 
| 1852 | 
            +
             | 
| 1853 | 
            +
                Parameters:
         | 
| 1854 | 
            +
                    image (np.ndarray): The image to classify or tag
         | 
| 1855 | 
            +
                    labels (List[str]): The list of labels or tags that is associated with the image
         | 
| 1856 | 
            +
             | 
| 1857 | 
            +
                Returns:
         | 
| 1858 | 
            +
                    Dict[str, Any]: A dictionary containing the labels and scores. One dictionary
         | 
| 1859 | 
            +
                        contains a list of given labels and other a list of scores.
         | 
| 1860 | 
            +
             | 
| 1861 | 
            +
                Example
         | 
| 1862 | 
            +
                -------
         | 
| 1863 | 
            +
                    >>> siglip_classification(image, ['dog', 'cat', 'bird'])
         | 
| 1864 | 
            +
                    {"labels": ["dog", "cat", "bird"], "scores": [0.68, 0.30, 0.02]},
         | 
| 1865 | 
            +
                """
         | 
| 1866 | 
            +
             | 
| 1867 | 
            +
                if image.shape[0] < 1 or image.shape[1] < 1:
         | 
| 1868 | 
            +
                    return {"labels": [], "scores": []}
         | 
| 1869 | 
            +
             | 
| 1870 | 
            +
                image_file = numpy_to_bytes(image)
         | 
| 1871 | 
            +
             | 
| 1872 | 
            +
                files = [("image", image_file)]
         | 
| 1873 | 
            +
             | 
| 1874 | 
            +
                payload = {
         | 
| 1875 | 
            +
                    "model": "siglip",
         | 
| 1876 | 
            +
                    "labels": labels,
         | 
| 1877 | 
            +
                }
         | 
| 1878 | 
            +
             | 
| 1879 | 
            +
                response: dict[str, Any] = send_inference_request(
         | 
| 1880 | 
            +
                    payload=payload,
         | 
| 1881 | 
            +
                    endpoint_name="classification",
         | 
| 1882 | 
            +
                    files=files,
         | 
| 1883 | 
            +
                    v2=True,
         | 
| 1884 | 
            +
                    metadata_payload={"function_name": "siglip_classification"},
         | 
| 1885 | 
            +
                )
         | 
| 1886 | 
            +
             | 
| 1887 | 
            +
                return response
         | 
| 1888 | 
            +
             | 
| 1889 | 
            +
             | 
| 1848 1890 | 
             
            # Utility and visualization functions
         | 
| 1849 1891 |  | 
| 1850 1892 |  | 
| @@ -2362,6 +2404,7 @@ FUNCTION_TOOLS = [ | |
| 2362 2404 | 
             
                qwen2_vl_images_vqa,
         | 
| 2363 2405 | 
             
                qwen2_vl_video_vqa,
         | 
| 2364 2406 | 
             
                video_temporal_localization,
         | 
| 2407 | 
            +
                flux_image_inpainting,
         | 
| 2365 2408 | 
             
            ]
         | 
| 2366 2409 |  | 
| 2367 2410 | 
             
            UTIL_TOOLS = [
         | 
| @@ -16,11 +16,11 @@ vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1r | |
| 16 16 | 
             
            vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,100
         | 
| 17 17 | 
             
            vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
         | 
| 18 18 | 
             
            vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
         | 
| 19 | 
            -
            vision_agent/tools/__init__.py,sha256= | 
| 19 | 
            +
            vision_agent/tools/__init__.py,sha256=uhqE2bo9q1F7andb2zEzp-OYVmYOElPdapIGK9bQ63U,2825
         | 
| 20 20 | 
             
            vision_agent/tools/meta_tools.py,sha256=by7TIbH7lsLIayX_Pe2mS1iw8aeLn2T8yqAo8SkB9Kg,32074
         | 
| 21 21 | 
             
            vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
         | 
| 22 22 | 
             
            vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
         | 
| 23 | 
            -
            vision_agent/tools/tools.py,sha256= | 
| 23 | 
            +
            vision_agent/tools/tools.py,sha256=nTeWePteTltKEvDcr2t3nSDtdUL0xdXEIjeolvXNxGg,84945
         | 
| 24 24 | 
             
            vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
         | 
| 25 25 | 
             
            vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
         | 
| 26 26 | 
             
            vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
         | 
| @@ -29,7 +29,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd | |
| 29 29 | 
             
            vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
         | 
| 30 30 | 
             
            vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
         | 
| 31 31 | 
             
            vision_agent/utils/video.py,sha256=tRcGp4vEnaDycigL1hBO9k0FBPtDH35fCQciVr9GqYI,6013
         | 
| 32 | 
            -
            vision_agent-0.2. | 
| 33 | 
            -
            vision_agent-0.2. | 
| 34 | 
            -
            vision_agent-0.2. | 
| 35 | 
            -
            vision_agent-0.2. | 
| 32 | 
            +
            vision_agent-0.2.190.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
         | 
| 33 | 
            +
            vision_agent-0.2.190.dist-info/METADATA,sha256=AvlPUByWfCJlMgl2ZvWDfnp9ila0lsCv6x3tZ41DY8k,18328
         | 
| 34 | 
            +
            vision_agent-0.2.190.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
         | 
| 35 | 
            +
            vision_agent-0.2.190.dist-info/RECORD,,
         | 
| 
            File without changes
         | 
| 
            File without changes
         |